001: /*
002: * The JTS Topology Suite is a collection of Java classes that
003: * implement the fundamental operations required to validate a given
004: * geo-spatial data set to a known topological specification.
005: *
006: * Copyright (C) 2001 Vivid Solutions
007: *
008: * This library is free software; you can redistribute it and/or
009: * modify it under the terms of the GNU Lesser General Public
010: * License as published by the Free Software Foundation; either
011: * version 2.1 of the License, or (at your option) any later version.
012: *
013: * This library is distributed in the hope that it will be useful,
014: * but WITHOUT ANY WARRANTY; without even the implied warranty of
015: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
016: * Lesser General Public License for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public
019: * License along with this library; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
021: *
022: * For more information, contact:
023: *
024: * Vivid Solutions
025: * Suite #1A
026: * 2328 Government Street
027: * Victoria BC V8T 5G5
028: * Canada
029: *
030: * (250)385-6040
031: * www.vividsolutions.com
032: */
033: package com.vividsolutions.jts.io;
034:
035: import com.vividsolutions.jts.geom.*;
036: import com.vividsolutions.jts.util.*;
037: import com.vividsolutions.jts.io.ParseException;
038:
039: import java.io.IOException;
040: import java.io.Reader;
041: import java.io.StreamTokenizer;
042: import java.io.StringReader;
043: import java.util.ArrayList;
044:
045: /**
046: * Converts a geometry in Well-Known Text format to a {@link Geometry}.
047: * <p>
048: * <code>WKTReader</code> supports
049: * extracting <code>Geometry</code> objects from either {@link Reader}s or
050: * {@link String}s. This allows it to function as a parser to read <code>Geometry</code>
051: * objects from text blocks embedded in other data formats (e.g. XML). <P>
052: * <p>
053: * A <code>WKTReader</code> is parameterized by a <code>GeometryFactory</code>,
054: * to allow it to create <code>Geometry</code> objects of the appropriate
055: * implementation. In particular, the <code>GeometryFactory</code>
056: * determines the <code>PrecisionModel</code> and <code>SRID</code> that is
057: * used. <P>
058: *
059: * The <code>WKTReader</code> converts all input numbers to the precise
060: * internal representation.
061: *
062: * <h3>Notes:</h3>
063: * <ul>
064: * <li>The reader supports non-standard "LINEARRING" tags.
065: * <li>The reader uses Double.parseDouble to perform the conversion of ASCII
066: * numbers to floating point. This means it supports the Java
067: * syntax for floating point literals (including scientific notation).
068: * </ul>
069: *
070: * <h3>Syntax</h3>
071: * The following syntax specification describes the version of Well-Known Text
072: * supported by JTS.
073: * (The specification uses a syntax language similar to that used in
074: * the C and Java language specifications.)
075: * <p>
076: *
077: * <blockquote><pre>
078: * <i>WKTGeometry:</i> one of<i>
079: *
080: * WKTPoint WKTLineString WKTLinearRing WKTPolygon
081: * WKTMultiPoint WKTMultiLineString WKTMultiPolygon
082: * WKTGeometryCollection</i>
083: *
084: * <i>WKTPoint:</i> <b>POINT ( </b><i>Coordinate</i> <b>)</b>
085: *
086: * <i>WKTLineString:</i> <b>LINESTRING</b> <i>CoordinateSequence</i>
087: *
088: * <i>WKTLinearRing:</i> <b>LINEARRING</b> <i>CoordinateSequence</i>
089: *
090: * <i>WKTPolygon:</i> <b>POLYGON</b> <i>CoordinateSequenceList</i>
091: *
092: * <i>WKTMultiPoint:</i> <b>MULTIPOINT</b> <i>CoordinateSequence</i>
093: *
094: * <i>WKTMultiLineString:</i> <b>MULTILINESTRING</b> <i>CoordinateSequenceList</i>
095: *
096: * <i>WKTMultiPolygon:</i>
097: * <b>MULTIPOLYGON (</b> <i>CoordinateSequenceList {</i> , <i>CoordinateSequenceList }</i> <b>)</b>
098: *
099: * <i>WKTGeometryCollection: </i>
100: * <b>GEOMETRYCOLLECTION (</b> <i>WKTGeometry {</i> , <i>WKTGeometry }</i> <b>)</b>
101: *
102: * <i>CoordinateSequenceList:</i>
103: * <b>(</b> <i>CoordinateSequence {</i> <b>,</b> <i>CoordinateSequence }</i> <b>)</b>
104: *
105: * <i>CoordinateSequence:</i>
106: * <b>(</b> <i>Coordinate {</i> , <i>Coordinate }</i> <b>)</b>
107: *
108: * <i>Coordinate:
109: * Number Number Number<sub>opt</sub></i>
110: *
111: * <i>Number:</i> A Java-style floating-point number
112: *
113: * </pre></blockquote>
114: *
115: *
116: *@version 1.7
117: * @see WKTWriter
118: */
119: public class WKTReader {
120: private static final String EMPTY = "EMPTY";
121: private static final String COMMA = ",";
122: private static final String L_PAREN = "(";
123: private static final String R_PAREN = ")";
124:
125: private GeometryFactory geometryFactory;
126: private PrecisionModel precisionModel;
127: private StreamTokenizer tokenizer;
128:
129: /**
130: * Creates a reader that creates objects using the default {@link GeometryFactory}.
131: */
132: public WKTReader() {
133: this (new GeometryFactory());
134: }
135:
136: /**
137: * Creates a reader that creates objects using the given
138: * {@link GeometryFactory}.
139: *
140: *@param geometryFactory the factory used to create <code>Geometry</code>s.
141: */
142: public WKTReader(GeometryFactory geometryFactory) {
143: this .geometryFactory = geometryFactory;
144: precisionModel = geometryFactory.getPrecisionModel();
145: }
146:
147: /**
148: * Reads a Well-Known Text representation of a {@link Geometry}
149: * from a {@link String}.
150: *
151: * @param wellKnownText
152: * one or more <Geometry Tagged Text>strings (see the OpenGIS
153: * Simple Features Specification) separated by whitespace
154: * @return a <code>Geometry</code> specified by <code>wellKnownText</code>
155: * @throws ParseException
156: * if a parsing problem occurs
157: */
158: public Geometry read(String wellKnownText) throws ParseException {
159: StringReader reader = new StringReader(wellKnownText);
160: try {
161: return read(reader);
162: } finally {
163: reader.close();
164: }
165: }
166:
167: /**
168: * Reads a Well-Known Text representation of a {@link Geometry}
169: * from a {@link Reader}.
170: *
171: *@param reader a Reader which will return a <Geometry Tagged Text>
172: * string (see the OpenGIS Simple Features Specification)
173: *@return a <code>Geometry</code> read from <code>reader</code>
174: *@throws ParseException if a parsing problem occurs
175: */
176: public Geometry read(Reader reader) throws ParseException {
177: tokenizer = new StreamTokenizer(reader);
178: // set tokenizer to NOT parse numbers
179: tokenizer.resetSyntax();
180: tokenizer.wordChars('a', 'z');
181: tokenizer.wordChars('A', 'Z');
182: tokenizer.wordChars(128 + 32, 255);
183: tokenizer.wordChars('0', '9');
184: tokenizer.wordChars('-', '-');
185: tokenizer.wordChars('+', '+');
186: tokenizer.wordChars('.', '.');
187: tokenizer.whitespaceChars(0, ' ');
188: tokenizer.commentChar('#');
189:
190: try {
191: return readGeometryTaggedText();
192: } catch (IOException e) {
193: throw new ParseException(e.toString());
194: }
195: }
196:
197: /**
198: * Returns the next array of <code>Coordinate</code>s in the stream.
199: *
200: *@param tokenizer tokenizer over a stream of text in Well-known Text
201: * format. The next element returned by the stream should be L_PAREN (the
202: * beginning of "(x1 y1, x2 y2, ..., xn yn)") or EMPTY.
203: *@return the next array of <code>Coordinate</code>s in the
204: * stream, or an empty array if EMPTY is the next element returned by
205: * the stream.
206: *@throws IOException if an I/O error occurs
207: *@throws ParseException if an unexpected token was encountered
208: */
209: private Coordinate[] getCoordinates() throws IOException,
210: ParseException {
211: String nextToken = getNextEmptyOrOpener();
212: if (nextToken.equals(EMPTY)) {
213: return new Coordinate[] {};
214: }
215: ArrayList coordinates = new ArrayList();
216: coordinates.add(getPreciseCoordinate());
217: nextToken = getNextCloserOrComma();
218: while (nextToken.equals(COMMA)) {
219: coordinates.add(getPreciseCoordinate());
220: nextToken = getNextCloserOrComma();
221: }
222: Coordinate[] array = new Coordinate[coordinates.size()];
223: return (Coordinate[]) coordinates.toArray(array);
224: }
225:
226: private Coordinate getPreciseCoordinate() throws IOException,
227: ParseException {
228: Coordinate coord = new Coordinate();
229: coord.x = getNextNumber();
230: coord.y = getNextNumber();
231: if (isNumberNext()) {
232: coord.z = getNextNumber();
233: }
234: precisionModel.makePrecise(coord);
235: return coord;
236: }
237:
238: private boolean isNumberNext() throws IOException {
239: int type = tokenizer.nextToken();
240: tokenizer.pushBack();
241: return type == StreamTokenizer.TT_WORD;
242: }
243:
244: /**
245: * Parses the next number in the stream.
246: * Numbers with exponents are handled.
247: *
248: *@param tokenizer tokenizer over a stream of text in Well-known Text
249: * format. The next token must be a number.
250: *@return the next number in the stream
251: *@throws ParseException if the next token is not a valid number
252: *@throws IOException if an I/O error occurs
253: */
254: private double getNextNumber() throws IOException, ParseException {
255: int type = tokenizer.nextToken();
256: switch (type) {
257: case StreamTokenizer.TT_WORD: {
258: try {
259: return Double.parseDouble(tokenizer.sval);
260: } catch (NumberFormatException ex) {
261: throw new ParseException("Invalid number: "
262: + tokenizer.sval);
263: }
264: }
265: }
266: parseError("number");
267: return 0.0;
268: }
269:
270: /**
271: * Returns the next EMPTY or L_PAREN in the stream as uppercase text.
272: *
273: *@param tokenizer tokenizer over a stream of text in Well-known Text
274: * format. The next token must be EMPTY or L_PAREN.
275: *@return the next EMPTY or L_PAREN in the stream as uppercase
276: * text.
277: *@throws ParseException if the next token is not EMPTY or L_PAREN
278: *@throws IOException if an I/O error occurs
279: */
280: private String getNextEmptyOrOpener() throws IOException,
281: ParseException {
282: String nextWord = getNextWord();
283: if (nextWord.equals(EMPTY) || nextWord.equals(L_PAREN)) {
284: return nextWord;
285: }
286: parseError(EMPTY + " or " + L_PAREN);
287: return null;
288: }
289:
290: /**
291: * Returns the next R_PAREN or COMMA in the stream.
292: *
293: *@param tokenizer tokenizer over a stream of text in Well-known Text
294: * format. The next token must be R_PAREN or COMMA.
295: *@return the next R_PAREN or COMMA in the stream
296: *@throws ParseException if the next token is not R_PAREN or COMMA
297: *@throws IOException if an I/O error occurs
298: */
299: private String getNextCloserOrComma() throws IOException,
300: ParseException {
301: String nextWord = getNextWord();
302: if (nextWord.equals(COMMA) || nextWord.equals(R_PAREN)) {
303: return nextWord;
304: }
305: parseError(COMMA + " or " + R_PAREN);
306: return null;
307: }
308:
309: /**
310: * Returns the next R_PAREN in the stream.
311: *
312: *@param tokenizer tokenizer over a stream of text in Well-known Text
313: * format. The next token must be R_PAREN.
314: *@return the next R_PAREN in the stream
315: *@throws ParseException if the next token is not R_PAREN
316: *@throws IOException if an I/O error occurs
317: */
318: private String getNextCloser() throws IOException, ParseException {
319: String nextWord = getNextWord();
320: if (nextWord.equals(R_PAREN)) {
321: return nextWord;
322: }
323: parseError(R_PAREN);
324: return null;
325: }
326:
327: /**
328: * Returns the next word in the stream.
329: *
330: *@param tokenizer tokenizer over a stream of text in Well-known Text
331: * format. The next token must be a word.
332: *@return the next word in the stream as uppercase text
333: *@throws ParseException if the next token is not a word
334: *@throws IOException if an I/O error occurs
335: */
336: private String getNextWord() throws IOException, ParseException {
337: int type = tokenizer.nextToken();
338: switch (type) {
339: case StreamTokenizer.TT_WORD:
340:
341: String word = tokenizer.sval;
342: if (word.equalsIgnoreCase(EMPTY))
343: return EMPTY;
344: return word;
345:
346: case '(':
347: return L_PAREN;
348: case ')':
349: return R_PAREN;
350: case ',':
351: return COMMA;
352: }
353: parseError("word");
354: return null;
355: }
356:
357: /**
358: * Throws a formatted ParseException for the current token.
359: *
360: * @param expected a description of what was expected
361: * @throws ParseException
362: * @throws AssertionFailedException if an invalid token is encountered
363: */
364: private void parseError(String expected) throws ParseException {
365: // throws Asserts for tokens that should never be seen
366: if (tokenizer.ttype == StreamTokenizer.TT_NUMBER)
367: Assert.shouldNeverReachHere("Unexpected NUMBER token");
368: if (tokenizer.ttype == StreamTokenizer.TT_EOL)
369: Assert.shouldNeverReachHere("Unexpected EOL token");
370:
371: String tokenStr = tokenString();
372: throw new ParseException("Expected " + expected + " but found "
373: + tokenStr);
374: }
375:
376: /**
377: * Gets a description of the current token
378: *
379: * @return a description of the current token
380: */
381: private String tokenString() {
382: switch (tokenizer.ttype) {
383: case StreamTokenizer.TT_NUMBER:
384: return "<NUMBER>";
385: case StreamTokenizer.TT_EOL:
386: return "End-of-Line";
387: case StreamTokenizer.TT_EOF:
388: return "End-of-Stream";
389: case StreamTokenizer.TT_WORD:
390: return "'" + tokenizer.sval + "'";
391: }
392: return "'" + (char) tokenizer.ttype + "'";
393: }
394:
395: /**
396: * Creates a <code>Geometry</code> using the next token in the stream.
397: *
398: *@param tokenizer tokenizer over a stream of text in Well-known Text
399: * format. The next tokens must form a <Geometry Tagged Text>.
400: *@return a <code>Geometry</code> specified by the next token
401: * in the stream
402: *@throws ParseException if the coordinates used to create a <code>Polygon</code>
403: * shell and holes do not form closed linestrings, or if an unexpected
404: * token was encountered
405: *@throws IOException if an I/O error occurs
406: */
407: private Geometry readGeometryTaggedText() throws IOException,
408: ParseException {
409: String type = null;
410:
411: try {
412: type = getNextWord();
413: } catch (IOException e) {
414: return null;
415: } catch (ParseException e) {
416: return null;
417: }
418:
419: if (type.equals("POINT")) {
420: return readPointText();
421: } else if (type.equalsIgnoreCase("LINESTRING")) {
422: return readLineStringText();
423: } else if (type.equalsIgnoreCase("LINEARRING")) {
424: return readLinearRingText();
425: } else if (type.equalsIgnoreCase("POLYGON")) {
426: return readPolygonText();
427: } else if (type.equalsIgnoreCase("MULTIPOINT")) {
428: return readMultiPointText();
429: } else if (type.equalsIgnoreCase("MULTILINESTRING")) {
430: return readMultiLineStringText();
431: } else if (type.equalsIgnoreCase("MULTIPOLYGON")) {
432: return readMultiPolygonText();
433: } else if (type.equalsIgnoreCase("GEOMETRYCOLLECTION")) {
434: return readGeometryCollectionText();
435: }
436: throw new ParseException("Unknown geometry type: " + type);
437: }
438:
439: /**
440: * Creates a <code>Point</code> using the next token in the stream.
441: *
442: *@param tokenizer tokenizer over a stream of text in Well-known Text
443: * format. The next tokens must form a <Point Text>.
444: *@return a <code>Point</code> specified by the next token in
445: * the stream
446: *@throws IOException if an I/O error occurs
447: *@throws ParseException if an unexpected token was encountered
448: */
449: private Point readPointText() throws IOException, ParseException {
450: String nextToken = getNextEmptyOrOpener();
451: if (nextToken.equals(EMPTY)) {
452: return geometryFactory.createPoint((Coordinate) null);
453: }
454: Point point = geometryFactory
455: .createPoint(getPreciseCoordinate());
456: getNextCloser();
457: return point;
458: }
459:
460: /**
461: * Creates a <code>LineString</code> using the next token in the stream.
462: *
463: *@param tokenizer tokenizer over a stream of text in Well-known Text
464: * format. The next tokens must form a <LineString Text>.
465: *@return a <code>LineString</code> specified by the next
466: * token in the stream
467: *@throws IOException if an I/O error occurs
468: *@throws ParseException if an unexpected token was encountered
469: */
470: private LineString readLineStringText() throws IOException,
471: ParseException {
472: return geometryFactory.createLineString(getCoordinates());
473: }
474:
475: /**
476: * Creates a <code>LinearRing</code> using the next token in the stream.
477: *
478: *@param tokenizer tokenizer over a stream of text in Well-known Text
479: * format. The next tokens must form a <LineString Text>.
480: *@return a <code>LinearRing</code> specified by the next
481: * token in the stream
482: *@throws IOException if an I/O error occurs
483: *@throws ParseException if the coordinates used to create the <code>LinearRing</code>
484: * do not form a closed linestring, or if an unexpected token was
485: * encountered
486: */
487: private LinearRing readLinearRingText() throws IOException,
488: ParseException {
489: return geometryFactory.createLinearRing(getCoordinates());
490: }
491:
492: /**
493: * Creates a <code>MultiPoint</code> using the next token in the stream.
494: *
495: *@param tokenizer tokenizer over a stream of text in Well-known Text
496: * format. The next tokens must form a <MultiPoint Text>.
497: *@return a <code>MultiPoint</code> specified by the next
498: * token in the stream
499: *@throws IOException if an I/O error occurs
500: *@throws ParseException if an unexpected token was encountered
501: */
502: private MultiPoint readMultiPointText() throws IOException,
503: ParseException {
504: return geometryFactory
505: .createMultiPoint(toPoints(getCoordinates()));
506: }
507:
508: /**
509: * Creates an array of <code>Point</code>s having the given <code>Coordinate</code>
510: * s.
511: *
512: *@param coordinates the <code>Coordinate</code>s with which to create the
513: * <code>Point</code>s
514: *@return <code>Point</code>s created using this <code>WKTReader</code>
515: * s <code>GeometryFactory</code>
516: */
517: private Point[] toPoints(Coordinate[] coordinates) {
518: ArrayList points = new ArrayList();
519: for (int i = 0; i < coordinates.length; i++) {
520: points.add(geometryFactory.createPoint(coordinates[i]));
521: }
522: return (Point[]) points.toArray(new Point[] {});
523: }
524:
525: /**
526: * Creates a <code>Polygon</code> using the next token in the stream.
527: *
528: *@param tokenizer tokenizer over a stream of text in Well-known Text
529: * format. The next tokens must form a <Polygon Text>.
530: *@return a <code>Polygon</code> specified by the next token
531: * in the stream
532: *@throws ParseException if the coordinates used to create the <code>Polygon</code>
533: * shell and holes do not form closed linestrings, or if an unexpected
534: * token was encountered.
535: *@throws IOException if an I/O error occurs
536: */
537: private Polygon readPolygonText() throws IOException,
538: ParseException {
539: String nextToken = getNextEmptyOrOpener();
540: if (nextToken.equals(EMPTY)) {
541: return geometryFactory.createPolygon(geometryFactory
542: .createLinearRing(new Coordinate[] {}),
543: new LinearRing[] {});
544: }
545: ArrayList holes = new ArrayList();
546: LinearRing shell = readLinearRingText();
547: nextToken = getNextCloserOrComma();
548: while (nextToken.equals(COMMA)) {
549: LinearRing hole = readLinearRingText();
550: holes.add(hole);
551: nextToken = getNextCloserOrComma();
552: }
553: LinearRing[] array = new LinearRing[holes.size()];
554: return geometryFactory.createPolygon(shell,
555: (LinearRing[]) holes.toArray(array));
556: }
557:
558: /**
559: * Creates a <code>MultiLineString</code> using the next token in the stream.
560: *
561: *@param tokenizer tokenizer over a stream of text in Well-known Text
562: * format. The next tokens must form a <MultiLineString Text>.
563: *@return a <code>MultiLineString</code> specified by the
564: * next token in the stream
565: *@throws IOException if an I/O error occurs
566: *@throws ParseException if an unexpected token was encountered
567: */
568: private com.vividsolutions.jts.geom.MultiLineString readMultiLineStringText()
569: throws IOException, ParseException {
570: String nextToken = getNextEmptyOrOpener();
571: if (nextToken.equals(EMPTY)) {
572: return geometryFactory
573: .createMultiLineString(new LineString[] {});
574: }
575: ArrayList lineStrings = new ArrayList();
576: LineString lineString = readLineStringText();
577: lineStrings.add(lineString);
578: nextToken = getNextCloserOrComma();
579: while (nextToken.equals(COMMA)) {
580: lineString = readLineStringText();
581: lineStrings.add(lineString);
582: nextToken = getNextCloserOrComma();
583: }
584: LineString[] array = new LineString[lineStrings.size()];
585: return geometryFactory
586: .createMultiLineString((LineString[]) lineStrings
587: .toArray(array));
588: }
589:
590: /**
591: * Creates a <code>MultiPolygon</code> using the next token in the stream.
592: *
593: *@param tokenizer tokenizer over a stream of text in Well-known Text
594: * format. The next tokens must form a <MultiPolygon Text>.
595: *@return a <code>MultiPolygon</code> specified by the next
596: * token in the stream, or if if the coordinates used to create the
597: * <code>Polygon</code> shells and holes do not form closed linestrings.
598: *@throws IOException if an I/O error occurs
599: *@throws ParseException if an unexpected token was encountered
600: */
601: private MultiPolygon readMultiPolygonText() throws IOException,
602: ParseException {
603: String nextToken = getNextEmptyOrOpener();
604: if (nextToken.equals(EMPTY)) {
605: return geometryFactory.createMultiPolygon(new Polygon[] {});
606: }
607: ArrayList polygons = new ArrayList();
608: Polygon polygon = readPolygonText();
609: polygons.add(polygon);
610: nextToken = getNextCloserOrComma();
611: while (nextToken.equals(COMMA)) {
612: polygon = readPolygonText();
613: polygons.add(polygon);
614: nextToken = getNextCloserOrComma();
615: }
616: Polygon[] array = new Polygon[polygons.size()];
617: return geometryFactory.createMultiPolygon((Polygon[]) polygons
618: .toArray(array));
619: }
620:
621: /**
622: * Creates a <code>GeometryCollection</code> using the next token in the
623: * stream.
624: *
625: *@param tokenizer tokenizer over a stream of text in Well-known Text
626: * format. The next tokens must form a <GeometryCollection Text>.
627: *@return a <code>GeometryCollection</code> specified by the
628: * next token in the stream
629: *@throws ParseException if the coordinates used to create a <code>Polygon</code>
630: * shell and holes do not form closed linestrings, or if an unexpected
631: * token was encountered
632: *@throws IOException if an I/O error occurs
633: */
634: private GeometryCollection readGeometryCollectionText()
635: throws IOException, ParseException {
636: String nextToken = getNextEmptyOrOpener();
637: if (nextToken.equals(EMPTY)) {
638: return geometryFactory
639: .createGeometryCollection(new Geometry[] {});
640: }
641: ArrayList geometries = new ArrayList();
642: Geometry geometry = readGeometryTaggedText();
643: geometries.add(geometry);
644: nextToken = getNextCloserOrComma();
645: while (nextToken.equals(COMMA)) {
646: geometry = readGeometryTaggedText();
647: geometries.add(geometry);
648: nextToken = getNextCloserOrComma();
649: }
650: Geometry[] array = new Geometry[geometries.size()];
651: return geometryFactory
652: .createGeometryCollection((Geometry[]) geometries
653: .toArray(array));
654: }
655:
656: }
|