001: /*
002: * xtc - The eXTensible Compiler
003: * Copyright (C) 2004-2007 Robert Grimm
004: *
005: * This library is free software; you can redistribute it and/or
006: * modify it under the terms of the GNU Lesser General Public License
007: * version 2.1 as published by the Free Software Foundation.
008: *
009: * This library is distributed in the hope that it will be useful,
010: * but WITHOUT ANY WARRANTY; without even the implied warranty of
011: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
012: * Lesser General Public License for more details.
013: *
014: * You should have received a copy of the GNU Lesser General Public
015: * License along with this library; if not, write to the Free Software
016: * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
017: * USA.
018: */
019: package xtc.parser;
020:
021: import java.io.IOException;
022: import java.io.Reader;
023:
024: import xtc.tree.Locatable;
025: import xtc.tree.Location;
026:
027: import xtc.util.Action;
028: import xtc.util.Pair;
029:
030: /**
031: * The base class for packrat parsers.
032: *
033: * @author Robert Grimm
034: * @version $Revision: 1.17 $
035: */
036: public abstract class ParserBase {
037:
038: /** The platform's line separator. */
039: public static final String NEWLINE = System
040: .getProperty("line.separator");
041:
042: /**
043: * The start index for lines. Note that this constant mirrors
044: * {@link xtc.Constants#FIRST_LINE} to avoid parsers depending on
045: * that class.
046: */
047: public static final int FIRST_LINE = 1;
048:
049: /**
050: * The start index for columns. Note that this constant mirrors
051: * {@link xtc.Constants#FIRST_COLUMN} to avoid parsers depending on
052: * that class.
053: */
054: public static final int FIRST_COLUMN = 1;
055:
056: /**
057: * The default size for the arrays storing the memoization table's
058: * columns.
059: */
060: public static final int INIT_SIZE = 4096;
061:
062: /**
063: * The increment for the arrays storing the memoization table's
064: * columns.
065: */
066: public static final int INCR_SIZE = 4096;
067:
068: // -------------------------------------------------------------------------
069:
070: /** The reader for the character stream to be parsed. */
071: protected Reader yyReader;
072:
073: /** The number of characters consumed from the character stream. */
074: protected int yyCount;
075:
076: /** The flag for whether the end-of-file has been reached. */
077: protected boolean yyEOF;
078:
079: /** The characters consumed so far. */
080: protected char[] yyData;
081:
082: /** The memoization table columns. */
083: protected Column[] yyColumns;
084:
085: // -------------------------------------------------------------------------
086:
087: /**
088: * Create a new parser base.
089: *
090: * @param reader The reader for the character stream to be parsed.
091: * @param file The name of the file backing the character stream.
092: * @throws NullPointerException Signals a null file name.
093: */
094: public ParserBase(final Reader reader, final String file) {
095: this (reader, file, INIT_SIZE - 1);
096: }
097:
098: /**
099: * Create a new parser base.
100: *
101: * @param reader The reader for the character stream to be parsed.
102: * @param file The name of the file backing the character stream.
103: * @param size The length of the character stream.
104: * @throws NullPointerException Signals a null file name.
105: * @throws IllegalArgumentException Signals a negative file size.
106: */
107: public ParserBase(final Reader reader, final String file,
108: final int size) {
109: if (null == file) {
110: throw new NullPointerException("Null file");
111: } else if (size < 0) {
112: throw new IllegalArgumentException("Negative size: " + size);
113: }
114:
115: yyReader = reader;
116: yyCount = 0;
117: yyEOF = false;
118: yyData = new char[size + 1];
119: yyColumns = new Column[size + 1];
120:
121: Column c = newColumn();
122: c.file = file;
123: c.seenCR = false;
124: c.line = FIRST_LINE;
125: c.column = FIRST_COLUMN;
126:
127: yyColumns[0] = c;
128: }
129:
130: // -------------------------------------------------------------------------
131:
132: /**
133: * Reset this parser to the specified index. This method discards
134: * the input and all memoized intermediate results up to and
135: * excluding the specified index. The index should be determined by
136: * accessing {@link SemanticValue#index} from a previous,
137: * <i>successful</i> parse (i.e., the result must be a {@link
138: * SemanticValue semantic value}).
139: *
140: * @param index The index.
141: * @throws IndexOutOfBoundsException Signals an invalid index.
142: */
143: public final void resetTo(final int index) {
144: // Check the specified index.
145: if (0 > index) {
146: throw new IndexOutOfBoundsException("Parser index: "
147: + index);
148:
149: } else if (0 == index) {
150: // There's nothing to see here. Move on.
151: return;
152:
153: } else if (index >= yyCount) {
154: throw new IndexOutOfBoundsException("Parser index: "
155: + index);
156: }
157:
158: // Get the column at the specified index (to make sure we have the
159: // corresponding location information) and construct its
160: // replacement.
161: Column c1 = column(index);
162: Column c2 = newColumn();
163:
164: c2.file = c1.file;
165: c2.seenCR = c1.seenCR;
166: c2.line = c1.line;
167: c2.column = c1.column;
168:
169: yyColumns[0] = c2;
170:
171: // Next, shift any read-in characters.
172: final int length = yyCount - index;
173:
174: System.arraycopy(yyData, index, yyData, 0, length);
175:
176: // Next, clear the rest of the memoization table.
177: for (int i = length; i < yyCount; i++) {
178: yyData[i] = 0;
179: }
180: for (int i = 1; i < yyCount; i++) {
181: yyColumns[i] = null;
182: }
183:
184: // Finally, fix the count.
185: yyCount = length;
186:
187: // Done.
188: }
189:
190: // -------------------------------------------------------------------------
191:
192: /**
193: * Grow the memoization table by the specified increment.
194: *
195: * @param incr The increment.
196: */
197: private void growBy(int incr) {
198: char[] oldValues = yyData;
199: yyData = new char[oldValues.length + incr];
200: System.arraycopy(oldValues, 0, yyData, 0, oldValues.length);
201:
202: Column[] oldColumns = yyColumns;
203: yyColumns = new Column[oldColumns.length + incr];
204: System
205: .arraycopy(oldColumns, 0, yyColumns, 0,
206: oldColumns.length);
207: }
208:
209: // -------------------------------------------------------------------------
210:
211: /**
212: * Create a new column. A concrete implementation of this method
213: * should simply return a new memoization table column.
214: *
215: * @return A new memoization table column.
216: */
217: protected abstract Column newColumn();
218:
219: /**
220: * Get the column at the specified index. If the column at the
221: * specified index has not been created yet, it is created as a
222: * side-effect of calling this method.
223: *
224: * @param index The index.
225: * @return The corresponding column.
226: * @throws IndexOutOfBoundsException Signals an invalid index.
227: */
228: protected final Column column(final int index) {
229: // A memoized production may try to access the entry just past the
230: // current end of the table before the corresponding character has
231: // been read. Hence, we may need to grow the table.
232: if (yyColumns.length == index)
233: growBy(INCR_SIZE);
234:
235: // Note that the array access below will generate an index out of
236: // bounds exception for invalid indices.
237: Column c = yyColumns[index];
238: if (null != c)
239: return c;
240:
241: // Find the last non-null column.
242: Column last = null;
243: int start;
244: for (start = index; start >= 0; start--) {
245: last = yyColumns[start];
246: if (null != last)
247: break;
248: }
249:
250: // Now, carry the location information forward.
251: int line = last.line;
252: int column = last.column;
253: boolean seenCR = last.seenCR;
254:
255: for (int i = start; i < index; i++) {
256: switch (yyData[i]) {
257: case '\t':
258: column = ((column >> 3) + 1) << 3;
259: seenCR = false;
260: break;
261: case '\n':
262: if (!seenCR) {
263: line++;
264: column = FIRST_COLUMN;
265: }
266: seenCR = false;
267: break;
268: case '\r':
269: line++;
270: column = FIRST_COLUMN;
271: seenCR = true;
272: break;
273: default:
274: column++;
275: seenCR = false;
276: }
277: }
278:
279: // Create the new column.
280: c = newColumn();
281: c.file = last.file;
282: c.seenCR = seenCR;
283: c.line = line;
284: c.column = column;
285: yyColumns[index] = c;
286:
287: return c;
288: }
289:
290: // -------------------------------------------------------------------------
291:
292: /**
293: * Parse a character at the specified index.
294: *
295: * @param index The index.
296: * @return The character or -1 if the end-of-file has been reached.
297: * @throws IOException
298: * Signals an exceptional condition while accessing the character
299: * stream.
300: */
301: protected final int character(final int index) throws IOException {
302: // Have we seen the end-of-file?
303: if (yyEOF) {
304: if (index < yyCount - 1) {
305: return yyData[index];
306: } else if (index < yyCount) {
307: return -1;
308: } else {
309: throw new IndexOutOfBoundsException("Parser index: "
310: + index);
311: }
312: }
313:
314: // Have we already read the desired character?
315: if (index < yyCount) {
316: return yyData[index];
317: } else if (index != yyCount) {
318: throw new IndexOutOfBoundsException("Parser index: "
319: + index);
320: }
321:
322: // Read another character.
323: final int c = yyReader.read();
324: final int incr = (-1 == c) ? 1 : INCR_SIZE;
325:
326: // Do we have enough space?
327: if (yyData.length <= yyCount) {
328: growBy(incr);
329: }
330:
331: if (-1 == c) {
332: // Remember the end-of-file.
333: yyEOF = true;
334:
335: } else {
336: // Remember the character.
337: yyData[index] = (char) c;
338: }
339: yyCount++;
340:
341: // Done.
342: return c;
343: }
344:
345: /**
346: * Get the difference between the specified indices.
347: *
348: * @param start The start index.
349: * @param end The end index.
350: * @return The difference as a string.
351: */
352: protected final String difference(final int start, final int end) {
353: return (start == end) ? "" : new String(yyData, start, end
354: - start);
355: }
356:
357: /**
358: * Determine whether the specified index represents the end-of-file.
359: *
360: * @param index The index.
361: * @return <code>true</code> if the specified index represents EOF.
362: */
363: public final boolean isEOF(final int index) {
364: return yyEOF && (index == yyCount - 1);
365: }
366:
367: /**
368: * Get the line at the specified index.
369: *
370: * @param index The index.
371: * @return The corresponding line, without any line terminating
372: * characters.
373: * @throws IndexOutOfBoundsException Signals an invalid index.
374: * @throws IOException Signals an I/O error.
375: */
376: public final String lineAt(int index) throws IOException {
377: if (0 > index) {
378: throw new IndexOutOfBoundsException("Parser index: "
379: + index);
380: }
381:
382: // Normalize index for line terminating positions.
383: if ((0 < index) && ('\n' == character(index))
384: && ('\r' == character(index - 1))) {
385: index--;
386: }
387:
388: int start = index;
389: int end = index;
390: int c;
391:
392: // Find the end of the line.
393: c = character(end);
394: while ((-1 != c) && ('\r' != c) && ('\n' != c)) {
395: end++;
396: c = character(end);
397: }
398:
399: // Find the start of the line.
400: while (true) {
401: if (0 == start) {
402: break;
403: }
404: c = character(start - 1);
405: if (('\r' == c) || ('\n' == c)) {
406: break;
407: }
408: start--;
409: }
410:
411: // Done.
412: return difference(start, end);
413: }
414:
415: // -------------------------------------------------------------------------
416:
417: /**
418: * Get the location for the specified index.
419: *
420: * @param index The index.
421: * @return The corresponding location.
422: */
423: public final Location location(final int index) {
424: final Column c = column(index);
425:
426: return new Location(c.file, c.line, c.column);
427: }
428:
429: /**
430: * Set the location for the specified index. This method updates
431: * the internal location based on, for example, a line marker
432: * recognized by the parser.
433: *
434: * <p />This method must be called before any nodes are created for
435: * positions at or beyond the specified index — unless the
436: * specified file, line, and column are the same as the internal
437: * location for the index. The line number may be one less than the
438: * start index for lines ({@link #FIRST_LINE}), to account for a
439: * line marker being present in the input. The column number is
440: * generally be expected to be the start index for columns ({@link
441: * #FIRST_COLUMN}), again accounting for a line marker being present
442: * in the input.
443: *
444: * @param index The index.
445: * @param file The new file name.
446: * @param line The new line number.
447: * @param column The new column number.
448: * @throws NullPointerException Signals a null file name.
449: * @throws IllegalArgumentException Signals an invalid line or
450: * column number.
451: * @throws IndexOutOfBoundsException Signals an invalid index.
452: * @throws IllegalStateException Signals that the index comes at or
453: * before any memoized results.
454: */
455: protected final void setLocation(final int index,
456: final String file, final int line, final int column) {
457: // Check the file, line, and column.
458: if (null == file) {
459: throw new NullPointerException("Null file");
460: } else if (FIRST_LINE - 1 > line) {
461: throw new IllegalArgumentException("Invalid line number: "
462: + line);
463: } else if (FIRST_COLUMN > column) {
464: throw new IllegalArgumentException(
465: "Invalid column number: " + column);
466: }
467:
468: // Make sure the index is valid.
469: if (index < 0 || yyCount <= index) {
470: throw new IndexOutOfBoundsException("Parser index: "
471: + index);
472: }
473:
474: // Detect repeated calls for the same location.
475: Column c = yyColumns[index];
476: if (null != c) {
477: if (file.equals(c.file) && line == c.line
478: && column == c.column) {
479: // We ignore repeated calls for the same index and location.
480: return;
481: } else if (0 != index) {
482: // The first column always exists, so we can't signal for a 0 index.
483: throw new IllegalStateException("Location at index "
484: + index + " is already committed");
485: }
486: }
487:
488: // Check that no further columns have been allocated.
489: for (int i = index + 1; i < yyCount; i++) {
490: if (null != yyColumns[i]) {
491: throw new IllegalStateException("Location at index "
492: + index + " is already committed");
493: }
494: }
495:
496: // Actually update the internal location. Note that we call
497: // column() instead of allocating the column directly to correctly
498: // carry forward the seenCR flag.
499: c = column(index);
500: c.file = file;
501: c.line = line;
502: c.column = column;
503: }
504:
505: /**
506: * Set the location for the specified locatable object. This method
507: * is equivalent to:<pre>
508: * if ((null != locatable) && (! locatable.hasLocation())) {
509: * locatable.setLocation(location(index));
510: * }
511: * </pre>
512: *
513: * @param locatable The locatable object.
514: * @param index The index.
515: */
516: public final void setLocation(final Locatable locatable,
517: final int index) {
518: if ((null != locatable) && (!locatable.hasLocation())) {
519: Column c = column(index);
520: locatable
521: .setLocation(new Location(c.file, c.line, c.column));
522: }
523: }
524:
525: // -------------------------------------------------------------------------
526:
527: /**
528: * Apply the specified actions on the specified seed. This method
529: * applies all {@link xtc.util.Action actions} on the specified
530: * list, using the result of the previous action as the argument to
531: * the next action. The argument to the first action is the
532: * specified seed. If the specified list is empty, this method
533: * simply returns the specified seed.
534: *
535: * @param actions The actions to apply.
536: * @param seed The initial argument.
537: * @return The result of applying the actions.
538: */
539: protected final <T> T apply(Pair<Action<T>> actions, T seed) {
540: while (!actions.isEmpty()) {
541: seed = actions.head().run(seed);
542: actions = actions.tail();
543: }
544: return seed;
545: }
546:
547: /**
548: * Apply the specified actions on the specified seed while also
549: * setting the results' locations. This method applies all {@link
550: * xtc.util.Action actions} on the specified list, using the result
551: * of the previous action as the argument to the next action. For
552: * the result of each application, it also sets the location. The
553: * argument to the first action is the specified seed. If the
554: * specified list is empty, this method simply returns the specified
555: * seed.
556: *
557: * @param actions The actions to apply.
558: * @param seed The initial argument.
559: * @param index The index representing the current parser location.
560: * @return The result of applying the actions.
561: */
562: protected final <T extends Locatable> T apply(
563: Pair<Action<T>> actions, T seed, final int index) {
564: if (!actions.isEmpty()) {
565: final Location loc = location(index);
566:
567: do {
568: seed = actions.head().run(seed);
569: seed.setLocation(loc);
570: actions = actions.tail();
571: } while (!actions.isEmpty());
572: }
573:
574: return seed;
575: }
576:
577: // -------------------------------------------------------------------------
578:
579: /**
580: * Format the specified parse error. The specified error must have
581: * been created by this parser.
582: *
583: * @param error The error.
584: * @return The corresponding error message.
585: * @throws IOException Signals an I/O error while creating the error
586: * message.
587: */
588: public final String format(ParseError error) throws IOException {
589: final StringBuilder buf = new StringBuilder();
590:
591: // The error's location.
592: Column c = null;
593: if (-1 != error.index) {
594: c = column(error.index);
595: buf.append(c.file);
596: buf.append(':');
597: buf.append(c.line);
598: buf.append(':');
599: buf.append(c.column);
600: buf.append(": ");
601: }
602:
603: // The error's actual message.
604: buf.append("error: ");
605: buf.append(error.msg);
606:
607: // The error's line with a position marker.
608: if (-1 != error.index) {
609: final String line = lineAt(error.index);
610: final int size = line.length();
611:
612: buf.append(NEWLINE);
613: for (int i = 0; i < size; i++)
614: buf.append(line.charAt(i));
615: buf.append(NEWLINE);
616: for (int i = FIRST_COLUMN; i < c.column; i++)
617: buf.append(' ');
618: buf.append('^');
619: buf.append(NEWLINE);
620: }
621:
622: // Done.
623: return buf.toString();
624: }
625:
626: /**
627: * Signal the specified parse error as a parse exception. The
628: * specified error must have been created by this parser.
629: *
630: * @param error The parse error.
631: * @throws ParseException Signals the error.
632: * @throws IOException Signals an I/O error while creating the
633: * exception's detail message.
634: */
635: public final void signal(ParseError error) throws ParseException,
636: IOException {
637: throw new ParseException(format(error));
638: }
639:
640: /**
641: * Extract the specified result's value. If the result is a {@link
642: * SemanticValue}, this method returns the actual value; if it is a
643: * {@link ParseError}, it signals a parse exception with the
644: * corresponding message. The specified result must have been
645: * created by this parser.
646: *
647: * @param r The result.
648: * @return The corresponding value.
649: * @throws ParseException Signals that the result represents a parse
650: * error.
651: * @throws IOException Signals an I/O error while creating the parse
652: * error's detail message.
653: */
654: public final Object value(Result r) throws ParseException,
655: IOException {
656: if (!r.hasValue())
657: signal(r.parseError());
658: return r.semanticValue();
659: }
660:
661: // -------------------------------------------------------------------------
662:
663: /**
664: * Get the next few characters from the specified index.
665: *
666: * @param index The index.
667: * @return The next few characters.
668: */
669: protected final String peek(final int index) {
670: int limit = yyEOF ? yyCount - 1 : yyCount;
671: if (index >= limit)
672: return "";
673: limit = Math.min(index + 20, limit);
674: return new String(yyData, index, limit - index);
675: }
676:
677: // -------------------------------------------------------------------------
678:
679: /**
680: * Cast the specified object. This method is used to avoid spurious
681: * compiler warnings for parsers utilizing generic types.
682: *
683: * @param o The object.
684: * @return The cast object.
685: */
686: @SuppressWarnings("unchecked")
687: protected static final <T> T cast(Object o) {
688: return (T) o;
689: }
690:
691: /**
692: * Cast the list starting at the specified pair. This method is
693: * used to avoid spurious compiler warnings for parsers utilizing
694: * generic types.
695: *
696: * @param p The list.
697: * @return The cast list.
698: */
699: @SuppressWarnings("unchecked")
700: protected static final <T> Pair<T> cast(Pair<?> p) {
701: return (Pair<T>) p;
702: }
703:
704: }
|