001: package org.antlr.runtime;
002:
003: import java.util.ArrayList;
004: import java.util.HashMap;
005: import java.util.List;
006: import java.util.Map;
007:
008: /** A generic recognizer that can handle recognizers generated from
009: * lexer, parser, and tree grammars. This is all the parsing
010: * support code essentially; most of it is error recovery stuff and
011: * backtracking.
012: */
013: public abstract class BaseRecognizer {
014: public static final int MEMO_RULE_FAILED = -2;
015: public static final int MEMO_RULE_UNKNOWN = -1;
016: public static final int INITIAL_FOLLOW_STACK_SIZE = 100;
017:
018: public static final Integer MEMO_RULE_FAILED_I = new Integer(
019: MEMO_RULE_FAILED);
020:
021: // copies from Token object for convenience in actions
022: public static final int DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL;
023: public static final int HIDDEN = Token.HIDDEN_CHANNEL;
024:
025: public static final String NEXT_TOKEN_RULE_NAME = "nextToken";
026:
027: /** Track the set of token types that can follow any rule invocation.
028: * Stack grows upwards. When it hits the max, it grows 2x in size
029: * and keeps going.
030: */
031: protected BitSet[] following = new BitSet[INITIAL_FOLLOW_STACK_SIZE];
032: protected int _fsp = -1;
033:
034: /** This is true when we see an error and before having successfully
035: * matched a token. Prevents generation of more than one error message
036: * per error.
037: */
038: protected boolean errorRecovery = false;
039:
040: /** The index into the input stream where the last error occurred.
041: * This is used to prevent infinite loops where an error is found
042: * but no token is consumed during recovery...another error is found,
043: * ad naseum. This is a failsafe mechanism to guarantee that at least
044: * one token/tree node is consumed for two errors.
045: */
046: protected int lastErrorIndex = -1;
047:
048: /** In lieu of a return value, this indicates that a rule or token
049: * has failed to match. Reset to false upon valid token match.
050: */
051: protected boolean failed = false;
052:
053: /** If 0, no backtracking is going on. Safe to exec actions etc...
054: * If >0 then it's the level of backtracking.
055: */
056: protected int backtracking = 0;
057:
058: /** An array[size num rules] of Map<Integer,Integer> that tracks
059: * the stop token index for each rule. ruleMemo[ruleIndex] is
060: * the memoization table for ruleIndex. For key ruleStartIndex, you
061: * get back the stop token for associated rule or MEMO_RULE_FAILED.
062: *
063: * This is only used if rule memoization is on (which it is by default).
064: */
065: protected Map[] ruleMemo;
066:
067: /** reset the parser's state; subclasses must rewinds the input stream */
068: public void reset() {
069: // wack everything related to error recovery
070: _fsp = -1;
071: errorRecovery = false;
072: lastErrorIndex = -1;
073: failed = false;
074: // wack everything related to backtracking and memoization
075: backtracking = 0;
076: for (int i = 0; ruleMemo != null && i < ruleMemo.length; i++) { // wipe cache
077: ruleMemo[i] = null;
078: }
079: }
080:
081: /** Match current input symbol against ttype. Upon error, do one token
082: * insertion or deletion if possible. You can override to not recover
083: * here and bail out of the current production to the normal error
084: * exception catch (at the end of the method) by just throwing
085: * MismatchedTokenException upon input.LA(1)!=ttype.
086: */
087: public void match(IntStream input, int ttype, BitSet follow)
088: throws RecognitionException {
089: if (input.LA(1) == ttype) {
090: input.consume();
091: errorRecovery = false;
092: failed = false;
093: return;
094: }
095: if (backtracking > 0) {
096: failed = true;
097: return;
098: }
099: mismatch(input, ttype, follow);
100: return;
101: }
102:
103: public void matchAny(IntStream input) {
104: errorRecovery = false;
105: failed = false;
106: input.consume();
107: }
108:
109: /** factor out what to do upon token mismatch so tree parsers can behave
110: * differently. Override this method in your parser to do things
111: * like bailing out after the first error; just throw the mte object
112: * instead of calling the recovery method.
113: */
114: protected void mismatch(IntStream input, int ttype, BitSet follow)
115: throws RecognitionException {
116: MismatchedTokenException mte = new MismatchedTokenException(
117: ttype, input);
118: recoverFromMismatchedToken(input, mte, ttype, follow);
119: }
120:
121: /** Report a recognition problem.
122: *
123: * This method sets errorRecovery to indicate the parser is recovering
124: * not parsing. Once in recovery mode, no errors are generated.
125: * To get out of recovery mode, the parser must successfully match
126: * a token (after a resync). So it will go:
127: *
128: * 1. error occurs
129: * 2. enter recovery mode, report error
130: * 3. consume until token found in resynch set
131: * 4. try to resume parsing
132: * 5. next match() will reset errorRecovery mode
133: */
134: public void reportError(RecognitionException e) {
135: // if we've already reported an error and have not matched a token
136: // yet successfully, don't report any errors.
137: if (errorRecovery) {
138: //System.err.print("[SPURIOUS] ");
139: return;
140: }
141: errorRecovery = true;
142:
143: displayRecognitionError(this .getTokenNames(), e);
144: }
145:
146: public void displayRecognitionError(String[] tokenNames,
147: RecognitionException e) {
148: String hdr = getErrorHeader(e);
149: String msg = getErrorMessage(e, tokenNames);
150: emitErrorMessage(hdr + " " + msg);
151: }
152:
153: /** What error message should be generated for the various
154: * exception types?
155: *
156: * Not very object-oriented code, but I like having all error message
157: * generation within one method rather than spread among all of the
158: * exception classes. This also makes it much easier for the exception
159: * handling because the exception classes do not have to have pointers back
160: * to this object to access utility routines and so on. Also, changing
161: * the message for an exception type would be difficult because you
162: * would have to subclassing exception, but then somehow get ANTLR
163: * to make those kinds of exception objects instead of the default.
164: * This looks weird, but trust me--it makes the most sense in terms
165: * of flexibility.
166: *
167: * For grammar debugging, you will want to override this to add
168: * more information such as the stack frame with
169: * getRuleInvocationStack(e, this.getClass().getName()) and,
170: * for no viable alts, the decision description and state etc...
171: *
172: * Override this to change the message generated for one or more
173: * exception types.
174: */
175: public String getErrorMessage(RecognitionException e,
176: String[] tokenNames) {
177: String msg = null;
178: if (e instanceof MismatchedTokenException) {
179: MismatchedTokenException mte = (MismatchedTokenException) e;
180: String tokenName = "<unknown>";
181: if (mte.expecting == Token.EOF) {
182: tokenName = "EOF";
183: } else {
184: tokenName = tokenNames[mte.expecting];
185: }
186: msg = "mismatched input " + getTokenErrorDisplay(e.token)
187: + " expecting " + tokenName;
188: } else if (e instanceof MismatchedTreeNodeException) {
189: MismatchedTreeNodeException mtne = (MismatchedTreeNodeException) e;
190: String tokenName = "<unknown>";
191: if (mtne.expecting == Token.EOF) {
192: tokenName = "EOF";
193: } else {
194: tokenName = tokenNames[mtne.expecting];
195: }
196: msg = "mismatched tree node: " + mtne.node + " expecting "
197: + tokenName;
198: } else if (e instanceof NoViableAltException) {
199: NoViableAltException nvae = (NoViableAltException) e;
200: // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
201: // and "(decision="+nvae.decisionNumber+") and
202: // "state "+nvae.stateNumber
203: msg = "no viable alternative at input "
204: + getTokenErrorDisplay(e.token);
205: } else if (e instanceof EarlyExitException) {
206: EarlyExitException eee = (EarlyExitException) e;
207: // for development, can add "(decision="+eee.decisionNumber+")"
208: msg = "required (...)+ loop did not match anything at input "
209: + getTokenErrorDisplay(e.token);
210: } else if (e instanceof MismatchedSetException) {
211: MismatchedSetException mse = (MismatchedSetException) e;
212: msg = "mismatched input " + getTokenErrorDisplay(e.token)
213: + " expecting set " + mse.expecting;
214: } else if (e instanceof MismatchedNotSetException) {
215: MismatchedNotSetException mse = (MismatchedNotSetException) e;
216: msg = "mismatched input " + getTokenErrorDisplay(e.token)
217: + " expecting set " + mse.expecting;
218: } else if (e instanceof FailedPredicateException) {
219: FailedPredicateException fpe = (FailedPredicateException) e;
220: msg = "rule " + fpe.ruleName + " failed predicate: {"
221: + fpe.predicateText + "}?";
222: }
223: return msg;
224: }
225:
226: /** What is the error header, normally line/character position information? */
227: public String getErrorHeader(RecognitionException e) {
228: return "line " + e.line + ":" + e.charPositionInLine;
229: }
230:
231: /** How should a token be displayed in an error message? The default
232: * is to display just the text, but during development you might
233: * want to have a lot of information spit out. Override in that case
234: * to use t.toString() (which, for CommonToken, dumps everything about
235: * the token). This is better than forcing you to override a method in
236: * your token objects because you don't have to go modify your lexer
237: * so that it creates a new Java type.
238: */
239: public String getTokenErrorDisplay(Token t) {
240: String s = t.getText();
241: if (s == null) {
242: if (t.getType() == Token.EOF) {
243: s = "<EOF>";
244: } else {
245: s = "<" + t.getType() + ">";
246: }
247: }
248: s = s.replaceAll("\n", "\\\\n");
249: s = s.replaceAll("\r", "\\\\r");
250: s = s.replaceAll("\t", "\\\\t");
251: return "'" + s + "'";
252: }
253:
254: /** Override this method to change where error messages go */
255: public void emitErrorMessage(String msg) {
256: System.err.println(msg);
257: }
258:
259: /** Recover from an error found on the input stream. Mostly this is
260: * NoViableAlt exceptions, but could be a mismatched token that
261: * the match() routine could not recover from.
262: */
263: public void recover(IntStream input, RecognitionException re) {
264: if (lastErrorIndex == input.index()) {
265: // uh oh, another error at same token index; must be a case
266: // where LT(1) is in the recovery token set so nothing is
267: // consumed; consume a single token so at least to prevent
268: // an infinite loop; this is a failsafe.
269: input.consume();
270: }
271: lastErrorIndex = input.index();
272: BitSet followSet = computeErrorRecoverySet();
273: beginResync();
274: consumeUntil(input, followSet);
275: endResync();
276: }
277:
278: /** A hook to listen in on the token consumption during error recovery.
279: * The DebugParser subclasses this to fire events to the listenter.
280: */
281: public void beginResync() {
282: }
283:
284: public void endResync() {
285: }
286:
287: /* Compute the error recovery set for the current rule. During
288: * rule invocation, the parser pushes the set of tokens that can
289: * follow that rule reference on the stack; this amounts to
290: * computing FIRST of what follows the rule reference in the
291: * enclosing rule. This local follow set only includes tokens
292: * from within the rule; i.e., the FIRST computation done by
293: * ANTLR stops at the end of a rule.
294: *
295: * EXAMPLE
296: *
297: * When you find a "no viable alt exception", the input is not
298: * consistent with any of the alternatives for rule r. The best
299: * thing to do is to consume tokens until you see something that
300: * can legally follow a call to r *or* any rule that called r.
301: * You don't want the exact set of viable next tokens because the
302: * input might just be missing a token--you might consume the
303: * rest of the input looking for one of the missing tokens.
304: *
305: * Consider grammar:
306: *
307: * a : '[' b ']'
308: * | '(' b ')'
309: * ;
310: * b : c '^' INT ;
311: * c : ID
312: * | INT
313: * ;
314: *
315: * At each rule invocation, the set of tokens that could follow
316: * that rule is pushed on a stack. Here are the various "local"
317: * follow sets:
318: *
319: * FOLLOW(b1_in_a) = FIRST(']') = ']'
320: * FOLLOW(b2_in_a) = FIRST(')') = ')'
321: * FOLLOW(c_in_b) = FIRST('^') = '^'
322: *
323: * Upon erroneous input "[]", the call chain is
324: *
325: * a -> b -> c
326: *
327: * and, hence, the follow context stack is:
328: *
329: * depth local follow set after call to rule
330: * 0 <EOF> a (from main())
331: * 1 ']' b
332: * 3 '^' c
333: *
334: * Notice that ')' is not included, because b would have to have
335: * been called from a different context in rule a for ')' to be
336: * included.
337: *
338: * For error recovery, we cannot consider FOLLOW(c)
339: * (context-sensitive or otherwise). We need the combined set of
340: * all context-sensitive FOLLOW sets--the set of all tokens that
341: * could follow any reference in the call chain. We need to
342: * resync to one of those tokens. Note that FOLLOW(c)='^' and if
343: * we resync'd to that token, we'd consume until EOF. We need to
344: * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
345: * In this case, for input "[]", LA(1) is in this set so we would
346: * not consume anything and after printing an error rule c would
347: * return normally. It would not find the required '^' though.
348: * At this point, it gets a mismatched token error and throws an
349: * exception (since LA(1) is not in the viable following token
350: * set). The rule exception handler tries to recover, but finds
351: * the same recovery set and doesn't consume anything. Rule b
352: * exits normally returning to rule a. Now it finds the ']' (and
353: * with the successful match exits errorRecovery mode).
354: *
355: * So, you cna see that the parser walks up call chain looking
356: * for the token that was a member of the recovery set.
357: *
358: * Errors are not generated in errorRecovery mode.
359: *
360: * ANTLR's error recovery mechanism is based upon original ideas:
361: *
362: * "Algorithms + Data Structures = Programs" by Niklaus Wirth
363: *
364: * and
365: *
366: * "A note on error recovery in recursive descent parsers":
367: * http://portal.acm.org/citation.cfm?id=947902.947905
368: *
369: * Later, Josef Grosch had some good ideas:
370: *
371: * "Efficient and Comfortable Error Recovery in Recursive Descent
372: * Parsers":
373: * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
374: *
375: * Like Grosch I implemented local FOLLOW sets that are combined
376: * at run-time upon error to avoid overhead during parsing.
377: */
378: protected BitSet computeErrorRecoverySet() {
379: return combineFollows(false);
380: }
381:
382: /** Compute the context-sensitive FOLLOW set for current rule.
383: * This is set of token types that can follow a specific rule
384: * reference given a specific call chain. You get the set of
385: * viable tokens that can possibly come next (lookahead depth 1)
386: * given the current call chain. Contrast this with the
387: * definition of plain FOLLOW for rule r:
388: *
389: * FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
390: *
391: * where x in T* and alpha, beta in V*; T is set of terminals and
392: * V is the set of terminals and nonterminals. In other words,
393: * FOLLOW(r) is the set of all tokens that can possibly follow
394: * references to r in *any* sentential form (context). At
395: * runtime, however, we know precisely which context applies as
396: * we have the call chain. We may compute the exact (rather
397: * than covering superset) set of following tokens.
398: *
399: * For example, consider grammar:
400: *
401: * stat : ID '=' expr ';' // FOLLOW(stat)=={EOF}
402: * | "return" expr '.'
403: * ;
404: * expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'}
405: * atom : INT // FOLLOW(atom)=={'+',')',';','.'}
406: * | '(' expr ')'
407: * ;
408: *
409: * The FOLLOW sets are all inclusive whereas context-sensitive
410: * FOLLOW sets are precisely what could follow a rule reference.
411: * For input input "i=(3);", here is the derivation:
412: *
413: * stat => ID '=' expr ';'
414: * => ID '=' atom ('+' atom)* ';'
415: * => ID '=' '(' expr ')' ('+' atom)* ';'
416: * => ID '=' '(' atom ')' ('+' atom)* ';'
417: * => ID '=' '(' INT ')' ('+' atom)* ';'
418: * => ID '=' '(' INT ')' ';'
419: *
420: * At the "3" token, you'd have a call chain of
421: *
422: * stat -> expr -> atom -> expr -> atom
423: *
424: * What can follow that specific nested ref to atom? Exactly ')'
425: * as you can see by looking at the derivation of this specific
426: * input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
427: *
428: * You want the exact viable token set when recovering from a
429: * token mismatch. Upon token mismatch, if LA(1) is member of
430: * the viable next token set, then you know there is most likely
431: * a missing token in the input stream. "Insert" one by just not
432: * throwing an exception.
433: */
434: protected BitSet computeContextSensitiveRuleFOLLOW() {
435: return combineFollows(true);
436: }
437:
438: protected BitSet combineFollows(boolean exact) {
439: int top = _fsp;
440: BitSet followSet = new BitSet();
441: for (int i = top; i >= 0; i--) {
442: BitSet localFollowSet = (BitSet) following[i];
443: /*
444: System.out.println("local follow depth "+i+"="+
445: localFollowSet.toString(getTokenNames())+")");
446: */
447: followSet.orInPlace(localFollowSet);
448: if (exact && !localFollowSet.member(Token.EOR_TOKEN_TYPE)) {
449: break;
450: }
451: }
452: followSet.remove(Token.EOR_TOKEN_TYPE);
453: return followSet;
454: }
455:
456: /** Attempt to recover from a single missing or extra token.
457: *
458: * EXTRA TOKEN
459: *
460: * LA(1) is not what we are looking for. If LA(2) has the right token,
461: * however, then assume LA(1) is some extra spurious token. Delete it
462: * and LA(2) as if we were doing a normal match(), which advances the
463: * input.
464: *
465: * MISSING TOKEN
466: *
467: * If current token is consistent with what could come after
468: * ttype then it is ok to "insert" the missing token, else throw
469: * exception For example, Input "i=(3;" is clearly missing the
470: * ')'. When the parser returns from the nested call to expr, it
471: * will have call chain:
472: *
473: * stat -> expr -> atom
474: *
475: * and it will be trying to match the ')' at this point in the
476: * derivation:
477: *
478: * => ID '=' '(' INT ')' ('+' atom)* ';'
479: * ^
480: * match() will see that ';' doesn't match ')' and report a
481: * mismatched token error. To recover, it sees that LA(1)==';'
482: * is in the set of tokens that can follow the ')' token
483: * reference in rule atom. It can assume that you forgot the ')'.
484: */
485: public void recoverFromMismatchedToken(IntStream input,
486: RecognitionException e, int ttype, BitSet follow)
487: throws RecognitionException {
488: System.err.println("BR.recoverFromMismatchedToken");
489: // if next token is what we are looking for then "delete" this token
490: if (input.LA(2) == ttype) {
491: reportError(e);
492: /*
493: System.err.println("recoverFromMismatchedToken deleting "+input.LT(1)+
494: " since "+input.LT(2)+" is what we want");
495: */
496: beginResync();
497: input.consume(); // simply delete extra token
498: endResync();
499: input.consume(); // move past ttype token as if all were ok
500: return;
501: }
502: if (!recoverFromMismatchedElement(input, e, follow)) {
503: throw e;
504: }
505: }
506:
507: public void recoverFromMismatchedSet(IntStream input,
508: RecognitionException e, BitSet follow)
509: throws RecognitionException {
510: // TODO do single token deletion like above for Token mismatch
511: if (!recoverFromMismatchedElement(input, e, follow)) {
512: throw e;
513: }
514: }
515:
516: /** This code is factored out from mismatched token and mismatched set
517: * recovery. It handles "single token insertion" error recovery for
518: * both. No tokens are consumed to recover from insertions. Return
519: * true if recovery was possible else return false.
520: */
521: protected boolean recoverFromMismatchedElement(IntStream input,
522: RecognitionException e, BitSet follow) {
523: if (follow == null) {
524: // we have no information about the follow; we can only consume
525: // a single token and hope for the best
526: return false;
527: }
528: //System.out.println("recoverFromMismatchedElement");
529: // compute what can follow this grammar element reference
530: if (follow.member(Token.EOR_TOKEN_TYPE)) {
531: BitSet viableTokensFollowingThisRule = computeContextSensitiveRuleFOLLOW();
532: follow = follow.or(viableTokensFollowingThisRule);
533: follow.remove(Token.EOR_TOKEN_TYPE);
534: }
535: // if current token is consistent with what could come after set
536: // then it is ok to "insert" the missing token, else throw exception
537: //System.out.println("viable tokens="+follow.toString(getTokenNames())+")");
538: if (follow.member(input.LA(1))) {
539: //System.out.println("LT(1)=="+input.LT(1)+" is consistent with what follows; inserting...");
540: reportError(e);
541: return true;
542: }
543: //System.err.println("nothing to do; throw exception");
544: return false;
545: }
546:
547: public void consumeUntil(IntStream input, int tokenType) {
548: //System.out.println("consumeUntil "+tokenType);
549: int ttype = input.LA(1);
550: while (ttype != Token.EOF && ttype != tokenType) {
551: input.consume();
552: ttype = input.LA(1);
553: }
554: }
555:
556: /** Consume tokens until one matches the given token set */
557: public void consumeUntil(IntStream input, BitSet set) {
558: //System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
559: int ttype = input.LA(1);
560: while (ttype != Token.EOF && !set.member(ttype)) {
561: //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
562: input.consume();
563: ttype = input.LA(1);
564: }
565: }
566:
567: /** Push a rule's follow set using our own hardcoded stack */
568: protected void pushFollow(BitSet fset) {
569: if ((_fsp + 1) >= following.length) {
570: BitSet[] f = new BitSet[following.length * 2];
571: System.arraycopy(following, 0, f, 0, following.length - 1);
572: following = f;
573: }
574: following[++_fsp] = fset;
575: }
576:
577: /** Return List<String> of the rules in your parser instance
578: * leading up to a call to this method. You could override if
579: * you want more details such as the file/line info of where
580: * in the parser java code a rule is invoked.
581: *
582: * This is very useful for error messages and for context-sensitive
583: * error recovery.
584: */
585: public List getRuleInvocationStack() {
586: String parserClassName = getClass().getName();
587: return getRuleInvocationStack(new Throwable(), parserClassName);
588: }
589:
590: /** A more general version of getRuleInvocationStack where you can
591: * pass in, for example, a RecognitionException to get it's rule
592: * stack trace. This routine is shared with all recognizers, hence,
593: * static.
594: *
595: * TODO: move to a utility class or something; weird having lexer call this
596: */
597: public static List getRuleInvocationStack(Throwable e,
598: String recognizerClassName) {
599: List rules = new ArrayList();
600: StackTraceElement[] stack = e.getStackTrace();
601: int i = 0;
602: for (i = stack.length - 1; i >= 0; i--) {
603: StackTraceElement t = stack[i];
604: if (t.getClassName().startsWith("org.antlr.runtime.")) {
605: continue; // skip support code such as this method
606: }
607: if (t.getMethodName().equals(NEXT_TOKEN_RULE_NAME)) {
608: continue;
609: }
610: if (!t.getClassName().equals(recognizerClassName)) {
611: continue; // must not be part of this parser
612: }
613: rules.add(t.getMethodName());
614: }
615: return rules;
616: }
617:
618: public int getBacktrackingLevel() {
619: return backtracking;
620: }
621:
622: /** Used to print out token names like ID during debugging and
623: * error reporting. The generated parsers implement a method
624: * that overrides this to point to their String[] tokenNames.
625: */
626: public String[] getTokenNames() {
627: return null;
628: }
629:
630: /** For debugging and other purposes, might want the grammar name.
631: * Have ANTLR generate an implementation for this method.
632: */
633: public String getGrammarFileName() {
634: return null;
635: }
636:
637: /** A convenience method for use most often with template rewrites.
638: * Convert a List<Token> to List<String>
639: */
640: public List toStrings(List tokens) {
641: if (tokens == null)
642: return null;
643: List strings = new ArrayList(tokens.size());
644: for (int i = 0; i < tokens.size(); i++) {
645: strings.add(((Token) tokens.get(i)).getText());
646: }
647: return strings;
648: }
649:
650: /** Convert a List<RuleReturnScope> to List<StringTemplate> by copying
651: * out the .st property. Useful when converting from
652: * list labels to template attributes:
653: *
654: * a : ids+=rule -> foo(ids={toTemplates($ids)})
655: * ;
656: * TJP: this is not needed anymore. $ids is a List of templates
657: * when output=template
658: *
659: public List toTemplates(List retvals) {
660: if ( retvals==null ) return null;
661: List strings = new ArrayList(retvals.size());
662: for (int i=0; i<retvals.size(); i++) {
663: strings.add(((RuleReturnScope)retvals.get(i)).getTemplate());
664: }
665: return strings;
666: }
667: */
668:
669: /** Given a rule number and a start token index number, return
670: * MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
671: * start index. If this rule has parsed input starting from the
672: * start index before, then return where the rule stopped parsing.
673: * It returns the index of the last token matched by the rule.
674: *
675: * For now we use a hashtable and just the slow Object-based one.
676: * Later, we can make a special one for ints and also one that
677: * tosses out data after we commit past input position i.
678: */
679: public int getRuleMemoization(int ruleIndex, int ruleStartIndex) {
680: if (ruleMemo[ruleIndex] == null) {
681: ruleMemo[ruleIndex] = new HashMap();
682: }
683: Integer stopIndexI = (Integer) ruleMemo[ruleIndex]
684: .get(new Integer(ruleStartIndex));
685: if (stopIndexI == null) {
686: return MEMO_RULE_UNKNOWN;
687: }
688: return stopIndexI.intValue();
689: }
690:
691: /** Has this rule already parsed input at the current index in the
692: * input stream? Return the stop token index or MEMO_RULE_UNKNOWN.
693: * If we attempted but failed to parse properly before, return
694: * MEMO_RULE_FAILED.
695: *
696: * This method has a side-effect: if we have seen this input for
697: * this rule and successfully parsed before, then seek ahead to
698: * 1 past the stop token matched for this rule last time.
699: */
700: public boolean alreadyParsedRule(IntStream input, int ruleIndex) {
701: int stopIndex = getRuleMemoization(ruleIndex, input.index());
702: if (stopIndex == MEMO_RULE_UNKNOWN) {
703: return false;
704: }
705: if (stopIndex == MEMO_RULE_FAILED) {
706: //System.out.println("rule "+ruleIndex+" will never succeed");
707: failed = true;
708: } else {
709: //System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+failed);
710: input.seek(stopIndex + 1); // jump to one past stop token
711: }
712: return true;
713: }
714:
715: /** Record whether or not this rule parsed the input at this position
716: * successfully. Use a standard java hashtable for now.
717: */
718: public void memoize(IntStream input, int ruleIndex,
719: int ruleStartIndex) {
720: int stopTokenIndex = failed ? MEMO_RULE_FAILED
721: : input.index() - 1;
722: if (ruleMemo[ruleIndex] != null) {
723: ruleMemo[ruleIndex].put(new Integer(ruleStartIndex),
724: new Integer(stopTokenIndex));
725: }
726: }
727:
728: /** Assume failure in case a rule bails out with an exception.
729: * Reset to rule stop index if successful.
730: public void memoizeFailure(int ruleIndex, int ruleStartIndex) {
731: ruleMemo[ruleIndex].put(
732: new Integer(ruleStartIndex), MEMO_RULE_FAILED_I
733: );
734: }
735: */
736:
737: /** After successful completion of a rule, record success for this
738: * rule and that it can skip ahead next time it attempts this
739: * rule for this input position.
740: public void memoizeSuccess(IntStream input,
741: int ruleIndex,
742: int ruleStartIndex)
743: {
744: ruleMemo[ruleIndex].put(
745: new Integer(ruleStartIndex), new Integer(input.index()-1)
746: );
747: }
748: */
749:
750: /** return how many rule/input-index pairs there are in total.
751: * TODO: this includes synpreds. :(
752: */
753: public int getRuleMemoizationCacheSize() {
754: int n = 0;
755: for (int i = 0; ruleMemo != null && i < ruleMemo.length; i++) {
756: Map ruleMap = ruleMemo[i];
757: if (ruleMap != null) {
758: n += ruleMap.size(); // how many input indexes are recorded?
759: }
760: }
761: return n;
762: }
763:
764: public void traceIn(String ruleName, int ruleIndex,
765: Object inputSymbol) {
766: System.out.print("enter " + ruleName + " " + inputSymbol);
767: if (failed) {
768: System.out.println(" failed=" + failed);
769: }
770: if (backtracking > 0) {
771: System.out.print(" backtracking=" + backtracking);
772: }
773: System.out.println();
774: }
775:
776: public void traceOut(String ruleName, int ruleIndex,
777: Object inputSymbol) {
778: System.out.print("exit " + ruleName + " " + inputSymbol);
779: if (failed) {
780: System.out.println(" failed=" + failed);
781: }
782: if (backtracking > 0) {
783: System.out.print(" backtracking=" + backtracking);
784: }
785: System.out.println();
786: }
787:
788: /** A syntactic predicate. Returns true/false depending on whether
789: * the specified grammar fragment matches the current input stream.
790: * This resets the failed instance var afterwards.
791: public boolean synpred(IntStream input, GrammarFragmentPtr fragment) {
792: //int i = input.index();
793: //System.out.println("begin backtracking="+backtracking+" @"+i+"="+((CommonTokenStream)input).LT(1));
794: backtracking++;
795: beginBacktrack(backtracking);
796: int start = input.mark();
797: try {fragment.invoke();}
798: catch (RecognitionException re) {
799: System.err.println("impossible: "+re);
800: }
801: boolean success = !failed;
802: input.rewind(start);
803: endBacktrack(backtracking, success);
804: backtracking--;
805: //System.out.println("end backtracking="+backtracking+": "+(failed?"FAILED":"SUCCEEDED")+" @"+input.index()+" should be "+i);
806: failed=false;
807: return success;
808: }
809: */
810: }
|