Source Code Cross Referenced for BaseRecognizer.java in  » Parser » antlr-3.0.1 » org » antlr » runtime » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Parser » antlr 3.0.1 » org.antlr.runtime 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        package org.antlr.runtime;
002:
003:        import java.util.ArrayList;
004:        import java.util.HashMap;
005:        import java.util.List;
006:        import java.util.Map;
007:
008:        /** A generic recognizer that can handle recognizers generated from
009:         *  lexer, parser, and tree grammars.  This is all the parsing
010:         *  support code essentially; most of it is error recovery stuff and
011:         *  backtracking.
012:         */
013:        public abstract class BaseRecognizer {
014:            public static final int MEMO_RULE_FAILED = -2;
015:            public static final int MEMO_RULE_UNKNOWN = -1;
016:            public static final int INITIAL_FOLLOW_STACK_SIZE = 100;
017:
018:            public static final Integer MEMO_RULE_FAILED_I = new Integer(
019:                    MEMO_RULE_FAILED);
020:
021:            // copies from Token object for convenience in actions
022:            public static final int DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL;
023:            public static final int HIDDEN = Token.HIDDEN_CHANNEL;
024:
025:            public static final String NEXT_TOKEN_RULE_NAME = "nextToken";
026:
027:            /** Track the set of token types that can follow any rule invocation.
028:             *  Stack grows upwards.  When it hits the max, it grows 2x in size
029:             *  and keeps going.
030:             */
031:            protected BitSet[] following = new BitSet[INITIAL_FOLLOW_STACK_SIZE];
032:            protected int _fsp = -1;
033:
034:            /** This is true when we see an error and before having successfully
035:             *  matched a token.  Prevents generation of more than one error message
036:             *  per error.
037:             */
038:            protected boolean errorRecovery = false;
039:
040:            /** The index into the input stream where the last error occurred.
041:             * 	This is used to prevent infinite loops where an error is found
042:             *  but no token is consumed during recovery...another error is found,
043:             *  ad naseum.  This is a failsafe mechanism to guarantee that at least
044:             *  one token/tree node is consumed for two errors.
045:             */
046:            protected int lastErrorIndex = -1;
047:
048:            /** In lieu of a return value, this indicates that a rule or token
049:             *  has failed to match.  Reset to false upon valid token match.
050:             */
051:            protected boolean failed = false;
052:
053:            /** If 0, no backtracking is going on.  Safe to exec actions etc...
054:             *  If >0 then it's the level of backtracking.
055:             */
056:            protected int backtracking = 0;
057:
058:            /** An array[size num rules] of Map<Integer,Integer> that tracks
059:             *  the stop token index for each rule.  ruleMemo[ruleIndex] is
060:             *  the memoization table for ruleIndex.  For key ruleStartIndex, you
061:             *  get back the stop token for associated rule or MEMO_RULE_FAILED.
062:             *
063:             *  This is only used if rule memoization is on (which it is by default).
064:             */
065:            protected Map[] ruleMemo;
066:
067:            /** reset the parser's state; subclasses must rewinds the input stream */
068:            public void reset() {
069:                // wack everything related to error recovery
070:                _fsp = -1;
071:                errorRecovery = false;
072:                lastErrorIndex = -1;
073:                failed = false;
074:                // wack everything related to backtracking and memoization
075:                backtracking = 0;
076:                for (int i = 0; ruleMemo != null && i < ruleMemo.length; i++) { // wipe cache
077:                    ruleMemo[i] = null;
078:                }
079:            }
080:
081:            /** Match current input symbol against ttype.  Upon error, do one token
082:             *  insertion or deletion if possible.  You can override to not recover
083:             *  here and bail out of the current production to the normal error
084:             *  exception catch (at the end of the method) by just throwing
085:             *  MismatchedTokenException upon input.LA(1)!=ttype.
086:             */
087:            public void match(IntStream input, int ttype, BitSet follow)
088:                    throws RecognitionException {
089:                if (input.LA(1) == ttype) {
090:                    input.consume();
091:                    errorRecovery = false;
092:                    failed = false;
093:                    return;
094:                }
095:                if (backtracking > 0) {
096:                    failed = true;
097:                    return;
098:                }
099:                mismatch(input, ttype, follow);
100:                return;
101:            }
102:
103:            public void matchAny(IntStream input) {
104:                errorRecovery = false;
105:                failed = false;
106:                input.consume();
107:            }
108:
109:            /** factor out what to do upon token mismatch so tree parsers can behave
110:             *  differently.  Override this method in your parser to do things
111:             *  like bailing out after the first error; just throw the mte object
112:             *  instead of calling the recovery method.
113:             */
114:            protected void mismatch(IntStream input, int ttype, BitSet follow)
115:                    throws RecognitionException {
116:                MismatchedTokenException mte = new MismatchedTokenException(
117:                        ttype, input);
118:                recoverFromMismatchedToken(input, mte, ttype, follow);
119:            }
120:
121:            /** Report a recognition problem.
122:             *
123:             *  This method sets errorRecovery to indicate the parser is recovering
124:             *  not parsing.  Once in recovery mode, no errors are generated.
125:             *  To get out of recovery mode, the parser must successfully match
126:             *  a token (after a resync).  So it will go:
127:             *
128:             * 		1. error occurs
129:             * 		2. enter recovery mode, report error
130:             * 		3. consume until token found in resynch set
131:             * 		4. try to resume parsing
132:             * 		5. next match() will reset errorRecovery mode
133:             */
134:            public void reportError(RecognitionException e) {
135:                // if we've already reported an error and have not matched a token
136:                // yet successfully, don't report any errors.
137:                if (errorRecovery) {
138:                    //System.err.print("[SPURIOUS] ");
139:                    return;
140:                }
141:                errorRecovery = true;
142:
143:                displayRecognitionError(this .getTokenNames(), e);
144:            }
145:
146:            public void displayRecognitionError(String[] tokenNames,
147:                    RecognitionException e) {
148:                String hdr = getErrorHeader(e);
149:                String msg = getErrorMessage(e, tokenNames);
150:                emitErrorMessage(hdr + " " + msg);
151:            }
152:
153:            /** What error message should be generated for the various
154:             *  exception types?
155:             *
156:             *  Not very object-oriented code, but I like having all error message
157:             *  generation within one method rather than spread among all of the
158:             *  exception classes. This also makes it much easier for the exception
159:             *  handling because the exception classes do not have to have pointers back
160:             *  to this object to access utility routines and so on. Also, changing
161:             *  the message for an exception type would be difficult because you
162:             *  would have to subclassing exception, but then somehow get ANTLR
163:             *  to make those kinds of exception objects instead of the default.
164:             *  This looks weird, but trust me--it makes the most sense in terms
165:             *  of flexibility.
166:             *
167:             *  For grammar debugging, you will want to override this to add
168:             *  more information such as the stack frame with
169:             *  getRuleInvocationStack(e, this.getClass().getName()) and,
170:             *  for no viable alts, the decision description and state etc...
171:             *
172:             *  Override this to change the message generated for one or more
173:             *  exception types.
174:             */
175:            public String getErrorMessage(RecognitionException e,
176:                    String[] tokenNames) {
177:                String msg = null;
178:                if (e instanceof  MismatchedTokenException) {
179:                    MismatchedTokenException mte = (MismatchedTokenException) e;
180:                    String tokenName = "<unknown>";
181:                    if (mte.expecting == Token.EOF) {
182:                        tokenName = "EOF";
183:                    } else {
184:                        tokenName = tokenNames[mte.expecting];
185:                    }
186:                    msg = "mismatched input " + getTokenErrorDisplay(e.token)
187:                            + " expecting " + tokenName;
188:                } else if (e instanceof  MismatchedTreeNodeException) {
189:                    MismatchedTreeNodeException mtne = (MismatchedTreeNodeException) e;
190:                    String tokenName = "<unknown>";
191:                    if (mtne.expecting == Token.EOF) {
192:                        tokenName = "EOF";
193:                    } else {
194:                        tokenName = tokenNames[mtne.expecting];
195:                    }
196:                    msg = "mismatched tree node: " + mtne.node + " expecting "
197:                            + tokenName;
198:                } else if (e instanceof  NoViableAltException) {
199:                    NoViableAltException nvae = (NoViableAltException) e;
200:                    // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
201:                    // and "(decision="+nvae.decisionNumber+") and
202:                    // "state "+nvae.stateNumber
203:                    msg = "no viable alternative at input "
204:                            + getTokenErrorDisplay(e.token);
205:                } else if (e instanceof  EarlyExitException) {
206:                    EarlyExitException eee = (EarlyExitException) e;
207:                    // for development, can add "(decision="+eee.decisionNumber+")"
208:                    msg = "required (...)+ loop did not match anything at input "
209:                            + getTokenErrorDisplay(e.token);
210:                } else if (e instanceof  MismatchedSetException) {
211:                    MismatchedSetException mse = (MismatchedSetException) e;
212:                    msg = "mismatched input " + getTokenErrorDisplay(e.token)
213:                            + " expecting set " + mse.expecting;
214:                } else if (e instanceof  MismatchedNotSetException) {
215:                    MismatchedNotSetException mse = (MismatchedNotSetException) e;
216:                    msg = "mismatched input " + getTokenErrorDisplay(e.token)
217:                            + " expecting set " + mse.expecting;
218:                } else if (e instanceof  FailedPredicateException) {
219:                    FailedPredicateException fpe = (FailedPredicateException) e;
220:                    msg = "rule " + fpe.ruleName + " failed predicate: {"
221:                            + fpe.predicateText + "}?";
222:                }
223:                return msg;
224:            }
225:
226:            /** What is the error header, normally line/character position information? */
227:            public String getErrorHeader(RecognitionException e) {
228:                return "line " + e.line + ":" + e.charPositionInLine;
229:            }
230:
231:            /** How should a token be displayed in an error message? The default
232:             *  is to display just the text, but during development you might
233:             *  want to have a lot of information spit out.  Override in that case
234:             *  to use t.toString() (which, for CommonToken, dumps everything about
235:             *  the token). This is better than forcing you to override a method in
236:             *  your token objects because you don't have to go modify your lexer
237:             *  so that it creates a new Java type.
238:             */
239:            public String getTokenErrorDisplay(Token t) {
240:                String s = t.getText();
241:                if (s == null) {
242:                    if (t.getType() == Token.EOF) {
243:                        s = "<EOF>";
244:                    } else {
245:                        s = "<" + t.getType() + ">";
246:                    }
247:                }
248:                s = s.replaceAll("\n", "\\\\n");
249:                s = s.replaceAll("\r", "\\\\r");
250:                s = s.replaceAll("\t", "\\\\t");
251:                return "'" + s + "'";
252:            }
253:
254:            /** Override this method to change where error messages go */
255:            public void emitErrorMessage(String msg) {
256:                System.err.println(msg);
257:            }
258:
259:            /** Recover from an error found on the input stream.  Mostly this is
260:             *  NoViableAlt exceptions, but could be a mismatched token that
261:             *  the match() routine could not recover from.
262:             */
263:            public void recover(IntStream input, RecognitionException re) {
264:                if (lastErrorIndex == input.index()) {
265:                    // uh oh, another error at same token index; must be a case
266:                    // where LT(1) is in the recovery token set so nothing is
267:                    // consumed; consume a single token so at least to prevent
268:                    // an infinite loop; this is a failsafe.
269:                    input.consume();
270:                }
271:                lastErrorIndex = input.index();
272:                BitSet followSet = computeErrorRecoverySet();
273:                beginResync();
274:                consumeUntil(input, followSet);
275:                endResync();
276:            }
277:
278:            /** A hook to listen in on the token consumption during error recovery.
279:             *  The DebugParser subclasses this to fire events to the listenter.
280:             */
281:            public void beginResync() {
282:            }
283:
284:            public void endResync() {
285:            }
286:
287:            /*  Compute the error recovery set for the current rule.  During
288:             *  rule invocation, the parser pushes the set of tokens that can
289:             *  follow that rule reference on the stack; this amounts to
290:             *  computing FIRST of what follows the rule reference in the
291:             *  enclosing rule. This local follow set only includes tokens
292:             *  from within the rule; i.e., the FIRST computation done by
293:             *  ANTLR stops at the end of a rule.
294:             *
295:             *  EXAMPLE
296:             *
297:             *  When you find a "no viable alt exception", the input is not
298:             *  consistent with any of the alternatives for rule r.  The best
299:             *  thing to do is to consume tokens until you see something that
300:             *  can legally follow a call to r *or* any rule that called r.
301:             *  You don't want the exact set of viable next tokens because the
302:             *  input might just be missing a token--you might consume the
303:             *  rest of the input looking for one of the missing tokens.
304:             *
305:             *  Consider grammar:
306:             *
307:             *  a : '[' b ']'
308:             *    | '(' b ')'
309:             *    ;
310:             *  b : c '^' INT ;
311:             *  c : ID
312:             *    | INT
313:             *    ;
314:             *
315:             *  At each rule invocation, the set of tokens that could follow
316:             *  that rule is pushed on a stack.  Here are the various "local"
317:             *  follow sets:
318:             *
319:             *  FOLLOW(b1_in_a) = FIRST(']') = ']'
320:             *  FOLLOW(b2_in_a) = FIRST(')') = ')'
321:             *  FOLLOW(c_in_b) = FIRST('^') = '^'
322:             *
323:             *  Upon erroneous input "[]", the call chain is
324:             *
325:             *  a -> b -> c
326:             *
327:             *  and, hence, the follow context stack is:
328:             *
329:             *  depth  local follow set     after call to rule
330:             *    0         <EOF>                    a (from main())
331:             *    1          ']'                     b
332:             *    3          '^'                     c
333:             *
334:             *  Notice that ')' is not included, because b would have to have
335:             *  been called from a different context in rule a for ')' to be
336:             *  included.
337:             *
338:             *  For error recovery, we cannot consider FOLLOW(c)
339:             *  (context-sensitive or otherwise).  We need the combined set of
340:             *  all context-sensitive FOLLOW sets--the set of all tokens that
341:             *  could follow any reference in the call chain.  We need to
342:             *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
343:             *  we resync'd to that token, we'd consume until EOF.  We need to
344:             *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
345:             *  In this case, for input "[]", LA(1) is in this set so we would
346:             *  not consume anything and after printing an error rule c would
347:             *  return normally.  It would not find the required '^' though.
348:             *  At this point, it gets a mismatched token error and throws an
349:             *  exception (since LA(1) is not in the viable following token
350:             *  set).  The rule exception handler tries to recover, but finds
351:             *  the same recovery set and doesn't consume anything.  Rule b
352:             *  exits normally returning to rule a.  Now it finds the ']' (and
353:             *  with the successful match exits errorRecovery mode).
354:             *
355:             *  So, you cna see that the parser walks up call chain looking
356:             *  for the token that was a member of the recovery set.
357:             *
358:             *  Errors are not generated in errorRecovery mode.
359:             *
360:             *  ANTLR's error recovery mechanism is based upon original ideas:
361:             *
362:             *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
363:             *
364:             *  and
365:             *
366:             *  "A note on error recovery in recursive descent parsers":
367:             *  http://portal.acm.org/citation.cfm?id=947902.947905
368:             *
369:             *  Later, Josef Grosch had some good ideas:
370:             *
371:             *  "Efficient and Comfortable Error Recovery in Recursive Descent
372:             *  Parsers":
373:             *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
374:             *
375:             *  Like Grosch I implemented local FOLLOW sets that are combined
376:             *  at run-time upon error to avoid overhead during parsing.
377:             */
378:            protected BitSet computeErrorRecoverySet() {
379:                return combineFollows(false);
380:            }
381:
382:            /** Compute the context-sensitive FOLLOW set for current rule.
383:             *  This is set of token types that can follow a specific rule
384:             *  reference given a specific call chain.  You get the set of
385:             *  viable tokens that can possibly come next (lookahead depth 1)
386:             *  given the current call chain.  Contrast this with the
387:             *  definition of plain FOLLOW for rule r:
388:             *
389:             *   FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
390:             *
391:             *  where x in T* and alpha, beta in V*; T is set of terminals and
392:             *  V is the set of terminals and nonterminals.  In other words,
393:             *  FOLLOW(r) is the set of all tokens that can possibly follow
394:             *  references to r in *any* sentential form (context).  At
395:             *  runtime, however, we know precisely which context applies as
396:             *  we have the call chain.  We may compute the exact (rather
397:             *  than covering superset) set of following tokens.
398:             *
399:             *  For example, consider grammar:
400:             *
401:             *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
402:             *       | "return" expr '.'
403:             *       ;
404:             *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
405:             *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
406:             *       | '(' expr ')'
407:             *       ;
408:             *
409:             *  The FOLLOW sets are all inclusive whereas context-sensitive
410:             *  FOLLOW sets are precisely what could follow a rule reference.
411:             *  For input input "i=(3);", here is the derivation:
412:             *
413:             *  stat => ID '=' expr ';'
414:             *       => ID '=' atom ('+' atom)* ';'
415:             *       => ID '=' '(' expr ')' ('+' atom)* ';'
416:             *       => ID '=' '(' atom ')' ('+' atom)* ';'
417:             *       => ID '=' '(' INT ')' ('+' atom)* ';'
418:             *       => ID '=' '(' INT ')' ';'
419:             *
420:             *  At the "3" token, you'd have a call chain of
421:             *
422:             *    stat -> expr -> atom -> expr -> atom
423:             *
424:             *  What can follow that specific nested ref to atom?  Exactly ')'
425:             *  as you can see by looking at the derivation of this specific
426:             *  input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
427:             *
428:             *  You want the exact viable token set when recovering from a
429:             *  token mismatch.  Upon token mismatch, if LA(1) is member of
430:             *  the viable next token set, then you know there is most likely
431:             *  a missing token in the input stream.  "Insert" one by just not
432:             *  throwing an exception.
433:             */
434:            protected BitSet computeContextSensitiveRuleFOLLOW() {
435:                return combineFollows(true);
436:            }
437:
438:            protected BitSet combineFollows(boolean exact) {
439:                int top = _fsp;
440:                BitSet followSet = new BitSet();
441:                for (int i = top; i >= 0; i--) {
442:                    BitSet localFollowSet = (BitSet) following[i];
443:                    /*
444:                    System.out.println("local follow depth "+i+"="+
445:                    				   localFollowSet.toString(getTokenNames())+")");
446:                     */
447:                    followSet.orInPlace(localFollowSet);
448:                    if (exact && !localFollowSet.member(Token.EOR_TOKEN_TYPE)) {
449:                        break;
450:                    }
451:                }
452:                followSet.remove(Token.EOR_TOKEN_TYPE);
453:                return followSet;
454:            }
455:
456:            /** Attempt to recover from a single missing or extra token.
457:             *
458:             *  EXTRA TOKEN
459:             *
460:             *  LA(1) is not what we are looking for.  If LA(2) has the right token,
461:             *  however, then assume LA(1) is some extra spurious token.  Delete it
462:             *  and LA(2) as if we were doing a normal match(), which advances the
463:             *  input.
464:             *
465:             *  MISSING TOKEN
466:             *
467:             *  If current token is consistent with what could come after
468:             *  ttype then it is ok to "insert" the missing token, else throw
469:             *  exception For example, Input "i=(3;" is clearly missing the
470:             *  ')'.  When the parser returns from the nested call to expr, it
471:             *  will have call chain:
472:             *
473:             *    stat -> expr -> atom
474:             *
475:             *  and it will be trying to match the ')' at this point in the
476:             *  derivation:
477:             *
478:             *       => ID '=' '(' INT ')' ('+' atom)* ';'
479:             *                          ^
480:             *  match() will see that ';' doesn't match ')' and report a
481:             *  mismatched token error.  To recover, it sees that LA(1)==';'
482:             *  is in the set of tokens that can follow the ')' token
483:             *  reference in rule atom.  It can assume that you forgot the ')'.
484:             */
485:            public void recoverFromMismatchedToken(IntStream input,
486:                    RecognitionException e, int ttype, BitSet follow)
487:                    throws RecognitionException {
488:                System.err.println("BR.recoverFromMismatchedToken");
489:                // if next token is what we are looking for then "delete" this token
490:                if (input.LA(2) == ttype) {
491:                    reportError(e);
492:                    /*
493:                    System.err.println("recoverFromMismatchedToken deleting "+input.LT(1)+
494:                    				   " since "+input.LT(2)+" is what we want");
495:                     */
496:                    beginResync();
497:                    input.consume(); // simply delete extra token
498:                    endResync();
499:                    input.consume(); // move past ttype token as if all were ok
500:                    return;
501:                }
502:                if (!recoverFromMismatchedElement(input, e, follow)) {
503:                    throw e;
504:                }
505:            }
506:
507:            public void recoverFromMismatchedSet(IntStream input,
508:                    RecognitionException e, BitSet follow)
509:                    throws RecognitionException {
510:                // TODO do single token deletion like above for Token mismatch
511:                if (!recoverFromMismatchedElement(input, e, follow)) {
512:                    throw e;
513:                }
514:            }
515:
516:            /** This code is factored out from mismatched token and mismatched set
517:             *  recovery.  It handles "single token insertion" error recovery for
518:             *  both.  No tokens are consumed to recover from insertions.  Return
519:             *  true if recovery was possible else return false.
520:             */
521:            protected boolean recoverFromMismatchedElement(IntStream input,
522:                    RecognitionException e, BitSet follow) {
523:                if (follow == null) {
524:                    // we have no information about the follow; we can only consume
525:                    // a single token and hope for the best
526:                    return false;
527:                }
528:                //System.out.println("recoverFromMismatchedElement");
529:                // compute what can follow this grammar element reference
530:                if (follow.member(Token.EOR_TOKEN_TYPE)) {
531:                    BitSet viableTokensFollowingThisRule = computeContextSensitiveRuleFOLLOW();
532:                    follow = follow.or(viableTokensFollowingThisRule);
533:                    follow.remove(Token.EOR_TOKEN_TYPE);
534:                }
535:                // if current token is consistent with what could come after set
536:                // then it is ok to "insert" the missing token, else throw exception
537:                //System.out.println("viable tokens="+follow.toString(getTokenNames())+")");
538:                if (follow.member(input.LA(1))) {
539:                    //System.out.println("LT(1)=="+input.LT(1)+" is consistent with what follows; inserting...");
540:                    reportError(e);
541:                    return true;
542:                }
543:                //System.err.println("nothing to do; throw exception");
544:                return false;
545:            }
546:
547:            public void consumeUntil(IntStream input, int tokenType) {
548:                //System.out.println("consumeUntil "+tokenType);
549:                int ttype = input.LA(1);
550:                while (ttype != Token.EOF && ttype != tokenType) {
551:                    input.consume();
552:                    ttype = input.LA(1);
553:                }
554:            }
555:
556:            /** Consume tokens until one matches the given token set */
557:            public void consumeUntil(IntStream input, BitSet set) {
558:                //System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
559:                int ttype = input.LA(1);
560:                while (ttype != Token.EOF && !set.member(ttype)) {
561:                    //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
562:                    input.consume();
563:                    ttype = input.LA(1);
564:                }
565:            }
566:
567:            /** Push a rule's follow set using our own hardcoded stack */
568:            protected void pushFollow(BitSet fset) {
569:                if ((_fsp + 1) >= following.length) {
570:                    BitSet[] f = new BitSet[following.length * 2];
571:                    System.arraycopy(following, 0, f, 0, following.length - 1);
572:                    following = f;
573:                }
574:                following[++_fsp] = fset;
575:            }
576:
577:            /** Return List<String> of the rules in your parser instance
578:             *  leading up to a call to this method.  You could override if
579:             *  you want more details such as the file/line info of where
580:             *  in the parser java code a rule is invoked.
581:             *
582:             *  This is very useful for error messages and for context-sensitive
583:             *  error recovery.
584:             */
585:            public List getRuleInvocationStack() {
586:                String parserClassName = getClass().getName();
587:                return getRuleInvocationStack(new Throwable(), parserClassName);
588:            }
589:
590:            /** A more general version of getRuleInvocationStack where you can
591:             *  pass in, for example, a RecognitionException to get it's rule
592:             *  stack trace.  This routine is shared with all recognizers, hence,
593:             *  static.
594:             *
595:             *  TODO: move to a utility class or something; weird having lexer call this
596:             */
597:            public static List getRuleInvocationStack(Throwable e,
598:                    String recognizerClassName) {
599:                List rules = new ArrayList();
600:                StackTraceElement[] stack = e.getStackTrace();
601:                int i = 0;
602:                for (i = stack.length - 1; i >= 0; i--) {
603:                    StackTraceElement t = stack[i];
604:                    if (t.getClassName().startsWith("org.antlr.runtime.")) {
605:                        continue; // skip support code such as this method
606:                    }
607:                    if (t.getMethodName().equals(NEXT_TOKEN_RULE_NAME)) {
608:                        continue;
609:                    }
610:                    if (!t.getClassName().equals(recognizerClassName)) {
611:                        continue; // must not be part of this parser
612:                    }
613:                    rules.add(t.getMethodName());
614:                }
615:                return rules;
616:            }
617:
618:            public int getBacktrackingLevel() {
619:                return backtracking;
620:            }
621:
622:            /** Used to print out token names like ID during debugging and
623:             *  error reporting.  The generated parsers implement a method
624:             *  that overrides this to point to their String[] tokenNames.
625:             */
626:            public String[] getTokenNames() {
627:                return null;
628:            }
629:
630:            /** For debugging and other purposes, might want the grammar name.
631:             *  Have ANTLR generate an implementation for this method.
632:             */
633:            public String getGrammarFileName() {
634:                return null;
635:            }
636:
637:            /** A convenience method for use most often with template rewrites.
638:             *  Convert a List<Token> to List<String>
639:             */
640:            public List toStrings(List tokens) {
641:                if (tokens == null)
642:                    return null;
643:                List strings = new ArrayList(tokens.size());
644:                for (int i = 0; i < tokens.size(); i++) {
645:                    strings.add(((Token) tokens.get(i)).getText());
646:                }
647:                return strings;
648:            }
649:
650:            /** Convert a List<RuleReturnScope> to List<StringTemplate> by copying
651:             *  out the .st property.  Useful when converting from
652:             *  list labels to template attributes:
653:             *
654:             *    a : ids+=rule -> foo(ids={toTemplates($ids)})
655:             *      ;
656:             *  TJP: this is not needed anymore.  $ids is a List of templates
657:             *  when output=template
658:             * 
659:            public List toTemplates(List retvals) {
660:            	if ( retvals==null ) return null;
661:            	List strings = new ArrayList(retvals.size());
662:            	for (int i=0; i<retvals.size(); i++) {
663:            		strings.add(((RuleReturnScope)retvals.get(i)).getTemplate());
664:            	}
665:            	return strings;
666:            }
667:             */
668:
669:            /** Given a rule number and a start token index number, return
670:             *  MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
671:             *  start index.  If this rule has parsed input starting from the
672:             *  start index before, then return where the rule stopped parsing.
673:             *  It returns the index of the last token matched by the rule.
674:             *
675:             *  For now we use a hashtable and just the slow Object-based one.
676:             *  Later, we can make a special one for ints and also one that
677:             *  tosses out data after we commit past input position i.
678:             */
679:            public int getRuleMemoization(int ruleIndex, int ruleStartIndex) {
680:                if (ruleMemo[ruleIndex] == null) {
681:                    ruleMemo[ruleIndex] = new HashMap();
682:                }
683:                Integer stopIndexI = (Integer) ruleMemo[ruleIndex]
684:                        .get(new Integer(ruleStartIndex));
685:                if (stopIndexI == null) {
686:                    return MEMO_RULE_UNKNOWN;
687:                }
688:                return stopIndexI.intValue();
689:            }
690:
691:            /** Has this rule already parsed input at the current index in the
692:             *  input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
693:             *  If we attempted but failed to parse properly before, return
694:             *  MEMO_RULE_FAILED.
695:             *
696:             *  This method has a side-effect: if we have seen this input for
697:             *  this rule and successfully parsed before, then seek ahead to
698:             *  1 past the stop token matched for this rule last time.
699:             */
700:            public boolean alreadyParsedRule(IntStream input, int ruleIndex) {
701:                int stopIndex = getRuleMemoization(ruleIndex, input.index());
702:                if (stopIndex == MEMO_RULE_UNKNOWN) {
703:                    return false;
704:                }
705:                if (stopIndex == MEMO_RULE_FAILED) {
706:                    //System.out.println("rule "+ruleIndex+" will never succeed");
707:                    failed = true;
708:                } else {
709:                    //System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+failed);
710:                    input.seek(stopIndex + 1); // jump to one past stop token
711:                }
712:                return true;
713:            }
714:
715:            /** Record whether or not this rule parsed the input at this position
716:             *  successfully.  Use a standard java hashtable for now.
717:             */
718:            public void memoize(IntStream input, int ruleIndex,
719:                    int ruleStartIndex) {
720:                int stopTokenIndex = failed ? MEMO_RULE_FAILED
721:                        : input.index() - 1;
722:                if (ruleMemo[ruleIndex] != null) {
723:                    ruleMemo[ruleIndex].put(new Integer(ruleStartIndex),
724:                            new Integer(stopTokenIndex));
725:                }
726:            }
727:
728:            /** Assume failure in case a rule bails out with an exception.
729:             *  Reset to rule stop index if successful.
730:            public void memoizeFailure(int ruleIndex, int ruleStartIndex) {
731:            	ruleMemo[ruleIndex].put(
732:            		new Integer(ruleStartIndex), MEMO_RULE_FAILED_I
733:            	);
734:            }
735:             */
736:
737:            /** After successful completion of a rule, record success for this
738:             *  rule and that it can skip ahead next time it attempts this
739:             *  rule for this input position.
740:            public void memoizeSuccess(IntStream input,
741:            						   int ruleIndex,
742:            						   int ruleStartIndex)
743:            {
744:            	ruleMemo[ruleIndex].put(
745:            		new Integer(ruleStartIndex), new Integer(input.index()-1)
746:            	);
747:            }
748:             */
749:
750:            /** return how many rule/input-index pairs there are in total.
751:             *  TODO: this includes synpreds. :(
752:             */
753:            public int getRuleMemoizationCacheSize() {
754:                int n = 0;
755:                for (int i = 0; ruleMemo != null && i < ruleMemo.length; i++) {
756:                    Map ruleMap = ruleMemo[i];
757:                    if (ruleMap != null) {
758:                        n += ruleMap.size(); // how many input indexes are recorded?
759:                    }
760:                }
761:                return n;
762:            }
763:
764:            public void traceIn(String ruleName, int ruleIndex,
765:                    Object inputSymbol) {
766:                System.out.print("enter " + ruleName + " " + inputSymbol);
767:                if (failed) {
768:                    System.out.println(" failed=" + failed);
769:                }
770:                if (backtracking > 0) {
771:                    System.out.print(" backtracking=" + backtracking);
772:                }
773:                System.out.println();
774:            }
775:
776:            public void traceOut(String ruleName, int ruleIndex,
777:                    Object inputSymbol) {
778:                System.out.print("exit " + ruleName + " " + inputSymbol);
779:                if (failed) {
780:                    System.out.println(" failed=" + failed);
781:                }
782:                if (backtracking > 0) {
783:                    System.out.print(" backtracking=" + backtracking);
784:                }
785:                System.out.println();
786:            }
787:
788:            /** A syntactic predicate.  Returns true/false depending on whether
789:             *  the specified grammar fragment matches the current input stream.
790:             *  This resets the failed instance var afterwards.
791:            public boolean synpred(IntStream input, GrammarFragmentPtr fragment) {
792:            	//int i = input.index();
793:            	//System.out.println("begin backtracking="+backtracking+" @"+i+"="+((CommonTokenStream)input).LT(1));
794:            	backtracking++;
795:            	beginBacktrack(backtracking);
796:            	int start = input.mark();
797:            	try {fragment.invoke();}
798:            	catch (RecognitionException re) {
799:            		System.err.println("impossible: "+re);
800:            	}
801:            	boolean success = !failed;
802:            	input.rewind(start);
803:            	endBacktrack(backtracking, success);
804:            	backtracking--;
805:            	//System.out.println("end backtracking="+backtracking+": "+(failed?"FAILED":"SUCCEEDED")+" @"+input.index()+" should be "+i);
806:            	failed=false;
807:            	return success;
808:            }
809:             */
810:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.