0001: package antlr;
0002:
0003: /* ANTLR Translator Generator
0004: * Project led by Terence Parr at http://www.jGuru.com
0005: * Software rights: http://www.antlr.org/RIGHTS.html
0006: *
0007: * $Id: LLkAnalyzer.java,v 1.1 2004/01/21 19:18:32 rgrimm Exp $
0008: */
0009:
0010: import antlr.collections.impl.BitSet;
0011: import antlr.collections.impl.Vector;
0012:
0013: /**A linear-approximate LL(k) grammar analzyer.
0014: *
0015: * All lookahead elements are sets of token types.
0016: *
0017: * @author Terence Parr, John Lilley
0018: * @see antlr.Grammar
0019: * @see antlr.Lookahead
0020: */
0021: public class LLkAnalyzer implements LLkGrammarAnalyzer {
0022: // Set "analyzerDebug" to true
0023: public boolean DEBUG_ANALYZER = false;
0024: private AlternativeBlock currentBlock;
0025: protected Tool tool = null;
0026: protected Grammar grammar = null;
0027: // True if analyzing a lexical grammar
0028: protected boolean lexicalAnalysis = false;
0029: // Used for formatting bit sets in default (Java) format
0030: CharFormatter charFormatter = new JavaCharFormatter();
0031:
0032: /** Create an LLk analyzer */
0033: public LLkAnalyzer(Tool tool_) {
0034: tool = tool_;
0035: }
0036:
0037: /** Return true if someone used the '.' wildcard default idiom.
0038: * Either #(. children) or '.' as an alt by itself.
0039: */
0040: protected boolean altUsesWildcardDefault(Alternative alt) {
0041: AlternativeElement head = alt.head;
0042: // if element is #(. blah) then check to see if el is root
0043: if (head instanceof TreeElement
0044: && ((TreeElement) head).root instanceof WildcardElement) {
0045: return true;
0046: }
0047: if (head instanceof WildcardElement
0048: && head.next instanceof BlockEndElement) {
0049: return true;
0050: }
0051: return false;
0052: }
0053:
0054: /**Is this block of alternatives LL(k)? Fill in alternative cache for this block.
0055: * @return true if the block is deterministic
0056: */
0057: public boolean deterministic(AlternativeBlock blk) {
0058: /** The lookahead depth for this decision */
0059: int k = 1; // start at k=1
0060: if (DEBUG_ANALYZER)
0061: System.out.println("deterministic(" + blk + ")");
0062: boolean det = true;
0063: int nalts = blk.alternatives.size();
0064: AlternativeBlock saveCurrentBlock = currentBlock;
0065: Alternative wildcardAlt = null;
0066: currentBlock = blk;
0067:
0068: /* don't allow nongreedy (...) blocks */
0069: if (blk.greedy == false && !(blk instanceof OneOrMoreBlock)
0070: && !(blk instanceof ZeroOrMoreBlock)) {
0071: tool
0072: .warning(
0073: "Being nongreedy only makes sense for (...)+ and (...)*",
0074: grammar.getFilename(), blk.getLine(), blk
0075: .getColumn());
0076: }
0077:
0078: // SPECIAL CASE: only one alternative. We don't need to check the
0079: // determinism, but other code expects the lookahead cache to be
0080: // set for the single alt.
0081: if (nalts == 1) {
0082: AlternativeElement e = blk.getAlternativeAt(0).head;
0083: currentBlock.alti = 0;
0084: blk.getAlternativeAt(0).cache[1] = e.look(1);
0085: blk.getAlternativeAt(0).lookaheadDepth = 1; // set lookahead to LL(1)
0086: currentBlock = saveCurrentBlock;
0087: return true; // always deterministic for one alt
0088: }
0089:
0090: outer: for (int i = 0; i < nalts - 1; i++) {
0091: currentBlock.alti = i;
0092: currentBlock.analysisAlt = i; // which alt are we analyzing?
0093: currentBlock.altj = i + 1; // reset this alt. Haven't computed yet,
0094: // but we need the alt number.
0095: inner:
0096: // compare against other alternatives with lookahead depth k
0097: for (int j = i + 1; j < nalts; j++) {
0098: currentBlock.altj = j;
0099: if (DEBUG_ANALYZER)
0100: System.out.println("comparing " + i
0101: + " against alt " + j);
0102: currentBlock.analysisAlt = j; // which alt are we analyzing?
0103: k = 1; // always attempt minimum lookahead possible.
0104:
0105: // check to see if there is a lookahead depth that distinguishes
0106: // between alternatives i and j.
0107: Lookahead[] r = new Lookahead[grammar.maxk + 1];
0108: boolean haveAmbiguity;
0109: do {
0110: haveAmbiguity = false;
0111: if (DEBUG_ANALYZER)
0112: System.out.println("checking depth " + k + "<="
0113: + grammar.maxk);
0114: Lookahead p, q;
0115: p = getAltLookahead(blk, i, k);
0116: q = getAltLookahead(blk, j, k);
0117:
0118: // compare LOOK(alt i) with LOOK(alt j). Is there an intersection?
0119: // Lookahead must be disjoint.
0120: if (DEBUG_ANALYZER)
0121: System.out.println("p is "
0122: + p.toString(",", charFormatter,
0123: grammar));
0124: if (DEBUG_ANALYZER)
0125: System.out.println("q is "
0126: + q.toString(",", charFormatter,
0127: grammar));
0128: // r[i] = p.fset.and(q.fset);
0129: r[k] = p.intersection(q);
0130: if (DEBUG_ANALYZER)
0131: System.out.println("intersection at depth " + k
0132: + " is " + r[k].toString());
0133: if (!r[k].nil()) {
0134: haveAmbiguity = true;
0135: k++;
0136: }
0137: // go until no more lookahead to use or no intersection
0138: } while (haveAmbiguity && k <= grammar.maxk);
0139:
0140: Alternative ai = blk.getAlternativeAt(i);
0141: Alternative aj = blk.getAlternativeAt(j);
0142: if (haveAmbiguity) {
0143: det = false;
0144: ai.lookaheadDepth = NONDETERMINISTIC;
0145: aj.lookaheadDepth = NONDETERMINISTIC;
0146:
0147: /* if ith alt starts with a syntactic predicate, computing the
0148: * lookahead is still done for code generation, but messages
0149: * should not be generated when comparing against alt j.
0150: * Alternatives with syn preds that are unnecessary do
0151: * not result in syn pred try-blocks.
0152: */
0153: if (ai.synPred != null) {
0154: if (DEBUG_ANALYZER) {
0155: System.out.println("alt " + i
0156: + " has a syn pred");
0157: }
0158: // The alt with the (...)=> block is nondeterministic for sure.
0159: // If the (...)=> conflicts with alt j, j is nondeterministic.
0160: // This prevents alt j from being in any switch statements.
0161: // move on to next alternative=>no possible ambiguity!
0162: // continue inner;
0163: }
0164:
0165: /* if ith alt starts with a semantic predicate, computing the
0166: * lookahead is still done for code generation, but messages
0167: * should not be generated when comparing against alt j.
0168: */
0169: else if (ai.semPred != null) {
0170: if (DEBUG_ANALYZER) {
0171: System.out.println("alt " + i
0172: + " has a sem pred");
0173: }
0174: }
0175:
0176: /* if jth alt is exactly the wildcard or wildcard root of tree,
0177: * then remove elements from alt i lookahead from alt j's lookahead.
0178: * Don't do an ambiguity warning.
0179: */
0180: else if (altUsesWildcardDefault(aj)) {
0181: // System.out.println("removing pred sets");
0182: // removeCompetingPredictionSetsFromWildcard(aj.cache, aj.head, grammar.maxk);
0183: wildcardAlt = aj;
0184: }
0185:
0186: /* If the user specified warnWhenFollowAmbig=false, then we
0187: * can turn off this warning IFF one of the alts is empty;
0188: * that is, it points immediately at the end block.
0189: */
0190: else if (!blk.warnWhenFollowAmbig
0191: && (ai.head instanceof BlockEndElement || aj.head instanceof BlockEndElement)) {
0192: // System.out.println("ai.head pts to "+ai.head.getClass());
0193: // System.out.println("aj.head pts to "+aj.head.getClass());
0194: }
0195:
0196: /* If they have the generateAmbigWarnings option off for the block
0197: * then don't generate a warning.
0198: */
0199: else if (!blk.generateAmbigWarnings) {
0200: }
0201:
0202: /* If greedy=true and *one* empty alt shut off warning. */
0203: else if (blk.greedySet
0204: && blk.greedy
0205: && ((ai.head instanceof BlockEndElement && !(aj.head instanceof BlockEndElement)) || (aj.head instanceof BlockEndElement && !(ai.head instanceof BlockEndElement)))) {
0206: // System.out.println("greedy set to true; one alt empty");
0207: }
0208:
0209: /* We have no choice, but to report a nondetermism */
0210: else {
0211: tool.errorHandler.warnAltAmbiguity(grammar,
0212: blk, // the block
0213: lexicalAnalysis, // true if lexical
0214: grammar.maxk, // depth of ambiguity
0215: r, // set of linear ambiguities
0216: i, // first ambiguous alternative
0217: j // second ambiguous alternative
0218: );
0219: }
0220: } else {
0221: // a lookahead depth, k, was found where i and j do not conflict
0222: ai.lookaheadDepth = Math.max(ai.lookaheadDepth, k);
0223: aj.lookaheadDepth = Math.max(aj.lookaheadDepth, k);
0224: }
0225: }
0226: }
0227:
0228: // finished with block.
0229:
0230: // If had wildcard default clause idiom, remove competing lookahead
0231: /*
0232: if ( wildcardAlt!=null ) {
0233: removeCompetingPredictionSetsFromWildcard(wildcardAlt.cache, wildcardAlt.head, grammar.maxk);
0234: }
0235: */
0236:
0237: currentBlock = saveCurrentBlock;
0238: return det;
0239: }
0240:
0241: /**Is (...)+ block LL(1)? Fill in alternative cache for this block.
0242: * @return true if the block is deterministic
0243: */
0244: public boolean deterministic(OneOrMoreBlock blk) {
0245: if (DEBUG_ANALYZER)
0246: System.out.println("deterministic(...)+(" + blk + ")");
0247: AlternativeBlock saveCurrentBlock = currentBlock;
0248: currentBlock = blk;
0249: boolean blkOk = deterministic((AlternativeBlock) blk);
0250: // block has been checked, now check that what follows does not conflict
0251: // with the lookahead of the (...)+ block.
0252: boolean det = deterministicImpliedPath(blk);
0253: currentBlock = saveCurrentBlock;
0254: return det && blkOk;
0255: }
0256:
0257: /**Is (...)* block LL(1)? Fill in alternative cache for this block.
0258: * @return true if the block is deterministic
0259: */
0260: public boolean deterministic(ZeroOrMoreBlock blk) {
0261: if (DEBUG_ANALYZER)
0262: System.out.println("deterministic(...)*(" + blk + ")");
0263: AlternativeBlock saveCurrentBlock = currentBlock;
0264: currentBlock = blk;
0265: boolean blkOk = deterministic((AlternativeBlock) blk);
0266: // block has been checked, now check that what follows does not conflict
0267: // with the lookahead of the (...)* block.
0268: boolean det = deterministicImpliedPath(blk);
0269: currentBlock = saveCurrentBlock;
0270: return det && blkOk;
0271: }
0272:
0273: /**Is this (...)* or (...)+ block LL(k)?
0274: * @return true if the block is deterministic
0275: */
0276: public boolean deterministicImpliedPath(BlockWithImpliedExitPath blk) {
0277: /** The lookahead depth for this decision considering implied exit path */
0278: int k;
0279: boolean det = true;
0280: Vector alts = blk.getAlternatives();
0281: int nalts = alts.size();
0282: currentBlock.altj = -1; // comparing against implicit optional/exit alt
0283:
0284: if (DEBUG_ANALYZER)
0285: System.out.println("deterministicImpliedPath");
0286: for (int i = 0; i < nalts; i++) { // check follow against all alts
0287: Alternative alt = blk.getAlternativeAt(i);
0288:
0289: if (alt.head instanceof BlockEndElement) {
0290: tool
0291: .warning(
0292: "empty alternative makes no sense in (...)* or (...)+",
0293: grammar.getFilename(), blk.getLine(),
0294: blk.getColumn());
0295: }
0296:
0297: k = 1; // assume eac alt is LL(1) with exit branch
0298: // check to see if there is a lookahead depth that distinguishes
0299: // between alternative i and the exit branch.
0300: Lookahead[] r = new Lookahead[grammar.maxk + 1];
0301: boolean haveAmbiguity;
0302: do {
0303: haveAmbiguity = false;
0304: if (DEBUG_ANALYZER)
0305: System.out.println("checking depth " + k + "<="
0306: + grammar.maxk);
0307: Lookahead p;
0308: Lookahead follow = blk.next.look(k);
0309: blk.exitCache[k] = follow;
0310: currentBlock.alti = i;
0311: p = getAltLookahead(blk, i, k);
0312:
0313: if (DEBUG_ANALYZER)
0314: System.out.println("follow is "
0315: + follow.toString(",", charFormatter,
0316: grammar));
0317: if (DEBUG_ANALYZER)
0318: System.out.println("p is "
0319: + p.toString(",", charFormatter, grammar));
0320: //r[k] = follow.fset.and(p.fset);
0321: r[k] = follow.intersection(p);
0322: if (DEBUG_ANALYZER)
0323: System.out.println("intersection at depth " + k
0324: + " is " + r[k]);
0325: if (!r[k].nil()) {
0326: haveAmbiguity = true;
0327: k++;
0328: }
0329: // go until no more lookahead to use or no intersection
0330: } while (haveAmbiguity && k <= grammar.maxk);
0331:
0332: if (haveAmbiguity) {
0333: det = false;
0334: alt.lookaheadDepth = NONDETERMINISTIC;
0335: blk.exitLookaheadDepth = NONDETERMINISTIC;
0336: Alternative ambigAlt = blk
0337: .getAlternativeAt(currentBlock.alti);
0338:
0339: /* If the user specified warnWhenFollowAmbig=false, then we
0340: * can turn off this warning.
0341: */
0342: if (!blk.warnWhenFollowAmbig) {
0343: }
0344:
0345: /* If they have the generateAmbigWarnings option off for the block
0346: * then don't generate a warning.
0347: */
0348: else if (!blk.generateAmbigWarnings) {
0349: }
0350:
0351: /* If greedy=true and alt not empty, shut off warning */
0352: else if (blk.greedy == true && blk.greedySet
0353: && !(ambigAlt.head instanceof BlockEndElement)) {
0354: if (DEBUG_ANALYZER)
0355: System.out.println("greedy loop");
0356: }
0357:
0358: /* If greedy=false then shut off warning...will have
0359: * to add "if FOLLOW break"
0360: * block during code gen to compensate for removal of warning.
0361: */
0362: else if (blk.greedy == false
0363: && !(ambigAlt.head instanceof BlockEndElement)) {
0364: if (DEBUG_ANALYZER)
0365: System.out.println("nongreedy loop");
0366: // if FOLLOW not single k-string (|set[k]| can
0367: // be > 1 actually) then must warn them that
0368: // loop may terminate incorrectly.
0369: // For example, ('a'..'d')+ ("ad"|"cb")
0370: if (!lookaheadEquivForApproxAndFullAnalysis(
0371: blk.exitCache, grammar.maxk)) {
0372: tool
0373: .warning(
0374: new String[] {
0375: "nongreedy block may exit incorrectly due",
0376: "\tto limitations of linear approximate lookahead (first k-1 sets",
0377: "\tin lookahead not singleton)." },
0378: grammar.getFilename(), blk
0379: .getLine(), blk
0380: .getColumn());
0381: }
0382: }
0383:
0384: // no choice but to generate a warning
0385: else {
0386: tool.errorHandler.warnAltExitAmbiguity(grammar,
0387: blk, // the block
0388: lexicalAnalysis, // true if lexical
0389: grammar.maxk, // depth of ambiguity
0390: r, // set of linear ambiguities
0391: i // ambiguous alternative
0392: );
0393: }
0394: } else {
0395: alt.lookaheadDepth = Math.max(alt.lookaheadDepth, k);
0396: blk.exitLookaheadDepth = Math.max(
0397: blk.exitLookaheadDepth, k);
0398: }
0399: }
0400: return det;
0401: }
0402:
0403: /**Compute the lookahead set of whatever follows references to
0404: * the rule associated witht the FOLLOW block.
0405: */
0406: public Lookahead FOLLOW(int k, RuleEndElement end) {
0407: // what rule are we trying to compute FOLLOW of?
0408: RuleBlock rb = (RuleBlock) end.block;
0409: // rule name is different in lexer
0410: String rule;
0411: if (lexicalAnalysis) {
0412: rule = CodeGenerator.encodeLexerRuleName(rb.getRuleName());
0413: } else {
0414: rule = rb.getRuleName();
0415: }
0416:
0417: if (DEBUG_ANALYZER)
0418: System.out.println("FOLLOW(" + k + "," + rule + ")");
0419:
0420: // are we in the midst of computing this FOLLOW already?
0421: if (end.lock[k]) {
0422: if (DEBUG_ANALYZER)
0423: System.out.println("FOLLOW cycle to " + rule);
0424: return new Lookahead(rule);
0425: }
0426:
0427: // Check to see if there is cached value
0428: if (end.cache[k] != null) {
0429: if (DEBUG_ANALYZER) {
0430: System.out.println("cache entry FOLLOW("
0431: + k
0432: + ") for "
0433: + rule
0434: + ": "
0435: + end.cache[k].toString(",", charFormatter,
0436: grammar));
0437: }
0438: // if the cache is a complete computation then simply return entry
0439: if (end.cache[k].cycle == null) {
0440: return (Lookahead) end.cache[k].clone();
0441: }
0442: // A cache entry exists, but it is a reference to a cyclic computation.
0443: RuleSymbol rs = (RuleSymbol) grammar
0444: .getSymbol(end.cache[k].cycle);
0445: RuleEndElement re = rs.getBlock().endNode;
0446: // The other entry may not exist because it is still being
0447: // computed when this cycle cache entry was found here.
0448: if (re.cache[k] == null) {
0449: // return the cycle...that's all we can do at the moment.
0450: return (Lookahead) end.cache[k].clone();
0451: } else {
0452: // replace this cache entry with the entry from the referenced computation.
0453: // Eventually, this percolates a complete (no cycle reference) cache entry
0454: // to this node (or at least gets it closer and closer). This is not
0455: // crucial, but makes cache lookup faster as we might have to look up
0456: // lots of cycle references before finding a complete reference.
0457: end.cache[k] = re.cache[k];
0458: // Return the cache entry associated with the cycle reference.
0459: return (Lookahead) re.cache[k].clone();
0460: }
0461: }
0462:
0463: end.lock[k] = true; // prevent FOLLOW computation cycles
0464:
0465: Lookahead p = new Lookahead();
0466:
0467: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(rule);
0468:
0469: // Walk list of references to this rule to compute FOLLOW
0470: for (int i = 0; i < rs.numReferences(); i++) {
0471: RuleRefElement rr = rs.getReference(i);
0472: if (DEBUG_ANALYZER)
0473: System.out.println("next[" + rule + "] is "
0474: + rr.next.toString());
0475: Lookahead q = rr.next.look(k);
0476: if (DEBUG_ANALYZER)
0477: System.out.println("FIRST of next[" + rule
0478: + "] ptr is " + q.toString());
0479: /* If there is a cycle then if the cycle is to the rule for
0480: * this end block, you have a cycle to yourself. Remove the
0481: * cycle indication--the lookahead is complete.
0482: */
0483: if (q.cycle != null && q.cycle.equals(rule)) {
0484: q.cycle = null; // don't want cycle to yourself!
0485: }
0486: // add the lookahead into the current FOLLOW computation set
0487: p.combineWith(q);
0488: if (DEBUG_ANALYZER)
0489: System.out.println("combined FOLLOW[" + rule + "] is "
0490: + p.toString());
0491: }
0492:
0493: end.lock[k] = false; // we're not doing FOLLOW anymore
0494:
0495: // if no rules follow this, it can be a start symbol or called by a start sym.
0496: // set the follow to be end of file.
0497: if (p.fset.nil() && p.cycle == null) {
0498: if (grammar instanceof TreeWalkerGrammar) {
0499: // Tree grammars don't see EOF, they see end of sibling list or
0500: // "NULL TREE LOOKAHEAD".
0501: p.fset.add(Token.NULL_TREE_LOOKAHEAD);
0502: } else if (grammar instanceof LexerGrammar) {
0503: // Lexical grammars use Epsilon to indicate that the end of rule has been hit
0504: // EOF would be misleading; any character can follow a token rule not just EOF
0505: // as in a grammar (where a start symbol is followed by EOF). There is no
0506: // sequence info in a lexer between tokens to indicate what is the last token
0507: // to be seen.
0508: // p.fset.add(EPSILON_TYPE);
0509: p.setEpsilon();
0510: } else {
0511: p.fset.add(Token.EOF_TYPE);
0512: }
0513: }
0514:
0515: // Cache the result of the FOLLOW computation
0516: if (DEBUG_ANALYZER) {
0517: System.out.println("saving FOLLOW(" + k + ") for " + rule
0518: + ": " + p.toString(",", charFormatter, grammar));
0519: }
0520: end.cache[k] = (Lookahead) p.clone();
0521:
0522: return p;
0523: }
0524:
0525: private Lookahead getAltLookahead(AlternativeBlock blk, int alt,
0526: int k) {
0527: Lookahead p;
0528: Alternative a = blk.getAlternativeAt(alt);
0529: AlternativeElement e = a.head;
0530: //System.out.println("getAltLookahead("+k+","+e+"), cache size is "+a.cache.length);
0531: if (a.cache[k] == null) {
0532: p = e.look(k);
0533: a.cache[k] = p;
0534: } else {
0535: p = a.cache[k];
0536: }
0537: return p;
0538: }
0539:
0540: /**Actions are ignored */
0541: public Lookahead look(int k, ActionElement action) {
0542: if (DEBUG_ANALYZER)
0543: System.out.println("lookAction(" + k + "," + action + ")");
0544: return action.next.look(k);
0545: }
0546:
0547: /**Combine the lookahead computed for each alternative */
0548: public Lookahead look(int k, AlternativeBlock blk) {
0549: if (DEBUG_ANALYZER)
0550: System.out.println("lookAltBlk(" + k + "," + blk + ")");
0551: AlternativeBlock saveCurrentBlock = currentBlock;
0552: currentBlock = blk;
0553: Lookahead p = new Lookahead();
0554: for (int i = 0; i < blk.alternatives.size(); i++) {
0555: if (DEBUG_ANALYZER)
0556: System.out.println("alt " + i + " of " + blk);
0557: // must set analysis alt
0558: currentBlock.analysisAlt = i;
0559: Alternative alt = blk.getAlternativeAt(i);
0560: AlternativeElement elem = alt.head;
0561: if (DEBUG_ANALYZER) {
0562: if (alt.head == alt.tail) {
0563: System.out.println("alt " + i + " is empty");
0564: }
0565: }
0566: Lookahead q = elem.look(k);
0567: p.combineWith(q);
0568: }
0569: if (k == 1 && blk.not
0570: && subruleCanBeInverted(blk, lexicalAnalysis)) {
0571: // Invert the lookahead set
0572: if (lexicalAnalysis) {
0573: BitSet b = (BitSet) ((LexerGrammar) grammar).charVocabulary
0574: .clone();
0575: int[] elems = p.fset.toArray();
0576: for (int j = 0; j < elems.length; j++) {
0577: b.remove(elems[j]);
0578: }
0579: p.fset = b;
0580: } else {
0581: p.fset.notInPlace(Token.MIN_USER_TYPE,
0582: grammar.tokenManager.maxTokenType());
0583: }
0584: }
0585: currentBlock = saveCurrentBlock;
0586: return p;
0587: }
0588:
0589: /**Compute what follows this place-holder node and possibly
0590: * what begins the associated loop unless the
0591: * node is locked.
0592: * <p>
0593: * if we hit the end of a loop, we have to include
0594: * what tokens can begin the loop as well. If the start
0595: * node is locked, then we simply found an empty path
0596: * through this subrule while analyzing it. If the
0597: * start node is not locked, then this node was hit
0598: * during a FOLLOW operation and the FIRST of this
0599: * block must be included in that lookahead computation.
0600: */
0601: public Lookahead look(int k, BlockEndElement end) {
0602: if (DEBUG_ANALYZER)
0603: System.out.println("lookBlockEnd(" + k + ", " + end.block
0604: + "); lock is " + end.lock[k]);
0605: if (end.lock[k]) {
0606: // computation in progress => the tokens we would have
0607: // computed (had we not been locked) will be included
0608: // in the set by that computation with the lock on this
0609: // node.
0610: return new Lookahead();
0611: }
0612:
0613: Lookahead p;
0614:
0615: /* Hitting the end of a loop means you can see what begins the loop */
0616: if (end.block instanceof ZeroOrMoreBlock
0617: || end.block instanceof OneOrMoreBlock) {
0618: // compute what can start the block,
0619: // but lock end node so we don't do it twice in same
0620: // computation.
0621: end.lock[k] = true;
0622: p = look(k, end.block);
0623: end.lock[k] = false;
0624: } else {
0625: p = new Lookahead();
0626: }
0627:
0628: /* Tree blocks do not have any follow because they are children
0629: * of what surrounds them. For example, A #(B C) D results in
0630: * a look() for the TreeElement end of NULL_TREE_LOOKAHEAD, which
0631: * indicates that nothing can follow the last node of tree #(B C)
0632: */
0633: if (end.block instanceof TreeElement) {
0634: p.combineWith(Lookahead.of(Token.NULL_TREE_LOOKAHEAD));
0635: }
0636:
0637: /* Syntactic predicates such as ( (A)? )=> have no follow per se.
0638: * We cannot accurately say what would be matched following a
0639: * syntactic predicate (you MIGHT be ok if you said it was whatever
0640: * followed the alternative predicted by the predicate). Hence,
0641: * (like end-of-token) we return Epsilon to indicate "unknown
0642: * lookahead."
0643: */
0644: else if (end.block instanceof SynPredBlock) {
0645: p.setEpsilon();
0646: }
0647:
0648: // compute what can follow the block
0649: else {
0650: Lookahead q = end.block.next.look(k);
0651: p.combineWith(q);
0652: }
0653:
0654: return p;
0655: }
0656:
0657: /**Return this char as the lookahead if k=1.
0658: * <p>### Doesn't work for ( 'a' 'b' | 'a' ~'b' ) yet!!!
0659: * <p>
0660: * If the atom has the <tt>not</tt> flag on, then
0661: * create the set complement of the tokenType
0662: * which is the set of all characters referenced
0663: * in the grammar with this char turned off.
0664: * Also remove characters from the set that
0665: * are currently allocated for predicting
0666: * previous alternatives. This avoids ambiguity
0667: * messages and is more properly what is meant.
0668: * ( 'a' | ~'a' ) implies that the ~'a' is the
0669: * "else" clause.
0670: * <p>
0671: * NOTE: we do <b>NOT</b> include exit path in
0672: * the exclusion set. E.g.,
0673: * ( 'a' | ~'a' )* 'b'
0674: * should exit upon seeing a 'b' during the loop.
0675: */
0676: public Lookahead look(int k, CharLiteralElement atom) {
0677: if (DEBUG_ANALYZER)
0678: System.out.println("lookCharLiteral(" + k + "," + atom
0679: + ")");
0680: // Skip until analysis hits k==1
0681: if (k > 1) {
0682: return atom.next.look(k - 1);
0683: }
0684: if (lexicalAnalysis) {
0685: if (atom.not) {
0686: BitSet b = (BitSet) ((LexerGrammar) grammar).charVocabulary
0687: .clone();
0688: if (DEBUG_ANALYZER)
0689: System.out.println("charVocab is " + b.toString());
0690: // remove stuff predicted by preceding alts and follow of block
0691: removeCompetingPredictionSets(b, atom);
0692: if (DEBUG_ANALYZER)
0693: System.out
0694: .println("charVocab after removal of prior alt lookahead "
0695: + b.toString());
0696: // now remove element that is stated not to be in the set
0697: b.clear(atom.getType());
0698: return new Lookahead(b);
0699: } else {
0700: return Lookahead.of(atom.getType());
0701: }
0702: } else {
0703: // Should have been avoided by MakeGrammar
0704: tool.panic("Character literal reference found in parser");
0705: // ... so we make the compiler happy
0706: return Lookahead.of(atom.getType());
0707: }
0708: }
0709:
0710: public Lookahead look(int k, CharRangeElement r) {
0711: if (DEBUG_ANALYZER)
0712: System.out.println("lookCharRange(" + k + "," + r + ")");
0713: // Skip until analysis hits k==1
0714: if (k > 1) {
0715: return r.next.look(k - 1);
0716: }
0717: BitSet p = BitSet.of(r.begin);
0718: for (int i = r.begin + 1; i <= r.end; i++) {
0719: p.add(i);
0720: }
0721: return new Lookahead(p);
0722: }
0723:
0724: public Lookahead look(int k, GrammarAtom atom) {
0725: if (DEBUG_ANALYZER)
0726: System.out.println("look(" + k + "," + atom + "["
0727: + atom.getType() + "])");
0728:
0729: if (lexicalAnalysis) {
0730: // MakeGrammar should have created a rule reference instead
0731: tool.panic("token reference found in lexer");
0732: }
0733: // Skip until analysis hits k==1
0734: if (k > 1) {
0735: return atom.next.look(k - 1);
0736: }
0737: Lookahead l = Lookahead.of(atom.getType());
0738: if (atom.not) {
0739: // Invert the lookahead set against the token vocabulary
0740: int maxToken = grammar.tokenManager.maxTokenType();
0741: l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
0742: // remove stuff predicted by preceding alts and follow of block
0743: removeCompetingPredictionSets(l.fset, atom);
0744: }
0745: return l;
0746: }
0747:
0748: /**The lookahead of a (...)+ block is the combined lookahead of
0749: * all alternatives and, if an empty path is found, the lookahead
0750: * of what follows the block.
0751: */
0752: public Lookahead look(int k, OneOrMoreBlock blk) {
0753: if (DEBUG_ANALYZER)
0754: System.out.println("look+" + k + "," + blk + ")");
0755: Lookahead p = look(k, (AlternativeBlock) blk);
0756: return p;
0757: }
0758:
0759: /**Combine the lookahead computed for each alternative.
0760: * Lock the node so that no other computation may come back
0761: * on itself--infinite loop. This also implies infinite left-recursion
0762: * in the grammar (or an error in this algorithm ;)).
0763: */
0764: public Lookahead look(int k, RuleBlock blk) {
0765: if (DEBUG_ANALYZER)
0766: System.out.println("lookRuleBlk(" + k + "," + blk + ")");
0767: Lookahead p = look(k, (AlternativeBlock) blk);
0768: return p;
0769: }
0770:
0771: /**If not locked or noFOLLOW set, compute FOLLOW of a rule.
0772: * <p>
0773: * TJP says 8/12/99: not true anymore:
0774: * Lexical rules never compute follow. They set epsilon and
0775: * the code generator gens code to check for any character.
0776: * The code generator must remove the tokens used to predict
0777: * any previous alts in the same block.
0778: * <p>
0779: * When the last node of a rule is reached and noFOLLOW,
0780: * it implies that a "local" FOLLOW will be computed
0781: * after this call. I.e.,
0782: * <pre>
0783: * a : b A;
0784: * b : B | ;
0785: * c : b C;
0786: * </pre>
0787: * Here, when computing the look of rule b from rule a,
0788: * we want only {B,EPSILON_TYPE} so that look(b A) will
0789: * be {B,A} not {B,A,C}.
0790: * <p>
0791: * if the end block is not locked and the FOLLOW is
0792: * wanted, the algorithm must compute the lookahead
0793: * of what follows references to this rule. If
0794: * end block is locked, FOLLOW will return an empty set
0795: * with a cycle to the rule associated with this end block.
0796: */
0797: public Lookahead look(int k, RuleEndElement end) {
0798: if (DEBUG_ANALYZER)
0799: System.out.println("lookRuleBlockEnd(" + k + "); noFOLLOW="
0800: + end.noFOLLOW + "; lock is " + end.lock[k]);
0801: if (/*lexicalAnalysis ||*/end.noFOLLOW) {
0802: Lookahead p = new Lookahead();
0803: p.setEpsilon();
0804: p.epsilonDepth = BitSet.of(k);
0805: return p;
0806: }
0807: Lookahead p = FOLLOW(k, end);
0808: return p;
0809: }
0810:
0811: /**Compute the lookahead contributed by a rule reference.
0812: *
0813: * <p>
0814: * When computing ruleref lookahead, we don't want the FOLLOW
0815: * computation done if an empty path exists for the rule.
0816: * The FOLLOW is too loose of a set...we want only to
0817: * include the "local" FOLLOW or what can follow this
0818: * particular ref to the node. In other words, we use
0819: * context information to reduce the complexity of the
0820: * analysis and strengthen the parser.
0821: *
0822: * The noFOLLOW flag is used as a means of restricting
0823: * the FOLLOW to a "local" FOLLOW. This variable is
0824: * orthogonal to the <tt>lock</tt> variable that prevents
0825: * infinite recursion. noFOLLOW does not care about what k is.
0826: */
0827: public Lookahead look(int k, RuleRefElement rr) {
0828: if (DEBUG_ANALYZER)
0829: System.out.println("lookRuleRef(" + k + "," + rr + ")");
0830: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(rr.targetRule);
0831: if (rs == null || !rs.defined) {
0832: tool
0833: .error("no definition of rule " + rr.targetRule,
0834: grammar.getFilename(), rr.getLine(), rr
0835: .getColumn());
0836: return new Lookahead();
0837: }
0838: RuleBlock rb = rs.getBlock();
0839: RuleEndElement end = rb.endNode;
0840: boolean saveEnd = end.noFOLLOW;
0841: end.noFOLLOW = true;
0842: // go off to the rule and get the lookahead (w/o FOLLOW)
0843: Lookahead p = look(k, rr.targetRule);
0844: if (DEBUG_ANALYZER)
0845: System.out
0846: .println("back from rule ref to " + rr.targetRule);
0847: // restore state of end block
0848: end.noFOLLOW = saveEnd;
0849:
0850: // check for infinite recursion. If a cycle is returned: trouble!
0851: if (p.cycle != null) {
0852: tool.error("infinite recursion to rule " + p.cycle
0853: + " from rule " + rr.enclosingRuleName, grammar
0854: .getFilename(), rr.getLine(), rr.getColumn());
0855: }
0856:
0857: // is the local FOLLOW required?
0858: if (p.containsEpsilon()) {
0859: if (DEBUG_ANALYZER)
0860: System.out.println("rule ref to " + rr.targetRule
0861: + " has eps, depth: " + p.epsilonDepth);
0862:
0863: // remove epsilon
0864: p.resetEpsilon();
0865: // fset.clear(EPSILON_TYPE);
0866:
0867: // for each lookahead depth that saw epsilon
0868: int[] depths = p.epsilonDepth.toArray();
0869: p.epsilonDepth = null; // clear all epsilon stuff
0870: for (int i = 0; i < depths.length; i++) {
0871: int rk = k - (k - depths[i]);
0872: Lookahead q = rr.next.look(rk); // see comments in Lookahead
0873: p.combineWith(q);
0874: }
0875: // note: any of these look() computations for local follow can
0876: // set EPSILON in the set again if the end of this rule is found.
0877: }
0878:
0879: return p;
0880: }
0881:
0882: public Lookahead look(int k, StringLiteralElement atom) {
0883: if (DEBUG_ANALYZER)
0884: System.out.println("lookStringLiteral(" + k + "," + atom
0885: + ")");
0886: if (lexicalAnalysis) {
0887: // need more lookahead than string can provide?
0888: if (k > atom.processedAtomText.length()) {
0889: return atom.next.look(k
0890: - atom.processedAtomText.length());
0891: } else {
0892: // get char at lookahead depth k, from the processed literal text
0893: return Lookahead.of(atom.processedAtomText
0894: .charAt(k - 1));
0895: }
0896: } else {
0897: // Skip until analysis hits k==1
0898: if (k > 1) {
0899: return atom.next.look(k - 1);
0900: }
0901: Lookahead l = Lookahead.of(atom.getType());
0902: if (atom.not) {
0903: // Invert the lookahead set against the token vocabulary
0904: int maxToken = grammar.tokenManager.maxTokenType();
0905: l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
0906: }
0907: return l;
0908: }
0909: }
0910:
0911: /**The lookahead of a (...)=> block is the lookahead of
0912: * what follows the block. By definition, the syntactic
0913: * predicate block defies static analysis (you want to try it
0914: * out at run-time). The LOOK of (a)=>A B is A for LL(1)
0915: * ### is this even called?
0916: */
0917: public Lookahead look(int k, SynPredBlock blk) {
0918: if (DEBUG_ANALYZER)
0919: System.out.println("look=>(" + k + "," + blk + ")");
0920: return blk.next.look(k);
0921: }
0922:
0923: public Lookahead look(int k, TokenRangeElement r) {
0924: if (DEBUG_ANALYZER)
0925: System.out.println("lookTokenRange(" + k + "," + r + ")");
0926: // Skip until analysis hits k==1
0927: if (k > 1) {
0928: return r.next.look(k - 1);
0929: }
0930: BitSet p = BitSet.of(r.begin);
0931: for (int i = r.begin + 1; i <= r.end; i++) {
0932: p.add(i);
0933: }
0934: return new Lookahead(p);
0935: }
0936:
0937: public Lookahead look(int k, TreeElement t) {
0938: if (DEBUG_ANALYZER)
0939: System.out.println("look(" + k + "," + t.root + "["
0940: + t.root.getType() + "])");
0941: if (k > 1) {
0942: return t.next.look(k - 1);
0943: }
0944: Lookahead l = null;
0945: if (t.root instanceof WildcardElement) {
0946: l = t.root.look(1); // compute FIRST set minus previous rows
0947: } else {
0948: l = Lookahead.of(t.root.getType());
0949: if (t.root.not) {
0950: // Invert the lookahead set against the token vocabulary
0951: int maxToken = grammar.tokenManager.maxTokenType();
0952: l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
0953: }
0954: }
0955: return l;
0956: }
0957:
0958: public Lookahead look(int k, WildcardElement wc) {
0959: if (DEBUG_ANALYZER)
0960: System.out.println("look(" + k + "," + wc + ")");
0961:
0962: // Skip until analysis hits k==1
0963: if (k > 1) {
0964: return wc.next.look(k - 1);
0965: }
0966:
0967: BitSet b;
0968: if (lexicalAnalysis) {
0969: // Copy the character vocabulary
0970: b = (BitSet) ((LexerGrammar) grammar).charVocabulary
0971: .clone();
0972: } else {
0973: b = new BitSet(1);
0974: // Invert the lookahead set against the token vocabulary
0975: int maxToken = grammar.tokenManager.maxTokenType();
0976: b.notInPlace(Token.MIN_USER_TYPE, maxToken);
0977: if (DEBUG_ANALYZER)
0978: System.out.println("look(" + k + "," + wc
0979: + ") after not: " + b);
0980: }
0981:
0982: // Remove prediction sets from competing alternatives
0983: // removeCompetingPredictionSets(b, wc);
0984:
0985: return new Lookahead(b);
0986: }
0987:
0988: /** The (...)* element is the combined lookahead of the alternatives and what can
0989: * follow the loop.
0990: */
0991: public Lookahead look(int k, ZeroOrMoreBlock blk) {
0992: if (DEBUG_ANALYZER)
0993: System.out.println("look*(" + k + "," + blk + ")");
0994: Lookahead p = look(k, (AlternativeBlock) blk);
0995: Lookahead q = blk.next.look(k);
0996: p.combineWith(q);
0997: return p;
0998: }
0999:
1000: /**Compute the combined lookahead for all productions of a rule.
1001: * If the lookahead returns with epsilon, at least one epsilon
1002: * path exists (one that consumes no tokens). The noFOLLOW
1003: * flag being set for this endruleblk, indicates that the
1004: * a rule ref invoked this rule.
1005: *
1006: * Currently only look(RuleRef) calls this. There is no need
1007: * for the code generator to call this.
1008: */
1009: public Lookahead look(int k, String rule) {
1010: if (DEBUG_ANALYZER)
1011: System.out.println("lookRuleName(" + k + "," + rule + ")");
1012: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(rule);
1013: RuleBlock rb = rs.getBlock();
1014:
1015: if (rb.lock[k]) {
1016: if (DEBUG_ANALYZER)
1017: System.out.println("infinite recursion to rule "
1018: + rb.getRuleName());
1019: return new Lookahead(rule);
1020: }
1021:
1022: // have we computed it before?
1023: if (rb.cache[k] != null) {
1024: if (DEBUG_ANALYZER) {
1025: System.out.println("found depth "
1026: + k
1027: + " result in FIRST "
1028: + rule
1029: + " cache: "
1030: + rb.cache[k].toString(",", charFormatter,
1031: grammar));
1032: }
1033: return (Lookahead) rb.cache[k].clone();
1034: }
1035:
1036: rb.lock[k] = true;
1037: Lookahead p = look(k, (RuleBlock) rb);
1038: rb.lock[k] = false;
1039:
1040: // cache results
1041: rb.cache[k] = (Lookahead) p.clone();
1042: if (DEBUG_ANALYZER) {
1043: System.out
1044: .println("saving depth "
1045: + k
1046: + " result in FIRST "
1047: + rule
1048: + " cache: "
1049: + rb.cache[k].toString(",", charFormatter,
1050: grammar));
1051: }
1052: return p;
1053: }
1054:
1055: /** If the first k-1 sets are singleton sets, the appoximate
1056: * lookahead analysis is equivalent to full lookahead analysis.
1057: */
1058: public static boolean lookaheadEquivForApproxAndFullAnalysis(
1059: Lookahead[] bset, int k) {
1060: // first k-1 sets degree 1?
1061: for (int i = 1; i <= k - 1; i++) {
1062: BitSet look = bset[i].fset;
1063: if (look.degree() > 1) {
1064: return false;
1065: }
1066: }
1067: return true;
1068: }
1069:
1070: /** Remove the prediction sets from preceding alternatives
1071: * and follow set, but *only* if this element is the first element
1072: * of the alternative. The class members currenBlock and
1073: * currentBlock.analysisAlt must be set correctly.
1074: * @param b The prediction bitset to be modified
1075: * @el The element of interest
1076: */
1077: private void removeCompetingPredictionSets(BitSet b,
1078: AlternativeElement el) {
1079: // Only do this if the element is the first element of the alt,
1080: // because we are making an implicit assumption that k==1.
1081: GrammarElement head = currentBlock
1082: .getAlternativeAt(currentBlock.analysisAlt).head;
1083: // if element is #(. blah) then check to see if el is root
1084: if (head instanceof TreeElement) {
1085: if (((TreeElement) head).root != el) {
1086: return;
1087: }
1088: } else if (el != head) {
1089: return;
1090: }
1091: for (int i = 0; i < currentBlock.analysisAlt; i++) {
1092: AlternativeElement e = currentBlock.getAlternativeAt(i).head;
1093: b.subtractInPlace(e.look(1).fset);
1094: }
1095: }
1096:
1097: /** Remove the prediction sets from preceding alternatives
1098: * The class members currenBlock must be set correctly.
1099: * Remove prediction sets from 1..k.
1100: * @param look The prediction lookahead to be modified
1101: * @el The element of interest
1102: * @k How deep into lookahead to modify
1103: */
1104: private void removeCompetingPredictionSetsFromWildcard(
1105: Lookahead[] look, AlternativeElement el, int k) {
1106: for (int d = 1; d <= k; d++) {
1107: for (int i = 0; i < currentBlock.analysisAlt; i++) {
1108: AlternativeElement e = currentBlock.getAlternativeAt(i).head;
1109: look[d].fset.subtractInPlace(e.look(d).fset);
1110: }
1111: }
1112: }
1113:
1114: /** reset the analyzer so it looks like a new one */
1115: private void reset() {
1116: grammar = null;
1117: DEBUG_ANALYZER = false;
1118: currentBlock = null;
1119: lexicalAnalysis = false;
1120: }
1121:
1122: /** Set the grammar for the analyzer */
1123: public void setGrammar(Grammar g) {
1124: if (grammar != null) {
1125: reset();
1126: }
1127: grammar = g;
1128:
1129: // Is this lexical?
1130: lexicalAnalysis = (grammar instanceof LexerGrammar);
1131: DEBUG_ANALYZER = grammar.analyzerDebug;
1132: }
1133:
1134: public boolean subruleCanBeInverted(AlternativeBlock blk,
1135: boolean forLexer) {
1136: if (blk instanceof ZeroOrMoreBlock
1137: || blk instanceof OneOrMoreBlock
1138: || blk instanceof SynPredBlock) {
1139: return false;
1140: }
1141: // Cannot invert an empty subrule
1142: if (blk.alternatives.size() == 0) {
1143: return false;
1144: }
1145: // The block must only contain alternatives with a single element,
1146: // where each element is a char, token, char range, or token range.
1147: for (int i = 0; i < blk.alternatives.size(); i++) {
1148: Alternative alt = blk.getAlternativeAt(i);
1149: // Cannot have anything interesting in the alternative ...
1150: if (alt.synPred != null || alt.semPred != null
1151: || alt.exceptionSpec != null) {
1152: return false;
1153: }
1154: // ... and there must be one simple element
1155: AlternativeElement elt = alt.head;
1156: if (!(elt instanceof CharLiteralElement
1157: || elt instanceof TokenRefElement
1158: || elt instanceof CharRangeElement
1159: || elt instanceof TokenRangeElement || (elt instanceof StringLiteralElement && !forLexer))
1160: || !(elt.next instanceof BlockEndElement)
1161: || elt.getAutoGenType() != GrammarElement.AUTO_GEN_NONE) {
1162: return false;
1163: }
1164: }
1165: return true;
1166: }
1167: }
|