0001: package antlr;
0002:
0003: /* ANTLR Translator Generator
0004: * Project led by Terence Parr at http://www.cs.usfca.edu
0005: * Software rights: http://www.antlr.org/license.html
0006: */
0007:
0008: import antlr.collections.impl.BitSet;
0009: import antlr.collections.impl.Vector;
0010:
0011: /**A linear-approximate LL(k) grammar analzyer.
0012: *
0013: * All lookahead elements are sets of token types.
0014: *
0015: * @author Terence Parr, John Lilley
0016: * @see antlr.Grammar
0017: * @see antlr.Lookahead
0018: */
0019: public class LLkAnalyzer implements LLkGrammarAnalyzer {
0020: // Set "analyzerDebug" to true
0021: public boolean DEBUG_ANALYZER = false;
0022: private AlternativeBlock currentBlock;
0023: protected Tool tool = null;
0024: protected Grammar grammar = null;
0025: // True if analyzing a lexical grammar
0026: protected boolean lexicalAnalysis = false;
0027: // Used for formatting bit sets in default (Java) format
0028: CharFormatter charFormatter = new JavaCharFormatter();
0029:
0030: /** Create an LLk analyzer */
0031: public LLkAnalyzer(Tool tool_) {
0032: tool = tool_;
0033: }
0034:
0035: /** Return true if someone used the '.' wildcard default idiom.
0036: * Either #(. children) or '.' as an alt by itself.
0037: */
0038: protected boolean altUsesWildcardDefault(Alternative alt) {
0039: AlternativeElement head = alt.head;
0040: // if element is #(. blah) then check to see if el is root
0041: if (head instanceof TreeElement
0042: && ((TreeElement) head).root instanceof WildcardElement) {
0043: return true;
0044: }
0045: if (head instanceof WildcardElement
0046: && head.next instanceof BlockEndElement) {
0047: return true;
0048: }
0049: return false;
0050: }
0051:
0052: /**Is this block of alternatives LL(k)? Fill in alternative cache for this block.
0053: * @return true if the block is deterministic
0054: */
0055: public boolean deterministic(AlternativeBlock blk) {
0056: /** The lookahead depth for this decision */
0057: int k = 1; // start at k=1
0058: if (DEBUG_ANALYZER)
0059: System.out.println("deterministic(" + blk + ")");
0060: boolean det = true;
0061: int nalts = blk.alternatives.size();
0062: AlternativeBlock saveCurrentBlock = currentBlock;
0063: Alternative wildcardAlt = null;
0064: currentBlock = blk;
0065:
0066: /* don't allow nongreedy (...) blocks */
0067: if (blk.greedy == false && !(blk instanceof OneOrMoreBlock)
0068: && !(blk instanceof ZeroOrMoreBlock)) {
0069: tool
0070: .warning(
0071: "Being nongreedy only makes sense for (...)+ and (...)*",
0072: grammar.getFilename(), blk.getLine(), blk
0073: .getColumn());
0074: }
0075:
0076: // SPECIAL CASE: only one alternative. We don't need to check the
0077: // determinism, but other code expects the lookahead cache to be
0078: // set for the single alt.
0079: if (nalts == 1) {
0080: AlternativeElement e = blk.getAlternativeAt(0).head;
0081: currentBlock.alti = 0;
0082: blk.getAlternativeAt(0).cache[1] = e.look(1);
0083: blk.getAlternativeAt(0).lookaheadDepth = 1; // set lookahead to LL(1)
0084: currentBlock = saveCurrentBlock;
0085: return true; // always deterministic for one alt
0086: }
0087:
0088: outer: for (int i = 0; i < nalts - 1; i++) {
0089: currentBlock.alti = i;
0090: currentBlock.analysisAlt = i; // which alt are we analyzing?
0091: currentBlock.altj = i + 1; // reset this alt. Haven't computed yet,
0092: // but we need the alt number.
0093: inner:
0094: // compare against other alternatives with lookahead depth k
0095: for (int j = i + 1; j < nalts; j++) {
0096: currentBlock.altj = j;
0097: if (DEBUG_ANALYZER)
0098: System.out.println("comparing " + i
0099: + " against alt " + j);
0100: currentBlock.analysisAlt = j; // which alt are we analyzing?
0101: k = 1; // always attempt minimum lookahead possible.
0102:
0103: // check to see if there is a lookahead depth that distinguishes
0104: // between alternatives i and j.
0105: Lookahead[] r = new Lookahead[grammar.maxk + 1];
0106: boolean haveAmbiguity;
0107: do {
0108: haveAmbiguity = false;
0109: if (DEBUG_ANALYZER)
0110: System.out.println("checking depth " + k + "<="
0111: + grammar.maxk);
0112: Lookahead p, q;
0113: p = getAltLookahead(blk, i, k);
0114: q = getAltLookahead(blk, j, k);
0115:
0116: // compare LOOK(alt i) with LOOK(alt j). Is there an intersection?
0117: // Lookahead must be disjoint.
0118: if (DEBUG_ANALYZER)
0119: System.out.println("p is "
0120: + p.toString(",", charFormatter,
0121: grammar));
0122: if (DEBUG_ANALYZER)
0123: System.out.println("q is "
0124: + q.toString(",", charFormatter,
0125: grammar));
0126: // r[i] = p.fset.and(q.fset);
0127: r[k] = p.intersection(q);
0128: if (DEBUG_ANALYZER)
0129: System.out.println("intersection at depth " + k
0130: + " is " + r[k].toString());
0131: if (!r[k].nil()) {
0132: haveAmbiguity = true;
0133: k++;
0134: }
0135: // go until no more lookahead to use or no intersection
0136: } while (haveAmbiguity && k <= grammar.maxk);
0137:
0138: Alternative ai = blk.getAlternativeAt(i);
0139: Alternative aj = blk.getAlternativeAt(j);
0140: if (haveAmbiguity) {
0141: det = false;
0142: ai.lookaheadDepth = NONDETERMINISTIC;
0143: aj.lookaheadDepth = NONDETERMINISTIC;
0144:
0145: /* if ith alt starts with a syntactic predicate, computing the
0146: * lookahead is still done for code generation, but messages
0147: * should not be generated when comparing against alt j.
0148: * Alternatives with syn preds that are unnecessary do
0149: * not result in syn pred try-blocks.
0150: */
0151: if (ai.synPred != null) {
0152: if (DEBUG_ANALYZER) {
0153: System.out.println("alt " + i
0154: + " has a syn pred");
0155: }
0156: // The alt with the (...)=> block is nondeterministic for sure.
0157: // If the (...)=> conflicts with alt j, j is nondeterministic.
0158: // This prevents alt j from being in any switch statements.
0159: // move on to next alternative=>no possible ambiguity!
0160: // continue inner;
0161: }
0162:
0163: /* if ith alt starts with a semantic predicate, computing the
0164: * lookahead is still done for code generation, but messages
0165: * should not be generated when comparing against alt j.
0166: */
0167: else if (ai.semPred != null) {
0168: if (DEBUG_ANALYZER) {
0169: System.out.println("alt " + i
0170: + " has a sem pred");
0171: }
0172: }
0173:
0174: /* if jth alt is exactly the wildcard or wildcard root of tree,
0175: * then remove elements from alt i lookahead from alt j's lookahead.
0176: * Don't do an ambiguity warning.
0177: */
0178: else if (altUsesWildcardDefault(aj)) {
0179: // System.out.println("removing pred sets");
0180: // removeCompetingPredictionSetsFromWildcard(aj.cache, aj.head, grammar.maxk);
0181: wildcardAlt = aj;
0182: }
0183:
0184: /* If the user specified warnWhenFollowAmbig=false, then we
0185: * can turn off this warning IFF one of the alts is empty;
0186: * that is, it points immediately at the end block.
0187: */
0188: else if (!blk.warnWhenFollowAmbig
0189: && (ai.head instanceof BlockEndElement || aj.head instanceof BlockEndElement)) {
0190: // System.out.println("ai.head pts to "+ai.head.getClass());
0191: // System.out.println("aj.head pts to "+aj.head.getClass());
0192: }
0193:
0194: /* If they have the generateAmbigWarnings option off for the block
0195: * then don't generate a warning.
0196: */
0197: else if (!blk.generateAmbigWarnings) {
0198: }
0199:
0200: /* If greedy=true and *one* empty alt shut off warning. */
0201: else if (blk.greedySet
0202: && blk.greedy
0203: && ((ai.head instanceof BlockEndElement && !(aj.head instanceof BlockEndElement)) || (aj.head instanceof BlockEndElement && !(ai.head instanceof BlockEndElement)))) {
0204: // System.out.println("greedy set to true; one alt empty");
0205: }
0206:
0207: /* We have no choice, but to report a nondetermism */
0208: else {
0209: tool.errorHandler.warnAltAmbiguity(grammar,
0210: blk, // the block
0211: lexicalAnalysis, // true if lexical
0212: grammar.maxk, // depth of ambiguity
0213: r, // set of linear ambiguities
0214: i, // first ambiguous alternative
0215: j // second ambiguous alternative
0216: );
0217: }
0218: } else {
0219: // a lookahead depth, k, was found where i and j do not conflict
0220: ai.lookaheadDepth = Math.max(ai.lookaheadDepth, k);
0221: aj.lookaheadDepth = Math.max(aj.lookaheadDepth, k);
0222: }
0223: }
0224: }
0225:
0226: // finished with block.
0227:
0228: // If had wildcard default clause idiom, remove competing lookahead
0229: /*
0230: if ( wildcardAlt!=null ) {
0231: removeCompetingPredictionSetsFromWildcard(wildcardAlt.cache, wildcardAlt.head, grammar.maxk);
0232: }
0233: */
0234:
0235: currentBlock = saveCurrentBlock;
0236: return det;
0237: }
0238:
0239: /**Is (...)+ block LL(1)? Fill in alternative cache for this block.
0240: * @return true if the block is deterministic
0241: */
0242: public boolean deterministic(OneOrMoreBlock blk) {
0243: if (DEBUG_ANALYZER)
0244: System.out.println("deterministic(...)+(" + blk + ")");
0245: AlternativeBlock saveCurrentBlock = currentBlock;
0246: currentBlock = blk;
0247: boolean blkOk = deterministic((AlternativeBlock) blk);
0248: // block has been checked, now check that what follows does not conflict
0249: // with the lookahead of the (...)+ block.
0250: boolean det = deterministicImpliedPath(blk);
0251: currentBlock = saveCurrentBlock;
0252: return det && blkOk;
0253: }
0254:
0255: /**Is (...)* block LL(1)? Fill in alternative cache for this block.
0256: * @return true if the block is deterministic
0257: */
0258: public boolean deterministic(ZeroOrMoreBlock blk) {
0259: if (DEBUG_ANALYZER)
0260: System.out.println("deterministic(...)*(" + blk + ")");
0261: AlternativeBlock saveCurrentBlock = currentBlock;
0262: currentBlock = blk;
0263: boolean blkOk = deterministic((AlternativeBlock) blk);
0264: // block has been checked, now check that what follows does not conflict
0265: // with the lookahead of the (...)* block.
0266: boolean det = deterministicImpliedPath(blk);
0267: currentBlock = saveCurrentBlock;
0268: return det && blkOk;
0269: }
0270:
0271: /**Is this (...)* or (...)+ block LL(k)?
0272: * @return true if the block is deterministic
0273: */
0274: public boolean deterministicImpliedPath(BlockWithImpliedExitPath blk) {
0275: /** The lookahead depth for this decision considering implied exit path */
0276: int k;
0277: boolean det = true;
0278: Vector alts = blk.getAlternatives();
0279: int nalts = alts.size();
0280: currentBlock.altj = -1; // comparing against implicit optional/exit alt
0281:
0282: if (DEBUG_ANALYZER)
0283: System.out.println("deterministicImpliedPath");
0284: for (int i = 0; i < nalts; i++) { // check follow against all alts
0285: Alternative alt = blk.getAlternativeAt(i);
0286:
0287: if (alt.head instanceof BlockEndElement) {
0288: tool
0289: .warning(
0290: "empty alternative makes no sense in (...)* or (...)+",
0291: grammar.getFilename(), blk.getLine(),
0292: blk.getColumn());
0293: }
0294:
0295: k = 1; // assume eac alt is LL(1) with exit branch
0296: // check to see if there is a lookahead depth that distinguishes
0297: // between alternative i and the exit branch.
0298: Lookahead[] r = new Lookahead[grammar.maxk + 1];
0299: boolean haveAmbiguity;
0300: do {
0301: haveAmbiguity = false;
0302: if (DEBUG_ANALYZER)
0303: System.out.println("checking depth " + k + "<="
0304: + grammar.maxk);
0305: Lookahead p;
0306: Lookahead follow = blk.next.look(k);
0307: blk.exitCache[k] = follow;
0308: currentBlock.alti = i;
0309: p = getAltLookahead(blk, i, k);
0310:
0311: if (DEBUG_ANALYZER)
0312: System.out.println("follow is "
0313: + follow.toString(",", charFormatter,
0314: grammar));
0315: if (DEBUG_ANALYZER)
0316: System.out.println("p is "
0317: + p.toString(",", charFormatter, grammar));
0318: //r[k] = follow.fset.and(p.fset);
0319: r[k] = follow.intersection(p);
0320: if (DEBUG_ANALYZER)
0321: System.out.println("intersection at depth " + k
0322: + " is " + r[k]);
0323: if (!r[k].nil()) {
0324: haveAmbiguity = true;
0325: k++;
0326: }
0327: // go until no more lookahead to use or no intersection
0328: } while (haveAmbiguity && k <= grammar.maxk);
0329:
0330: if (haveAmbiguity) {
0331: det = false;
0332: alt.lookaheadDepth = NONDETERMINISTIC;
0333: blk.exitLookaheadDepth = NONDETERMINISTIC;
0334: Alternative ambigAlt = blk
0335: .getAlternativeAt(currentBlock.alti);
0336:
0337: /* If the user specified warnWhenFollowAmbig=false, then we
0338: * can turn off this warning.
0339: */
0340: if (!blk.warnWhenFollowAmbig) {
0341: }
0342:
0343: /* If they have the generateAmbigWarnings option off for the block
0344: * then don't generate a warning.
0345: */
0346: else if (!blk.generateAmbigWarnings) {
0347: }
0348:
0349: /* If greedy=true and alt not empty, shut off warning */
0350: else if (blk.greedy == true && blk.greedySet
0351: && !(ambigAlt.head instanceof BlockEndElement)) {
0352: if (DEBUG_ANALYZER)
0353: System.out.println("greedy loop");
0354: }
0355:
0356: /* If greedy=false then shut off warning...will have
0357: * to add "if FOLLOW break"
0358: * block during code gen to compensate for removal of warning.
0359: */
0360: else if (blk.greedy == false
0361: && !(ambigAlt.head instanceof BlockEndElement)) {
0362: if (DEBUG_ANALYZER)
0363: System.out.println("nongreedy loop");
0364: // if FOLLOW not single k-string (|set[k]| can
0365: // be > 1 actually) then must warn them that
0366: // loop may terminate incorrectly.
0367: // For example, ('a'..'d')+ ("ad"|"cb")
0368: if (!lookaheadEquivForApproxAndFullAnalysis(
0369: blk.exitCache, grammar.maxk)) {
0370: tool
0371: .warning(
0372: new String[] {
0373: "nongreedy block may exit incorrectly due",
0374: "\tto limitations of linear approximate lookahead (first k-1 sets",
0375: "\tin lookahead not singleton)." },
0376: grammar.getFilename(), blk
0377: .getLine(), blk
0378: .getColumn());
0379: }
0380: }
0381:
0382: // no choice but to generate a warning
0383: else {
0384: tool.errorHandler.warnAltExitAmbiguity(grammar,
0385: blk, // the block
0386: lexicalAnalysis, // true if lexical
0387: grammar.maxk, // depth of ambiguity
0388: r, // set of linear ambiguities
0389: i // ambiguous alternative
0390: );
0391: }
0392: } else {
0393: alt.lookaheadDepth = Math.max(alt.lookaheadDepth, k);
0394: blk.exitLookaheadDepth = Math.max(
0395: blk.exitLookaheadDepth, k);
0396: }
0397: }
0398: return det;
0399: }
0400:
0401: /**Compute the lookahead set of whatever follows references to
0402: * the rule associated witht the FOLLOW block.
0403: */
0404: public Lookahead FOLLOW(int k, RuleEndElement end) {
0405: // what rule are we trying to compute FOLLOW of?
0406: RuleBlock rb = (RuleBlock) end.block;
0407: // rule name is different in lexer
0408: String rule;
0409: if (lexicalAnalysis) {
0410: rule = CodeGenerator.encodeLexerRuleName(rb.getRuleName());
0411: } else {
0412: rule = rb.getRuleName();
0413: }
0414:
0415: if (DEBUG_ANALYZER)
0416: System.out.println("FOLLOW(" + k + "," + rule + ")");
0417:
0418: // are we in the midst of computing this FOLLOW already?
0419: if (end.lock[k]) {
0420: if (DEBUG_ANALYZER)
0421: System.out.println("FOLLOW cycle to " + rule);
0422: return new Lookahead(rule);
0423: }
0424:
0425: // Check to see if there is cached value
0426: if (end.cache[k] != null) {
0427: if (DEBUG_ANALYZER) {
0428: System.out.println("cache entry FOLLOW("
0429: + k
0430: + ") for "
0431: + rule
0432: + ": "
0433: + end.cache[k].toString(",", charFormatter,
0434: grammar));
0435: }
0436: // if the cache is a complete computation then simply return entry
0437: if (end.cache[k].cycle == null) {
0438: return (Lookahead) end.cache[k].clone();
0439: }
0440: // A cache entry exists, but it is a reference to a cyclic computation.
0441: RuleSymbol rs = (RuleSymbol) grammar
0442: .getSymbol(end.cache[k].cycle);
0443: RuleEndElement re = rs.getBlock().endNode;
0444: // The other entry may not exist because it is still being
0445: // computed when this cycle cache entry was found here.
0446: if (re.cache[k] == null) {
0447: // return the cycle...that's all we can do at the moment.
0448: return (Lookahead) end.cache[k].clone();
0449: } else {
0450: if (DEBUG_ANALYZER) {
0451: System.out.println("combining FOLLOW("
0452: + k
0453: + ") for "
0454: + rule
0455: + ": from "
0456: + end.cache[k].toString(",", charFormatter,
0457: grammar)
0458: + " with FOLLOW for "
0459: + ((RuleBlock) re.block).getRuleName()
0460: + ": "
0461: + re.cache[k].toString(",", charFormatter,
0462: grammar));
0463: }
0464: // combine results from other rule's FOLLOW
0465: if (re.cache[k].cycle == null) {
0466: // current rule depends on another rule's FOLLOW and
0467: // it is complete with no cycle; just kill our cycle and
0468: // combine full result from other rule's FOLLOW
0469: end.cache[k].combineWith(re.cache[k]);
0470: end.cache[k].cycle = null; // kill cycle as we're complete
0471: } else {
0472: // the FOLLOW cache for other rule has a cycle also.
0473: // Here is where we bubble up a cycle. We better recursively
0474: // wipe out cycles (partial computations). I'm a little nervous
0475: // that we might leave a cycle here, however.
0476: Lookahead refFOLLOW = FOLLOW(k, re);
0477: end.cache[k].combineWith(refFOLLOW);
0478: // all cycles should be gone, but if not, record ref to cycle
0479: end.cache[k].cycle = refFOLLOW.cycle;
0480: }
0481: if (DEBUG_ANALYZER) {
0482: System.out.println("saving FOLLOW("
0483: + k
0484: + ") for "
0485: + rule
0486: + ": from "
0487: + end.cache[k].toString(",", charFormatter,
0488: grammar));
0489: }
0490: // Return the updated cache entry associated
0491: // with the cycle reference.
0492: return (Lookahead) end.cache[k].clone();
0493: }
0494: }
0495:
0496: end.lock[k] = true; // prevent FOLLOW computation cycles
0497:
0498: Lookahead p = new Lookahead();
0499:
0500: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(rule);
0501:
0502: // Walk list of references to this rule to compute FOLLOW
0503: for (int i = 0; i < rs.numReferences(); i++) {
0504: RuleRefElement rr = rs.getReference(i);
0505: if (DEBUG_ANALYZER)
0506: System.out.println("next[" + rule + "] is "
0507: + rr.next.toString());
0508: Lookahead q = rr.next.look(k);
0509: if (DEBUG_ANALYZER)
0510: System.out.println("FIRST of next[" + rule
0511: + "] ptr is " + q.toString());
0512: /* If there is a cycle then if the cycle is to the rule for
0513: * this end block, you have a cycle to yourself. Remove the
0514: * cycle indication--the lookahead is complete.
0515: */
0516: if (q.cycle != null && q.cycle.equals(rule)) {
0517: q.cycle = null; // don't want cycle to yourself!
0518: }
0519: // add the lookahead into the current FOLLOW computation set
0520: p.combineWith(q);
0521: if (DEBUG_ANALYZER)
0522: System.out.println("combined FOLLOW[" + rule + "] is "
0523: + p.toString());
0524: }
0525:
0526: end.lock[k] = false; // we're not doing FOLLOW anymore
0527:
0528: // if no rules follow this, it can be a start symbol or called by a start sym.
0529: // set the follow to be end of file.
0530: if (p.fset.nil() && p.cycle == null) {
0531: if (grammar instanceof TreeWalkerGrammar) {
0532: // Tree grammars don't see EOF, they see end of sibling list or
0533: // "NULL TREE LOOKAHEAD".
0534: p.fset.add(Token.NULL_TREE_LOOKAHEAD);
0535: } else if (grammar instanceof LexerGrammar) {
0536: // Lexical grammars use Epsilon to indicate that the end of rule has been hit
0537: // EOF would be misleading; any character can follow a token rule not just EOF
0538: // as in a grammar (where a start symbol is followed by EOF). There is no
0539: // sequence info in a lexer between tokens to indicate what is the last token
0540: // to be seen.
0541: // p.fset.add(EPSILON_TYPE);
0542: p.setEpsilon();
0543: } else {
0544: p.fset.add(Token.EOF_TYPE);
0545: }
0546: }
0547:
0548: // Cache the result of the FOLLOW computation
0549: if (DEBUG_ANALYZER) {
0550: System.out.println("saving FOLLOW(" + k + ") for " + rule
0551: + ": " + p.toString(",", charFormatter, grammar));
0552: }
0553: end.cache[k] = (Lookahead) p.clone();
0554:
0555: return p;
0556: }
0557:
0558: private Lookahead getAltLookahead(AlternativeBlock blk, int alt,
0559: int k) {
0560: Lookahead p;
0561: Alternative a = blk.getAlternativeAt(alt);
0562: AlternativeElement e = a.head;
0563: //System.out.println("getAltLookahead("+k+","+e+"), cache size is "+a.cache.length);
0564: if (a.cache[k] == null) {
0565: p = e.look(k);
0566: a.cache[k] = p;
0567: } else {
0568: p = a.cache[k];
0569: }
0570: return p;
0571: }
0572:
0573: /**Actions are ignored */
0574: public Lookahead look(int k, ActionElement action) {
0575: if (DEBUG_ANALYZER)
0576: System.out.println("lookAction(" + k + "," + action + ")");
0577: return action.next.look(k);
0578: }
0579:
0580: /**Combine the lookahead computed for each alternative */
0581: public Lookahead look(int k, AlternativeBlock blk) {
0582: if (DEBUG_ANALYZER)
0583: System.out.println("lookAltBlk(" + k + "," + blk + ")");
0584: AlternativeBlock saveCurrentBlock = currentBlock;
0585: currentBlock = blk;
0586: Lookahead p = new Lookahead();
0587: for (int i = 0; i < blk.alternatives.size(); i++) {
0588: if (DEBUG_ANALYZER)
0589: System.out.println("alt " + i + " of " + blk);
0590: // must set analysis alt
0591: currentBlock.analysisAlt = i;
0592: Alternative alt = blk.getAlternativeAt(i);
0593: AlternativeElement elem = alt.head;
0594: if (DEBUG_ANALYZER) {
0595: if (alt.head == alt.tail) {
0596: System.out.println("alt " + i + " is empty");
0597: }
0598: }
0599: Lookahead q = elem.look(k);
0600: p.combineWith(q);
0601: }
0602: if (k == 1 && blk.not
0603: && subruleCanBeInverted(blk, lexicalAnalysis)) {
0604: // Invert the lookahead set
0605: if (lexicalAnalysis) {
0606: BitSet b = (BitSet) ((LexerGrammar) grammar).charVocabulary
0607: .clone();
0608: int[] elems = p.fset.toArray();
0609: for (int j = 0; j < elems.length; j++) {
0610: b.remove(elems[j]);
0611: }
0612: p.fset = b;
0613: } else {
0614: p.fset.notInPlace(Token.MIN_USER_TYPE,
0615: grammar.tokenManager.maxTokenType());
0616: }
0617: }
0618: currentBlock = saveCurrentBlock;
0619: return p;
0620: }
0621:
0622: /**Compute what follows this place-holder node and possibly
0623: * what begins the associated loop unless the
0624: * node is locked.
0625: * <p>
0626: * if we hit the end of a loop, we have to include
0627: * what tokens can begin the loop as well. If the start
0628: * node is locked, then we simply found an empty path
0629: * through this subrule while analyzing it. If the
0630: * start node is not locked, then this node was hit
0631: * during a FOLLOW operation and the FIRST of this
0632: * block must be included in that lookahead computation.
0633: */
0634: public Lookahead look(int k, BlockEndElement end) {
0635: if (DEBUG_ANALYZER)
0636: System.out.println("lookBlockEnd(" + k + ", " + end.block
0637: + "); lock is " + end.lock[k]);
0638: if (end.lock[k]) {
0639: // computation in progress => the tokens we would have
0640: // computed (had we not been locked) will be included
0641: // in the set by that computation with the lock on this
0642: // node.
0643: return new Lookahead();
0644: }
0645:
0646: Lookahead p;
0647:
0648: /* Hitting the end of a loop means you can see what begins the loop */
0649: if (end.block instanceof ZeroOrMoreBlock
0650: || end.block instanceof OneOrMoreBlock) {
0651: // compute what can start the block,
0652: // but lock end node so we don't do it twice in same
0653: // computation.
0654: end.lock[k] = true;
0655: p = look(k, end.block);
0656: end.lock[k] = false;
0657: } else {
0658: p = new Lookahead();
0659: }
0660:
0661: /* Tree blocks do not have any follow because they are children
0662: * of what surrounds them. For example, A #(B C) D results in
0663: * a look() for the TreeElement end of NULL_TREE_LOOKAHEAD, which
0664: * indicates that nothing can follow the last node of tree #(B C)
0665: */
0666: if (end.block instanceof TreeElement) {
0667: p.combineWith(Lookahead.of(Token.NULL_TREE_LOOKAHEAD));
0668: }
0669:
0670: /* Syntactic predicates such as ( (A)? )=> have no follow per se.
0671: * We cannot accurately say what would be matched following a
0672: * syntactic predicate (you MIGHT be ok if you said it was whatever
0673: * followed the alternative predicted by the predicate). Hence,
0674: * (like end-of-token) we return Epsilon to indicate "unknown
0675: * lookahead."
0676: */
0677: else if (end.block instanceof SynPredBlock) {
0678: p.setEpsilon();
0679: }
0680:
0681: // compute what can follow the block
0682: else {
0683: Lookahead q = end.block.next.look(k);
0684: p.combineWith(q);
0685: }
0686:
0687: return p;
0688: }
0689:
0690: /**Return this char as the lookahead if k=1.
0691: * <p>### Doesn't work for ( 'a' 'b' | 'a' ~'b' ) yet!!!
0692: * <p>
0693: * If the atom has the <tt>not</tt> flag on, then
0694: * create the set complement of the tokenType
0695: * which is the set of all characters referenced
0696: * in the grammar with this char turned off.
0697: * Also remove characters from the set that
0698: * are currently allocated for predicting
0699: * previous alternatives. This avoids ambiguity
0700: * messages and is more properly what is meant.
0701: * ( 'a' | ~'a' ) implies that the ~'a' is the
0702: * "else" clause.
0703: * <p>
0704: * NOTE: we do <b>NOT</b> include exit path in
0705: * the exclusion set. E.g.,
0706: * ( 'a' | ~'a' )* 'b'
0707: * should exit upon seeing a 'b' during the loop.
0708: */
0709: public Lookahead look(int k, CharLiteralElement atom) {
0710: if (DEBUG_ANALYZER)
0711: System.out.println("lookCharLiteral(" + k + "," + atom
0712: + ")");
0713: // Skip until analysis hits k==1
0714: if (k > 1) {
0715: return atom.next.look(k - 1);
0716: }
0717: if (lexicalAnalysis) {
0718: if (atom.not) {
0719: BitSet b = (BitSet) ((LexerGrammar) grammar).charVocabulary
0720: .clone();
0721: if (DEBUG_ANALYZER)
0722: System.out.println("charVocab is " + b.toString());
0723: // remove stuff predicted by preceding alts and follow of block
0724: removeCompetingPredictionSets(b, atom);
0725: if (DEBUG_ANALYZER)
0726: System.out
0727: .println("charVocab after removal of prior alt lookahead "
0728: + b.toString());
0729: // now remove element that is stated not to be in the set
0730: b.clear(atom.getType());
0731: return new Lookahead(b);
0732: } else {
0733: return Lookahead.of(atom.getType());
0734: }
0735: } else {
0736: // Should have been avoided by MakeGrammar
0737: tool
0738: .fatalError("Character literal reference found in parser");
0739: // ... so we make the compiler happy
0740: return Lookahead.of(atom.getType());
0741: }
0742: }
0743:
0744: public Lookahead look(int k, CharRangeElement r) {
0745: if (DEBUG_ANALYZER)
0746: System.out.println("lookCharRange(" + k + "," + r + ")");
0747: // Skip until analysis hits k==1
0748: if (k > 1) {
0749: return r.next.look(k - 1);
0750: }
0751: BitSet p = BitSet.of(r.begin);
0752: for (int i = r.begin + 1; i <= r.end; i++) {
0753: p.add(i);
0754: }
0755: return new Lookahead(p);
0756: }
0757:
0758: public Lookahead look(int k, GrammarAtom atom) {
0759: if (DEBUG_ANALYZER)
0760: System.out.println("look(" + k + "," + atom + "["
0761: + atom.getType() + "])");
0762:
0763: if (lexicalAnalysis) {
0764: // MakeGrammar should have created a rule reference instead
0765: tool.fatalError("token reference found in lexer");
0766: }
0767: // Skip until analysis hits k==1
0768: if (k > 1) {
0769: return atom.next.look(k - 1);
0770: }
0771: Lookahead l = Lookahead.of(atom.getType());
0772: if (atom.not) {
0773: // Invert the lookahead set against the token vocabulary
0774: int maxToken = grammar.tokenManager.maxTokenType();
0775: l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
0776: // remove stuff predicted by preceding alts and follow of block
0777: removeCompetingPredictionSets(l.fset, atom);
0778: }
0779: return l;
0780: }
0781:
0782: /**The lookahead of a (...)+ block is the combined lookahead of
0783: * all alternatives and, if an empty path is found, the lookahead
0784: * of what follows the block.
0785: */
0786: public Lookahead look(int k, OneOrMoreBlock blk) {
0787: if (DEBUG_ANALYZER)
0788: System.out.println("look+" + k + "," + blk + ")");
0789: Lookahead p = look(k, (AlternativeBlock) blk);
0790: return p;
0791: }
0792:
0793: /**Combine the lookahead computed for each alternative.
0794: * Lock the node so that no other computation may come back
0795: * on itself--infinite loop. This also implies infinite left-recursion
0796: * in the grammar (or an error in this algorithm ;)).
0797: */
0798: public Lookahead look(int k, RuleBlock blk) {
0799: if (DEBUG_ANALYZER)
0800: System.out.println("lookRuleBlk(" + k + "," + blk + ")");
0801: Lookahead p = look(k, (AlternativeBlock) blk);
0802: return p;
0803: }
0804:
0805: /**If not locked or noFOLLOW set, compute FOLLOW of a rule.
0806: * <p>
0807: * TJP says 8/12/99: not true anymore:
0808: * Lexical rules never compute follow. They set epsilon and
0809: * the code generator gens code to check for any character.
0810: * The code generator must remove the tokens used to predict
0811: * any previous alts in the same block.
0812: * <p>
0813: * When the last node of a rule is reached and noFOLLOW,
0814: * it implies that a "local" FOLLOW will be computed
0815: * after this call. I.e.,
0816: * <pre>
0817: * a : b A;
0818: * b : B | ;
0819: * c : b C;
0820: * </pre>
0821: * Here, when computing the look of rule b from rule a,
0822: * we want only {B,EPSILON_TYPE} so that look(b A) will
0823: * be {B,A} not {B,A,C}.
0824: * <p>
0825: * if the end block is not locked and the FOLLOW is
0826: * wanted, the algorithm must compute the lookahead
0827: * of what follows references to this rule. If
0828: * end block is locked, FOLLOW will return an empty set
0829: * with a cycle to the rule associated with this end block.
0830: */
0831: public Lookahead look(int k, RuleEndElement end) {
0832: if (DEBUG_ANALYZER)
0833: System.out.println("lookRuleBlockEnd(" + k + "); noFOLLOW="
0834: + end.noFOLLOW + "; lock is " + end.lock[k]);
0835: if (/*lexicalAnalysis ||*/end.noFOLLOW) {
0836: Lookahead p = new Lookahead();
0837: p.setEpsilon();
0838: p.epsilonDepth = BitSet.of(k);
0839: return p;
0840: }
0841: Lookahead p = FOLLOW(k, end);
0842: return p;
0843: }
0844:
0845: /**Compute the lookahead contributed by a rule reference.
0846: *
0847: * <p>
0848: * When computing ruleref lookahead, we don't want the FOLLOW
0849: * computation done if an empty path exists for the rule.
0850: * The FOLLOW is too loose of a set...we want only to
0851: * include the "local" FOLLOW or what can follow this
0852: * particular ref to the node. In other words, we use
0853: * context information to reduce the complexity of the
0854: * analysis and strengthen the parser.
0855: *
0856: * The noFOLLOW flag is used as a means of restricting
0857: * the FOLLOW to a "local" FOLLOW. This variable is
0858: * orthogonal to the <tt>lock</tt> variable that prevents
0859: * infinite recursion. noFOLLOW does not care about what k is.
0860: */
0861: public Lookahead look(int k, RuleRefElement rr) {
0862: if (DEBUG_ANALYZER)
0863: System.out.println("lookRuleRef(" + k + "," + rr + ")");
0864: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(rr.targetRule);
0865: if (rs == null || !rs.defined) {
0866: tool
0867: .error("no definition of rule " + rr.targetRule,
0868: grammar.getFilename(), rr.getLine(), rr
0869: .getColumn());
0870: return new Lookahead();
0871: }
0872: RuleBlock rb = rs.getBlock();
0873: RuleEndElement end = rb.endNode;
0874: boolean saveEnd = end.noFOLLOW;
0875: end.noFOLLOW = true;
0876: // go off to the rule and get the lookahead (w/o FOLLOW)
0877: Lookahead p = look(k, rr.targetRule);
0878: if (DEBUG_ANALYZER)
0879: System.out
0880: .println("back from rule ref to " + rr.targetRule);
0881: // restore state of end block
0882: end.noFOLLOW = saveEnd;
0883:
0884: // check for infinite recursion. If a cycle is returned: trouble!
0885: if (p.cycle != null) {
0886: tool.error("infinite recursion to rule " + p.cycle
0887: + " from rule " + rr.enclosingRuleName, grammar
0888: .getFilename(), rr.getLine(), rr.getColumn());
0889: }
0890:
0891: // is the local FOLLOW required?
0892: if (p.containsEpsilon()) {
0893: if (DEBUG_ANALYZER)
0894: System.out.println("rule ref to " + rr.targetRule
0895: + " has eps, depth: " + p.epsilonDepth);
0896:
0897: // remove epsilon
0898: p.resetEpsilon();
0899: // fset.clear(EPSILON_TYPE);
0900:
0901: // for each lookahead depth that saw epsilon
0902: int[] depths = p.epsilonDepth.toArray();
0903: p.epsilonDepth = null; // clear all epsilon stuff
0904: for (int i = 0; i < depths.length; i++) {
0905: int rk = k - (k - depths[i]);
0906: Lookahead q = rr.next.look(rk); // see comments in Lookahead
0907: p.combineWith(q);
0908: }
0909: // note: any of these look() computations for local follow can
0910: // set EPSILON in the set again if the end of this rule is found.
0911: }
0912:
0913: return p;
0914: }
0915:
0916: public Lookahead look(int k, StringLiteralElement atom) {
0917: if (DEBUG_ANALYZER)
0918: System.out.println("lookStringLiteral(" + k + "," + atom
0919: + ")");
0920: if (lexicalAnalysis) {
0921: // need more lookahead than string can provide?
0922: if (k > atom.processedAtomText.length()) {
0923: return atom.next.look(k
0924: - atom.processedAtomText.length());
0925: } else {
0926: // get char at lookahead depth k, from the processed literal text
0927: return Lookahead.of(atom.processedAtomText
0928: .charAt(k - 1));
0929: }
0930: } else {
0931: // Skip until analysis hits k==1
0932: if (k > 1) {
0933: return atom.next.look(k - 1);
0934: }
0935: Lookahead l = Lookahead.of(atom.getType());
0936: if (atom.not) {
0937: // Invert the lookahead set against the token vocabulary
0938: int maxToken = grammar.tokenManager.maxTokenType();
0939: l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
0940: }
0941: return l;
0942: }
0943: }
0944:
0945: /**The lookahead of a (...)=> block is the lookahead of
0946: * what follows the block. By definition, the syntactic
0947: * predicate block defies static analysis (you want to try it
0948: * out at run-time). The LOOK of (a)=>A B is A for LL(1)
0949: * ### is this even called?
0950: */
0951: public Lookahead look(int k, SynPredBlock blk) {
0952: if (DEBUG_ANALYZER)
0953: System.out.println("look=>(" + k + "," + blk + ")");
0954: return blk.next.look(k);
0955: }
0956:
0957: public Lookahead look(int k, TokenRangeElement r) {
0958: if (DEBUG_ANALYZER)
0959: System.out.println("lookTokenRange(" + k + "," + r + ")");
0960: // Skip until analysis hits k==1
0961: if (k > 1) {
0962: return r.next.look(k - 1);
0963: }
0964: BitSet p = BitSet.of(r.begin);
0965: for (int i = r.begin + 1; i <= r.end; i++) {
0966: p.add(i);
0967: }
0968: return new Lookahead(p);
0969: }
0970:
0971: public Lookahead look(int k, TreeElement t) {
0972: if (DEBUG_ANALYZER)
0973: System.out.println("look(" + k + "," + t.root + "["
0974: + t.root.getType() + "])");
0975: if (k > 1) {
0976: return t.next.look(k - 1);
0977: }
0978: Lookahead l = null;
0979: if (t.root instanceof WildcardElement) {
0980: l = t.root.look(1); // compute FIRST set minus previous rows
0981: } else {
0982: l = Lookahead.of(t.root.getType());
0983: if (t.root.not) {
0984: // Invert the lookahead set against the token vocabulary
0985: int maxToken = grammar.tokenManager.maxTokenType();
0986: l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
0987: }
0988: }
0989: return l;
0990: }
0991:
0992: public Lookahead look(int k, WildcardElement wc) {
0993: if (DEBUG_ANALYZER)
0994: System.out.println("look(" + k + "," + wc + ")");
0995:
0996: // Skip until analysis hits k==1
0997: if (k > 1) {
0998: return wc.next.look(k - 1);
0999: }
1000:
1001: BitSet b;
1002: if (lexicalAnalysis) {
1003: // Copy the character vocabulary
1004: b = (BitSet) ((LexerGrammar) grammar).charVocabulary
1005: .clone();
1006: } else {
1007: b = new BitSet(1);
1008: // Invert the lookahead set against the token vocabulary
1009: int maxToken = grammar.tokenManager.maxTokenType();
1010: b.notInPlace(Token.MIN_USER_TYPE, maxToken);
1011: if (DEBUG_ANALYZER)
1012: System.out.println("look(" + k + "," + wc
1013: + ") after not: " + b);
1014: }
1015:
1016: // Remove prediction sets from competing alternatives
1017: // removeCompetingPredictionSets(b, wc);
1018:
1019: return new Lookahead(b);
1020: }
1021:
1022: /** The (...)* element is the combined lookahead of the alternatives and what can
1023: * follow the loop.
1024: */
1025: public Lookahead look(int k, ZeroOrMoreBlock blk) {
1026: if (DEBUG_ANALYZER)
1027: System.out.println("look*(" + k + "," + blk + ")");
1028: Lookahead p = look(k, (AlternativeBlock) blk);
1029: Lookahead q = blk.next.look(k);
1030: p.combineWith(q);
1031: return p;
1032: }
1033:
1034: /**Compute the combined lookahead for all productions of a rule.
1035: * If the lookahead returns with epsilon, at least one epsilon
1036: * path exists (one that consumes no tokens). The noFOLLOW
1037: * flag being set for this endruleblk, indicates that the
1038: * a rule ref invoked this rule.
1039: *
1040: * Currently only look(RuleRef) calls this. There is no need
1041: * for the code generator to call this.
1042: */
1043: public Lookahead look(int k, String rule) {
1044: if (DEBUG_ANALYZER)
1045: System.out.println("lookRuleName(" + k + "," + rule + ")");
1046: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(rule);
1047: RuleBlock rb = rs.getBlock();
1048:
1049: if (rb.lock[k]) {
1050: if (DEBUG_ANALYZER)
1051: System.out.println("infinite recursion to rule "
1052: + rb.getRuleName());
1053: return new Lookahead(rule);
1054: }
1055:
1056: // have we computed it before?
1057: if (rb.cache[k] != null) {
1058: if (DEBUG_ANALYZER) {
1059: System.out.println("found depth "
1060: + k
1061: + " result in FIRST "
1062: + rule
1063: + " cache: "
1064: + rb.cache[k].toString(",", charFormatter,
1065: grammar));
1066: }
1067: return (Lookahead) rb.cache[k].clone();
1068: }
1069:
1070: rb.lock[k] = true;
1071: Lookahead p = look(k, (RuleBlock) rb);
1072: rb.lock[k] = false;
1073:
1074: // cache results
1075: rb.cache[k] = (Lookahead) p.clone();
1076: if (DEBUG_ANALYZER) {
1077: System.out
1078: .println("saving depth "
1079: + k
1080: + " result in FIRST "
1081: + rule
1082: + " cache: "
1083: + rb.cache[k].toString(",", charFormatter,
1084: grammar));
1085: }
1086: return p;
1087: }
1088:
1089: /** If the first k-1 sets are singleton sets, the appoximate
1090: * lookahead analysis is equivalent to full lookahead analysis.
1091: */
1092: public static boolean lookaheadEquivForApproxAndFullAnalysis(
1093: Lookahead[] bset, int k) {
1094: // first k-1 sets degree 1?
1095: for (int i = 1; i <= k - 1; i++) {
1096: BitSet look = bset[i].fset;
1097: if (look.degree() > 1) {
1098: return false;
1099: }
1100: }
1101: return true;
1102: }
1103:
1104: /** Remove the prediction sets from preceding alternatives
1105: * and follow set, but *only* if this element is the first element
1106: * of the alternative. The class members currenBlock and
1107: * currentBlock.analysisAlt must be set correctly.
1108: * @param b The prediction bitset to be modified
1109: * @el The element of interest
1110: */
1111: private void removeCompetingPredictionSets(BitSet b,
1112: AlternativeElement el) {
1113: // Only do this if the element is the first element of the alt,
1114: // because we are making an implicit assumption that k==1.
1115: GrammarElement head = currentBlock
1116: .getAlternativeAt(currentBlock.analysisAlt).head;
1117: // if element is #(. blah) then check to see if el is root
1118: if (head instanceof TreeElement) {
1119: if (((TreeElement) head).root != el) {
1120: return;
1121: }
1122: } else if (el != head) {
1123: return;
1124: }
1125: for (int i = 0; i < currentBlock.analysisAlt; i++) {
1126: AlternativeElement e = currentBlock.getAlternativeAt(i).head;
1127: b.subtractInPlace(e.look(1).fset);
1128: }
1129: }
1130:
1131: /** Remove the prediction sets from preceding alternatives
1132: * The class members currenBlock must be set correctly.
1133: * Remove prediction sets from 1..k.
1134: * @param look The prediction lookahead to be modified
1135: * @el The element of interest
1136: * @k How deep into lookahead to modify
1137: */
1138: private void removeCompetingPredictionSetsFromWildcard(
1139: Lookahead[] look, AlternativeElement el, int k) {
1140: for (int d = 1; d <= k; d++) {
1141: for (int i = 0; i < currentBlock.analysisAlt; i++) {
1142: AlternativeElement e = currentBlock.getAlternativeAt(i).head;
1143: look[d].fset.subtractInPlace(e.look(d).fset);
1144: }
1145: }
1146: }
1147:
1148: /** reset the analyzer so it looks like a new one */
1149: private void reset() {
1150: grammar = null;
1151: DEBUG_ANALYZER = false;
1152: currentBlock = null;
1153: lexicalAnalysis = false;
1154: }
1155:
1156: /** Set the grammar for the analyzer */
1157: public void setGrammar(Grammar g) {
1158: if (grammar != null) {
1159: reset();
1160: }
1161: grammar = g;
1162:
1163: // Is this lexical?
1164: lexicalAnalysis = (grammar instanceof LexerGrammar);
1165: DEBUG_ANALYZER = grammar.analyzerDebug;
1166: }
1167:
1168: public boolean subruleCanBeInverted(AlternativeBlock blk,
1169: boolean forLexer) {
1170: if (blk instanceof ZeroOrMoreBlock
1171: || blk instanceof OneOrMoreBlock
1172: || blk instanceof SynPredBlock) {
1173: return false;
1174: }
1175: // Cannot invert an empty subrule
1176: if (blk.alternatives.size() == 0) {
1177: return false;
1178: }
1179: // The block must only contain alternatives with a single element,
1180: // where each element is a char, token, char range, or token range.
1181: for (int i = 0; i < blk.alternatives.size(); i++) {
1182: Alternative alt = blk.getAlternativeAt(i);
1183: // Cannot have anything interesting in the alternative ...
1184: if (alt.synPred != null || alt.semPred != null
1185: || alt.exceptionSpec != null) {
1186: return false;
1187: }
1188: // ... and there must be one simple element
1189: AlternativeElement elt = alt.head;
1190: if (!(elt instanceof CharLiteralElement
1191: || elt instanceof TokenRefElement
1192: || elt instanceof CharRangeElement
1193: || elt instanceof TokenRangeElement || (elt instanceof StringLiteralElement && !forLexer))
1194: || !(elt.next instanceof BlockEndElement)
1195: || elt.getAutoGenType() != GrammarElement.AUTO_GEN_NONE) {
1196: return false;
1197: }
1198: }
1199: return true;
1200: }
1201: }
|