0001: package persistence.antlr;
0002:
0003: /* ANTLR Translator Generator
0004: * Project led by Terence Parr at http://www.jGuru.com
0005: * Software rights: http://www.antlr.org/license.html
0006: *
0007: */
0008:
0009: import persistence.antlr.collections.impl.BitSet;
0010: import persistence.antlr.collections.impl.Vector;
0011:
0012: /**A linear-approximate LL(k) grammar analzyer.
0013: *
0014: * All lookahead elements are sets of token types.
0015: *
0016: * @author Terence Parr, John Lilley
0017: * @see persistence.antlr.Grammar
0018: * @see persistence.antlr.Lookahead
0019: */
0020: public class LLkAnalyzer implements LLkGrammarAnalyzer {
0021: // Set "analyzerDebug" to true
0022: public boolean DEBUG_ANALYZER = false;
0023: private AlternativeBlock currentBlock;
0024: protected Tool tool = null;
0025: protected Grammar grammar = null;
0026: // True if analyzing a lexical grammar
0027: protected boolean lexicalAnalysis = false;
0028: // Used for formatting bit sets in default (Java) format
0029: CharFormatter charFormatter = new JavaCharFormatter();
0030:
0031: /** Create an LLk analyzer */
0032: public LLkAnalyzer(Tool tool_) {
0033: tool = tool_;
0034: }
0035:
0036: /** Return true if someone used the '.' wildcard default idiom.
0037: * Either #(. children) or '.' as an alt by itself.
0038: */
0039: protected boolean altUsesWildcardDefault(Alternative alt) {
0040: AlternativeElement head = alt.head;
0041: // if element is #(. blah) then check to see if el is root
0042: if (head instanceof TreeElement
0043: && ((TreeElement) head).root instanceof WildcardElement) {
0044: return true;
0045: }
0046: if (head instanceof WildcardElement
0047: && head.next instanceof BlockEndElement) {
0048: return true;
0049: }
0050: return false;
0051: }
0052:
0053: /**Is this block of alternatives LL(k)? Fill in alternative cache for this block.
0054: * @return true if the block is deterministic
0055: */
0056: public boolean deterministic(AlternativeBlock blk) {
0057: /** The lookahead depth for this decision */
0058: int k = 1; // start at k=1
0059: if (DEBUG_ANALYZER)
0060: System.out.println("deterministic(" + blk + ")");
0061: boolean det = true;
0062: int nalts = blk.alternatives.size();
0063: AlternativeBlock saveCurrentBlock = currentBlock;
0064: Alternative wildcardAlt = null;
0065: currentBlock = blk;
0066:
0067: /* don't allow nongreedy (...) blocks */
0068: if (blk.greedy == false && !(blk instanceof OneOrMoreBlock)
0069: && !(blk instanceof ZeroOrMoreBlock)) {
0070: tool
0071: .warning(
0072: "Being nongreedy only makes sense for (...)+ and (...)*",
0073: grammar.getFilename(), blk.getLine(), blk
0074: .getColumn());
0075: }
0076:
0077: // SPECIAL CASE: only one alternative. We don't need to check the
0078: // determinism, but other code expects the lookahead cache to be
0079: // set for the single alt.
0080: if (nalts == 1) {
0081: AlternativeElement e = blk.getAlternativeAt(0).head;
0082: currentBlock.alti = 0;
0083: blk.getAlternativeAt(0).cache[1] = e.look(1);
0084: blk.getAlternativeAt(0).lookaheadDepth = 1; // set lookahead to LL(1)
0085: currentBlock = saveCurrentBlock;
0086: return true; // always deterministic for one alt
0087: }
0088:
0089: outer: for (int i = 0; i < nalts - 1; i++) {
0090: currentBlock.alti = i;
0091: currentBlock.analysisAlt = i; // which alt are we analyzing?
0092: currentBlock.altj = i + 1; // reset this alt. Haven't computed yet,
0093: // but we need the alt number.
0094: inner:
0095: // compare against other alternatives with lookahead depth k
0096: for (int j = i + 1; j < nalts; j++) {
0097: currentBlock.altj = j;
0098: if (DEBUG_ANALYZER)
0099: System.out.println("comparing " + i
0100: + " against alt " + j);
0101: currentBlock.analysisAlt = j; // which alt are we analyzing?
0102: k = 1; // always attempt minimum lookahead possible.
0103:
0104: // check to see if there is a lookahead depth that distinguishes
0105: // between alternatives i and j.
0106: Lookahead[] r = new Lookahead[grammar.maxk + 1];
0107: boolean haveAmbiguity;
0108: do {
0109: haveAmbiguity = false;
0110: if (DEBUG_ANALYZER)
0111: System.out.println("checking depth " + k + "<="
0112: + grammar.maxk);
0113: Lookahead p, q;
0114: p = getAltLookahead(blk, i, k);
0115: q = getAltLookahead(blk, j, k);
0116:
0117: // compare LOOK(alt i) with LOOK(alt j). Is there an intersection?
0118: // Lookahead must be disjoint.
0119: if (DEBUG_ANALYZER)
0120: System.out.println("p is "
0121: + p.toString(",", charFormatter,
0122: grammar));
0123: if (DEBUG_ANALYZER)
0124: System.out.println("q is "
0125: + q.toString(",", charFormatter,
0126: grammar));
0127: // r[i] = p.fset.and(q.fset);
0128: r[k] = p.intersection(q);
0129: if (DEBUG_ANALYZER)
0130: System.out.println("intersection at depth " + k
0131: + " is " + r[k].toString());
0132: if (!r[k].nil()) {
0133: haveAmbiguity = true;
0134: k++;
0135: }
0136: // go until no more lookahead to use or no intersection
0137: } while (haveAmbiguity && k <= grammar.maxk);
0138:
0139: Alternative ai = blk.getAlternativeAt(i);
0140: Alternative aj = blk.getAlternativeAt(j);
0141: if (haveAmbiguity) {
0142: det = false;
0143: ai.lookaheadDepth = NONDETERMINISTIC;
0144: aj.lookaheadDepth = NONDETERMINISTIC;
0145:
0146: /* if ith alt starts with a syntactic predicate, computing the
0147: * lookahead is still done for code generation, but messages
0148: * should not be generated when comparing against alt j.
0149: * Alternatives with syn preds that are unnecessary do
0150: * not result in syn pred try-blocks.
0151: */
0152: if (ai.synPred != null) {
0153: if (DEBUG_ANALYZER) {
0154: System.out.println("alt " + i
0155: + " has a syn pred");
0156: }
0157: // The alt with the (...)=> block is nondeterministic for sure.
0158: // If the (...)=> conflicts with alt j, j is nondeterministic.
0159: // This prevents alt j from being in any switch statements.
0160: // move on to next alternative=>no possible ambiguity!
0161: // continue inner;
0162: }
0163:
0164: /* if ith alt starts with a semantic predicate, computing the
0165: * lookahead is still done for code generation, but messages
0166: * should not be generated when comparing against alt j.
0167: */
0168: else if (ai.semPred != null) {
0169: if (DEBUG_ANALYZER) {
0170: System.out.println("alt " + i
0171: + " has a sem pred");
0172: }
0173: }
0174:
0175: /* if jth alt is exactly the wildcard or wildcard root of tree,
0176: * then remove elements from alt i lookahead from alt j's lookahead.
0177: * Don't do an ambiguity warning.
0178: */
0179: else if (altUsesWildcardDefault(aj)) {
0180: // System.out.println("removing pred sets");
0181: // removeCompetingPredictionSetsFromWildcard(aj.cache, aj.head, grammar.maxk);
0182: wildcardAlt = aj;
0183: }
0184:
0185: /* If the user specified warnWhenFollowAmbig=false, then we
0186: * can turn off this warning IFF one of the alts is empty;
0187: * that is, it points immediately at the end block.
0188: */
0189: else if (!blk.warnWhenFollowAmbig
0190: && (ai.head instanceof BlockEndElement || aj.head instanceof BlockEndElement)) {
0191: // System.out.println("ai.head pts to "+ai.head.getClass());
0192: // System.out.println("aj.head pts to "+aj.head.getClass());
0193: }
0194:
0195: /* If they have the generateAmbigWarnings option off for the block
0196: * then don't generate a warning.
0197: */
0198: else if (!blk.generateAmbigWarnings) {
0199: }
0200:
0201: /* If greedy=true and *one* empty alt shut off warning. */
0202: else if (blk.greedySet
0203: && blk.greedy
0204: && ((ai.head instanceof BlockEndElement && !(aj.head instanceof BlockEndElement)) || (aj.head instanceof BlockEndElement && !(ai.head instanceof BlockEndElement)))) {
0205: // System.out.println("greedy set to true; one alt empty");
0206: }
0207:
0208: /* We have no choice, but to report a nondetermism */
0209: else {
0210: tool.errorHandler.warnAltAmbiguity(grammar,
0211: blk, // the block
0212: lexicalAnalysis, // true if lexical
0213: grammar.maxk, // depth of ambiguity
0214: r, // set of linear ambiguities
0215: i, // first ambiguous alternative
0216: j // second ambiguous alternative
0217: );
0218: }
0219: } else {
0220: // a lookahead depth, k, was found where i and j do not conflict
0221: ai.lookaheadDepth = Math.max(ai.lookaheadDepth, k);
0222: aj.lookaheadDepth = Math.max(aj.lookaheadDepth, k);
0223: }
0224: }
0225: }
0226:
0227: // finished with block.
0228:
0229: // If had wildcard default clause idiom, remove competing lookahead
0230: /*
0231: if ( wildcardAlt!=null ) {
0232: removeCompetingPredictionSetsFromWildcard(wildcardAlt.cache, wildcardAlt.head, grammar.maxk);
0233: }
0234: */
0235:
0236: currentBlock = saveCurrentBlock;
0237: return det;
0238: }
0239:
0240: /**Is (...)+ block LL(1)? Fill in alternative cache for this block.
0241: * @return true if the block is deterministic
0242: */
0243: public boolean deterministic(OneOrMoreBlock blk) {
0244: if (DEBUG_ANALYZER)
0245: System.out.println("deterministic(...)+(" + blk + ")");
0246: AlternativeBlock saveCurrentBlock = currentBlock;
0247: currentBlock = blk;
0248: boolean blkOk = deterministic((AlternativeBlock) blk);
0249: // block has been checked, now check that what follows does not conflict
0250: // with the lookahead of the (...)+ block.
0251: boolean det = deterministicImpliedPath(blk);
0252: currentBlock = saveCurrentBlock;
0253: return det && blkOk;
0254: }
0255:
0256: /**Is (...)* block LL(1)? Fill in alternative cache for this block.
0257: * @return true if the block is deterministic
0258: */
0259: public boolean deterministic(ZeroOrMoreBlock blk) {
0260: if (DEBUG_ANALYZER)
0261: System.out.println("deterministic(...)*(" + blk + ")");
0262: AlternativeBlock saveCurrentBlock = currentBlock;
0263: currentBlock = blk;
0264: boolean blkOk = deterministic((AlternativeBlock) blk);
0265: // block has been checked, now check that what follows does not conflict
0266: // with the lookahead of the (...)* block.
0267: boolean det = deterministicImpliedPath(blk);
0268: currentBlock = saveCurrentBlock;
0269: return det && blkOk;
0270: }
0271:
0272: /**Is this (...)* or (...)+ block LL(k)?
0273: * @return true if the block is deterministic
0274: */
0275: public boolean deterministicImpliedPath(BlockWithImpliedExitPath blk) {
0276: /** The lookahead depth for this decision considering implied exit path */
0277: int k;
0278: boolean det = true;
0279: Vector alts = blk.getAlternatives();
0280: int nalts = alts.size();
0281: currentBlock.altj = -1; // comparing against implicit optional/exit alt
0282:
0283: if (DEBUG_ANALYZER)
0284: System.out.println("deterministicImpliedPath");
0285: for (int i = 0; i < nalts; i++) { // check follow against all alts
0286: Alternative alt = blk.getAlternativeAt(i);
0287:
0288: if (alt.head instanceof BlockEndElement) {
0289: tool
0290: .warning(
0291: "empty alternative makes no sense in (...)* or (...)+",
0292: grammar.getFilename(), blk.getLine(),
0293: blk.getColumn());
0294: }
0295:
0296: k = 1; // assume eac alt is LL(1) with exit branch
0297: // check to see if there is a lookahead depth that distinguishes
0298: // between alternative i and the exit branch.
0299: Lookahead[] r = new Lookahead[grammar.maxk + 1];
0300: boolean haveAmbiguity;
0301: do {
0302: haveAmbiguity = false;
0303: if (DEBUG_ANALYZER)
0304: System.out.println("checking depth " + k + "<="
0305: + grammar.maxk);
0306: Lookahead p;
0307: Lookahead follow = blk.next.look(k);
0308: blk.exitCache[k] = follow;
0309: currentBlock.alti = i;
0310: p = getAltLookahead(blk, i, k);
0311:
0312: if (DEBUG_ANALYZER)
0313: System.out.println("follow is "
0314: + follow.toString(",", charFormatter,
0315: grammar));
0316: if (DEBUG_ANALYZER)
0317: System.out.println("p is "
0318: + p.toString(",", charFormatter, grammar));
0319: //r[k] = follow.fset.and(p.fset);
0320: r[k] = follow.intersection(p);
0321: if (DEBUG_ANALYZER)
0322: System.out.println("intersection at depth " + k
0323: + " is " + r[k]);
0324: if (!r[k].nil()) {
0325: haveAmbiguity = true;
0326: k++;
0327: }
0328: // go until no more lookahead to use or no intersection
0329: } while (haveAmbiguity && k <= grammar.maxk);
0330:
0331: if (haveAmbiguity) {
0332: det = false;
0333: alt.lookaheadDepth = NONDETERMINISTIC;
0334: blk.exitLookaheadDepth = NONDETERMINISTIC;
0335: Alternative ambigAlt = blk
0336: .getAlternativeAt(currentBlock.alti);
0337:
0338: /* If the user specified warnWhenFollowAmbig=false, then we
0339: * can turn off this warning.
0340: */
0341: if (!blk.warnWhenFollowAmbig) {
0342: }
0343:
0344: /* If they have the generateAmbigWarnings option off for the block
0345: * then don't generate a warning.
0346: */
0347: else if (!blk.generateAmbigWarnings) {
0348: }
0349:
0350: /* If greedy=true and alt not empty, shut off warning */
0351: else if (blk.greedy == true && blk.greedySet
0352: && !(ambigAlt.head instanceof BlockEndElement)) {
0353: if (DEBUG_ANALYZER)
0354: System.out.println("greedy loop");
0355: }
0356:
0357: /* If greedy=false then shut off warning...will have
0358: * to add "if FOLLOW break"
0359: * block during code gen to compensate for removal of warning.
0360: */
0361: else if (blk.greedy == false
0362: && !(ambigAlt.head instanceof BlockEndElement)) {
0363: if (DEBUG_ANALYZER)
0364: System.out.println("nongreedy loop");
0365: // if FOLLOW not single k-string (|set[k]| can
0366: // be > 1 actually) then must warn them that
0367: // loop may terminate incorrectly.
0368: // For example, ('a'..'d')+ ("ad"|"cb")
0369: if (!lookaheadEquivForApproxAndFullAnalysis(
0370: blk.exitCache, grammar.maxk)) {
0371: tool
0372: .warning(
0373: new String[] {
0374: "nongreedy block may exit incorrectly due",
0375: "\tto limitations of linear approximate lookahead (first k-1 sets",
0376: "\tin lookahead not singleton)." },
0377: grammar.getFilename(), blk
0378: .getLine(), blk
0379: .getColumn());
0380: }
0381: }
0382:
0383: // no choice but to generate a warning
0384: else {
0385: tool.errorHandler.warnAltExitAmbiguity(grammar,
0386: blk, // the block
0387: lexicalAnalysis, // true if lexical
0388: grammar.maxk, // depth of ambiguity
0389: r, // set of linear ambiguities
0390: i // ambiguous alternative
0391: );
0392: }
0393: } else {
0394: alt.lookaheadDepth = Math.max(alt.lookaheadDepth, k);
0395: blk.exitLookaheadDepth = Math.max(
0396: blk.exitLookaheadDepth, k);
0397: }
0398: }
0399: return det;
0400: }
0401:
0402: /**Compute the lookahead set of whatever follows references to
0403: * the rule associated witht the FOLLOW block.
0404: */
0405: public Lookahead FOLLOW(int k, RuleEndElement end) {
0406: // what rule are we trying to compute FOLLOW of?
0407: RuleBlock rb = (RuleBlock) end.block;
0408: // rule name is different in lexer
0409: String rule;
0410: if (lexicalAnalysis) {
0411: rule = CodeGenerator.encodeLexerRuleName(rb.getRuleName());
0412: } else {
0413: rule = rb.getRuleName();
0414: }
0415:
0416: if (DEBUG_ANALYZER)
0417: System.out.println("FOLLOW(" + k + "," + rule + ")");
0418:
0419: // are we in the midst of computing this FOLLOW already?
0420: if (end.lock[k]) {
0421: if (DEBUG_ANALYZER)
0422: System.out.println("FOLLOW cycle to " + rule);
0423: return new Lookahead(rule);
0424: }
0425:
0426: // Check to see if there is cached value
0427: if (end.cache[k] != null) {
0428: if (DEBUG_ANALYZER) {
0429: System.out.println("cache entry FOLLOW("
0430: + k
0431: + ") for "
0432: + rule
0433: + ": "
0434: + end.cache[k].toString(",", charFormatter,
0435: grammar));
0436: }
0437: // if the cache is a complete computation then simply return entry
0438: if (end.cache[k].cycle == null) {
0439: return (Lookahead) end.cache[k].clone();
0440: }
0441: // A cache entry exists, but it is a reference to a cyclic computation.
0442: RuleSymbol rs = (RuleSymbol) grammar
0443: .getSymbol(end.cache[k].cycle);
0444: RuleEndElement re = rs.getBlock().endNode;
0445: // The other entry may not exist because it is still being
0446: // computed when this cycle cache entry was found here.
0447: if (re.cache[k] == null) {
0448: // return the cycle...that's all we can do at the moment.
0449: return (Lookahead) end.cache[k].clone();
0450: } else {
0451: if (DEBUG_ANALYZER) {
0452: System.out.println("combining FOLLOW("
0453: + k
0454: + ") for "
0455: + rule
0456: + ": from "
0457: + end.cache[k].toString(",", charFormatter,
0458: grammar)
0459: + " with FOLLOW for "
0460: + ((RuleBlock) re.block).getRuleName()
0461: + ": "
0462: + re.cache[k].toString(",", charFormatter,
0463: grammar));
0464: }
0465: // combine results from other rule's FOLLOW
0466: if (re.cache[k].cycle == null) {
0467: // current rule depends on another rule's FOLLOW and
0468: // it is complete with no cycle; just kill our cycle and
0469: // combine full result from other rule's FOLLOW
0470: end.cache[k].combineWith(re.cache[k]);
0471: end.cache[k].cycle = null; // kill cycle as we're complete
0472: } else {
0473: // the FOLLOW cache for other rule has a cycle also.
0474: // Here is where we bubble up a cycle. We better recursively
0475: // wipe out cycles (partial computations). I'm a little nervous
0476: // that we might leave a cycle here, however.
0477: Lookahead refFOLLOW = FOLLOW(k, re);
0478: end.cache[k].combineWith(refFOLLOW);
0479: // all cycles should be gone, but if not, record ref to cycle
0480: end.cache[k].cycle = refFOLLOW.cycle;
0481: }
0482: if (DEBUG_ANALYZER) {
0483: System.out.println("saving FOLLOW("
0484: + k
0485: + ") for "
0486: + rule
0487: + ": from "
0488: + end.cache[k].toString(",", charFormatter,
0489: grammar));
0490: }
0491: // Return the updated cache entry associated
0492: // with the cycle reference.
0493: return (Lookahead) end.cache[k].clone();
0494: }
0495: }
0496:
0497: end.lock[k] = true; // prevent FOLLOW computation cycles
0498:
0499: Lookahead p = new Lookahead();
0500:
0501: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(rule);
0502:
0503: // Walk list of references to this rule to compute FOLLOW
0504: for (int i = 0; i < rs.numReferences(); i++) {
0505: RuleRefElement rr = rs.getReference(i);
0506: if (DEBUG_ANALYZER)
0507: System.out.println("next[" + rule + "] is "
0508: + rr.next.toString());
0509: Lookahead q = rr.next.look(k);
0510: if (DEBUG_ANALYZER)
0511: System.out.println("FIRST of next[" + rule
0512: + "] ptr is " + q.toString());
0513: /* If there is a cycle then if the cycle is to the rule for
0514: * this end block, you have a cycle to yourself. Remove the
0515: * cycle indication--the lookahead is complete.
0516: */
0517: if (q.cycle != null && q.cycle.equals(rule)) {
0518: q.cycle = null; // don't want cycle to yourself!
0519: }
0520: // add the lookahead into the current FOLLOW computation set
0521: p.combineWith(q);
0522: if (DEBUG_ANALYZER)
0523: System.out.println("combined FOLLOW[" + rule + "] is "
0524: + p.toString());
0525: }
0526:
0527: end.lock[k] = false; // we're not doing FOLLOW anymore
0528:
0529: // if no rules follow this, it can be a start symbol or called by a start sym.
0530: // set the follow to be end of file.
0531: if (p.fset.nil() && p.cycle == null) {
0532: if (grammar instanceof TreeWalkerGrammar) {
0533: // Tree grammars don't see EOF, they see end of sibling list or
0534: // "NULL TREE LOOKAHEAD".
0535: p.fset.add(Token.NULL_TREE_LOOKAHEAD);
0536: } else if (grammar instanceof LexerGrammar) {
0537: // Lexical grammars use Epsilon to indicate that the end of rule has been hit
0538: // EOF would be misleading; any character can follow a token rule not just EOF
0539: // as in a grammar (where a start symbol is followed by EOF). There is no
0540: // sequence info in a lexer between tokens to indicate what is the last token
0541: // to be seen.
0542: // p.fset.add(EPSILON_TYPE);
0543: p.setEpsilon();
0544: } else {
0545: p.fset.add(Token.EOF_TYPE);
0546: }
0547: }
0548:
0549: // Cache the result of the FOLLOW computation
0550: if (DEBUG_ANALYZER) {
0551: System.out.println("saving FOLLOW(" + k + ") for " + rule
0552: + ": " + p.toString(",", charFormatter, grammar));
0553: }
0554: end.cache[k] = (Lookahead) p.clone();
0555:
0556: return p;
0557: }
0558:
0559: private Lookahead getAltLookahead(AlternativeBlock blk, int alt,
0560: int k) {
0561: Lookahead p;
0562: Alternative a = blk.getAlternativeAt(alt);
0563: AlternativeElement e = a.head;
0564: //System.out.println("getAltLookahead("+k+","+e+"), cache size is "+a.cache.length);
0565: if (a.cache[k] == null) {
0566: p = e.look(k);
0567: a.cache[k] = p;
0568: } else {
0569: p = a.cache[k];
0570: }
0571: return p;
0572: }
0573:
0574: /**Actions are ignored */
0575: public Lookahead look(int k, ActionElement action) {
0576: if (DEBUG_ANALYZER)
0577: System.out.println("lookAction(" + k + "," + action + ")");
0578: return action.next.look(k);
0579: }
0580:
0581: /**Combine the lookahead computed for each alternative */
0582: public Lookahead look(int k, AlternativeBlock blk) {
0583: if (DEBUG_ANALYZER)
0584: System.out.println("lookAltBlk(" + k + "," + blk + ")");
0585: AlternativeBlock saveCurrentBlock = currentBlock;
0586: currentBlock = blk;
0587: Lookahead p = new Lookahead();
0588: for (int i = 0; i < blk.alternatives.size(); i++) {
0589: if (DEBUG_ANALYZER)
0590: System.out.println("alt " + i + " of " + blk);
0591: // must set analysis alt
0592: currentBlock.analysisAlt = i;
0593: Alternative alt = blk.getAlternativeAt(i);
0594: AlternativeElement elem = alt.head;
0595: if (DEBUG_ANALYZER) {
0596: if (alt.head == alt.tail) {
0597: System.out.println("alt " + i + " is empty");
0598: }
0599: }
0600: Lookahead q = elem.look(k);
0601: p.combineWith(q);
0602: }
0603: if (k == 1 && blk.not
0604: && subruleCanBeInverted(blk, lexicalAnalysis)) {
0605: // Invert the lookahead set
0606: if (lexicalAnalysis) {
0607: BitSet b = (BitSet) ((LexerGrammar) grammar).charVocabulary
0608: .clone();
0609: int[] elems = p.fset.toArray();
0610: for (int j = 0; j < elems.length; j++) {
0611: b.remove(elems[j]);
0612: }
0613: p.fset = b;
0614: } else {
0615: p.fset.notInPlace(Token.MIN_USER_TYPE,
0616: grammar.tokenManager.maxTokenType());
0617: }
0618: }
0619: currentBlock = saveCurrentBlock;
0620: return p;
0621: }
0622:
0623: /**Compute what follows this place-holder node and possibly
0624: * what begins the associated loop unless the
0625: * node is locked.
0626: * <p>
0627: * if we hit the end of a loop, we have to include
0628: * what tokens can begin the loop as well. If the start
0629: * node is locked, then we simply found an empty path
0630: * through this subrule while analyzing it. If the
0631: * start node is not locked, then this node was hit
0632: * during a FOLLOW operation and the FIRST of this
0633: * block must be included in that lookahead computation.
0634: */
0635: public Lookahead look(int k, BlockEndElement end) {
0636: if (DEBUG_ANALYZER)
0637: System.out.println("lookBlockEnd(" + k + ", " + end.block
0638: + "); lock is " + end.lock[k]);
0639: if (end.lock[k]) {
0640: // computation in progress => the tokens we would have
0641: // computed (had we not been locked) will be included
0642: // in the set by that computation with the lock on this
0643: // node.
0644: return new Lookahead();
0645: }
0646:
0647: Lookahead p;
0648:
0649: /* Hitting the end of a loop means you can see what begins the loop */
0650: if (end.block instanceof ZeroOrMoreBlock
0651: || end.block instanceof OneOrMoreBlock) {
0652: // compute what can start the block,
0653: // but lock end node so we don't do it twice in same
0654: // computation.
0655: end.lock[k] = true;
0656: p = look(k, end.block);
0657: end.lock[k] = false;
0658: } else {
0659: p = new Lookahead();
0660: }
0661:
0662: /* Tree blocks do not have any follow because they are children
0663: * of what surrounds them. For example, A #(B C) D results in
0664: * a look() for the TreeElement end of NULL_TREE_LOOKAHEAD, which
0665: * indicates that nothing can follow the last node of tree #(B C)
0666: */
0667: if (end.block instanceof TreeElement) {
0668: p.combineWith(Lookahead.of(Token.NULL_TREE_LOOKAHEAD));
0669: }
0670:
0671: /* Syntactic predicates such as ( (A)? )=> have no follow per se.
0672: * We cannot accurately say what would be matched following a
0673: * syntactic predicate (you MIGHT be ok if you said it was whatever
0674: * followed the alternative predicted by the predicate). Hence,
0675: * (like end-of-token) we return Epsilon to indicate "unknown
0676: * lookahead."
0677: */
0678: else if (end.block instanceof SynPredBlock) {
0679: p.setEpsilon();
0680: }
0681:
0682: // compute what can follow the block
0683: else {
0684: Lookahead q = end.block.next.look(k);
0685: p.combineWith(q);
0686: }
0687:
0688: return p;
0689: }
0690:
0691: /**Return this char as the lookahead if k=1.
0692: * <p>### Doesn't work for ( 'a' 'b' | 'a' ~'b' ) yet!!!
0693: * <p>
0694: * If the atom has the <tt>not</tt> flag on, then
0695: * create the set complement of the tokenType
0696: * which is the set of all characters referenced
0697: * in the grammar with this char turned off.
0698: * Also remove characters from the set that
0699: * are currently allocated for predicting
0700: * previous alternatives. This avoids ambiguity
0701: * messages and is more properly what is meant.
0702: * ( 'a' | ~'a' ) implies that the ~'a' is the
0703: * "else" clause.
0704: * <p>
0705: * NOTE: we do <b>NOT</b> include exit path in
0706: * the exclusion set. E.g.,
0707: * ( 'a' | ~'a' )* 'b'
0708: * should exit upon seeing a 'b' during the loop.
0709: */
0710: public Lookahead look(int k, CharLiteralElement atom) {
0711: if (DEBUG_ANALYZER)
0712: System.out.println("lookCharLiteral(" + k + "," + atom
0713: + ")");
0714: // Skip until analysis hits k==1
0715: if (k > 1) {
0716: return atom.next.look(k - 1);
0717: }
0718: if (lexicalAnalysis) {
0719: if (atom.not) {
0720: BitSet b = (BitSet) ((LexerGrammar) grammar).charVocabulary
0721: .clone();
0722: if (DEBUG_ANALYZER)
0723: System.out.println("charVocab is " + b.toString());
0724: // remove stuff predicted by preceding alts and follow of block
0725: removeCompetingPredictionSets(b, atom);
0726: if (DEBUG_ANALYZER)
0727: System.out
0728: .println("charVocab after removal of prior alt lookahead "
0729: + b.toString());
0730: // now remove element that is stated not to be in the set
0731: b.clear(atom.getType());
0732: return new Lookahead(b);
0733: } else {
0734: return Lookahead.of(atom.getType());
0735: }
0736: } else {
0737: // Should have been avoided by MakeGrammar
0738: tool.panic("Character literal reference found in parser");
0739: // ... so we make the compiler happy
0740: return Lookahead.of(atom.getType());
0741: }
0742: }
0743:
0744: public Lookahead look(int k, CharRangeElement r) {
0745: if (DEBUG_ANALYZER)
0746: System.out.println("lookCharRange(" + k + "," + r + ")");
0747: // Skip until analysis hits k==1
0748: if (k > 1) {
0749: return r.next.look(k - 1);
0750: }
0751: BitSet p = BitSet.of(r.begin);
0752: for (int i = r.begin + 1; i <= r.end; i++) {
0753: p.add(i);
0754: }
0755: return new Lookahead(p);
0756: }
0757:
0758: public Lookahead look(int k, GrammarAtom atom) {
0759: if (DEBUG_ANALYZER)
0760: System.out.println("look(" + k + "," + atom + "["
0761: + atom.getType() + "])");
0762:
0763: if (lexicalAnalysis) {
0764: // MakeGrammar should have created a rule reference instead
0765: tool.panic("token reference found in lexer");
0766: }
0767: // Skip until analysis hits k==1
0768: if (k > 1) {
0769: return atom.next.look(k - 1);
0770: }
0771: Lookahead l = Lookahead.of(atom.getType());
0772: if (atom.not) {
0773: // Invert the lookahead set against the token vocabulary
0774: int maxToken = grammar.tokenManager.maxTokenType();
0775: l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
0776: // remove stuff predicted by preceding alts and follow of block
0777: removeCompetingPredictionSets(l.fset, atom);
0778: }
0779: return l;
0780: }
0781:
0782: /**The lookahead of a (...)+ block is the combined lookahead of
0783: * all alternatives and, if an empty path is found, the lookahead
0784: * of what follows the block.
0785: */
0786: public Lookahead look(int k, OneOrMoreBlock blk) {
0787: if (DEBUG_ANALYZER)
0788: System.out.println("look+" + k + "," + blk + ")");
0789: Lookahead p = look(k, (AlternativeBlock) blk);
0790: return p;
0791: }
0792:
0793: /**Combine the lookahead computed for each alternative.
0794: * Lock the node so that no other computation may come back
0795: * on itself--infinite loop. This also implies infinite left-recursion
0796: * in the grammar (or an error in this algorithm ;)).
0797: */
0798: public Lookahead look(int k, RuleBlock blk) {
0799: if (DEBUG_ANALYZER)
0800: System.out.println("lookRuleBlk(" + k + "," + blk + ")");
0801: Lookahead p = look(k, (AlternativeBlock) blk);
0802: return p;
0803: }
0804:
0805: /**If not locked or noFOLLOW set, compute FOLLOW of a rule.
0806: * <p>
0807: * TJP says 8/12/99: not true anymore:
0808: * Lexical rules never compute follow. They set epsilon and
0809: * the code generator gens code to check for any character.
0810: * The code generator must remove the tokens used to predict
0811: * any previous alts in the same block.
0812: * <p>
0813: * When the last node of a rule is reached and noFOLLOW,
0814: * it implies that a "local" FOLLOW will be computed
0815: * after this call. I.e.,
0816: * <pre>
0817: * a : b A;
0818: * b : B | ;
0819: * c : b C;
0820: * </pre>
0821: * Here, when computing the look of rule b from rule a,
0822: * we want only {B,EPSILON_TYPE} so that look(b A) will
0823: * be {B,A} not {B,A,C}.
0824: * <p>
0825: * if the end block is not locked and the FOLLOW is
0826: * wanted, the algorithm must compute the lookahead
0827: * of what follows references to this rule. If
0828: * end block is locked, FOLLOW will return an empty set
0829: * with a cycle to the rule associated with this end block.
0830: */
0831: public Lookahead look(int k, RuleEndElement end) {
0832: if (DEBUG_ANALYZER)
0833: System.out.println("lookRuleBlockEnd(" + k + "); noFOLLOW="
0834: + end.noFOLLOW + "; lock is " + end.lock[k]);
0835: if (/*lexicalAnalysis ||*/end.noFOLLOW) {
0836: Lookahead p = new Lookahead();
0837: p.setEpsilon();
0838: p.epsilonDepth = BitSet.of(k);
0839: return p;
0840: }
0841: Lookahead p = FOLLOW(k, end);
0842: return p;
0843: }
0844:
0845: /**Compute the lookahead contributed by a rule reference.
0846: *
0847: * <p>
0848: * When computing ruleref lookahead, we don't want the FOLLOW
0849: * computation done if an empty path exists for the rule.
0850: * The FOLLOW is too loose of a set...we want only to
0851: * include the "local" FOLLOW or what can follow this
0852: * particular ref to the node. In other words, we use
0853: * context information to reduce the complexity of the
0854: * analysis and strengthen the parser.
0855: *
0856: * The noFOLLOW flag is used as a means of restricting
0857: * the FOLLOW to a "local" FOLLOW. This variable is
0858: * orthogonal to the <tt>lock</tt> variable that prevents
0859: * infinite recursion. noFOLLOW does not care about what k is.
0860: */
0861: public Lookahead look(int k, RuleRefElement rr) {
0862: if (DEBUG_ANALYZER)
0863: System.out.println("lookRuleRef(" + k + "," + rr + ")");
0864: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(rr.targetRule);
0865: if (rs == null || !rs.defined) {
0866: tool
0867: .error("no definition of rule " + rr.targetRule,
0868: grammar.getFilename(), rr.getLine(), rr
0869: .getColumn());
0870: return new Lookahead();
0871: }
0872: RuleBlock rb = rs.getBlock();
0873: RuleEndElement end = rb.endNode;
0874: boolean saveEnd = end.noFOLLOW;
0875: end.noFOLLOW = true;
0876: // go off to the rule and get the lookahead (w/o FOLLOW)
0877: Lookahead p = look(k, rr.targetRule);
0878: if (DEBUG_ANALYZER)
0879: System.out
0880: .println("back from rule ref to " + rr.targetRule);
0881: // restore state of end block
0882: end.noFOLLOW = saveEnd;
0883:
0884: // check for infinite recursion. If a cycle is returned: trouble!
0885: if (p.cycle != null) {
0886: tool.error("infinite recursion to rule " + p.cycle
0887: + " from rule " + rr.enclosingRuleName, grammar
0888: .getFilename(), rr.getLine(), rr.getColumn());
0889: }
0890:
0891: // is the local FOLLOW required?
0892: if (p.containsEpsilon()) {
0893: if (DEBUG_ANALYZER)
0894: System.out.println("rule ref to " + rr.targetRule
0895: + " has eps, depth: " + p.epsilonDepth);
0896:
0897: // remove epsilon
0898: p.resetEpsilon();
0899: // fset.clear(EPSILON_TYPE);
0900:
0901: // for each lookahead depth that saw epsilon
0902: int[] depths = p.epsilonDepth.toArray();
0903: p.epsilonDepth = null; // clear all epsilon stuff
0904: for (int i = 0; i < depths.length; i++) {
0905: int rk = k - (k - depths[i]);
0906: Lookahead q = rr.next.look(rk); // see comments in Lookahead
0907: p.combineWith(q);
0908: }
0909: // note: any of these look() computations for local follow can
0910: // set EPSILON in the set again if the end of this rule is found.
0911: }
0912:
0913: return p;
0914: }
0915:
0916: public Lookahead look(int k, StringLiteralElement atom) {
0917: if (DEBUG_ANALYZER)
0918: System.out.println("lookStringLiteral(" + k + "," + atom
0919: + ")");
0920: if (lexicalAnalysis) {
0921: // need more lookahead than string can provide?
0922: if (k > atom.processedAtomText.length()) {
0923: return atom.next.look(k
0924: - atom.processedAtomText.length());
0925: } else {
0926: // get char at lookahead depth k, from the processed literal text
0927: return Lookahead.of(atom.processedAtomText
0928: .charAt(k - 1));
0929: }
0930: } else {
0931: // Skip until analysis hits k==1
0932: if (k > 1) {
0933: return atom.next.look(k - 1);
0934: }
0935: Lookahead l = Lookahead.of(atom.getType());
0936: if (atom.not) {
0937: // Invert the lookahead set against the token vocabulary
0938: int maxToken = grammar.tokenManager.maxTokenType();
0939: l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
0940: }
0941: return l;
0942: }
0943: }
0944:
0945: /**The lookahead of a (...)=> block is the lookahead of
0946: * what follows the block. By definition, the syntactic
0947: * predicate block defies static analysis (you want to try it
0948: * out at run-time). The LOOK of (a)=>A B is A for LL(1)
0949: * ### is this even called?
0950: */
0951: public Lookahead look(int k, SynPredBlock blk) {
0952: if (DEBUG_ANALYZER)
0953: System.out.println("look=>(" + k + "," + blk + ")");
0954: return blk.next.look(k);
0955: }
0956:
0957: public Lookahead look(int k, TokenRangeElement r) {
0958: if (DEBUG_ANALYZER)
0959: System.out.println("lookTokenRange(" + k + "," + r + ")");
0960: // Skip until analysis hits k==1
0961: if (k > 1) {
0962: return r.next.look(k - 1);
0963: }
0964: BitSet p = BitSet.of(r.begin);
0965: for (int i = r.begin + 1; i <= r.end; i++) {
0966: p.add(i);
0967: }
0968: return new Lookahead(p);
0969: }
0970:
0971: public Lookahead look(int k, TreeElement t) {
0972: if (DEBUG_ANALYZER)
0973: System.out.println("look(" + k + "," + t.root + "["
0974: + t.root.getType() + "])");
0975: if (k > 1) {
0976: return t.next.look(k - 1);
0977: }
0978: Lookahead l = null;
0979: if (t.root instanceof WildcardElement) {
0980: l = t.root.look(1); // compute FIRST set minus previous rows
0981: } else {
0982: l = Lookahead.of(t.root.getType());
0983: if (t.root.not) {
0984: // Invert the lookahead set against the token vocabulary
0985: int maxToken = grammar.tokenManager.maxTokenType();
0986: l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
0987: }
0988: }
0989: return l;
0990: }
0991:
0992: public Lookahead look(int k, WildcardElement wc) {
0993: if (DEBUG_ANALYZER)
0994: System.out.println("look(" + k + "," + wc + ")");
0995:
0996: // Skip until analysis hits k==1
0997: if (k > 1) {
0998: return wc.next.look(k - 1);
0999: }
1000:
1001: BitSet b;
1002: if (lexicalAnalysis) {
1003: // Copy the character vocabulary
1004: b = (BitSet) ((LexerGrammar) grammar).charVocabulary
1005: .clone();
1006: } else {
1007: b = new BitSet(1);
1008: // Invert the lookahead set against the token vocabulary
1009: int maxToken = grammar.tokenManager.maxTokenType();
1010: b.notInPlace(Token.MIN_USER_TYPE, maxToken);
1011: if (DEBUG_ANALYZER)
1012: System.out.println("look(" + k + "," + wc
1013: + ") after not: " + b);
1014: }
1015:
1016: // Remove prediction sets from competing alternatives
1017: // removeCompetingPredictionSets(b, wc);
1018:
1019: return new Lookahead(b);
1020: }
1021:
1022: /** The (...)* element is the combined lookahead of the alternatives and what can
1023: * follow the loop.
1024: */
1025: public Lookahead look(int k, ZeroOrMoreBlock blk) {
1026: if (DEBUG_ANALYZER)
1027: System.out.println("look*(" + k + "," + blk + ")");
1028: Lookahead p = look(k, (AlternativeBlock) blk);
1029: Lookahead q = blk.next.look(k);
1030: p.combineWith(q);
1031: return p;
1032: }
1033:
1034: /**Compute the combined lookahead for all productions of a rule.
1035: * If the lookahead returns with epsilon, at least one epsilon
1036: * path exists (one that consumes no tokens). The noFOLLOW
1037: * flag being set for this endruleblk, indicates that the
1038: * a rule ref invoked this rule.
1039: *
1040: * Currently only look(RuleRef) calls this. There is no need
1041: * for the code generator to call this.
1042: */
1043: public Lookahead look(int k, String rule) {
1044: if (DEBUG_ANALYZER)
1045: System.out.println("lookRuleName(" + k + "," + rule + ")");
1046: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(rule);
1047: RuleBlock rb = rs.getBlock();
1048:
1049: if (rb.lock[k]) {
1050: if (DEBUG_ANALYZER)
1051: System.out.println("infinite recursion to rule "
1052: + rb.getRuleName());
1053: return new Lookahead(rule);
1054: }
1055:
1056: // have we computed it before?
1057: if (rb.cache[k] != null) {
1058: if (DEBUG_ANALYZER) {
1059: System.out.println("found depth "
1060: + k
1061: + " result in FIRST "
1062: + rule
1063: + " cache: "
1064: + rb.cache[k].toString(",", charFormatter,
1065: grammar));
1066: }
1067: return (Lookahead) rb.cache[k].clone();
1068: }
1069:
1070: rb.lock[k] = true;
1071: Lookahead p = look(k, (RuleBlock) rb);
1072: rb.lock[k] = false;
1073:
1074: // cache results
1075: rb.cache[k] = (Lookahead) p.clone();
1076: if (DEBUG_ANALYZER) {
1077: System.out
1078: .println("saving depth "
1079: + k
1080: + " result in FIRST "
1081: + rule
1082: + " cache: "
1083: + rb.cache[k].toString(",", charFormatter,
1084: grammar));
1085: }
1086: return p;
1087: }
1088:
1089: /** If the first k-1 sets are singleton sets, the appoximate
1090: * lookahead analysis is equivalent to full lookahead analysis.
1091: */
1092: public static boolean lookaheadEquivForApproxAndFullAnalysis(
1093: Lookahead[] bset, int k) {
1094: // first k-1 sets degree 1?
1095: for (int i = 1; i <= k - 1; i++) {
1096: BitSet look = bset[i].fset;
1097: if (look.degree() > 1) {
1098: return false;
1099: }
1100: }
1101: return true;
1102: }
1103:
1104: /** Remove the prediction sets from preceding alternatives
1105: * and follow set, but *only* if this element is the first element
1106: * of the alternative. The class members currenBlock and
1107: * currentBlock.analysisAlt must be set correctly.
1108: * @param b The prediction bitset to be modified
1109: * @el The element of interest
1110: */
1111: private void removeCompetingPredictionSets(BitSet b,
1112: AlternativeElement el) {
1113: // Only do this if the element is the first element of the alt,
1114: // because we are making an implicit assumption that k==1.
1115: GrammarElement head = currentBlock
1116: .getAlternativeAt(currentBlock.analysisAlt).head;
1117: // if element is #(. blah) then check to see if el is root
1118: if (head instanceof TreeElement) {
1119: if (((TreeElement) head).root != el) {
1120: return;
1121: }
1122: } else if (el != head) {
1123: return;
1124: }
1125: for (int i = 0; i < currentBlock.analysisAlt; i++) {
1126: AlternativeElement e = currentBlock.getAlternativeAt(i).head;
1127: b.subtractInPlace(e.look(1).fset);
1128: }
1129: }
1130:
1131: /** Remove the prediction sets from preceding alternatives
1132: * The class members currenBlock must be set correctly.
1133: * Remove prediction sets from 1..k.
1134: * @param look The prediction lookahead to be modified
1135: * @el The element of interest
1136: * @k How deep into lookahead to modify
1137: */
1138: private void removeCompetingPredictionSetsFromWildcard(
1139: Lookahead[] look, AlternativeElement el, int k) {
1140: for (int d = 1; d <= k; d++) {
1141: for (int i = 0; i < currentBlock.analysisAlt; i++) {
1142: AlternativeElement e = currentBlock.getAlternativeAt(i).head;
1143: look[d].fset.subtractInPlace(e.look(d).fset);
1144: }
1145: }
1146: }
1147:
1148: /** reset the analyzer so it looks like a new one */
1149: private void reset() {
1150: grammar = null;
1151: DEBUG_ANALYZER = false;
1152: currentBlock = null;
1153: lexicalAnalysis = false;
1154: }
1155:
1156: /** Set the grammar for the analyzer */
1157: public void setGrammar(Grammar g) {
1158: if (grammar != null) {
1159: reset();
1160: }
1161: grammar = g;
1162:
1163: // Is this lexical?
1164: lexicalAnalysis = (grammar instanceof LexerGrammar);
1165: DEBUG_ANALYZER = grammar.analyzerDebug;
1166: }
1167:
1168: public boolean subruleCanBeInverted(AlternativeBlock blk,
1169: boolean forLexer) {
1170: if (blk instanceof ZeroOrMoreBlock
1171: || blk instanceof OneOrMoreBlock
1172: || blk instanceof SynPredBlock) {
1173: return false;
1174: }
1175: // Cannot invert an empty subrule
1176: if (blk.alternatives.size() == 0) {
1177: return false;
1178: }
1179: // The block must only contain alternatives with a single element,
1180: // where each element is a char, token, char range, or token range.
1181: for (int i = 0; i < blk.alternatives.size(); i++) {
1182: Alternative alt = blk.getAlternativeAt(i);
1183: // Cannot have anything interesting in the alternative ...
1184: if (alt.synPred != null || alt.semPred != null
1185: || alt.exceptionSpec != null) {
1186: return false;
1187: }
1188: // ... and there must be one simple element
1189: AlternativeElement elt = alt.head;
1190: if (!(elt instanceof CharLiteralElement
1191: || elt instanceof TokenRefElement
1192: || elt instanceof CharRangeElement
1193: || elt instanceof TokenRangeElement || (elt instanceof StringLiteralElement && !forLexer))
1194: || !(elt.next instanceof BlockEndElement)
1195: || elt.getAutoGenType() != GrammarElement.AUTO_GEN_NONE) {
1196: return false;
1197: }
1198: }
1199: return true;
1200: }
1201: }
|