001: /*
002: [The "BSD licence"]
003: Copyright (c) 2005-2006 Terence Parr
004: All rights reserved.
005:
006: Redistribution and use in source and binary forms, with or without
007: modification, are permitted provided that the following conditions
008: are met:
009: 1. Redistributions of source code must retain the above copyright
010: notice, this list of conditions and the following disclaimer.
011: 2. Redistributions in binary form must reproduce the above copyright
012: notice, this list of conditions and the following disclaimer in the
013: documentation and/or other materials provided with the distribution.
014: 3. The name of the author may not be used to endorse or promote products
015: derived from this software without specific prior written permission.
016:
017: THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
018: IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
019: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
020: IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
021: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
022: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
023: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
024: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
026: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: */
028: package org.antlr.tool;
029:
030: import org.antlr.analysis.*;
031: import org.antlr.misc.IntSet;
032: import org.antlr.misc.IntervalSet;
033:
034: import java.util.Iterator;
035: import java.util.List;
036:
037: /** Routines to construct StateClusters from EBNF grammar constructs.
038: * No optimization is done to remove unnecessary epsilon edges.
039: *
040: * TODO: add an optimization that reduces number of states and transitions
041: * will help with speed of conversion and make it easier to view NFA. For
042: * example, o-A->o-->o-B->o should be o-A->o-B->o
043: */
044: public class NFAFactory {
045: /** This factory is attached to a specifc NFA that it is building.
046: * The NFA will be filled up with states and transitions.
047: */
048: NFA nfa = null;
049:
050: String currentRuleName = null;
051:
052: /** Used to assign state numbers */
053: protected int stateCounter = 0;
054:
055: public NFAFactory(NFA nfa) {
056: nfa.setFactory(this );
057: this .nfa = nfa;
058: }
059:
060: public NFAState newState() {
061: NFAState n = new NFAState(nfa);
062: int state = stateCounter;
063: n.stateNumber = state;
064: stateCounter++;
065: nfa.addState(n);
066: n.setEnclosingRuleName(currentRuleName);
067: return n;
068: }
069:
070: public int getNumberOfStates() {
071: return stateCounter;
072: }
073:
074: /** Optimize an alternative (list of grammar elements).
075: *
076: * Walk the chain of elements (which can be complicated loop blocks...)
077: * and throw away any epsilon transitions used to link up simple elements.
078: *
079: * This only removes 195 states from the java.g's NFA, but every little
080: * bit helps. Perhaps I can improve in the future.
081: */
082: public void optimizeAlternative(StateCluster alt) {
083: NFAState s = alt.left;
084: while (s != alt.right) {
085: // if it's a block element, jump over it and continue
086: if (s.endOfBlockStateNumber != State.INVALID_STATE_NUMBER) {
087: s = nfa.getState(s.endOfBlockStateNumber);
088: continue;
089: }
090: Transition t = s.transition(0);
091: if (t instanceof RuleClosureTransition) {
092: s = ((RuleClosureTransition) t).getFollowState();
093: continue;
094: }
095: if (t.label.isEpsilon() && s.getNumberOfTransitions() == 1) {
096: // bypass epsilon transition and point to what the epsilon's
097: // target points to unless that epsilon transition points to
098: // a block or loop etc.. Also don't collapse epsilons that
099: // point at the last node of the alt
100: NFAState epsilonTarget = (NFAState) t.target;
101: if (epsilonTarget.endOfBlockStateNumber == State.INVALID_STATE_NUMBER
102: && epsilonTarget.transition(0) != null) {
103: s.setTransition0(epsilonTarget.transition(0));
104: /*
105: System.out.println("### opt "+s.stateNumber+"->"+
106: epsilonTarget.transition(0).target.stateNumber);
107: */
108: }
109: }
110: s = (NFAState) t.target;
111: }
112: }
113:
114: /** From label A build Graph o-A->o */
115: public StateCluster build_Atom(int label) {
116: NFAState left = newState();
117: NFAState right = newState();
118: transitionBetweenStates(left, right, label);
119: StateCluster g = new StateCluster(left, right);
120: return g;
121: }
122:
123: /** From set build single edge graph o->o-set->o. To conform to
124: * what an alt block looks like, must have extra state on left.
125: */
126: public StateCluster build_Set(IntSet set) {
127: //NFAState start = newState();
128: NFAState left = newState();
129: //transitionBetweenStates(start, left, Label.EPSILON);
130: NFAState right = newState();
131: Transition e = new Transition(new Label(set), right);
132: left.addTransition(e);
133: StateCluster g = new StateCluster(left, right);
134: return g;
135: }
136:
137: /** Can only complement block of simple alts; can complement build_Set()
138: * result, that is. Get set and complement, replace old with complement.
139: public StateCluster build_AlternativeBlockComplement(StateCluster blk) {
140: State s0 = blk.left;
141: IntSet set = getCollapsedBlockAsSet(s0);
142: if ( set!=null ) {
143: // if set is available, then structure known and blk is a set
144: set = nfa.grammar.complement(set);
145: Label label = s0.transition(0).target.transition(0).label;
146: label.setSet(set);
147: }
148: return blk;
149: }
150: */
151:
152: public StateCluster build_Range(int a, int b) {
153: NFAState left = newState();
154: NFAState right = newState();
155: Transition e = new Transition(new Label(IntervalSet.of(a, b)),
156: right);
157: left.addTransition(e);
158: StateCluster g = new StateCluster(left, right);
159: return g;
160: }
161:
162: /** From char 'c' build StateCluster o-intValue(c)->o
163: */
164: public StateCluster build_CharLiteralAtom(String charLiteral) {
165: int c = Grammar.getCharValueFromGrammarCharLiteral(charLiteral);
166: return build_Atom(c);
167: }
168:
169: /** From char 'c' build StateCluster o-intValue(c)->o
170: * can include unicode spec likes '\u0024' later. Accepts
171: * actual unicode 16-bit now, of course, by default.
172: * TODO not supplemental char clean!
173: */
174: public StateCluster build_CharRange(String a, String b) {
175: int from = Grammar.getCharValueFromGrammarCharLiteral(a);
176: int to = Grammar.getCharValueFromGrammarCharLiteral(b);
177: return build_Range(from, to);
178: }
179:
180: /** For a non-lexer, just build a simple token reference atom.
181: * For a lexer, a string is a sequence of char to match. That is,
182: * "fog" is treated as 'f' 'o' 'g' not as a single transition in
183: * the DFA. Machine== o-'f'->o-'o'->o-'g'->o and has n+1 states
184: * for n characters.
185: */
186: public StateCluster build_StringLiteralAtom(String stringLiteral) {
187: if (nfa.grammar.type == Grammar.LEXER) {
188: StringBuffer chars = Grammar
189: .getUnescapedStringFromGrammarStringLiteral(stringLiteral);
190: NFAState first = newState();
191: NFAState last = null;
192: NFAState prev = first;
193: for (int i = 0; i < chars.length(); i++) {
194: int c = chars.charAt(i);
195: NFAState next = newState();
196: transitionBetweenStates(prev, next, c);
197: prev = last = next;
198: }
199: return new StateCluster(first, last);
200: }
201:
202: // a simple token reference in non-Lexers
203: int tokenType = nfa.grammar.getTokenType(stringLiteral);
204: return build_Atom(tokenType);
205: }
206:
207: /** For reference to rule r, build
208: *
209: * o-e->(r) o
210: *
211: * where (r) is the start of rule r and the trailing o is not linked
212: * to from rule ref state directly (it's done thru the transition(0)
213: * RuleClosureTransition.
214: *
215: * If the rule r is just a list of tokens, it's block will be just
216: * a set on an edge o->o->o-set->o->o->o, could inline it rather than doing
217: * the rule reference, but i'm not doing this yet as I'm not sure
218: * it would help much in the NFA->DFA construction.
219: *
220: * TODO add to codegen: collapse alt blks that are sets into single matchSet
221: */
222: public StateCluster build_RuleRef(int ruleIndex, NFAState ruleStart) {
223: /*
224: System.out.println("building ref to rule "+ruleIndex+": "+
225: nfa.getGrammar().getRuleName(ruleIndex));
226: */
227: NFAState left = newState();
228: // left.setDescription("ref to "+ruleStart.getDescription());
229: NFAState right = newState();
230: // right.setDescription("NFAState following ref to "+ruleStart.getDescription());
231: Transition e = new RuleClosureTransition(ruleIndex, ruleStart,
232: right);
233: left.addTransition(e);
234: StateCluster g = new StateCluster(left, right);
235: return g;
236: }
237:
238: /** From an empty alternative build StateCluster o-e->o */
239: public StateCluster build_Epsilon() {
240: NFAState left = newState();
241: NFAState right = newState();
242: transitionBetweenStates(left, right, Label.EPSILON);
243: StateCluster g = new StateCluster(left, right);
244: return g;
245: }
246:
247: /** Build what amounts to an epsilon transition with a semantic
248: * predicate action. The pred is a pointer into the AST of
249: * the SEMPRED token.
250: */
251: public StateCluster build_SemanticPredicate(GrammarAST pred) {
252: // don't count syn preds
253: if (!pred.getText().toUpperCase().startsWith(
254: Grammar.SYNPRED_RULE_PREFIX.toUpperCase())) {
255: nfa.grammar.numberOfSemanticPredicates++;
256: }
257: NFAState left = newState();
258: NFAState right = newState();
259: Transition e = new Transition(new Label(pred), right);
260: left.addTransition(e);
261: StateCluster g = new StateCluster(left, right);
262: return g;
263: }
264:
265: /** add an EOF transition to any rule end NFAState that points to nothing
266: * (i.e., for all those rules not invoked by another rule). These
267: * are start symbols then.
268: *
269: * Return the number of grammar entry points; i.e., how many rules are
270: * not invoked by another rule (they can only be invoked from outside).
271: * These are the start rules.
272: */
273: public int build_EOFStates(List rules) {
274: int numberUnInvokedRules = 0;
275: for (Iterator iterator = rules.iterator(); iterator.hasNext();) {
276: Rule r = (Rule) iterator.next();
277: String ruleName = r.name;
278: NFAState endNFAState = nfa.grammar
279: .getRuleStopState(ruleName);
280: // Is this rule a start symbol? (no follow links)
281: if (endNFAState.transition(0) == null) {
282: // if so, then don't let algorithm fall off the end of
283: // the rule, make it hit EOF/EOT.
284: /*
285: if ( nfa.grammar.type==Grammar.LEXER ) {
286: return; // 11/28/2005: try having only Tokens with EOT transition
287: }
288: if ( nfa.grammar.type!=Grammar.LEXER ||
289: ruleName.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) )
290: {
291: build_EOFState(endNFAState);
292: }
293: */
294: build_EOFState(endNFAState);
295: // track how many rules have been invoked by another rule
296: numberUnInvokedRules++;
297: }
298: }
299: return numberUnInvokedRules;
300: }
301:
302: /** set up an NFA NFAState that will yield eof tokens or,
303: * in the case of a lexer grammar, an EOT token when the conversion
304: * hits the end of a rule.
305: */
306: private void build_EOFState(NFAState endNFAState) {
307: NFAState end = newState();
308: int label = Label.EOF;
309: if (nfa.grammar.type == Grammar.LEXER) {
310: label = Label.EOT;
311: end.setEOTTargetState(true);
312: }
313: /*
314: System.out.println("build "+nfa.grammar.getTokenDisplayName(label)+
315: " loop on end of state "+endNFAState.getDescription()+
316: " to state "+end.stateNumber);
317: */
318: Transition toEnd = new Transition(label, end);
319: endNFAState.addTransition(toEnd);
320: }
321:
322: /** From A B build A-e->B (that is, build an epsilon arc from right
323: * of A to left of B).
324: *
325: * As a convenience, return B if A is null or return A if B is null.
326: */
327: public StateCluster build_AB(StateCluster A, StateCluster B) {
328: if (A == null) {
329: return B;
330: }
331: if (B == null) {
332: return A;
333: }
334: transitionBetweenStates(A.right, B.left, Label.EPSILON);
335: StateCluster g = new StateCluster(A.left, B.right);
336: return g;
337: }
338:
339: /** From a set ('a'|'b') build
340: *
341: * o->o-'a'..'b'->o->o (last NFAState is blockEndNFAState pointed to by all alts)
342: */
343: public StateCluster build_AlternativeBlockFromSet(StateCluster set) {
344: if (set == null) {
345: return null;
346: }
347:
348: // single alt, no decision, just return only alt state cluster
349: NFAState startOfAlt = newState(); // must have this no matter what
350: transitionBetweenStates(startOfAlt, set.left, Label.EPSILON);
351:
352: return new StateCluster(startOfAlt, set.right);
353: }
354:
355: /** From A|B|..|Z alternative block build
356: *
357: * o->o-A->o->o (last NFAState is blockEndNFAState pointed to by all alts)
358: * | ^
359: * o->o-B->o--|
360: * | |
361: * ... |
362: * | |
363: * o->o-Z->o--|
364: *
365: * So every alternative gets begin NFAState connected by epsilon
366: * and every alt right side points at a block end NFAState. There is a
367: * new NFAState in the NFAState in the StateCluster for each alt plus one for the
368: * end NFAState.
369: *
370: * Special case: only one alternative: don't make a block with alt
371: * begin/end.
372: *
373: * Special case: if just a list of tokens/chars/sets, then collapse
374: * to a single edge'd o-set->o graph.
375: *
376: * Set alt number (1..n) in the left-Transition NFAState.
377: */
378: public StateCluster build_AlternativeBlock(
379: List alternativeStateClusters) {
380: StateCluster result = null;
381: if (alternativeStateClusters == null
382: || alternativeStateClusters.size() == 0) {
383: return null;
384: }
385:
386: // single alt case
387: if (alternativeStateClusters.size() == 1) {
388: // single alt, no decision, just return only alt state cluster
389: StateCluster g = (StateCluster) alternativeStateClusters
390: .get(0);
391: NFAState startOfAlt = newState(); // must have this no matter what
392: transitionBetweenStates(startOfAlt, g.left, Label.EPSILON);
393:
394: //System.out.println("### opt saved start/stop end in (...)");
395: return new StateCluster(startOfAlt, g.right);
396: }
397:
398: // even if we can collapse for lookahead purposes, we will still
399: // need to predict the alts of this subrule in case there are actions
400: // etc... This is the decision that is pointed to from the AST node
401: // (always)
402: NFAState prevAlternative = null; // tracks prev so we can link to next alt
403: NFAState firstAlt = null;
404: NFAState blockEndNFAState = newState();
405: blockEndNFAState.setDescription("end block");
406: int altNum = 1;
407: for (Iterator iter = alternativeStateClusters.iterator(); iter
408: .hasNext();) {
409: StateCluster g = (StateCluster) iter.next();
410: // add begin NFAState for this alt connected by epsilon
411: NFAState left = newState();
412: left.setDescription("alt " + altNum + " of ()");
413: transitionBetweenStates(left, g.left, Label.EPSILON);
414: transitionBetweenStates(g.right, blockEndNFAState,
415: Label.EPSILON);
416: // Are we the first alternative?
417: if (firstAlt == null) {
418: firstAlt = left; // track extreme left node of StateCluster
419: } else {
420: // if not first alternative, must link to this alt from previous
421: transitionBetweenStates(prevAlternative, left,
422: Label.EPSILON);
423: }
424: prevAlternative = left;
425: altNum++;
426: }
427:
428: // return StateCluster pointing representing entire block
429: // Points to first alt NFAState on left, block end on right
430: result = new StateCluster(firstAlt, blockEndNFAState);
431:
432: firstAlt.decisionStateType = NFAState.BLOCK_START;
433:
434: // set EOB markers for Jean
435: firstAlt.endOfBlockStateNumber = blockEndNFAState.stateNumber;
436:
437: return result;
438: }
439:
440: /** From (A)? build either:
441: *
442: * o--A->o
443: * | ^
444: * o---->|
445: *
446: * or, if A is a block, just add an empty alt to the end of the block
447: */
448: public StateCluster build_Aoptional(StateCluster A) {
449: StateCluster g = null;
450: int n = nfa.grammar.getNumberOfAltsForDecisionNFA(A.left);
451: if (n == 1) {
452: // no decision, just wrap in an optional path
453: //NFAState decisionState = newState();
454: NFAState decisionState = A.left; // resuse left edge
455: decisionState.setDescription("only alt of ()? block");
456: NFAState emptyAlt = newState();
457: emptyAlt.setDescription("epsilon path of ()? block");
458: NFAState blockEndNFAState = null;
459: blockEndNFAState = newState();
460: transitionBetweenStates(A.right, blockEndNFAState,
461: Label.EPSILON);
462: blockEndNFAState.setDescription("end ()? block");
463: //transitionBetweenStates(decisionState, A.left, Label.EPSILON);
464: transitionBetweenStates(decisionState, emptyAlt,
465: Label.EPSILON);
466: transitionBetweenStates(emptyAlt, blockEndNFAState,
467: Label.EPSILON);
468:
469: // set EOB markers for Jean
470: decisionState.endOfBlockStateNumber = blockEndNFAState.stateNumber;
471: blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;
472:
473: g = new StateCluster(decisionState, blockEndNFAState);
474: } else {
475: // a decision block, add an empty alt
476: NFAState lastRealAlt = nfa.grammar
477: .getNFAStateForAltOfDecision(A.left, n);
478: NFAState emptyAlt = newState();
479: emptyAlt.setDescription("epsilon path of ()? block");
480: transitionBetweenStates(lastRealAlt, emptyAlt,
481: Label.EPSILON);
482: transitionBetweenStates(emptyAlt, A.right, Label.EPSILON);
483:
484: // set EOB markers for Jean (I think this is redundant here)
485: A.left.endOfBlockStateNumber = A.right.stateNumber;
486: A.right.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;
487:
488: g = A; // return same block, but now with optional last path
489: }
490: g.left.decisionStateType = NFAState.OPTIONAL_BLOCK_START;
491:
492: return g;
493: }
494:
495: /** From (A)+ build
496: *
497: * |---| (Transition 2 from A.right points at alt 1)
498: * v | (follow of loop is Transition 1)
499: * o->o-A-o->o
500: *
501: * Meaning that the last NFAState in A points back to A's left Transition NFAState
502: * and we add a new begin/end NFAState. A can be single alternative or
503: * multiple.
504: *
505: * During analysis we'll call the follow link (transition 1) alt n+1 for
506: * an n-alt A block.
507: */
508: public StateCluster build_Aplus(StateCluster A) {
509: NFAState left = newState();
510: NFAState blockEndNFAState = newState();
511: blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;
512:
513: // don't reuse A.right as loopback if it's right edge of another block
514: if (A.right.decisionStateType == NFAState.RIGHT_EDGE_OF_BLOCK) {
515: // nested A* so make another tail node to be the loop back
516: // instead of the usual A.right which is the EOB for inner loop
517: NFAState extraRightEdge = newState();
518: transitionBetweenStates(A.right, extraRightEdge,
519: Label.EPSILON);
520: A.right = extraRightEdge;
521: }
522:
523: transitionBetweenStates(A.right, blockEndNFAState,
524: Label.EPSILON); // follow is Transition 1
525: // turn A's block end into a loopback (acts like alt 2)
526: transitionBetweenStates(A.right, A.left, Label.EPSILON); // loop back Transition 2
527: transitionBetweenStates(left, A.left, Label.EPSILON);
528:
529: A.right.decisionStateType = NFAState.LOOPBACK;
530: A.left.decisionStateType = NFAState.BLOCK_START;
531:
532: // set EOB markers for Jean
533: A.left.endOfBlockStateNumber = A.right.stateNumber;
534:
535: StateCluster g = new StateCluster(left, blockEndNFAState);
536: return g;
537: }
538:
539: /** From (A)* build
540: *
541: * |---|
542: * v |
543: * o->o-A-o--o (Transition 2 from block end points at alt 1; follow is Transition 1)
544: * | ^
545: * o---------| (optional branch is 2nd alt of optional block containing A+)
546: *
547: * Meaning that the last (end) NFAState in A points back to A's
548: * left side NFAState and we add 3 new NFAStates (the
549: * optional branch is built just like an optional subrule).
550: * See the Aplus() method for more on the loop back Transition.
551: * The new node on right edge is set to RIGHT_EDGE_OF_CLOSURE so we
552: * can detect nested (A*)* loops and insert an extra node. Previously,
553: * two blocks shared same EOB node.
554: *
555: * There are 2 or 3 decision points in a A*. If A is not a block (i.e.,
556: * it only has one alt), then there are two decisions: the optional bypass
557: * and then loopback. If A is a block of alts, then there are three
558: * decisions: bypass, loopback, and A's decision point.
559: *
560: * Note that the optional bypass must be outside the loop as (A|B)* is
561: * not the same thing as (A|B|)+.
562: *
563: * This is an accurate NFA representation of the meaning of (A)*, but
564: * for generating code, I don't need a DFA for the optional branch by
565: * virtue of how I generate code. The exit-loopback-branch decision
566: * is sufficient to let me make an appropriate enter, exit, loop
567: * determination. See codegen.g
568: */
569: public StateCluster build_Astar(StateCluster A) {
570: NFAState bypassDecisionState = newState();
571: bypassDecisionState
572: .setDescription("enter loop path of ()* block");
573: NFAState optionalAlt = newState();
574: optionalAlt.setDescription("epsilon path of ()* block");
575: NFAState blockEndNFAState = newState();
576: blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;
577:
578: // don't reuse A.right as loopback if it's right edge of another block
579: if (A.right.decisionStateType == NFAState.RIGHT_EDGE_OF_BLOCK) {
580: // nested A* so make another tail node to be the loop back
581: // instead of the usual A.right which is the EOB for inner loop
582: NFAState extraRightEdge = newState();
583: transitionBetweenStates(A.right, extraRightEdge,
584: Label.EPSILON);
585: A.right = extraRightEdge;
586: }
587:
588: // convert A's end block to loopback
589: A.right.setDescription("()* loopback");
590: // Transition 1 to actual block of stuff
591: transitionBetweenStates(bypassDecisionState, A.left,
592: Label.EPSILON);
593: // Transition 2 optional to bypass
594: transitionBetweenStates(bypassDecisionState, optionalAlt,
595: Label.EPSILON);
596: transitionBetweenStates(optionalAlt, blockEndNFAState,
597: Label.EPSILON);
598: // Transition 1 of end block exits
599: transitionBetweenStates(A.right, blockEndNFAState,
600: Label.EPSILON);
601: // Transition 2 of end block loops
602: transitionBetweenStates(A.right, A.left, Label.EPSILON);
603:
604: bypassDecisionState.decisionStateType = NFAState.BYPASS;
605: A.left.decisionStateType = NFAState.BLOCK_START;
606: A.right.decisionStateType = NFAState.LOOPBACK;
607:
608: // set EOB markers for Jean
609: A.left.endOfBlockStateNumber = A.right.stateNumber;
610: bypassDecisionState.endOfBlockStateNumber = blockEndNFAState.stateNumber;
611:
612: StateCluster g = new StateCluster(bypassDecisionState,
613: blockEndNFAState);
614: return g;
615: }
616:
617: /** Build an NFA predictor for special rule called Tokens manually that
618: * predicts which token will succeed. The refs to the rules are not
619: * RuleRefTransitions as I want DFA conversion to stop at the EOT
620: * transition on the end of each token, rather than return to Tokens rule.
621: * If I used normal build_alternativeBlock for this, the RuleRefTransitions
622: * would save return address when jumping away from Tokens rule.
623: *
624: * All I do here is build n new states for n rules with an epsilon
625: * edge to the rule start states and then to the next state in the
626: * list:
627: *
628: * o->(A) (a state links to start of A and to next in list)
629: * |
630: * o->(B)
631: * |
632: * ...
633: * |
634: * o->(Z)
635: *
636: * This is the NFA created for the artificial rule created in
637: * Grammar.addArtificialMatchTokensRule().
638: *
639: * 11/28/2005: removed so we can use normal rule construction for Tokens.
640: public NFAState build_ArtificialMatchTokensRuleNFA() {
641: int altNum = 1;
642: NFAState firstAlt = null; // the start state for the "rule"
643: NFAState prevAlternative = null;
644: Iterator iter = nfa.grammar.getRules().iterator();
645: // TODO: add a single decision node/state for good description
646: while (iter.hasNext()) {
647: Rule r = (Rule) iter.next();
648: String ruleName = r.name;
649: String modifier = nfa.grammar.getRuleModifier(ruleName);
650: if ( ruleName.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ||
651: (modifier!=null &&
652: modifier.equals(Grammar.FRAGMENT_RULE_MODIFIER)) )
653: {
654: continue; // don't loop to yourself or do nontoken rules
655: }
656: NFAState ruleStartState = nfa.grammar.getRuleStartState(ruleName);
657: NFAState left = newState();
658: left.setDescription("alt "+altNum+" of artificial rule "+Grammar.ARTIFICIAL_TOKENS_RULENAME);
659: transitionBetweenStates(left, ruleStartState, Label.EPSILON);
660: // Are we the first alternative?
661: if ( firstAlt==null ) {
662: firstAlt = left; // track extreme top left node as rule start
663: }
664: else {
665: // if not first alternative, must link to this alt from previous
666: transitionBetweenStates(prevAlternative, left, Label.EPSILON);
667: }
668: prevAlternative = left;
669: altNum++;
670: }
671: firstAlt.decisionStateType = NFAState.BLOCK_START;
672:
673: return firstAlt;
674: }
675: */
676:
677: /** Build an atom with all possible values in its label */
678: public StateCluster build_Wildcard() {
679: NFAState left = newState();
680: NFAState right = newState();
681: Label label = new Label(nfa.grammar.getTokenTypes()); // char or tokens
682: Transition e = new Transition(label, right);
683: left.addTransition(e);
684: StateCluster g = new StateCluster(left, right);
685: return g;
686: }
687:
688: /** Given a collapsed block of alts (a set of atoms), pull out
689: * the set and return it.
690: */
691: protected IntSet getCollapsedBlockAsSet(State blk) {
692: State s0 = blk;
693: if (s0 != null && s0.transition(0) != null) {
694: State s1 = s0.transition(0).target;
695: if (s1 != null && s1.transition(0) != null) {
696: Label label = s1.transition(0).label;
697: if (label.isSet()) {
698: return label.getSet();
699: }
700: }
701: }
702: return null;
703: }
704:
705: private void transitionBetweenStates(NFAState a, NFAState b,
706: int label) {
707: Transition e = new Transition(label, b);
708: a.addTransition(e);
709: }
710: }
|