0001: /*
0002: [The "BSD licence"]
0003: Copyright (c) 2005-2006 Terence Parr
0004: All rights reserved.
0005:
0006: Redistribution and use in source and binary forms, with or without
0007: modification, are permitted provided that the following conditions
0008: are met:
0009: 1. Redistributions of source code must retain the above copyright
0010: notice, this list of conditions and the following disclaimer.
0011: 2. Redistributions in binary form must reproduce the above copyright
0012: notice, this list of conditions and the following disclaimer in the
0013: documentation and/or other materials provided with the distribution.
0014: 3. The name of the author may not be used to endorse or promote products
0015: derived from this software without specific prior written permission.
0016:
0017: THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
0018: IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
0019: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
0020: IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
0021: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
0022: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
0023: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
0024: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0025: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
0026: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0027: */
0028: package org.antlr.analysis;
0029:
0030: import org.antlr.Tool;
0031: import org.antlr.codegen.CodeGenerator;
0032: import org.antlr.misc.IntervalSet;
0033: import org.antlr.misc.IntSet;
0034: import org.antlr.misc.Utils;
0035: import org.antlr.runtime.IntStream;
0036: import org.antlr.stringtemplate.StringTemplate;
0037: import org.antlr.tool.*;
0038:
0039: import java.util.*;
0040:
0041: /** A DFA (converted from a grammar's NFA).
0042: * DFAs are used as prediction machine for alternative blocks in all kinds
0043: * of recognizers (lexers, parsers, tree walkers).
0044: */
0045: public class DFA {
0046: public static final int REACHABLE_UNKNOWN = -2;
0047: public static final int REACHABLE_BUSY = -1; // in process of computing
0048: public static final int REACHABLE_NO = 0;
0049: public static final int REACHABLE_YES = 1;
0050:
0051: /** Prevent explosion of DFA states during conversion. The max number
0052: * of states per alt in a single decision's DFA.
0053: */
0054: public static final int MAX_STATES_PER_ALT_IN_DFA = 450;
0055:
0056: /** Set to 0 to not terminate early */
0057: public static int MAX_TIME_PER_DFA_CREATION = 1 * 1000;
0058:
0059: /** How many edges can each DFA state have before a "special" state
0060: * is created that uses IF expressions instead of a table?
0061: */
0062: public static int MAX_STATE_TRANSITIONS_FOR_TABLE = 65534;
0063:
0064: /** What's the start state for this DFA? */
0065: public DFAState startState;
0066:
0067: /** This DFA is being built for which decision? */
0068: public int decisionNumber = 0;
0069:
0070: /** From what NFAState did we create the DFA? */
0071: public NFAState decisionNFAStartState;
0072:
0073: /** The printable grammar fragment associated with this DFA */
0074: public String description;
0075:
0076: /** A set of all uniquely-numbered DFA states. Maps hash of DFAState
0077: * to the actual DFAState object. We use this to detect
0078: * existing DFA states. Map<DFAState,DFAState>. Use Map so
0079: * we can get old state back (Set only allows you to see if it's there).
0080: * Not used during fixed k lookahead as it's a waste to fill it with
0081: * a dup of states array.
0082: */
0083: protected Map uniqueStates = new HashMap();
0084:
0085: /** Maps the state number to the actual DFAState. Use a Vector as it
0086: * grows automatically when I set the ith element. This contains all
0087: * states, but the states are not unique. s3 might be same as s1 so
0088: * s3 -> s1 in this table. This is how cycles occur. If fixed k,
0089: * then these states will all be unique as states[i] always points
0090: * at state i when no cycles exist.
0091: *
0092: * This is managed in parallel with uniqueStates and simply provides
0093: * a way to go from state number to DFAState rather than via a
0094: * hash lookup.
0095: */
0096: protected Vector states = new Vector();
0097:
0098: /** Unique state numbers */
0099: protected int stateCounter = 0;
0100:
0101: /** count only new states not states that were rejected as already present */
0102: protected int numberOfStates = 0;
0103:
0104: /** User specified max fixed lookahead. If 0, nothing specified. -1
0105: * implies we have not looked at the options table yet to set k.
0106: */
0107: protected int user_k = -1;
0108:
0109: /** While building the DFA, track max lookahead depth if not cyclic */
0110: protected int max_k = -1;
0111:
0112: /** Is this DFA reduced? I.e., can all states lead to an accept state? */
0113: protected boolean reduced = true;
0114:
0115: /** Are there any loops in this DFA?
0116: * Computed by doesStateReachAcceptState()
0117: */
0118: protected boolean cyclic = false;
0119:
0120: /** Each alt in an NFA derived from a grammar must have a DFA state that
0121: * predicts it lest the parser not know what to do. Nondeterminisms can
0122: * lead to this situation (assuming no semantic predicates can resolve
0123: * the problem) and when for some reason, I cannot compute the lookahead
0124: * (which might arise from an error in the algorithm or from
0125: * left-recursion etc...). This list starts out with all alts contained
0126: * and then in method doesStateReachAcceptState() I remove the alts I
0127: * know to be uniquely predicted.
0128: */
0129: protected List unreachableAlts;
0130:
0131: protected int nAlts = 0;
0132:
0133: /** We only want one accept state per predicted alt; track here */
0134: protected DFAState[] altToAcceptState;
0135:
0136: /** Track whether an alt discovers recursion for each alt during
0137: * NFA to DFA conversion; >1 alt with recursion implies nonregular.
0138: */
0139: protected IntSet recursiveAltSet = new IntervalSet();
0140:
0141: /** Which NFA are we converting (well, which piece of the NFA)? */
0142: public NFA nfa;
0143:
0144: protected NFAToDFAConverter nfaConverter;
0145:
0146: /** This probe tells you a lot about a decision and is useful even
0147: * when there is no error such as when a syntactic nondeterminism
0148: * is solved via semantic predicates. Perhaps a GUI would want
0149: * the ability to show that.
0150: */
0151: public DecisionProbe probe = new DecisionProbe(this );
0152:
0153: /** Track absolute time of the conversion so we can have a failsafe:
0154: * if it takes too long, then terminate. Assume bugs are in the
0155: * analysis engine.
0156: */
0157: protected long conversionStartTime;
0158:
0159: /** Map an edge transition table to a unique set number; ordered so
0160: * we can push into the output template as an ordered list of sets
0161: * and then ref them from within the transition[][] table. Like this
0162: * for C# target:
0163: * public static readonly DFA30_transition0 =
0164: * new short[] { 46, 46, -1, 46, 46, -1, -1, -1, -1, -1, -1, -1,...};
0165: * public static readonly DFA30_transition1 =
0166: * new short[] { 21 };
0167: * public static readonly short[][] DFA30_transition = {
0168: * DFA30_transition0,
0169: * DFA30_transition0,
0170: * DFA30_transition1,
0171: * ...
0172: * };
0173: */
0174: public Map edgeTransitionClassMap = new LinkedHashMap();
0175:
0176: /** The unique edge transition class number; every time we see a new
0177: * set of edges emanating from a state, we number it so we can reuse
0178: * if it's every seen again for another state. For Java grammar,
0179: * some of the big edge transition tables are seen about 57 times.
0180: */
0181: protected int edgeTransitionClass = 0;
0182:
0183: /* This DFA can be converted to a transition[state][char] table and
0184: * the following tables are filled by createStateTables upon request.
0185: * These are injected into the templates for code generation.
0186: * See March 25, 2006 entry for description:
0187: * http://www.antlr.org/blog/antlr3/codegen.tml
0188: * Often using Vector as can't set ith position in a List and have
0189: * it extend list size; bizarre.
0190: */
0191:
0192: /** List of special DFAState objects */
0193: public List specialStates;
0194: /** List of ST for special states. */
0195: public List specialStateSTs;
0196: public Vector accept;
0197: public Vector eot;
0198: public Vector eof;
0199: public Vector min;
0200: public Vector max;
0201: public Vector special;
0202: public Vector transition;
0203: /** just the Vector<Integer> indicating which unique edge table is at
0204: * position i.
0205: */
0206: public Vector transitionEdgeTables; // not used by java yet
0207: protected int uniqueCompressedSpecialStateNum = 0;
0208:
0209: public DFA(int decisionNumber, NFAState decisionStartState) {
0210: this .decisionNumber = decisionNumber;
0211: this .decisionNFAStartState = decisionStartState;
0212: nfa = decisionStartState.nfa;
0213: nAlts = nfa.grammar
0214: .getNumberOfAltsForDecisionNFA(decisionStartState);
0215: //setOptions( nfa.grammar.getDecisionOptions(getDecisionNumber()) );
0216: initAltRelatedInfo();
0217:
0218: //long start = System.currentTimeMillis();
0219: nfaConverter = new NFAToDFAConverter(this );
0220: nfaConverter.convert();
0221:
0222: // figure out if there are problems with decision
0223: verify();
0224:
0225: if (!probe.isDeterministic() || probe.analysisAborted()
0226: || probe.analysisOverflowed()) {
0227: probe.issueWarnings();
0228: }
0229:
0230: // must be after verify as it computes cyclic, needed by this routine
0231: // should be after warnings because early termination or something
0232: // will not allow the reset to operate properly in some cases.
0233: resetStateNumbersToBeContiguous();
0234:
0235: //long stop = System.currentTimeMillis();
0236: //System.out.println("verify cost: "+(int)(stop-start)+" ms");
0237:
0238: if (Tool.internalOption_PrintDFA) {
0239: System.out.println("DFA d=" + decisionNumber);
0240: FASerializer serializer = new FASerializer(nfa.grammar);
0241: String result = serializer.serialize(startState);
0242: System.out.println(result);
0243: }
0244: }
0245:
0246: /** Walk all states and reset their numbers to be a contiguous sequence
0247: * of integers starting from 0. Only cyclic DFA can have unused positions
0248: * in states list. State i might be identical to a previous state j and
0249: * will result in states[i] == states[j]. We don't want to waste a state
0250: * number on this. Useful mostly for code generation in tables.
0251: *
0252: * At the start of this routine, states[i].stateNumber <= i by definition.
0253: * If states[50].stateNumber is 50 then a cycle during conversion may
0254: * try to add state 103, but we find that an identical DFA state, named
0255: * 50, already exists, hence, states[103]==states[50] and both have
0256: * stateNumber 50 as they point at same object. Afterwards, the set
0257: * of state numbers from all states should represent a contiguous range
0258: * from 0..n-1 where n is the number of unique states.
0259: */
0260: public void resetStateNumbersToBeContiguous() {
0261: if (getUserMaxLookahead() > 0) {
0262: // all numbers are unique already; no states are thrown out.
0263: return;
0264: }
0265: /*
0266: if ( decisionNumber==14 ) {
0267: System.out.println("DFA :"+decisionNumber+" "+this);
0268: //System.out.println("DFA start state :"+startState);
0269: System.out.println("unique state numbers: ");
0270: Set s = getUniqueStates().keySet();
0271: for (Iterator it = s.iterator(); it.hasNext();) {
0272: DFAState d = (DFAState) it.next();
0273: System.out.print(d.stateNumber+" ");
0274: }
0275: System.out.println();
0276:
0277: System.out.println("size="+s.size());
0278: System.out.println("continguous states: ");
0279: for (Iterator it = states.iterator(); it.hasNext();) {
0280: DFAState d = (DFAState) it.next();
0281: if ( d!=null ) {
0282: System.out.print(d.stateNumber+" ");
0283: }
0284: }
0285: System.out.println();
0286:
0287: //Set a = new HashSet();
0288: List a = new ArrayList();
0289: System.out.println("unique set from states table: ");
0290: for (int i = 0; i <= getMaxStateNumber(); i++) {
0291: DFAState d = getState(i);
0292: if ( d==null ) {
0293: continue;
0294: }
0295: boolean found=false;
0296: for (int j=0; j<a.size(); j++) {
0297: DFAState old = (DFAState)a.get(j);
0298: if ( old.equals(d) ) {
0299: if ( old.stateNumber!=d.stateNumber ) {
0300: System.out.println("WHAT! state["+i+"]="+d+" prev in list as "+old);
0301: }
0302: found=true;
0303: }
0304: }
0305: if ( !found ) {
0306: a.add(d);
0307: }
0308: }
0309: for (Iterator it = a.iterator(); it.hasNext();) {
0310: DFAState d = (DFAState) it.next();
0311: if ( d!=null ) {
0312: System.out.print(d.stateNumber+" ");
0313: }
0314: }
0315: System.out.println();
0316: System.out.println("size="+a.size());
0317:
0318: if ( a.equals(s) ) {
0319: System.out.println("both sets same");
0320: }
0321: else {
0322: System.out.println("sets NOT same");
0323: }
0324: System.out.println("stateCounter="+stateCounter);
0325: }
0326: */
0327:
0328: // walk list of DFAState objects by state number,
0329: // setting state numbers to 0..n-1
0330: int snum = 0;
0331: for (int i = 0; i <= getMaxStateNumber(); i++) {
0332: DFAState s = getState(i);
0333: // some states are unused after creation most commonly due to cycles
0334: // or conflict resolution.
0335: if (s == null) {
0336: continue;
0337: }
0338: // state i is mapped to DFAState with state number set to i originally
0339: // so if it's less than i, then we renumbered it already; that
0340: // happens when states have been merged or cycles occurred I think.
0341: // states[50] will point to DFAState with s50 in it but
0342: // states[103] might also point at this same DFAState. Since
0343: // 50 < 103 then it's already been renumbered as it points downwards.
0344: boolean alreadyRenumbered = s.stateNumber < i;
0345: if (!alreadyRenumbered) {
0346: // state i is a valid state, reset it's state number
0347: s.stateNumber = snum; // rewrite state numbers to be 0..n-1
0348: snum++;
0349: }
0350: }
0351: /*
0352: if ( decisionNumber==14 ) {
0353: //System.out.println("max state num: "+maxStateNumber);
0354: System.out.println("after renum, DFA :"+decisionNumber+" "+this);
0355: System.out.println("uniq states.size="+uniqueStates.size());
0356:
0357: Set a = new HashSet();
0358: System.out.println("after renumber; unique set from states table: ");
0359: for (int i = 0; i <= getMaxStateNumber(); i++) {
0360: DFAState d = getState(i);
0361: a.add(d);
0362: }
0363: for (Iterator it = a.iterator(); it.hasNext();) {
0364: DFAState d = (DFAState) it.next();
0365: if ( d!=null ) {
0366: System.out.print(d.stateNumber+" ");
0367: }
0368: }
0369: System.out.println();
0370: System.out.println("size="+a.size());
0371: }
0372: */
0373: if (snum != getNumberOfStates()) {
0374: ErrorManager.internalError("DFA " + decisionNumber + ": "
0375: + decisionNFAStartState.getDescription()
0376: + " num unique states " + getNumberOfStates()
0377: + "!= num renumbered states " + snum);
0378: }
0379: }
0380:
0381: // JAVA-SPECIFIC Accessors!!!!! It is so impossible to get arrays
0382: // or even consistently formatted strings acceptable to java that
0383: // I am forced to build the individual char elements here
0384:
0385: public List getJavaCompressedAccept() {
0386: return getRunLengthEncoding(accept);
0387: }
0388:
0389: public List getJavaCompressedEOT() {
0390: return getRunLengthEncoding(eot);
0391: }
0392:
0393: public List getJavaCompressedEOF() {
0394: return getRunLengthEncoding(eof);
0395: }
0396:
0397: public List getJavaCompressedMin() {
0398: return getRunLengthEncoding(min);
0399: }
0400:
0401: public List getJavaCompressedMax() {
0402: return getRunLengthEncoding(max);
0403: }
0404:
0405: public List getJavaCompressedSpecial() {
0406: return getRunLengthEncoding(special);
0407: }
0408:
0409: public List getJavaCompressedTransition() {
0410: if (transition == null || transition.size() == 0) {
0411: return null;
0412: }
0413: List encoded = new ArrayList(transition.size());
0414: // walk Vector<Vector<FormattedInteger>> which is the transition[][] table
0415: for (int i = 0; i < transition.size(); i++) {
0416: Vector transitionsForState = (Vector) transition
0417: .elementAt(i);
0418: encoded.add(getRunLengthEncoding(transitionsForState));
0419: }
0420: return encoded;
0421: }
0422:
0423: /** Compress the incoming data list so that runs of same number are
0424: * encoded as number,value pair sequences. 3 -1 -1 -1 28 is encoded
0425: * as 1 3 3 -1 1 28. I am pretty sure this is the lossless compression
0426: * that GIF files use. Transition tables are heavily compressed by
0427: * this technique. I got the idea from JFlex http://jflex.de/
0428: *
0429: * Return List<String> where each string is either \xyz for 8bit char
0430: * and \uFFFF for 16bit. Hideous and specific to Java, but it is the
0431: * only target bad enough to need it.
0432: */
0433: public List getRunLengthEncoding(List data) {
0434: if (data == null || data.size() == 0) {
0435: // for states with no transitions we want an empty string ""
0436: // to hold its place in the transitions array.
0437: List empty = new ArrayList();
0438: empty.add("");
0439: return empty;
0440: }
0441: int size = Math.max(2, data.size() / 2);
0442: List encoded = new ArrayList(size); // guess at size
0443: // scan values looking for runs
0444: int i = 0;
0445: Integer emptyValue = Utils.integer(-1);
0446: while (i < data.size()) {
0447: Integer I = (Integer) data.get(i);
0448: if (I == null) {
0449: I = emptyValue;
0450: }
0451: // count how many v there are?
0452: int n = 0;
0453: for (int j = i; j < data.size(); j++) {
0454: Integer v = (Integer) data.get(j);
0455: if (v == null) {
0456: v = emptyValue;
0457: }
0458: if (I.equals(v)) {
0459: n++;
0460: } else {
0461: break;
0462: }
0463: }
0464: encoded.add(encodeIntAsCharEscape((char) n));
0465: encoded.add(encodeIntAsCharEscape((char) I.intValue()));
0466: i += n;
0467: }
0468: return encoded;
0469: }
0470:
0471: public void createStateTables(CodeGenerator generator) {
0472: //System.out.println("createTables:\n"+this);
0473:
0474: description = getNFADecisionStartState().getDescription();
0475: description = generator.target
0476: .getTargetStringLiteralFromString(description);
0477:
0478: // create all the tables
0479: special = new Vector(this .getNumberOfStates()); // Vector<short>
0480: special.setSize(this .getNumberOfStates());
0481: specialStates = new ArrayList(); // List<DFAState>
0482: specialStateSTs = new ArrayList(); // List<ST>
0483: accept = new Vector(this .getNumberOfStates()); // Vector<int>
0484: accept.setSize(this .getNumberOfStates());
0485: eot = new Vector(this .getNumberOfStates()); // Vector<int>
0486: eot.setSize(this .getNumberOfStates());
0487: eof = new Vector(this .getNumberOfStates()); // Vector<int>
0488: eof.setSize(this .getNumberOfStates());
0489: min = new Vector(this .getNumberOfStates()); // Vector<int>
0490: min.setSize(this .getNumberOfStates());
0491: max = new Vector(this .getNumberOfStates()); // Vector<int>
0492: max.setSize(this .getNumberOfStates());
0493: transition = new Vector(this .getNumberOfStates()); // Vector<Vector<int>>
0494: transition.setSize(this .getNumberOfStates());
0495: transitionEdgeTables = new Vector(this .getNumberOfStates()); // Vector<Vector<int>>
0496: transitionEdgeTables.setSize(this .getNumberOfStates());
0497:
0498: // for each state in the DFA, fill relevant tables.
0499: Iterator it = null;
0500: if (getUserMaxLookahead() > 0) {
0501: it = states.iterator();
0502: } else {
0503: it = getUniqueStates().values().iterator();
0504: }
0505: while (it.hasNext()) {
0506: DFAState s = (DFAState) it.next();
0507: if (s == null) {
0508: // ignore null states; some acylic DFA see this condition
0509: // when inlining DFA (due to lacking of exit branch pruning?)
0510: continue;
0511: }
0512: if (s.isAcceptState()) {
0513: // can't compute min,max,special,transition on accepts
0514: accept.set(s.stateNumber, Utils.integer(s
0515: .getUniquelyPredictedAlt()));
0516: } else {
0517: createMinMaxTables(s);
0518: createTransitionTableEntryForState(s);
0519: createSpecialTable(s);
0520: createEOTAndEOFTables(s);
0521: }
0522: }
0523:
0524: // now that we have computed list of specialStates, gen code for 'em
0525: for (int i = 0; i < specialStates.size(); i++) {
0526: DFAState ss = (DFAState) specialStates.get(i);
0527: StringTemplate stateST = generator.generateSpecialState(ss);
0528: specialStateSTs.add(stateST);
0529: }
0530:
0531: // check that the tables are not messed up by encode/decode
0532: /*
0533: testEncodeDecode(min);
0534: testEncodeDecode(max);
0535: testEncodeDecode(accept);
0536: testEncodeDecode(special);
0537: System.out.println("min="+min);
0538: System.out.println("max="+max);
0539: System.out.println("eot="+eot);
0540: System.out.println("eof="+eof);
0541: System.out.println("accept="+accept);
0542: System.out.println("special="+special);
0543: System.out.println("transition="+transition);
0544: */
0545: }
0546:
0547: /*
0548: private void testEncodeDecode(List data) {
0549: System.out.println("data="+data);
0550: List encoded = getRunLengthEncoding(data);
0551: StringBuffer buf = new StringBuffer();
0552: for (int i = 0; i < encoded.size(); i++) {
0553: String I = (String)encoded.get(i);
0554: int v = 0;
0555: if ( I.startsWith("\\u") ) {
0556: v = Integer.parseInt(I.substring(2,I.length()), 16);
0557: }
0558: else {
0559: v = Integer.parseInt(I.substring(1,I.length()), 8);
0560: }
0561: buf.append((char)v);
0562: }
0563: String encodedS = buf.toString();
0564: short[] decoded = org.antlr.runtime.DFA.unpackEncodedString(encodedS);
0565: //System.out.println("decoded:");
0566: for (int i = 0; i < decoded.length; i++) {
0567: short x = decoded[i];
0568: if ( x!=((Integer)data.get(i)).intValue() ) {
0569: System.err.println("problem with encoding");
0570: }
0571: //System.out.print(", "+x);
0572: }
0573: //System.out.println();
0574: }
0575: */
0576:
0577: protected void createMinMaxTables(DFAState s) {
0578: int smin = Label.MAX_CHAR_VALUE + 1;
0579: int smax = Label.MIN_ATOM_VALUE - 1;
0580: for (int j = 0; j < s.getNumberOfTransitions(); j++) {
0581: Transition edge = (Transition) s.transition(j);
0582: Label label = edge.label;
0583: if (label.isAtom()) {
0584: if (label.getAtom() >= Label.MIN_CHAR_VALUE) {
0585: if (label.getAtom() < smin) {
0586: smin = label.getAtom();
0587: }
0588: if (label.getAtom() > smax) {
0589: smax = label.getAtom();
0590: }
0591: }
0592: } else if (label.isSet()) {
0593: IntervalSet labels = (IntervalSet) label.getSet();
0594: int lmin = labels.getMinElement();
0595: // if valid char (don't do EOF) and less than current min
0596: if (lmin < smin && lmin >= Label.MIN_CHAR_VALUE) {
0597: smin = labels.getMinElement();
0598: }
0599: if (labels.getMaxElement() > smax) {
0600: smax = labels.getMaxElement();
0601: }
0602: }
0603: }
0604:
0605: if (smax < 0) {
0606: // must be predicates or pure EOT transition; just zero out min, max
0607: smin = Label.MIN_CHAR_VALUE;
0608: smax = Label.MIN_CHAR_VALUE;
0609: }
0610:
0611: min.set(s.stateNumber, Utils.integer((char) smin));
0612: max.set(s.stateNumber, Utils.integer((char) smax));
0613:
0614: if (smax < 0 || smin > Label.MAX_CHAR_VALUE || smin < 0) {
0615: ErrorManager.internalError("messed up: min=" + min
0616: + ", max=" + max);
0617: }
0618: }
0619:
0620: protected void createTransitionTableEntryForState(DFAState s) {
0621: /*
0622: System.out.println("createTransitionTableEntryForState s"+s.stateNumber+
0623: " dec "+s.dfa.decisionNumber+" cyclic="+s.dfa.isCyclic());
0624: */
0625: int smax = ((Integer) max.get(s.stateNumber)).intValue();
0626: int smin = ((Integer) min.get(s.stateNumber)).intValue();
0627:
0628: Vector stateTransitions = new Vector(smax - smin + 1);
0629: stateTransitions.setSize(smax - smin + 1);
0630: transition.set(s.stateNumber, stateTransitions);
0631: for (int j = 0; j < s.getNumberOfTransitions(); j++) {
0632: Transition edge = (Transition) s.transition(j);
0633: Label label = edge.label;
0634: if (label.isAtom()
0635: && label.getAtom() >= Label.MIN_CHAR_VALUE) {
0636: int labelIndex = label.getAtom() - smin; // offset from 0
0637: stateTransitions.set(labelIndex, Utils
0638: .integer(edge.target.stateNumber));
0639: } else if (label.isSet()) {
0640: IntervalSet labels = (IntervalSet) label.getSet();
0641: int[] atoms = labels.toArray();
0642: for (int a = 0; a < atoms.length; a++) {
0643: // set the transition if the label is valid (don't do EOF)
0644: if (atoms[a] >= Label.MIN_CHAR_VALUE) {
0645: int labelIndex = atoms[a] - smin; // offset from 0
0646: stateTransitions.set(labelIndex, Utils
0647: .integer(edge.target.stateNumber));
0648: }
0649: }
0650: }
0651: }
0652: // track unique state transition tables so we can reuse
0653: Integer edgeClass = (Integer) edgeTransitionClassMap
0654: .get(stateTransitions);
0655: if (edgeClass != null) {
0656: //System.out.println("we've seen this array before; size="+stateTransitions.size());
0657: transitionEdgeTables.set(s.stateNumber, edgeClass);
0658: } else {
0659: /*
0660: if ( stateTransitions.size()>255 ) {
0661: System.out.println("edge edgeTable "+stateTransitions.size()+" s"+s.stateNumber+": "+Utils.integer(edgeTransitionClass));
0662: }
0663: else {
0664: System.out.println("stateTransitions="+stateTransitions);
0665: }
0666: */
0667: edgeClass = Utils.integer(edgeTransitionClass);
0668: transitionEdgeTables.set(s.stateNumber, edgeClass);
0669: edgeTransitionClassMap.put(stateTransitions, edgeClass);
0670: edgeTransitionClass++;
0671: }
0672: }
0673:
0674: /** Set up the EOT and EOF tables; we cannot put -1 min/max values so
0675: * we need another way to test that in the DFA transition function.
0676: */
0677: protected void createEOTAndEOFTables(DFAState s) {
0678: for (int j = 0; j < s.getNumberOfTransitions(); j++) {
0679: Transition edge = (Transition) s.transition(j);
0680: Label label = edge.label;
0681: if (label.isAtom()) {
0682: if (label.getAtom() == Label.EOT) {
0683: // eot[s] points to accept state
0684: eot.set(s.stateNumber, Utils
0685: .integer(edge.target.stateNumber));
0686: } else if (label.getAtom() == Label.EOF) {
0687: // eof[s] points to accept state
0688: eof.set(s.stateNumber, Utils
0689: .integer(edge.target.stateNumber));
0690: }
0691: } else if (label.isSet()) {
0692: IntervalSet labels = (IntervalSet) label.getSet();
0693: int[] atoms = labels.toArray();
0694: for (int a = 0; a < atoms.length; a++) {
0695: if (atoms[a] == Label.EOT) {
0696: // eot[s] points to accept state
0697: eot.set(s.stateNumber, Utils
0698: .integer(edge.target.stateNumber));
0699: } else if (atoms[a] == Label.EOF) {
0700: eof.set(s.stateNumber, Utils
0701: .integer(edge.target.stateNumber));
0702: }
0703: }
0704: }
0705: }
0706: }
0707:
0708: protected void createSpecialTable(DFAState s) {
0709: // number all special states from 0...n-1 instead of their usual numbers
0710: boolean hasSemPred = false;
0711:
0712: // TODO this code is very similar to canGenerateSwitch. Refactor to share
0713: for (int j = 0; j < s.getNumberOfTransitions(); j++) {
0714: Transition edge = (Transition) s.transition(j);
0715: Label label = edge.label;
0716: // can't do a switch if the edges have preds or are going to
0717: // require gated predicates
0718: if (label.isSemanticPredicate()
0719: || ((DFAState) edge.target)
0720: .getGatedPredicatesInNFAConfigurations() != null) {
0721: hasSemPred = true;
0722: break;
0723: }
0724: }
0725: // if has pred or too big for table, make it special
0726: int smax = ((Integer) max.get(s.stateNumber)).intValue();
0727: int smin = ((Integer) min.get(s.stateNumber)).intValue();
0728: if (hasSemPred || smax - smin > MAX_STATE_TRANSITIONS_FOR_TABLE) {
0729: special.set(s.stateNumber, Utils
0730: .integer(uniqueCompressedSpecialStateNum));
0731: uniqueCompressedSpecialStateNum++;
0732: specialStates.add(s);
0733: } else {
0734: special.set(s.stateNumber, Utils.integer(-1)); // not special
0735: }
0736: }
0737:
0738: public static String encodeIntAsCharEscape(int v) {
0739: if (v <= 127) {
0740: return "\\" + Integer.toOctalString(v);
0741: }
0742: String hex = Integer.toHexString(v | 0x10000).substring(1, 5);
0743: return "\\u" + hex;
0744: }
0745:
0746: public int predict(IntStream input) {
0747: Interpreter interp = new Interpreter(nfa.grammar, input);
0748: return interp.predict(this );
0749: }
0750:
0751: /** Add a new DFA state to this DFA if not already present.
0752: * To force an acyclic, fixed maximum depth DFA, just always
0753: * return the incoming state. By not reusing old states,
0754: * no cycles can be created. If we're doing fixed k lookahead
0755: * don't updated uniqueStates, just return incoming state, which
0756: * indicates it's a new state.
0757: */
0758: protected DFAState addState(DFAState d) {
0759: /*
0760: if ( decisionNumber==14 ) {
0761: System.out.println("addState: "+d.stateNumber);
0762: }
0763: */
0764: if (getUserMaxLookahead() > 0) {
0765: return d;
0766: }
0767: // does a DFA state exist already with everything the same
0768: // except its state number?
0769: DFAState existing = (DFAState) uniqueStates.get(d);
0770: if (existing != null) {
0771: /*
0772: System.out.println("state "+d.stateNumber+" exists as state "+
0773: existing.stateNumber);
0774: */
0775: // already there...get the existing DFA state
0776: return existing;
0777: }
0778:
0779: // if not there, then add new state.
0780: uniqueStates.put(d, d);
0781: numberOfStates++;
0782: return d;
0783: }
0784:
0785: public void removeState(DFAState d) {
0786: DFAState it = (DFAState) uniqueStates.remove(d);
0787: if (it != null) {
0788: numberOfStates--;
0789: }
0790: }
0791:
0792: public Map getUniqueStates() {
0793: return uniqueStates;
0794: }
0795:
0796: /** What is the max state number ever created? This may be beyond
0797: * getNumberOfStates().
0798: */
0799: public int getMaxStateNumber() {
0800: return states.size() - 1;
0801: }
0802:
0803: public DFAState getState(int stateNumber) {
0804: return (DFAState) states.get(stateNumber);
0805: }
0806:
0807: public void setState(int stateNumber, DFAState d) {
0808: states.set(stateNumber, d);
0809: }
0810:
0811: /** Is the DFA reduced? I.e., does every state have a path to an accept
0812: * state? If not, don't delete as we need to generate an error indicating
0813: * which paths are "dead ends". Also tracks list of alts with no accept
0814: * state in the DFA. Must call verify() first before this makes sense.
0815: */
0816: public boolean isReduced() {
0817: return reduced;
0818: }
0819:
0820: /** Is this DFA cyclic? That is, are there any loops? If not, then
0821: * the DFA is essentially an LL(k) predictor for some fixed, max k value.
0822: * We can build a series of nested IF statements to match this. In the
0823: * presence of cycles, we need to build a general DFA and interpret it
0824: * to distinguish between alternatives.
0825: */
0826: public boolean isCyclic() {
0827: return cyclic && getUserMaxLookahead() == 0;
0828: }
0829:
0830: public boolean canInlineDecision() {
0831: // TODO: and ! too big
0832: return CodeGenerator.GEN_ACYCLIC_DFA_INLINE && !isCyclic()
0833: && !probe.isNonLLStarDecision();
0834: }
0835:
0836: /** Is this DFA derived from the NFA for the Tokens rule? */
0837: public boolean isTokensRuleDecision() {
0838: if (nfa.grammar.type != Grammar.LEXER) {
0839: return false;
0840: }
0841: NFAState nfaStart = getNFADecisionStartState();
0842: NFAState TokensRuleStart = nfa.grammar
0843: .getRuleStartState(Grammar.ARTIFICIAL_TOKENS_RULENAME);
0844: NFAState TokensDecisionStart = (NFAState) TokensRuleStart
0845: .transition(0).target;
0846: return nfaStart == TokensDecisionStart;
0847: }
0848:
0849: /** The user may specify a max, acyclic lookahead for any decision. No
0850: * DFA cycles are created when this value, k, is greater than 0.
0851: * If this decision has no k lookahead specified, then try the grammar.
0852: */
0853: public int getUserMaxLookahead() {
0854: if (user_k >= 0) { // cache for speed
0855: return user_k;
0856: }
0857: GrammarAST blockAST = nfa.grammar
0858: .getDecisionBlockAST(decisionNumber);
0859: Object k = blockAST.getOption("k");
0860: if (k == null) {
0861: user_k = nfa.grammar.getGrammarMaxLookahead();
0862: return user_k;
0863: }
0864: if (k instanceof Integer) {
0865: Integer kI = (Integer) k;
0866: user_k = kI.intValue();
0867: } else {
0868: // must be String "*"
0869: if (k.equals("*")) {
0870: user_k = 0;
0871: }
0872: }
0873: return user_k;
0874: }
0875:
0876: public boolean getAutoBacktrackMode() {
0877: String autoBacktrack = (String) decisionNFAStartState
0878: .getAssociatedASTNode().getOption("backtrack");
0879: if (autoBacktrack == null) {
0880: autoBacktrack = (String) nfa.grammar.getOption("backtrack");
0881: }
0882: return autoBacktrack != null && autoBacktrack.equals("true");
0883: }
0884:
0885: public void setUserMaxLookahead(int k) {
0886: this .user_k = k;
0887: }
0888:
0889: /** Return k if decision is LL(k) for some k else return max int */
0890: public int getMaxLookaheadDepth() {
0891: if (isCyclic()) {
0892: return Integer.MAX_VALUE;
0893: }
0894: return max_k;
0895: }
0896:
0897: /** Return a list of Integer alt numbers for which no lookahead could
0898: * be computed or for which no single DFA accept state predicts those
0899: * alts. Must call verify() first before this makes sense.
0900: */
0901: public List getUnreachableAlts() {
0902: return unreachableAlts;
0903: }
0904:
0905: /** Once this DFA has been built, need to verify that:
0906: *
0907: * 1. it's reduced
0908: * 2. all alts have an accept state
0909: *
0910: * Elsewhere, in the NFA converter, we need to verify that:
0911: *
0912: * 3. alts i and j have disjoint lookahead if no sem preds
0913: * 4. if sem preds, nondeterministic alts must be sufficiently covered
0914: */
0915: public void verify() {
0916: if (!probe.nonLLStarDecision) { // avoid if non-LL(*)
0917: doesStateReachAcceptState(startState);
0918: }
0919: }
0920:
0921: /** figure out if this state eventually reaches an accept state and
0922: * modify the instance variable 'reduced' to indicate if we find
0923: * at least one state that cannot reach an accept state. This implies
0924: * that the overall DFA is not reduced. This algorithm should be
0925: * linear in the number of DFA states.
0926: *
0927: * The algorithm also tracks which alternatives have no accept state,
0928: * indicating a nondeterminism.
0929: *
0930: * Also computes whether the DFA is cyclic.
0931: *
0932: * TODO: I call getUniquelyPredicatedAlt too much; cache predicted alt
0933: */
0934: protected boolean doesStateReachAcceptState(DFAState d) {
0935: if (d.isAcceptState()) {
0936: // accept states have no edges emanating from them so we can return
0937: d.setAcceptStateReachable(REACHABLE_YES);
0938: // this alt is uniquely predicted, remove from nondeterministic list
0939: int predicts = d.getUniquelyPredictedAlt();
0940: unreachableAlts.remove(Utils.integer(predicts));
0941: return true;
0942: }
0943:
0944: // avoid infinite loops
0945: d.setAcceptStateReachable(REACHABLE_BUSY);
0946:
0947: boolean anEdgeReachesAcceptState = false;
0948: // Visit every transition, track if at least one edge reaches stop state
0949: // Cannot terminate when we know this state reaches stop state since
0950: // all transitions must be traversed to set status of each DFA state.
0951: for (int i = 0; i < d.getNumberOfTransitions(); i++) {
0952: Transition t = d.transition(i);
0953: DFAState edgeTarget = (DFAState) t.target;
0954: int targetStatus = edgeTarget.getAcceptStateReachable();
0955: if (targetStatus == REACHABLE_BUSY) { // avoid cycles; they say nothing
0956: cyclic = true;
0957: continue;
0958: }
0959: if (targetStatus == REACHABLE_YES) { // avoid unnecessary work
0960: anEdgeReachesAcceptState = true;
0961: continue;
0962: }
0963: if (targetStatus == REACHABLE_NO) { // avoid unnecessary work
0964: continue;
0965: }
0966: // target must be REACHABLE_UNKNOWN (i.e., unvisited)
0967: if (doesStateReachAcceptState(edgeTarget)) {
0968: anEdgeReachesAcceptState = true;
0969: // have to keep looking so don't break loop
0970: // must cover all states even if we find a path for this state
0971: }
0972: }
0973: if (anEdgeReachesAcceptState) {
0974: d.setAcceptStateReachable(REACHABLE_YES);
0975: } else {
0976: /*
0977: if ( d.getNumberOfTransitions()==0 ) {
0978: probe.reportDanglingState(d);
0979: }
0980: */
0981: d.setAcceptStateReachable(REACHABLE_NO);
0982: reduced = false;
0983: }
0984: return anEdgeReachesAcceptState;
0985: }
0986:
0987: public NFAState getNFADecisionStartState() {
0988: return decisionNFAStartState;
0989: }
0990:
0991: public DFAState getAcceptState(int alt) {
0992: return altToAcceptState[alt];
0993: }
0994:
0995: public void setAcceptState(int alt, DFAState acceptState) {
0996: altToAcceptState[alt] = acceptState;
0997: }
0998:
0999: public String getDescription() {
1000: return description;
1001: }
1002:
1003: public int getDecisionNumber() {
1004: return decisionNFAStartState.getDecisionNumber();
1005: }
1006:
1007: /** What GrammarAST node (derived from the grammar) is this DFA
1008: * associated with? It will point to the start of a block or
1009: * the loop back of a (...)+ block etc...
1010: */
1011: public GrammarAST getDecisionASTNode() {
1012: return decisionNFAStartState.getAssociatedASTNode();
1013: }
1014:
1015: public boolean isGreedy() {
1016: GrammarAST blockAST = nfa.grammar
1017: .getDecisionBlockAST(decisionNumber);
1018: String v = (String) blockAST.getOption("greedy");
1019: if (v != null && v.equals("false")) {
1020: return false;
1021: }
1022: return true;
1023: }
1024:
1025: public DFAState newState() {
1026: DFAState n = new DFAState(this );
1027: n.stateNumber = stateCounter;
1028: stateCounter++;
1029: states.setSize(n.stateNumber + 1);
1030: states.set(n.stateNumber, n); // track state num to state
1031: return n;
1032: }
1033:
1034: public int getNumberOfStates() {
1035: if (getUserMaxLookahead() > 0) {
1036: // if using fixed lookahead then uniqueSets not set
1037: return states.size();
1038: }
1039: return numberOfStates;
1040: }
1041:
1042: public int getNumberOfAlts() {
1043: return nAlts;
1044: }
1045:
1046: public boolean analysisAborted() {
1047: return probe.analysisAborted();
1048: }
1049:
1050: protected void initAltRelatedInfo() {
1051: unreachableAlts = new LinkedList();
1052: for (int i = 1; i <= nAlts; i++) {
1053: unreachableAlts.add(Utils.integer(i));
1054: }
1055: altToAcceptState = new DFAState[nAlts + 1];
1056: }
1057:
1058: public String toString() {
1059: FASerializer serializer = new FASerializer(nfa.grammar);
1060: if (startState == null) {
1061: return "";
1062: }
1063: return serializer.serialize(startState, false);
1064: }
1065:
1066: /** EOT (end of token) is a label that indicates when the DFA conversion
1067: * algorithm would "fall off the end of a lexer rule". It normally
1068: * means the default clause. So for ('a'..'z')+ you would see a DFA
1069: * with a state that has a..z and EOT emanating from it. a..z would
1070: * jump to a state predicting alt 1 and EOT would jump to a state
1071: * predicting alt 2 (the exit loop branch). EOT implies anything other
1072: * than a..z. If for some reason, the set is "all char" such as with
1073: * the wildcard '.', then EOT cannot match anything. For example,
1074: *
1075: * BLOCK : '{' (.)* '}'
1076: *
1077: * consumes all char until EOF when greedy=true. When all edges are
1078: * combined for the DFA state after matching '}', you will find that
1079: * it is all char. The EOT transition has nothing to match and is
1080: * unreachable. The findNewDFAStatesAndAddDFATransitions() method
1081: * must know to ignore the EOT, so we simply remove it from the
1082: * reachable labels. Later analysis will find that the exit branch
1083: * is not predicted by anything. For greedy=false, we leave only
1084: * the EOT label indicating that the DFA should stop immediately
1085: * and predict the exit branch. The reachable labels are often a
1086: * set of disjoint values like: [<EOT>, 42, {0..41, 43..65534}]
1087: * due to DFA conversion so must construct a pure set to see if
1088: * it is same as Label.ALLCHAR.
1089: *
1090: * Only do this for Lexers.
1091: *
1092: * If EOT coexists with ALLCHAR:
1093: * 1. If not greedy, modify the labels parameter to be EOT
1094: * 2. If greedy, remove EOT from the labels set
1095: protected boolean reachableLabelsEOTCoexistsWithAllChar(OrderedHashSet labels)
1096: {
1097: Label eot = new Label(Label.EOT);
1098: if ( !labels.containsKey(eot) ) {
1099: return false;
1100: }
1101: System.out.println("### contains EOT");
1102: boolean containsAllChar = false;
1103: IntervalSet completeVocab = new IntervalSet();
1104: int n = labels.size();
1105: for (int i=0; i<n; i++) {
1106: Label rl = (Label)labels.get(i);
1107: if ( !rl.equals(eot) ) {
1108: completeVocab.addAll(rl.getSet());
1109: }
1110: }
1111: System.out.println("completeVocab="+completeVocab);
1112: if ( completeVocab.equals(Label.ALLCHAR) ) {
1113: System.out.println("all char");
1114: containsAllChar = true;
1115: }
1116: return containsAllChar;
1117: }
1118: */
1119: }
|