001: /*
002: [The "BSD licence"]
003: Copyright (c) 2005-2006 Terence Parr
004: All rights reserved.
005:
006: Redistribution and use in source and binary forms, with or without
007: modification, are permitted provided that the following conditions
008: are met:
009: 1. Redistributions of source code must retain the above copyright
010: notice, this list of conditions and the following disclaimer.
011: 2. Redistributions in binary form must reproduce the above copyright
012: notice, this list of conditions and the following disclaimer in the
013: documentation and/or other materials provided with the distribution.
014: 3. The name of the author may not be used to endorse or promote products
015: derived from this software without specific prior written permission.
016:
017: THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
018: IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
019: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
020: IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
021: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
022: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
023: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
024: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
026: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: */
028: package org.antlr.analysis;
029:
030: import org.antlr.tool.Grammar;
031: import org.antlr.misc.Utils;
032:
033: import java.util.HashSet;
034: import java.util.Set;
035:
036: /** A module to perform optimizations on DFAs.
037: *
038: * I could more easily (and more quickly) do some optimizations (such as
039: * PRUNE_EBNF_EXIT_BRANCHES) during DFA construction, but then it
040: * messes up the determinism checking. For example, it looks like
041: * loop exit branches are unreachable if you prune exit branches
042: * during DFA construction and before determinism checks.
043: *
044: * In general, ANTLR's NFA->DFA->codegen pipeline seems very robust
045: * to me which I attribute to a uniform and consistent set of data
046: * structures. Regardless of what I want to "say"/implement, I do so
047: * within the confines of, for example, a DFA. The code generator
048: * can then just generate code--it doesn't have to do much thinking.
049: * Putting optimizations in the code gen code really starts to make
050: * it a spagetti factory (uh oh, now I'm hungry!). The pipeline is
051: * very testable; each stage has well defined input/output pairs.
052: *
053: * ### Optimization: PRUNE_EBNF_EXIT_BRANCHES
054: *
055: * There is no need to test EBNF block exit branches. Not only is it
056: * an unneeded computation, but counter-intuitively, you actually get
057: * better errors. You can report an error at the missing or extra
058: * token rather than as soon as you've figured out you will fail.
059: *
060: * Imagine optional block "( DOT CLASS )? SEMI". ANTLR generates:
061: *
062: * int alt=0;
063: * if ( input.LA(1)==DOT ) {
064: * alt=1;
065: * }
066: * else if ( input.LA(1)==SEMI ) {
067: * alt=2;
068: * }
069: *
070: * Clearly, since Parser.match() will ultimately find the error, we
071: * do not want to report an error nor do we want to bother testing
072: * lookahead against what follows the (...)? We want to generate
073: * simply "should I enter the subrule?":
074: *
075: * int alt=2;
076: * if ( input.LA(1)==DOT ) {
077: * alt=1;
078: * }
079: *
080: * NOTE 1. Greedy loops cannot be optimized in this way. For example,
081: * "(greedy=false:'x'|.)* '\n'". You specifically need the exit branch
082: * to tell you when to terminate the loop as the same input actually
083: * predicts one of the alts (i.e., staying in the loop).
084: *
085: * NOTE 2. I do not optimize cyclic DFAs at the moment as it doesn't
086: * seem to work. ;) I'll have to investigate later to see what work I
087: * can do on cyclic DFAs to make them have fewer edges. Might have
088: * something to do with the EOT token.
089: *
090: * ### PRUNE_SUPERFLUOUS_EOT_EDGES
091: *
092: * When a token is a subset of another such as the following rules, ANTLR
093: * quietly assumes the first token to resolve the ambiguity.
094: *
095: * EQ : '=' ;
096: * ASSIGNOP : '=' | '+=' ;
097: *
098: * It can yield states that have only a single edge on EOT to an accept
099: * state. This is a waste and messes up my code generation. ;) If
100: * Tokens rule DFA goes
101: *
102: * s0 -'='-> s3 -EOT-> s5 (accept)
103: *
104: * then s5 should be pruned and s3 should be made an accept. Do NOT do this
105: * for keyword versus ID as the state with EOT edge emanating from it will
106: * also have another edge.
107: *
108: * ### Optimization: COLLAPSE_ALL_INCIDENT_EDGES
109: *
110: * Done during DFA construction. See method addTransition() in
111: * NFAToDFAConverter.
112: *
113: * ### Optimization: MERGE_STOP_STATES
114: *
115: * Done during DFA construction. See addDFAState() in NFAToDFAConverter.
116: */
117: public class DFAOptimizer {
118: public static boolean PRUNE_EBNF_EXIT_BRANCHES = true;
119: public static boolean PRUNE_TOKENS_RULE_SUPERFLUOUS_EOT_EDGES = true;
120: public static boolean COLLAPSE_ALL_PARALLEL_EDGES = true;
121: public static boolean MERGE_STOP_STATES = true;
122:
123: /** Used by DFA state machine generator to avoid infinite recursion
124: * resulting from cycles int the DFA. This is a set of int state #s.
125: */
126: protected Set visited = new HashSet();
127:
128: protected Grammar grammar;
129:
130: public DFAOptimizer(Grammar grammar) {
131: this .grammar = grammar;
132: }
133:
134: public void optimize() {
135: // optimize each DFA in this grammar
136: for (int decisionNumber = 1; decisionNumber <= grammar
137: .getNumberOfDecisions(); decisionNumber++) {
138: DFA dfa = grammar.getLookaheadDFA(decisionNumber);
139: optimize(dfa);
140: }
141: }
142:
143: protected void optimize(DFA dfa) {
144: if (dfa == null) {
145: return; // nothing to do
146: }
147: /*
148: System.out.println("Optimize DFA "+dfa.decisionNFAStartState.decisionNumber+
149: " num states="+dfa.getNumberOfStates());
150: */
151: //long start = System.currentTimeMillis();
152: if (PRUNE_EBNF_EXIT_BRANCHES && dfa.canInlineDecision()) {
153: visited.clear();
154: int decisionType = dfa.getNFADecisionStartState().decisionStateType;
155: if (dfa.isGreedy()
156: && (decisionType == NFAState.OPTIONAL_BLOCK_START || decisionType == NFAState.LOOPBACK)) {
157: optimizeExitBranches(dfa.startState);
158: }
159: }
160: // If the Tokens rule has syntactically ambiguous rules, try to prune
161: if (PRUNE_TOKENS_RULE_SUPERFLUOUS_EOT_EDGES
162: && dfa.isTokensRuleDecision()
163: && dfa.probe.stateToSyntacticallyAmbiguousTokensRuleAltsMap
164: .size() > 0) {
165: visited.clear();
166: optimizeEOTBranches(dfa.startState);
167: }
168:
169: /* ack...code gen needs this, cannot optimize
170: visited.clear();
171: unlinkUnneededStateData(dfa.startState);
172: */
173: //long stop = System.currentTimeMillis();
174: //System.out.println("minimized in "+(int)(stop-start)+" ms");
175: }
176:
177: protected void optimizeExitBranches(DFAState d) {
178: Integer sI = Utils.integer(d.stateNumber);
179: if (visited.contains(sI)) {
180: return; // already visited
181: }
182: visited.add(sI);
183: int nAlts = d.dfa.getNumberOfAlts();
184: for (int i = 0; i < d.getNumberOfTransitions(); i++) {
185: Transition edge = (Transition) d.transition(i);
186: DFAState edgeTarget = ((DFAState) edge.target);
187: /*
188: System.out.println(d.stateNumber+"-"+
189: edge.label.toString(d.dfa.nfa.grammar)+"->"+
190: edgeTarget.stateNumber);
191: */
192: // if target is an accept state and that alt is the exit alt
193: if (edgeTarget.isAcceptState()
194: && edgeTarget.getUniquelyPredictedAlt() == nAlts) {
195: /*
196: System.out.println("ignoring transition "+i+" to max alt "+
197: d.dfa.getNumberOfAlts());
198: */
199: d.removeTransition(i);
200: i--; // back up one so that i++ of loop iteration stays within bounds
201: }
202: optimizeExitBranches(edgeTarget);
203: }
204: }
205:
206: protected void optimizeEOTBranches(DFAState d) {
207: Integer sI = Utils.integer(d.stateNumber);
208: if (visited.contains(sI)) {
209: return; // already visited
210: }
211: visited.add(sI);
212: for (int i = 0; i < d.getNumberOfTransitions(); i++) {
213: Transition edge = (Transition) d.transition(i);
214: DFAState edgeTarget = ((DFAState) edge.target);
215: /*
216: System.out.println(d.stateNumber+"-"+
217: edge.label.toString(d.dfa.nfa.grammar)+"->"+
218: edgeTarget.stateNumber);
219: */
220: // if only one edge coming out, it is EOT, and target is accept prune
221: if (PRUNE_TOKENS_RULE_SUPERFLUOUS_EOT_EDGES
222: && edgeTarget.isAcceptState()
223: && d.getNumberOfTransitions() == 1
224: && edge.label.isAtom()
225: && edge.label.getAtom() == Label.EOT) {
226: //System.out.println("state "+d+" can be pruned");
227: // remove the superfluous EOT edge
228: d.removeTransition(i);
229: d.setAcceptState(true); // make it an accept state
230: // force it to uniquely predict the originally predicted state
231: d.cachedUniquelyPredicatedAlt = edgeTarget
232: .getUniquelyPredictedAlt();
233: i--; // back up one so that i++ of loop iteration stays within bounds
234: }
235: optimizeEOTBranches(edgeTarget);
236: }
237: }
238:
239: /** Walk DFA states, unlinking the nfa configs and whatever else I
240: * can to reduce memory footprint.
241: protected void unlinkUnneededStateData(DFAState d) {
242: Integer sI = Utils.integer(d.stateNumber);
243: if ( visited.contains(sI) ) {
244: return; // already visited
245: }
246: visited.add(sI);
247: d.nfaConfigurations = null;
248: for (int i = 0; i < d.getNumberOfTransitions(); i++) {
249: Transition edge = (Transition) d.transition(i);
250: DFAState edgeTarget = ((DFAState)edge.target);
251: unlinkUnneededStateData(edgeTarget);
252: }
253: }
254: */
255:
256: }
|