001: /*
002: [The "BSD licence"]
003: Copyright (c) 2005-2006 Terence Parr
004: All rights reserved.
005:
006: Redistribution and use in source and binary forms, with or without
007: modification, are permitted provided that the following conditions
008: are met:
009: 1. Redistributions of source code must retain the above copyright
010: notice, this list of conditions and the following disclaimer.
011: 2. Redistributions in binary form must reproduce the above copyright
012: notice, this list of conditions and the following disclaimer in the
013: documentation and/or other materials provided with the distribution.
014: 3. The name of the author may not be used to endorse or promote products
015: derived from this software without specific prior written permission.
016:
017: THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
018: IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
019: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
020: IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
021: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
022: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
023: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
024: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
026: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: */
028: package org.antlr.analysis;
029:
030: /** A tree node for tracking the call chains for NFAs that invoke
031: * other NFAs. These trees only have to point upwards to their parents
032: * so we can walk back up the tree (i.e., pop stuff off the stack). We
033: * never walk from stack down down through the children.
034: *
035: * Each alt predicted in a decision has its own context tree,
036: * representing all possible return nodes. The initial stack has
037: * EOF ("$") in it. So, for m alternative productions, the lookahead
038: * DFA will have m NFAContext trees.
039: *
040: * To "push" a new context, just do "new NFAContext(context-parent, state)"
041: * which will add itself to the parent. The root is NFAContext(null, null).
042: *
043: * The complete context for an NFA configuration is the set of invoking states
044: * on the path from this node thru the parent pointers to the root.
045: */
046: public class NFAContext {
047: /** This is similar to Bermudez's m constant in his LAR(m) where
048: * you bound the stack so your states don't explode. The main difference
049: * is that I bound only recursion on the stack, not the simple stack size.
050: * This looser constraint will let the conversion roam further to find
051: * lookahead to resolve a decision.
052: *
053: * Bermudez's m operates differently as it is his LR stack depth
054: * I'm pretty sure it therefore includes all stack symbols. Here I
055: * restrict the size of an NFA configuration to be finite because a
056: * stack component may mention the same NFA invocation state at
057: * most m times. Hence, the number of DFA states will not grow forever.
058: * With recursive rules like
059: *
060: * e : '(' e ')' | INT ;
061: *
062: * you could chase your tail forever if somebody said "s : e '.' | e ';' ;"
063: * This constant prevents new states from being created after a stack gets
064: * "too big".
065: *
066: * Imagine doing a depth-first search on the DFA...as you chase an input
067: * sequence you can recurse to same rule such as e above. You'd have a
068: * chain of ((((. When you get do some point, you have to give up. The
069: * states in the chain will have longer and longer NFA config stacks.
070: * Must limit size.
071: *
072: * TODO: i wonder if we can recognize recursive loops and use a simple cycle?
073: *
074: * max=0 implies you cannot ever jump to another rule during closure.
075: * max=1 implies you can make as many calls as you want--you just
076: * can't ever visit a state that is on your rule invocation stack.
077: * I.e., you cannot ever recurse.
078: * max=2 implies you are able to recurse once (i.e., call a rule twice
079: * from the same place).
080: *
081: * This tracks recursion to a rule specific to an invocation site!
082: * It does not detect multiple calls to a rule from different rule
083: * invocation states. We are guaranteed to terminate because the
084: * stack can only grow as big as the number of NFA states * max.
085: *
086: * I noticed that the Java grammar didn't work with max=1, but did with
087: * max=4. Let's set to 4. Recursion is sometimes needed to resolve some
088: * fixed lookahead decisions.
089: */
090: public static int MAX_SAME_RULE_INVOCATIONS_PER_NFA_CONFIG_STACK = 4;
091:
092: public NFAContext parent;
093:
094: /** The NFA state that invoked another rule's start state is recorded
095: * on the rule invocation context stack.
096: */
097: public NFAState invokingState;
098:
099: /** Computing the hashCode is very expensive and closureBusy()
100: * uses it to track when it's seen a state|ctx before to avoid
101: * infinite loops. As we add new contexts, record the hash code
102: * as this.invokingState + parent.cachedHashCode. Avoids walking
103: * up the tree for every hashCode(). Note that this caching works
104: * because a context is a monotonically growing tree of context nodes
105: * and nothing on the stack is ever modified...ctx just grows
106: * or shrinks.
107: */
108: protected int cachedHashCode;
109:
110: public NFAContext(NFAContext parent, NFAState invokingState) {
111: this .parent = parent;
112: this .invokingState = invokingState;
113: if (invokingState != null) {
114: this .cachedHashCode = invokingState.stateNumber;
115: }
116: if (parent != null) {
117: this .cachedHashCode += parent.cachedHashCode;
118: }
119: }
120:
121: /** Two contexts are equals() if both have
122: * same call stack; walk upwards to the root.
123: * Recall that the root sentinel node has no invokingStates and no parent.
124: * Note that you may be comparing contexts in different alt trees.
125: *
126: * The hashCode is now cheap as it's computed once upon each context
127: * push on the stack. Use it to make equals() more efficient.
128: */
129: public boolean equals(Object o) {
130: NFAContext other = ((NFAContext) o);
131: if (this .cachedHashCode != other.cachedHashCode) {
132: return false; // can't be same if hash is different
133: }
134: if (this == other) {
135: return true;
136: }
137: // System.out.println("comparing "+this+" with "+other);
138: NFAContext sp = this ;
139: while (sp.parent != null && other.parent != null) {
140: if (sp.invokingState != other.invokingState) {
141: return false;
142: }
143: sp = sp.parent;
144: other = other.parent;
145: }
146: if (!(sp.parent == null && other.parent == null)) {
147: return false; // both pointers must be at their roots after walk
148: }
149: return true;
150: }
151:
152: /** Two contexts conflict() if they are equals() or one is a stack suffix
153: * of the other. For example, contexts [21 12 $] and [21 9 $] do not
154: * conflict, but [21 $] and [21 12 $] do conflict. Note that I should
155: * probably not show the $ in this case. There is a dummy node for each
156: * stack that just means empty; $ is a marker that's all.
157: *
158: * This is used in relation to checking conflicts associated with a
159: * single NFA state's configurations within a single DFA state.
160: * If there are configurations s and t within a DFA state such that
161: * s.state=t.state && s.alt != t.alt && s.ctx conflicts t.ctx then
162: * the DFA state predicts more than a single alt--it's nondeterministic.
163: * Two contexts conflict if they are the same or if one is a suffix
164: * of the other.
165: *
166: * When comparing contexts, if one context has a stack and the other
167: * does not then they should be considered the same context. The only
168: * way for an NFA state p to have an empty context and a nonempty context
169: * is the case when closure falls off end of rule without a call stack
170: * and re-enters the rule with a context. This resolves the issue I
171: * discussed with Sriram Srinivasan Feb 28, 2005 about not terminating
172: * fast enough upon nondeterminism.
173: */
174: public boolean conflictsWith(NFAContext other) {
175: return this .suffix(other); // || this.equals(other);
176: }
177:
178: /** [$] suffix any context
179: * [21 $] suffix [21 12 $]
180: * [21 12 $] suffix [21 $]
181: * [21 18 $] suffix [21 18 12 9 $]
182: * [21 18 12 9 $] suffix [21 18 $]
183: * [21 12 $] not suffix [21 9 $]
184: *
185: * Example "[21 $] suffix [21 12 $]" means: rule r invoked current rule
186: * from state 21. Rule s invoked rule r from state 12 which then invoked
187: * current rule also via state 21. While the context prior to state 21
188: * is different, the fact that both contexts emanate from state 21 implies
189: * that they are now going to track perfectly together. Once they
190: * converged on state 21, there is no way they can separate. In other
191: * words, the prior stack state is not consulted when computing where to
192: * go in the closure operation. ?$ and ??$ are considered the same stack.
193: * If ? is popped off then $ and ?$ remain; they are now an empty and
194: * nonempty context comparison. So, if one stack is a suffix of
195: * another, then it will still degenerate to the simple empty stack
196: * comparison case.
197: */
198: protected boolean suffix(NFAContext other) {
199: NFAContext sp = this ;
200: // if one of the contexts is empty, it never enters loop and returns true
201: while (sp.parent != null && other.parent != null) {
202: if (sp.invokingState != other.invokingState) {
203: return false;
204: }
205: sp = sp.parent;
206: other = other.parent;
207: }
208: //System.out.println("suffix");
209: return true;
210: }
211:
212: /** Walk upwards to the root of the call stack context looking
213: * for a particular invoking state.
214: public boolean contains(int state) {
215: NFAContext sp = this;
216: int n = 0; // track recursive invocations of state
217: System.out.println("this.context is "+sp);
218: while ( sp.parent!=null ) {
219: if ( sp.invokingState.stateNumber == state ) {
220: return true;
221: }
222: sp = sp.parent;
223: }
224: return false;
225: }
226: */
227:
228: /** Given an NFA state number, how many times has the NFA-to-DFA
229: * conversion pushed that state on the stack? In other words,
230: * the NFA state must be a rule invocation state and this method
231: * tells you how many times you've been to this state. If none,
232: * then you have not called the target rule from this state before
233: * (though another NFA state could have called that target rule).
234: * If n=1, then you've been to this state before during this
235: * DFA construction and are going to invoke that rule again.
236: *
237: * Note that many NFA states can invoke rule r, but we ignore recursion
238: * unless you hit the same rule invocation state again.
239: */
240: public int recursionDepthEmanatingFromState(int state) {
241: NFAContext sp = this ;
242: int n = 0; // track recursive invocations of target from this state
243: //System.out.println("this.context is "+sp);
244: while (sp.parent != null) {
245: if (sp.invokingState.stateNumber == state) {
246: n++;
247: }
248: sp = sp.parent;
249: }
250: return n;
251: }
252:
253: public int hashCode() {
254: return cachedHashCode;
255: /*
256: int h = 0;
257: NFAContext sp = this;
258: while ( sp.parent!=null ) {
259: h += sp.invokingState.getStateNumber();
260: sp = sp.parent;
261: }
262: return h;
263: */
264: }
265:
266: /** A context is empty if there is no parent; meaning nobody pushed
267: * anything on the call stack.
268: */
269: public boolean isEmpty() {
270: return parent == null;
271: }
272:
273: public String toString() {
274: StringBuffer buf = new StringBuffer();
275: NFAContext sp = this ;
276: buf.append("[");
277: while (sp.parent != null) {
278: buf.append(sp.invokingState.stateNumber);
279: buf.append(" ");
280: sp = sp.parent;
281: }
282: buf.append("$]");
283: return buf.toString();
284: }
285: }
|