001: /*
002: [The "BSD licence"]
003: Copyright (c) 2005-2006 Terence Parr
004: All rights reserved.
005:
006: Redistribution and use in source and binary forms, with or without
007: modification, are permitted provided that the following conditions
008: are met:
009: 1. Redistributions of source code must retain the above copyright
010: notice, this list of conditions and the following disclaimer.
011: 2. Redistributions in binary form must reproduce the above copyright
012: notice, this list of conditions and the following disclaimer in the
013: documentation and/or other materials provided with the distribution.
014: 3. The name of the author may not be used to endorse or promote products
015: derived from this software without specific prior written permission.
016:
017: THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
018: IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
019: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
020: IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
021: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
022: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
023: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
024: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
026: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: */
028: package org.antlr.tool;
029:
030: import org.antlr.analysis.DFA;
031: import org.antlr.analysis.*;
032: import org.antlr.runtime.*;
033: import org.antlr.runtime.debug.DebugEventListener;
034: import org.antlr.runtime.debug.BlankDebugEventListener;
035: import org.antlr.runtime.tree.ParseTree;
036: import org.antlr.runtime.debug.ParseTreeBuilder;
037: import org.antlr.misc.IntervalSet;
038:
039: import java.util.List;
040: import java.util.Stack;
041:
042: /** The recognition interpreter/engine for grammars. Separated
043: * out of Grammar as it's related, but technically not a Grammar function.
044: * You create an interpreter for a grammar and an input stream. This object
045: * can act as a TokenSource so that you can hook up two grammars (via
046: * a CommonTokenStream) to lex/parse. Being a token source only makes sense
047: * for a lexer grammar of course.
048: */
049: public class Interpreter implements TokenSource {
050: protected Grammar grammar;
051: protected IntStream input;
052:
053: /** A lexer listener that just creates token objects as they
054: * are matched. scan() use this listener to get a single object.
055: * To get a stream of tokens, you must call scan() multiple times,
056: * recording the token object result after each call.
057: */
058: class LexerActionGetTokenType extends BlankDebugEventListener {
059: public CommonToken token;
060: Grammar g;
061:
062: public LexerActionGetTokenType(Grammar g) {
063: this .g = g;
064: }
065:
066: public void exitRule(String ruleName) {
067: if (!ruleName.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME)) {
068: int type = g.getTokenType(ruleName);
069: int channel = Token.DEFAULT_CHANNEL;
070: token = new CommonToken((CharStream) input, type,
071: channel, 0, 0);
072: }
073: }
074: }
075:
076: public Interpreter(Grammar grammar, IntStream input) {
077: this .grammar = grammar;
078: this .input = input;
079: }
080:
081: public Token nextToken() {
082: if (grammar.type != Grammar.LEXER) {
083: return null;
084: }
085: if (input.LA(1) == CharStream.EOF) {
086: return Token.EOF_TOKEN;
087: }
088: int start = input.index();
089: int charPos = ((CharStream) input).getCharPositionInLine();
090: CommonToken token = null;
091: loop: while (input.LA(1) != CharStream.EOF) {
092: try {
093: token = scan(Grammar.ARTIFICIAL_TOKENS_RULENAME, null);
094: break;
095: } catch (RecognitionException re) {
096: // report a problem and try for another
097: reportScanError(re);
098: continue loop;
099: }
100: }
101: // the scan can only set type
102: // we must set the line, and other junk here to make it a complete token
103: int stop = input.index() - 1;
104: if (token == null) {
105: return Token.EOF_TOKEN;
106: }
107: token.setLine(((CharStream) input).getLine());
108: token.setStartIndex(start);
109: token.setStopIndex(stop);
110: token.setCharPositionInLine(charPos);
111: return token;
112: }
113:
114: /** For a given input char stream, try to match against the NFA
115: * starting at startRule. This is a deterministic parse even though
116: * it is using an NFA because it uses DFAs at each decision point to
117: * predict which alternative will succeed. This is exactly what the
118: * generated parser will do.
119: *
120: * This only does lexer grammars.
121: *
122: * Return the token type associated with the final rule end state.
123: */
124: public void scan(String startRule, DebugEventListener actions,
125: List visitedStates) throws RecognitionException {
126: if (grammar.type != Grammar.LEXER) {
127: return;
128: }
129: CharStream in = (CharStream) this .input;
130: //System.out.println("scan("+startRule+",'"+in.substring(in.index(),in.size()-1)+"')");
131: // Build NFAs/DFAs from the grammar AST if NFAs haven't been built yet
132: if (grammar.getRuleStartState(startRule) == null) {
133: grammar.createNFAs();
134: }
135:
136: if (!grammar.allDecisionDFAHaveBeenCreated()) {
137: // Create the DFA predictors for each decision
138: grammar.createLookaheadDFAs();
139: }
140:
141: // do the parse
142: Stack ruleInvocationStack = new Stack();
143: NFAState start = grammar.getRuleStartState(startRule);
144: NFAState stop = grammar.getRuleStopState(startRule);
145: parseEngine(startRule, start, stop, in, ruleInvocationStack,
146: actions, visitedStates);
147: }
148:
149: public CommonToken scan(String startRule)
150: throws RecognitionException {
151: return scan(startRule, null);
152: }
153:
154: public CommonToken scan(String startRule, List visitedStates)
155: throws RecognitionException {
156: LexerActionGetTokenType actions = new LexerActionGetTokenType(
157: grammar);
158: scan(startRule, actions, visitedStates);
159: return actions.token;
160: }
161:
162: public void parse(String startRule, DebugEventListener actions,
163: List visitedStates) throws RecognitionException {
164: //System.out.println("parse("+startRule+")");
165: // Build NFAs/DFAs from the grammar AST if NFAs haven't been built yet
166: if (grammar.getRuleStartState(startRule) == null) {
167: grammar.createNFAs();
168: }
169: if (!grammar.allDecisionDFAHaveBeenCreated()) {
170: // Create the DFA predictors for each decision
171: grammar.createLookaheadDFAs();
172: }
173: // do the parse
174: Stack ruleInvocationStack = new Stack();
175: NFAState start = grammar.getRuleStartState(startRule);
176: NFAState stop = grammar.getRuleStopState(startRule);
177: parseEngine(startRule, start, stop, input, ruleInvocationStack,
178: actions, visitedStates);
179: }
180:
181: public ParseTree parse(String startRule)
182: throws RecognitionException {
183: return parse(startRule, null);
184: }
185:
186: public ParseTree parse(String startRule, List visitedStates)
187: throws RecognitionException {
188: ParseTreeBuilder actions = new ParseTreeBuilder(grammar.name);
189: try {
190: parse(startRule, actions, visitedStates);
191: } catch (RecognitionException re) {
192: // Errors are tracked via the ANTLRDebugInterface
193: // Exceptions are used just to blast out of the parse engine
194: // The error will be in the parse tree.
195: }
196: return actions.getTree();
197: }
198:
199: /** Fill a list of all NFA states visited during the parse */
200: protected void parseEngine(String startRule, NFAState start,
201: NFAState stop, IntStream input, Stack ruleInvocationStack,
202: DebugEventListener actions, List visitedStates)
203: throws RecognitionException {
204: if (actions != null) {
205: actions.enterRule(start.getEnclosingRule());
206: }
207: NFAState s = start;
208: int t = input.LA(1);
209: while (s != stop) {
210: if (visitedStates != null) {
211: visitedStates.add(s);
212: }
213: /*
214: System.out.println("parse state "+s.stateNumber+" input="+
215: grammar.getTokenDisplayName(t));
216: */
217: // CASE 1: decision state
218: if (s.getDecisionNumber() > 0
219: && grammar.getNumberOfAltsForDecisionNFA(s) > 1) {
220: // decision point, must predict and jump to alt
221: DFA dfa = grammar
222: .getLookaheadDFA(s.getDecisionNumber());
223: /*
224: if ( grammar.type!=Grammar.LEXER ) {
225: System.out.println("decision: "+
226: dfa.getNFADecisionStartState().getDescription()+
227: " input="+grammar.getTokenDisplayName(t));
228: }
229: */
230: int m = input.mark();
231: int predictedAlt = predict(dfa);
232: if (predictedAlt == NFA.INVALID_ALT_NUMBER) {
233: String description = dfa.getNFADecisionStartState()
234: .getDescription();
235: NoViableAltException nvae = new NoViableAltException(
236: description, dfa.getDecisionNumber(),
237: s.stateNumber, input);
238: if (actions != null) {
239: actions.recognitionException(nvae);
240: }
241: input.consume(); // recover
242: throw nvae;
243: }
244: input.rewind(m);
245: int parseAlt = s.translateDisplayAltToWalkAlt(dfa,
246: predictedAlt);
247: /*
248: if ( grammar.type!=Grammar.LEXER ) {
249: System.out.println("predicted alt "+predictedAlt+", parseAlt "+
250: parseAlt);
251: }
252: */
253: NFAState alt = grammar.getNFAStateForAltOfDecision(s,
254: parseAlt);
255: s = (NFAState) alt.transition(0).target;
256: continue;
257: }
258:
259: // CASE 2: finished matching a rule
260: if (s.isAcceptState()) { // end of rule node
261: if (actions != null) {
262: actions.exitRule(s.getEnclosingRule());
263: }
264: if (ruleInvocationStack.empty()) {
265: // done parsing. Hit the start state.
266: //System.out.println("stack empty in stop state for "+s.getEnclosingRule());
267: break;
268: }
269: // pop invoking state off the stack to know where to return to
270: NFAState invokingState = (NFAState) ruleInvocationStack
271: .pop();
272: RuleClosureTransition invokingTransition = (RuleClosureTransition) invokingState
273: .transition(0);
274: // move to node after state that invoked this rule
275: s = invokingTransition.getFollowState();
276: continue;
277: }
278:
279: Transition trans = s.transition(0);
280: Label label = trans.label;
281: // CASE 3: epsilon transition
282: if (label.isEpsilon()) {
283: // CASE 3a: rule invocation state
284: if (trans instanceof RuleClosureTransition) {
285: ruleInvocationStack.push(s);
286: s = (NFAState) trans.target;
287: if (actions != null) {
288: actions.enterRule(s.getEnclosingRule());
289: }
290: }
291: // CASE 3b: plain old epsilon transition, just move
292: else {
293: s = (NFAState) trans.target;
294: }
295: }
296:
297: // CASE 4: match label on transition
298: else if (label.matches(t)) {
299: if (actions != null) {
300: if (grammar.type == Grammar.PARSER
301: || grammar.type == Grammar.COMBINED) {
302: actions.consumeToken(((TokenStream) input)
303: .LT(1));
304: }
305: }
306: s = (NFAState) s.transition(0).target;
307: input.consume();
308: t = input.LA(1);
309: }
310:
311: // CASE 5: error condition; label is inconsistent with input
312: else {
313: if (label.isAtom()) {
314: MismatchedTokenException mte = new MismatchedTokenException(
315: label.getAtom(), input);
316: if (actions != null) {
317: actions.recognitionException(mte);
318: }
319: input.consume(); // recover
320: throw mte;
321: } else if (label.isSet()) {
322: MismatchedSetException mse = new MismatchedSetException(
323: ((IntervalSet) label.getSet())
324: .toRuntimeBitSet(), input);
325: if (actions != null) {
326: actions.recognitionException(mse);
327: }
328: input.consume(); // recover
329: throw mse;
330: } else if (label.isSemanticPredicate()) {
331: FailedPredicateException fpe = new FailedPredicateException(
332: input, s.getEnclosingRule(), label
333: .getSemanticContext().toString());
334: if (actions != null) {
335: actions.recognitionException(fpe);
336: }
337: input.consume(); // recover
338: throw fpe;
339: } else {
340: throw new RecognitionException(input); // unknown error
341: }
342: }
343: }
344: //System.out.println("hit stop state for "+stop.getEnclosingRule());
345: if (actions != null) {
346: actions.exitRule(stop.getEnclosingRule());
347: }
348: }
349:
350: /** Given an input stream, return the unique alternative predicted by
351: * matching the input. Upon error, return NFA.INVALID_ALT_NUMBER
352: * The first symbol of lookahead is presumed to be primed; that is,
353: * input.lookahead(1) must point at the input symbol you want to start
354: * predicting with.
355: */
356: public int predict(DFA dfa) {
357: DFAState s = dfa.startState;
358: int c = input.LA(1);
359: Transition eotTransition = null;
360: dfaLoop: while (!s.isAcceptState()) {
361: /*
362: System.out.println("DFA.predict("+s.getStateNumber()+", "+
363: dfa.getNFA().getGrammar().getTokenName(c)+")");
364: */
365: // for each edge of s, look for intersection with current char
366: for (int i = 0; i < s.getNumberOfTransitions(); i++) {
367: Transition t = s.transition(i);
368: // special case: EOT matches any char
369: if (t.label.matches(c)) {
370: // take transition i
371: s = (DFAState) t.target;
372: input.consume();
373: c = input.LA(1);
374: continue dfaLoop;
375: }
376: if (t.label.getAtom() == Label.EOT) {
377: eotTransition = t;
378: }
379: }
380: if (eotTransition != null) {
381: s = (DFAState) eotTransition.target;
382: continue dfaLoop;
383: }
384: /*
385: ErrorManager.error(ErrorManager.MSG_NO_VIABLE_DFA_ALT,
386: s,
387: dfa.nfa.grammar.getTokenName(c));
388: */
389: return NFA.INVALID_ALT_NUMBER;
390: }
391: // woohoo! We know which alt to predict
392: // nothing emanates from a stop state; must terminate anyway
393: /*
394: System.out.println("DFA stop state "+s.getStateNumber()+" predicts "+
395: s.getUniquelyPredictedAlt());
396: */
397: return s.getUniquelyPredictedAlt();
398: }
399:
400: public void reportScanError(RecognitionException re) {
401: CharStream cs = (CharStream) input;
402: // print as good of a message is we can't, given that we do not have
403: // a Lexer object and, hence, cannot call the routine to get a
404: // decent error message.
405: System.err.println("problem matching token at " + cs.getLine()
406: + ":" + cs.getCharPositionInLine() + " "
407: + re.getClass().getName());
408: }
409: }
|