001: package fri.patterns.interpreter.parsergenerator;
002:
003: import java.util.*;
004: import java.io.*;
005: import fri.patterns.interpreter.parsergenerator.syntax.Rule;
006:
007: /**
008: The universal bottom-up parser algorithm. Runs with a Lexer (containing the input),
009: ParserTables (containing the syntax), and a Semantic (optional).
010: <pre>
011: private static final String [][] syntax = {
012: { "Start", "\"Hello\"", "\"World\"" },
013: { Token.IGNORED, "`whitespaces`" },
014: };
015:
016: SyntaxSeparation separation = new SyntaxSeparation(new Syntax(syntax));
017: LexerBuilder builder = new LexerBuilder(separation.getLexerSyntax(), separation.getIgnoredSymbols());
018: Lexer lexer = builder.getLexer();
019: lexer.setInput("\tHello \r\n\tWorld\n");
020: ParserTables parserTables = new SLRParserTables(separation.getParserSyntax());
021: Parser parser = new Parser(parserTables);
022: parser.parse(lexer, new PrintSemantic());
023: </pre>
024:
025: TODO: implement error recovery: method recover()
026: @author (c) 2000, Fritz Ritzberger
027: */
028:
029: public class Parser implements Serializable {
030: private Lexer lexer;
031: private ParserTables tables;
032: private transient Semantic semantic;
033: protected Stack stateStack = new Stack();
034: protected Stack valueStack = new Stack();
035: protected Stack rangeStack = new Stack();
036: private transient Object result;
037: private transient List inputTokens;
038: private transient List rangeList;
039: private transient Token.Range range = new Token.Range(null, null);
040: private transient PrintStream out;
041: private boolean passExpectedToLexer = true;
042: // private boolean showConflicts;
043: private boolean DEBUG;
044:
045: /**
046: Create a generic bottom-up Parser with passed ParserTables (representing the current syntax to apply).
047: @param tables ParserTables representing the syntax.
048: */
049: public Parser(ParserTables tables) {
050: this .tables = tables;
051: }
052:
053: /** Returns the parsing result built from Semantic call return values. Retrievable after parsing. */
054: public Object getResult() {
055: return result;
056: }
057:
058: // /** Sets if SHIFT/REDUCE or REDUCE/REDUCE conflicts wil be shown on System.err. */
059: // public void setShowConflicts(boolean showConflicts) {
060: // this.showConflicts = showConflicts;
061: // }
062:
063: /**
064: Sets the lexer to be used for parsing. The Lexer contains (or will contain) the input to parse.
065: The Parser calls <i>setTerminals()</i> on this call.
066: */
067: public void setLexer(Lexer lexer) {
068: boolean initLexer = (this .lexer != lexer); // look if passed lexer needs terminals
069: this .lexer = lexer;
070: clear(); // clear if reused
071: if (initLexer)
072: lexer.setTerminals(getParserTables().getTerminals()); // pass terminals to lexer
073: }
074:
075: /** Returns the lexer that was set to this parser, to call <i>setInput()</i> to the lexer. */
076: public Lexer getLexer() {
077: return lexer;
078: }
079:
080: /** Sets the input to contained lexer, or throws IllegalStateException if no lexer was set. */
081: public void setInput(Object input) throws IOException {
082: if (lexer == null)
083: throw new IllegalStateException(
084: "Can not set input when no lexer was defined!");
085: clear(); // clear if reused
086: lexer.setInput(input);
087: }
088:
089: /** Sets the semantic to be applied to parsing results. */
090: public void setSemantic(Semantic semantic) {
091: this .semantic = semantic;
092: }
093:
094: /** Returns the semantic that was set to this parser. */
095: public Semantic getSemantic() {
096: return semantic;
097: }
098:
099: /** Returns current ParserTables. */
100: public ParserTables getParserTables() {
101: return tables;
102: }
103:
104: /** Default is true. When true, the Parser will pass a Map of expected symbols to Lexer at every token request. */
105: public void setPassExpectedToLexer(boolean passExpectedToLexer) {
106: this .passExpectedToLexer = passExpectedToLexer;
107: }
108:
109: // bottom-up state machine methods
110:
111: private Integer top() {
112: return (Integer) stateStack.peek();
113: }
114:
115: private void push(Integer state, Object result, Token.Range range) {
116: stateStack.push(state);
117: semanticPush(result, range);
118: }
119:
120: private void pop(int pops) {
121: inputTokens = new ArrayList();
122: rangeList = new ArrayList();
123:
124: for (int i = 0; i < pops; i++) {
125: stateStack.pop();
126: semanticPop(i, pops);
127: }
128: }
129:
130: private void semanticPush(Object result, Token.Range range) {
131: if (semantic != null) { // when a semantic is present
132: valueStack.push(result); // we need to know parse result
133: rangeStack.push(range); // and its start-end positions within input text
134: }
135: }
136:
137: private void semanticPop(int popIndex, int countOfPops) {
138: if (semantic != null) {
139: // the value pop
140: inputTokens.add(0, valueStack.pop());
141:
142: // the range pop
143: Token.Range currentRange = (Token.Range) rangeStack.pop();
144: rangeList.add(0, currentRange);
145:
146: if (popIndex == 0) // first pop of right side holds last token value
147: this .range = new Token.Range(null, currentRange.end); // helper to remember end address
148:
149: if (popIndex == countOfPops - 1) // if it is the last pop, make a valid range for next push()
150: this .range = new Token.Range(currentRange.start,
151: this .range.end);
152: }
153: }
154:
155: /**
156: Reduce a rule when input satisfied it. Pop the stack n times, n is the number of right symbols of the rule.
157: Semantic gets called with all input tokens corresponding to the rule, if not null.
158: A new state gets pushed, determined by the new state (after pops) and the nonterminal of the rule (left side).
159: */
160: protected void reduce(Integer ruleIndex) {
161: if (DEBUG)
162: dump("reduce " + ruleIndex);
163:
164: Rule rule = getParserTables().getSyntax().getRule(
165: ruleIndex.intValue());
166: pop(rule.rightSize()); // pop count of elements on right side
167:
168: semanticReduce(rule);
169:
170: String nonterminal = rule.getNonterminal();
171: push(getParserTables().getGotoState(top(), nonterminal),
172: result, range);
173:
174: dumpStack();
175: }
176:
177: private void semanticReduce(Rule rule) {
178: if (semantic != null) {
179: result = semantic.doSemantic(rule, inputTokens, rangeList);
180: }
181: }
182:
183: /**
184: Push a new state upon state stack, determined by the GOTO table with current state
185: and the received token symbol. Then read a new token from Lexer, trying to evaluate a rule.
186: */
187: protected Token shift(Token token) throws IOException {
188: if (DEBUG)
189: dump("shift from token symbol >" + token.symbol + "<");
190:
191: push(getParserTables().getGotoState(top(), token.symbol),
192: token.text, token.range);
193: dumpStack();
194:
195: Token newToken = getNextToken();
196: if (DEBUG)
197: dump("next token " + newToken.symbol + " >" + newToken.text
198: + "<");
199:
200: return newToken;
201: }
202:
203: /** Delivers the next token from lexer to parser. Override to convert the Token value. */
204: protected Token getNextToken() throws IOException {
205: Map expected = passExpectedToLexer && top().intValue() >= 0 ? getParserTables()
206: .getExpected(top())
207: : null;
208: Token token = lexer.getNextToken(expected);
209: return token;
210: }
211:
212: // public parsing methods
213:
214: /**
215: Parse the tokens returned from passed lexer. This call is for checking correctness without semantics.
216: @param lexer the Lexer, loaded with input to scan.
217: @return true when input was syntactically correct.
218: */
219: public boolean parse(Lexer lexer) throws IOException {
220: setLexer(lexer);
221: return parse();
222: }
223:
224: /**
225: Parse the tokens returned from passed lexer. This call is for processing input with semantics.
226: At least <i>setLexer()</i> must have been called before.
227: @param semantic the semantic to apply to parser results.
228: @return true when input was syntactically correct.
229: */
230: public boolean parse(Semantic semantic) throws IOException {
231: if (lexer == null)
232: throw new IllegalStateException(
233: "No lexer was defined to scan input!");
234: setSemantic(semantic);
235: return parse();
236: }
237:
238: /**
239: Parse the tokens returned from passed input.
240: At least <i>setLexer()</i> must have been called before.
241: @param input the input to parse, as File, InputStream, String, ....
242: @return true when input was syntactically correct.
243: */
244: public boolean parse(Object input) throws IOException {
245: setInput(input);
246: return parse();
247: }
248:
249: /**
250: Parse the tokens returned from passed lexer. This call is for integrating a semantic.
251: @param lexer Lexer containing the input to parse
252: @param semantic the semantic to apply to parser results.
253: @return true when input was syntactically correct.
254: */
255: public boolean parse(Lexer lexer, Semantic semantic)
256: throws IOException {
257: setLexer(lexer);
258: setSemantic(semantic);
259: return parse();
260: }
261:
262: /**
263: Parse the tokens returned from passed lexer. This call is for integrating a semantic.
264: @param input the input to parse, as File, InputStream, String, ....
265: @param semantic the semantic to apply to parser results.
266: @return true when input was syntactically correct.
267: */
268: public boolean parse(Object input, Semantic semantic)
269: throws IOException {
270: setInput(input);
271: setSemantic(semantic);
272: return parse();
273: }
274:
275: /**
276: Start parsing after setting Lexer and optionally Semantic. At least <i>setLexer()</i> must have been called before.
277: <p>
278: Init the parser, read first token, push state 0 and set action to SHIFT.
279: Loop while action is not ERROR or ACCEPT, and token symbol is not ERROR, and top of stack is not ERROR.
280: Within loop, get next action from PARSE-ACTION table using current state and token symbol.
281: When action greater than zero, call reduce(), else when action is SHIFT, call shift().
282: @return true when input was syntactically correct.
283: */
284: public boolean parse() throws IOException {
285: stateStack.push(new Integer(0)); // push first state on stack
286: Integer action = ParserTables.SHIFT; // some allowed initial value
287: Token token = getNextToken(); // start reading input
288: if (DEBUG)
289: dump("initial token symbol >" + token.symbol + "<, text >"
290: + token.text + "<");
291:
292: while (token.symbol != null && // lexer error
293: action.equals(ParserTables.ACCEPT) == false && // input accepted
294: action.equals(ParserTables.ERROR) == false && // parse-action table error
295: top().equals(ParserTables.ERROR) == false) // goto table error
296: {
297: action = getParserTables().getParseAction(top(),
298: token.symbol);
299:
300: if (action.intValue() > 0)
301: reduce(action);
302: else if (action.equals(ParserTables.SHIFT))
303: token = shift(token);
304:
305: action = recover(action, token); // recover if error
306: }
307:
308: return detectError(token, top(), action);
309: }
310:
311: /**
312: Recover from error. Not implemented.
313: @param action current action from PARSE-ACTION table.
314: @param token recently received Token.
315: @return action to proceed with. Token.symbol may not be null and current state may not be ERROR after this call.
316: */
317: protected Integer recover(Integer action, Token token) {
318: return action;
319: }
320:
321: /**
322: Called after parse loop to determine if everything was OK.
323: @return true when action is ACCEPT, token.symbol is EPSILON, and state is not ERROR.
324: */
325: protected boolean detectError(Token token, Integer state,
326: Integer action) {
327: boolean ret = true;
328:
329: if (token.symbol == null || action.equals(ParserTables.ERROR)) {
330: if (token.symbol == null)
331: ensureOut().println(
332: "ERROR: Unknown symbol: >" + token.text
333: + "<, state " + state);
334: else
335: ensureOut().println(
336: "ERROR: Wrong symbol: "
337: + (Token.isEpsilon(token) ? "EOF"
338: : token.symbol + ", text: >"
339: + token.text + "<")
340: + ", state " + state);
341:
342: lexer.dump(out);
343:
344: Map h = getParserTables().getExpected(state);
345: if (h != null) {
346: ensureOut().print("Expected was (one of): ");
347:
348: for (Iterator it = h.keySet().iterator(); it.hasNext();) {
349: String s = (String) it.next();
350: ensureOut().print(
351: (Token.isEpsilon(s) ? "EOF" : s)
352: + (it.hasNext() ? ", " : ""));
353: }
354: ensureOut().println();
355: }
356:
357: ret = false;
358: } else if (state.equals(ParserTables.ERROR)) { // ERROR lies on stack, from SHIFT
359: pop(1);
360: ensureOut().println(
361: "ERROR: found no possible follow state for "
362: + top() + ", text >" + token.text + "<");
363: lexer.dump(out);
364: ret = false;
365: } else if (Token.isEpsilon(token) == false) {
366: ensureOut().println("ERROR: Input is not finished.");
367: lexer.dump(out);
368: ret = false;
369: } else if (action.equals(ParserTables.ACCEPT) == false) {
370: ensureOut().println(
371: "ERROR: Could not achieve ACCEPT. Symbol: "
372: + token.symbol);
373: lexer.dump(out);
374: ret = false;
375: }
376:
377: if (ret == false)
378: result = null;
379:
380: return ret;
381: }
382:
383: private void clear() {
384: stateStack.removeAllElements();
385: valueStack.removeAllElements();
386: rangeStack.removeAllElements();
387: range = new Token.Range(null, null);
388: inputTokens = null;
389: result = null;
390: if (lexer != null)
391: lexer.clear();
392: }
393:
394: private void dumpStack() {
395: if (DEBUG) {
396: ensureOut().print("stack: ");
397: for (int i = 0; i < stateStack.size(); i++)
398: ensureOut().print(stateStack.elementAt(i) + " ");
399: ensureOut().println();
400: }
401: }
402:
403: private void dump(String s) {
404: ensureOut().println(s);
405: }
406:
407: private PrintStream ensureOut() {
408: if (out == null)
409: out = System.err;
410: return out;
411: }
412:
413: /** Debug output will go to passed stream. */
414: public void setPrintStream(PrintStream out) {
415: this .out = (out != null) ? out : System.err;
416: }
417:
418: /** Set the debug mode. */
419: public void setDebug(boolean debug) {
420: DEBUG = debug;
421: }
422:
423: }
|