001: package fri.patterns.interpreter.parsergenerator.parsertables;
002:
003: import java.util.*;
004: import java.io.PrintStream;
005: import fri.patterns.interpreter.parsergenerator.Token;
006: import fri.patterns.interpreter.parsergenerator.syntax.*;
007:
008: /**
009: A table generator, building SLR bottom-up parser tables from a syntax.
010: <p>
011: An artifical START-node gets inserted. Lists of nonterminals and terminals are created.
012: Syntax nodes are created, from which the parser GOTO and PARSE-ACTION tables get built.
013: <p>
014: This class contains dump methods for syntax nodes and FIRST/FOLLOW sets.
015:
016: @author (c) 2000, Fritz Ritzberger
017: */
018:
019: public class SLRParserTables extends AbstractParserTables {
020: protected transient List syntaxNodes = new ArrayList();
021: protected transient FirstSets firstSets;
022: protected transient FollowSets followSets;
023:
024: /**
025: Appends START symbol and retrieves all symbols. Calls init then.
026: All parser table types run through this constructor.
027: */
028: public SLRParserTables(Syntax syntax) throws ParserBuildException {
029: this .syntax = addStartSymbol(syntax); // add START symbol to begin
030: getAllSymbols();
031: init();
032: }
033:
034: /** Builds SLR bottom-up parser tables. */
035: protected void init() throws ParserBuildException {
036: syntaxNodes = new SLRSyntaxNode().build(syntax, syntaxNodes,
037: new Hashtable());
038:
039: gotoTable = generateGoto(syntaxNodes);
040:
041: Nullable nullAble = new Nullable(syntax, nonterminals);
042: firstSets = new FirstSets(syntax, nullAble, nonterminals);
043: followSets = new FollowSets(syntax, nullAble, firstSets);
044:
045: parseTable = generateParseAction(syntaxNodes);
046: }
047:
048: // Looks for top level rules and inserts a START rule pointing to it.
049: // Inserts a default rule pointing to the first rule when none found.
050: private Syntax addStartSymbol(Syntax syntax)
051: throws ParserBuildException {
052: String startSym = null;
053: List startRules = syntax.findStartRules();
054:
055: if (startRules.size() <= 0) { // no toplevel rule
056: //throw new ParserBuildException("Grammatik hat keine Start-Regel!");
057: Rule rule = syntax.getRule(0);
058: System.err
059: .println("WARNING: Grammar has no top level rule, taking first rule >"
060: + rule + "<");
061: startSym = rule.getNonterminal();
062: } else if (startRules.size() > 1) { // more than one toplevel rules
063: for (int i = 0; i < startRules.size(); i++) { // check if all start rules have the same nonterminal on left side
064: Rule r = (Rule) startRules.get(i);
065: String nt = r.getNonterminal();
066:
067: if (startSym == null)
068: startSym = nt;
069: else if (startSym.equals(nt) == false)
070: throw new ParserBuildException(
071: "Grammar has more than one toplevel rules: "
072: + startRules);
073: }
074: } else { // exactly one toplevel rule found
075: startSym = ((Rule) startRules.get(0)).getNonterminal();
076: }
077:
078: Rule start = new Rule("<START>", 1);
079: start.addRightSymbol(startSym);
080: syntax.insertRule(0, start);
081:
082: return syntax;
083: }
084:
085: /** Returns all symbols (terminals and nonterminals). Builds lists. Called only once on construction. */
086: protected List getAllSymbols() throws ParserBuildException {
087: // collect nonterminals unique
088: for (int i = 0; i < syntax.size(); i++) {
089: Rule rule = syntax.getRule(i);
090: String nonterm = rule.getNonterminal(); // left side of derivation
091:
092: if (Nullable.isNull(nonterm))
093: throw new ParserBuildException(
094: "ERROR: Empty nonterminal: >" + nonterm + "<");
095:
096: if (nonterminals.indexOf(nonterm) < 0) // insert unique
097: nonterminals.add(nonterm);
098: }
099:
100: // collect terminals unique
101: for (int j = 0; j < syntax.size(); j++) {
102: Rule rule = syntax.getRule(j);
103:
104: for (int i = 0; i < rule.rightSize(); i++) {
105: String symbol = rule.getRightSymbol(i);
106:
107: if (Nullable.isNull(symbol))
108: throw new ParserBuildException(
109: "ERROR: Empty terminal: >" + symbol + "<");
110:
111: if (Token.isTerminal(symbol)) {
112: if (terminals.indexOf(symbol) < 0) {
113: terminals.add(symbol);
114: terminalsWithoutEpsilon.add(symbol);
115: }
116: } else // throw error if nonterminal is not present
117: if (nonterminals.indexOf(symbol) < 0)
118: throw new ParserBuildException(
119: "ERROR: Every nonterminal must have a rule. symbol: >"
120: + symbol + "<, rule: " + rule);
121: }
122: }
123:
124: // check for sanity
125: if (terminals.size() <= 0)
126: throw new ParserBuildException("ERROR: No terminal found: "
127: + syntax);
128:
129: if (nonterminals.size() <= 0)
130: throw new ParserBuildException(
131: "ERROR: No nonterminal found: " + syntax);
132:
133: // add nonterminals to symbols
134: for (int i = 0; i < nonterminals.size(); i++)
135: symbols.add(nonterminals.get(i));
136:
137: // add terminals without EpSiLoN to symbols
138: for (int i = 0; i < terminals.size(); i++)
139: symbols.add(terminals.get(i));
140:
141: // add "Epsilon" Symbol to Terminals
142: terminals.add(Token.EPSILON);
143:
144: return symbols;
145: }
146:
147: /** Creates GOTO table of follow states. */
148: protected List generateGoto(List syntaxNodes) {
149: //System.err.println("got "+syntaxNodes.size()+" states");
150: this .gotoTable = new ArrayList(syntaxNodes.size());
151:
152: Map hash = new Hashtable(syntaxNodes.size());
153:
154: for (int i = 0; i < syntaxNodes.size(); i++) {
155: SLRSyntaxNode node = (SLRSyntaxNode) syntaxNodes.get(i);
156:
157: Map h = node.fillGotoLine(i);
158:
159: if (h.size() <= 0)
160: gotoTable.add(null);
161: else
162: insertTableLine(i, h, gotoTable, hash);
163: }
164:
165: return gotoTable;
166: }
167:
168: /**
169: Erzeugen der Parse-Action-Tabelle fuer shift/reduce Aktionen.
170: */
171: protected List generateParseAction(List syntaxNodes) {
172: this .parseTable = new ArrayList(syntaxNodes.size());
173:
174: Map hash = new Hashtable(syntaxNodes.size());
175:
176: for (int i = 0; i < syntaxNodes.size(); i++) {
177: SLRSyntaxNode node = (SLRSyntaxNode) syntaxNodes.get(i);
178:
179: Map h = node.fillParseActionLine(i, firstSets, followSets);
180:
181: if (h.size() <= 0)
182: parseTable.add(null);
183: else
184: insertTableLine(i, h, parseTable, hash);
185: }
186:
187: return parseTable;
188: }
189:
190: /**
191: Compression of tables: Look for an identical table line.
192: @return identical line Zeile, or passed line when not found.
193: */
194: protected void insertTableLine(int i, Map line, List table, Map hash) {
195: Integer itg = (Integer) hash.get(line);
196: if (itg == null) {
197: table.add(line);
198: hash.put(line, new Integer(i));
199: } else {
200: table.add(table.get(itg.intValue()));
201: }
202: }
203:
204: /** Enable garbage collection of builder variables. CAUTION: dump methods work reduced after this call. */
205: public void freeSyntaxNodes() {
206: syntaxNodes = null;
207: symbols = null;
208: terminals = null;
209: }
210:
211: // dump methods to print parser information
212:
213: /** Overridden to report better. */
214: public void report(PrintStream out) {
215: System.err.println("Parser Generator is " + getClass());
216: super .report(out);
217: out.println("states: "
218: + (syntaxNodes != null ? syntaxNodes.size() : -1));
219: }
220:
221: /** Implements ParserTables: output of rules, FIRST/FOLLOW sets, syntax nodes, GOTO-table, PARSE-ACTON-table. */
222: public void dump(PrintStream out) {
223: dumpSyntax(out);
224: dumpFirstSet(out);
225: dumpFollowSet(out);
226: dumpSyntaxNodes(out);
227: dumpGoto(out);
228: dumpParseAction(out);
229: }
230:
231: public void dumpSyntaxNodes(PrintStream out) {
232: if (syntaxNodes != null) {
233: for (int i = 0; i < syntaxNodes.size(); i++)
234: dumpSyntaxNode(i, (SLRSyntaxNode) syntaxNodes.get(i),
235: out);
236: out.println();
237: }
238: }
239:
240: public void dumpSyntaxNode(int i, SLRSyntaxNode node,
241: PrintStream out) {
242: out.println("State " + i);
243: out.println(node.toString());
244: }
245:
246: public void dumpFirstSet(PrintStream out) {
247: if (firstSets != null)
248: dumpSet("FIRST", firstSets, out);
249: }
250:
251: public void dumpFollowSet(PrintStream out) {
252: if (followSets != null)
253: dumpSet("FOLLOW", followSets, out);
254: }
255:
256: public void dumpSet(String header, Map set, PrintStream out) {
257: for (Iterator it = set.keySet().iterator(); it.hasNext();) {
258: String nonterm = (String) it.next();
259: out.println(header + "(" + nonterm + ") = "
260: + set.get(nonterm));
261: }
262: out.println();
263: }
264:
265: /** Test main dumping arithmetic expression tables. */
266: public static void main(String[] args) {
267: String[][] syntax = { { "EXPR", "TERM" },
268: { "EXPR", "EXPR", "'+'", "TERM" },
269: { "EXPR", "EXPR", "'-'", "TERM" }, { "TERM", "FAKT", },
270: { "TERM", "TERM", "'*'", "FAKT" },
271: { "TERM", "TERM", "'/'", "FAKT" },
272: { "FAKT", "`number`", },
273: { "FAKT", "'('", "EXPR", "')'" }, };
274:
275: try {
276: SLRParserTables p = new SLRParserTables(new Syntax(syntax));
277: p.dump(System.err);
278: } catch (Exception e) {
279: e.printStackTrace();
280: }
281: }
282:
283: }
284:
285: /*
286: (Rule 0) <START> : EXPR
287: (Rule 1) EXPR : TERM
288: (Rule 2) EXPR : EXPR '+' TERM
289: (Rule 3) EXPR : EXPR '-' TERM
290: (Rule 4) TERM : FAKT
291: (Rule 5) TERM : TERM '*' FAKT
292: (Rule 6) TERM : TERM '/' FAKT
293: (Rule 7) FAKT : "[0-9]+"
294: (Rule 8) FAKT : '(' EXPR ')'
295:
296: FIRST(EXPR) = ["[0-9]+", '(']
297: FIRST(FAKT) = ["[0-9]+", '(']
298: FIRST(<START>) = ["[0-9]+", '(']
299: FIRST(TERM) = ["[0-9]+", '(']
300:
301: FOLLOW(FAKT) = [Epsilon, '+', '-', ')', '*', '/']
302: FOLLOW(EXPR) = [Epsilon, '+', '-', ')']
303: FOLLOW(<START>) = [Epsilon]
304: FOLLOW(TERM) = [Epsilon, '+', '-', ')', '*', '/']
305:
306: State 0
307: (Rule 0) <START> : .EXPR -> State 2
308: (Rule 1) EXPR : .TERM -> State 1
309: (Rule 2) EXPR : .EXPR '+' TERM -> State 2
310: (Rule 3) EXPR : .EXPR '-' TERM -> State 2
311: (Rule 4) TERM : .FAKT -> State 4
312: (Rule 5) TERM : .TERM '*' FAKT -> State 1
313: (Rule 6) TERM : .TERM '/' FAKT -> State 1
314: (Rule 7) FAKT : ."[0-9]+" -> State 5
315: (Rule 8) FAKT : .'(' EXPR ')' -> State 3
316:
317: State 1
318: (Rule 1) EXPR : TERM .
319: (Rule 5) TERM : TERM .'*' FAKT -> State 7
320: (Rule 6) TERM : TERM .'/' FAKT -> State 6
321:
322: State 2
323: (Rule 0) <START> : EXPR .
324: (Rule 2) EXPR : EXPR .'+' TERM -> State 9
325: (Rule 3) EXPR : EXPR .'-' TERM -> State 8
326:
327: State 3
328: (Rule 1) EXPR : .TERM -> State 1
329: (Rule 2) EXPR : .EXPR '+' TERM -> State 10
330: (Rule 3) EXPR : .EXPR '-' TERM -> State 10
331: (Rule 4) TERM : .FAKT -> State 4
332: (Rule 5) TERM : .TERM '*' FAKT -> State 1
333: (Rule 6) TERM : .TERM '/' FAKT -> State 1
334: (Rule 7) FAKT : ."[0-9]+" -> State 5
335: (Rule 8) FAKT : '(' .EXPR ')' -> State 10
336: (Rule 8) FAKT : .'(' EXPR ')' -> State 3
337:
338: State 4
339: (Rule 4) TERM : FAKT .
340:
341: State 5
342: (Rule 7) FAKT : "[0-9]+" .
343:
344: State 6
345: (Rule 6) TERM : TERM '/' .FAKT -> State 11
346: (Rule 7) FAKT : ."[0-9]+" -> State 5
347: (Rule 8) FAKT : .'(' EXPR ')' -> State 3
348:
349: State 7
350: (Rule 5) TERM : TERM '*' .FAKT -> State 12
351: (Rule 7) FAKT : ."[0-9]+" -> State 5
352: (Rule 8) FAKT : .'(' EXPR ')' -> State 3
353:
354: State 8
355: (Rule 3) EXPR : EXPR '-' .TERM -> State 13
356: (Rule 4) TERM : .FAKT -> State 4
357: (Rule 5) TERM : .TERM '*' FAKT -> State 13
358: (Rule 6) TERM : .TERM '/' FAKT -> State 13
359: (Rule 7) FAKT : ."[0-9]+" -> State 5
360: (Rule 8) FAKT : .'(' EXPR ')' -> State 3
361:
362: State 9
363: (Rule 2) EXPR : EXPR '+' .TERM -> State 14
364: (Rule 4) TERM : .FAKT -> State 4
365: (Rule 5) TERM : .TERM '*' FAKT -> State 14
366: (Rule 6) TERM : .TERM '/' FAKT -> State 14
367: (Rule 7) FAKT : ."[0-9]+" -> State 5
368: (Rule 8) FAKT : .'(' EXPR ')' -> State 3
369:
370: State 10
371: (Rule 2) EXPR : EXPR .'+' TERM -> State 9
372: (Rule 3) EXPR : EXPR .'-' TERM -> State 8
373: (Rule 8) FAKT : '(' EXPR .')' -> State 15
374:
375: State 11
376: (Rule 6) TERM : TERM '/' FAKT .
377:
378: State 12
379: (Rule 5) TERM : TERM '*' FAKT .
380:
381: State 13
382: (Rule 3) EXPR : EXPR '-' TERM .
383: (Rule 5) TERM : TERM .'*' FAKT -> State 7
384: (Rule 6) TERM : TERM .'/' FAKT -> State 6
385:
386: State 14
387: (Rule 2) EXPR : EXPR '+' TERM .
388: (Rule 5) TERM : TERM .'*' FAKT -> State 7
389: (Rule 6) TERM : TERM .'/' FAKT -> State 6
390:
391: State 15
392: (Rule 8) FAKT : '(' EXPR ')' .
393:
394:
395: GOTO TABLE
396: ==========
397: | <START> EXPR TERM FAKT '+' '-' '*' '/' "[0-9]+" '(' ')'
398: ________________________________________________________________________________________________
399: 0 | - 2 1 4 - - - - 5 3 -
400: 1 | - - - - - - 7 6 - - -
401: 2 | - - - - 9 8 - - - - -
402: 3 | - 10 1 4 - - - - 5 3 -
403: 4 | - - - - - - - - - - -
404: 5 | - - - - - - - - - - -
405: 6 | - - - 11 - - - - 5 3 -
406: 7 | - - - 12 - - - - 5 3 -
407: 8 | - - 13 4 - - - - 5 3 -
408: 9 | - - 14 4 - - - - 5 3 -
409: 10 | - - - - 9 8 - - - - 15
410: 11 | - - - - - - - - - - -
411: 12 | - - - - - - - - - - -
412: 13 | - - - - - - 7 6 - - -
413: 14 | - - - - - - 7 6 - - -
414: 15 | - - - - - - - - - - -
415:
416: PARSE-ACTION TABLE
417: ==================
418: | '+' '-' '*' '/' "[0-9]+" '(' ')' <EOF>
419: ________________________________________________________________________
420: 0 | - - - - SH SH - -
421: 1 | 1 1 SH SH - - 1 1
422: 2 | SH SH - - - - - AC
423: 3 | - - - - SH SH - -
424: 4 | 4 4 4 4 - - 4 4
425: 5 | 7 7 7 7 - - 7 7
426: 6 | - - - - SH SH - -
427: 7 | - - - - SH SH - -
428: 8 | - - - - SH SH - -
429: 9 | - - - - SH SH - -
430: 10 | SH SH - - - - SH -
431: 11 | 6 6 6 6 - - 6 6
432: 12 | 5 5 5 5 - - 5 5
433: 13 | 3 3 SH SH - - 3 3
434: 14 | 2 2 SH SH - - 2 2
435: 15 | 8 8 8 8 - - 8 8
436:
437: */
|