001: package fri.patterns.interpreter.parsergenerator.parsertables;
002:
003: import java.util.*;
004: import fri.patterns.interpreter.parsergenerator.Token;
005: import fri.patterns.interpreter.parsergenerator.ParserTables;
006: import fri.patterns.interpreter.parsergenerator.syntax.*;
007:
008: /**
009: SLR bottom-up parser syntax node.
010: The build() method of an no-arg-constructed node is used to fill a
011: state node list. The empty list passed to build() will be filled
012: with all state nodes after.
013: <p>
014: Every syntax node provides a fill() method to populate the corresponding
015: line (node-index) within PARSE-ACTION and GOTO tables.
016: <p>
017: After construction the syntax node has state entries represented by
018: RuleStateItem instances.
019: <p>
020: The state of a rule is represented by a dot "." at start, between symbols,
021: or at end. To shift an item means to move the dot to left.
022:
023: @author (c) 2000, Fritz Ritzberger
024: */
025:
026: class SLRSyntaxNode {
027: /** Contains all rule state entries. */
028: protected Hashtable entries = new Hashtable();
029:
030: private int kernels = 0;
031: private Integer hashCache = null;
032:
033: /** Do-nothing-constructor, used to call the build() method. */
034: public SLRSyntaxNode() {
035: }
036:
037: /** Factory-method to create a SLRSyntaxNode, to be overridden by other node types. */
038: protected SLRSyntaxNode createSyntaxNode() {
039: return new SLRSyntaxNode();
040: }
041:
042: /** Factory-method to create a RuleStateItem, to be overridden by other node types. */
043: protected RuleStateItem createRuleStateItem(int ruleIndex, Rule rule) {
044: return new RuleStateItem(ruleIndex, rule);
045: }
046:
047: /**
048: Construction of start node and all further state nodes.
049: After this call the syntaxNodes list is filled with all state-bound nodes.
050: @param syntax the grammar, including START rule.
051: @param syntaxNodes ampty list that holds all syntax nodes after this call.
052: @param kernels empty hashtable for internal buffering.
053: @return the syntaxNodes list, now filled.
054: */
055: public List build(Syntax syntax, List syntaxNodes, Hashtable kernels) {
056: // insert first rule as state item
057: RuleStateItem item = createRuleStateItem(0, syntax.getRule(0));
058: entries.put(item, item);
059: closure(syntax); // calculate followers
060:
061: syntaxNodes.add(this ); // now add startnode to list of syntax nodes
062:
063: generateSyntaxNodes(syntaxNodes, syntax, kernels); // generate all other nodes
064:
065: //System.err.println("Built "+syntaxNodes.size()+" states.");
066: return syntaxNodes;
067: }
068:
069: /**
070: Generates syntax nodes into passed list, whereby every node represents one possible state.
071: Every generated node gets appended to list and can generate new nodes by itself.
072: @param syntaxNodes list of nodes
073: @param syntax the grammar rules
074: */
075: protected void generateSyntaxNodes(List syntaxNodes, Syntax syntax,
076: Hashtable kernels) {
077: // newly appended nodes will be found and processed
078: for (int i = 0; i < syntaxNodes.size(); i++) {
079: SLRSyntaxNode node = (SLRSyntaxNode) syntaxNodes.get(i);
080: node.generateSyntaxNodesFromItems(syntaxNodes, syntax,
081: kernels);
082: }
083: }
084:
085: /**
086: Generates follower nodes from rule state items into list. The followers are referenced
087: by their originators (GOTO-references).
088: @param syntaxNodes list of nodes
089: @param syntax the grammar rules
090: */
091: protected void generateSyntaxNodesFromItems(List syntaxNodes,
092: Syntax syntax, Hashtable kernels) {
093: for (Enumeration e = entries.elements(); e.hasMoreElements();) {
094: RuleStateItem item = (RuleStateItem) e.nextElement();
095: String pending = item.getPendingSymbol();
096:
097: if (item.followNodeIndex < 0 && pending != null) { // further states are possible
098: // create a kernel item
099: SLRSyntaxNode node = createSyntaxNode();
100: List tmp = node.addShiftedItems(pending, entries); // get entries that have been taken
101:
102: // look if it is already in list
103: Integer kernelIndex = (Integer) kernels.get(node);
104: int index = kernelIndex != null ? kernelIndex
105: .intValue() : -1;
106:
107: // if not in list, add it, compute closure
108: if (index < 0) {
109: index = syntaxNodes.size();
110: kernels.put(node, new Integer(index));
111: syntaxNodes.add(node);
112: node.closure(syntax);
113: }
114:
115: // link originator entries to new or found node
116: for (int i = 0; i < tmp.size(); i++) {
117: Tuple t = (Tuple) tmp.get(i);
118: linkParentItemToChild(t.o1, index, syntaxNodes,
119: t.o2);
120: }
121: }
122: }
123: }
124:
125: /**
126: Adopt all rule-states from originator node, which have the passed symbol
127: as pending to the right of dot. This is done when a new node was generated,
128: which happens when the dot is moved to the right.
129: All adopted items together form the kernel of the syntax node.
130: The number of kernel items is calculated, which is important when searching
131: existing nodes.
132: @param symbol the symbol that must be the pending symbol right of the dot within rule state item
133: @param originatorEntries map containing all rule state items of the originator node (as value).
134: @return a Tuple list containing pairs of original and new item, the new item is the shifted one.
135: */
136: protected List addShiftedItems(String symbol,
137: Hashtable originatorEntries) {
138: List list = new ArrayList();
139: for (Enumeration e = originatorEntries.elements(); e
140: .hasMoreElements();) {
141: RuleStateItem item = (RuleStateItem) e.nextElement();
142: String pending = item.getPendingSymbol();
143:
144: if (pending != null && symbol.equals(pending)) {
145: RuleStateItem newitem = item.shift();
146: this .entries.put(newitem, newitem);
147: list.add(new Tuple(item, newitem)); // return all derived originator items
148: }
149: }
150:
151: kernels = list.size(); // remember count of kernel items
152:
153: return list; // return list of entries that were shifted
154: }
155:
156: /**
157: Store the follow state (node index) within rule state item.
158: This is a sparate protected method as LALR nodes do further work here.
159: */
160: protected void linkParentItemToChild(RuleStateItem parent,
161: int newIndex, List syntaxNodes, RuleStateItem child) {
162: parent.followNodeIndex = newIndex;
163: }
164:
165: /**
166: Closure - do for all rule states:
167: Adopt all rules from grammar that derive one of the pending nonterminals
168: (right of dot) within entries list. This is done recursively, appending
169: new rules at end, so that they can adopt further rules.
170: The closure method calls <i>addRulesDerivingPendingNonTerminal()</i>.
171: */
172: protected void closure(Syntax syntax) {
173: // put Hashtable to List for sequential work
174: List todo = new ArrayList(entries.size() * 2);
175: for (Enumeration e = entries.elements(); e.hasMoreElements();)
176: todo.add(e.nextElement());
177:
178: // loop todo list and find every added new item
179: for (int i = 0; i < todo.size(); i++) {
180: RuleStateItem rsi = (RuleStateItem) todo.get(i);
181: String nonterm = rsi.getPendingNonTerminal();
182: if (nonterm != null)
183: addRulesDerivingPendingNonTerminal(rsi, nonterm,
184: syntax, todo);
185: }
186: }
187:
188: /**
189: Closure call for one rule state item. All rules in grammar that have passed
190: nonterm on left side get appended to todo list and put into item entries when not already in entries.
191: */
192: protected void addRulesDerivingPendingNonTerminal(
193: RuleStateItem item, String nonterm, Syntax syntax, List todo) {
194: // make the closure for one item:
195: // if pointer before a nonterminal, add all rules that derive it
196: for (int i = 0; i < syntax.size(); i++) {
197: Rule rule = syntax.getRule(i);
198:
199: if (rule.getNonterminal().equals(nonterm)) {
200: RuleStateItem rsi = createRuleStateItem(i, rule);
201:
202: if (entries.containsKey(rsi) == false) {
203: entries.put(rsi, rsi); // real entry list
204: todo.add(rsi); // work list
205: }
206: }
207: }
208: }
209:
210: /**
211: Fill the line of GOTO table this state corresponds to.
212: @param state the index of this state within list
213: @return Hashtable with all terminals/nonterminals to handle, and their follower states.
214: */
215: public Hashtable fillGotoLine(int state) {
216: Hashtable h = new Hashtable(entries.size() * 3 / 2); // load factor 0.75
217:
218: // fill one row of GOTO-table
219: for (Enumeration e = entries.elements(); e.hasMoreElements();) {
220: // store temporary
221: RuleStateItem item = (RuleStateItem) e.nextElement();
222: String symbol = item.getPendingSymbol();
223:
224: if (symbol != null) { // if pointer not at end of rule
225: //System.err.println("Regel-Zustand: "+item);
226: setTableLine("GOTO", state, h, item, new Integer(
227: item.followNodeIndex), symbol);
228: }
229: }
230: return h;
231: }
232:
233: /**
234: Fill the line of PARSE-ACTION table this state corresponds to.
235: @param state the index of this state within list
236: @param firstSets all FIRST-sets of all nonterminals
237: @param followSets all FOLLOW-sets of all nonterminals
238: @return Hashtable with all terminals to handle, and their actions.
239: */
240: public Hashtable fillParseActionLine(int state,
241: FirstSets firstSets, FollowSets followSets) {
242: // fill one row of PARSE-ACTION-table
243: Hashtable h = new Hashtable(entries.size() * 10);
244:
245: for (Enumeration e = entries.elements(); e.hasMoreElements();) {
246: // store temporary
247: RuleStateItem item = (RuleStateItem) e.nextElement();
248: String symbol = item.getPendingSymbol();
249:
250: if (symbol != null) { // pointer not at end of rule, SHIFT
251: if (Token.isTerminal(symbol)) { // enter SHIFT at terminal symbol
252: // first-set of terminal is terminal
253: setParseTableLine(state, h, item,
254: ParserTables.SHIFT, symbol);
255: } else { // put SHIFT at all terminals of FIRST-set
256: List firstSet = getNontermShiftSymbols(firstSets,
257: item.getNonterminal());
258:
259: if (firstSet != null) { // LALR will return null, SLR not null
260: for (int i = 0; i < firstSet.size(); i++) {
261: String terminal = (String) firstSet.get(i);
262: setParseTableLine(state, h, item,
263: ParserTables.SHIFT, terminal);
264: }
265: }
266: }
267: } else { // pointer at end, REDUCE to rule number
268: for (Iterator reduceSymbols = getReduceSymbols(
269: followSets, item); reduceSymbols.hasNext();) {
270: String terminal = (String) reduceSymbols.next();
271:
272: if (item.ruleIndex == 0) // is startnode
273: setParseTableLine(state, h, item,
274: ParserTables.ACCEPT, terminal);
275: else
276: // ruleIndex > 0 means REDUCE
277: setParseTableLine(state, h, item, new Integer(
278: item.ruleIndex), terminal);
279: }
280: }
281: }
282: return h;
283: }
284:
285: /**
286: Returns all symbols for which SHIFT must be put into PARSE-ACTION table for a nonterminal.
287: For SLR this is the FIRST set of the nonterminal.
288: */
289: protected List getNontermShiftSymbols(FirstSets firstSets,
290: String nonterm) {
291: return (List) firstSets.get(nonterm);
292: }
293:
294: /**
295: Returns all symbols for which REDUCE must be put into PARSE-ACTION table.
296: For SLR this is the FOLLOW set of the nonterminal.
297: */
298: protected Iterator getReduceSymbols(FollowSets followSets,
299: RuleStateItem item) {
300: return ((List) followSets.get(item.getNonterminal()))
301: .iterator();
302: }
303:
304: /**
305: Set a position in PARSE-ACTION table.
306: This is the place where SHIFT/REDUCE and REDUCE/REDUCE conflicts are solved.
307: */
308: protected void setParseTableLine(int state, Hashtable line,
309: RuleStateItem item, Integer action, String terminal) {
310: // set one action into a parse-table row and resolve conflicts
311:
312: if (setTableLine("PARSE-ACTION", state, line, item, action,
313: terminal) == false) {
314: // shift/reduce or reduce/reduce conflict
315: Object o = line.get(terminal);
316:
317: if (action.equals(ParserTables.SHIFT)
318: || o.equals(ParserTables.SHIFT)) {
319: // prefer SHIFT operation
320: line.put(terminal, ParserTables.SHIFT);
321: System.err
322: .println("WARNING: shift/reduce conflict, SHIFT is preferred.");
323: } else {
324: System.err
325: .println("WARNING: reduce/reduce conflict, rule with smaller index is preferred.");
326: // prefer rule with smaller index
327: if (((Integer) o).intValue() > action.intValue())
328: line.put(terminal, action);
329: }
330: }
331: }
332:
333: /**
334: Set a position in one of the tables.
335: Here SHIFT/SHIFT, SHIFT/REDUCE and REDUCE/REDUCE conflicts are detected.
336: @return true when no conflict was detected
337: */
338: protected boolean setTableLine(String table, int state,
339: Hashtable line, RuleStateItem item, Integer action,
340: String terminal) {
341: // set one action into a table row and detect conflicts
342: Object o = line.get(terminal);
343: if (o == null) { // no conflict
344: line.put(terminal, action);
345: } else { // conflict?
346: if (o.equals(action) == false) { // conflict!
347: System.err
348: .println("========================================================");
349: System.err.println("WARNING: " + table + " state "
350: + state + ", terminal " + terminal + " is "
351: + displayAction(o)
352: + " and was overwritten by action "
353: + displayAction(action));
354: System.err.println("... from rule state: " + item);
355: System.err.println("... current state:\n" + this );
356: System.err
357: .println("========================================================");
358: return false;
359: }
360: }
361: return true;
362: }
363:
364: private String displayAction(Object action) {
365: if (action.equals(ParserTables.SHIFT))
366: return "SHIFT";
367: return "REDUCE(" + action.toString() + ")";
368: }
369:
370: /**
371: The count of kernel items must be equal. All kernel items must exist in passed node.
372: @param o new node that contains only kernel items (which do not have dot at start).
373: */
374: public boolean equals(Object o) {
375: //System.err.println("SLRSyntaxNode.equals: \n"+this+"\n with \n"+o);
376: SLRSyntaxNode node = (SLRSyntaxNode) o;
377:
378: if (node.kernels != kernels)
379: return false;
380:
381: // look if all entries are in the other node
382: for (Enumeration e = entries.elements(); e.hasMoreElements();) {
383: RuleStateItem item = (RuleStateItem) e.nextElement();
384: // kernel items have pointer not at start
385: if (item.pointerPosition > 1
386: && node.entries.containsKey(item) == false)
387: return false;
388: }
389: return true;
390: }
391:
392: /** Returns the hashcodes of all rule state items, associated by ^. The result gets buffered on first call. */
393: public int hashCode() {
394: if (hashCache == null) {
395: int result = 0;
396: for (Enumeration e = entries.elements(); e
397: .hasMoreElements();)
398: result ^= e.nextElement().hashCode();
399: hashCache = new Integer(result);
400: }
401: return hashCache.intValue();
402: }
403:
404: /** Outputs this syntax node with all its rule state entries sorted. */
405: public String toString() {
406: StringBuffer sb = new StringBuffer();
407: // we want a sorted output of items, order by ruleIndex
408: List list = new ArrayList(entries.size());
409: for (Enumeration e = entries.elements(); e.hasMoreElements();) {
410: RuleStateItem rsi = (RuleStateItem) e.nextElement();
411: int index = -1;
412: for (int i = 0; index == -1 && i < list.size(); i++) {
413: RuleStateItem rsi1 = (RuleStateItem) list.get(i);
414: if (rsi1.ruleIndex > rsi.ruleIndex
415: || rsi1.ruleIndex == rsi.ruleIndex
416: && rsi.pointerPosition > 1)
417: index = i;
418: }
419: if (index < 0)
420: list.add(rsi);
421: else
422: list.add(index, rsi);
423: }
424: for (int i = 0; i < list.size(); i++) {
425: sb.append(" ");
426: sb.append(list.get(i).toString());
427: sb.append("\n");
428: }
429: return sb.toString();
430: }
431:
432: // Helper that hold two RuleStateItems
433: private class Tuple {
434: RuleStateItem o1, o2;
435:
436: Tuple(RuleStateItem o1, RuleStateItem o2) {
437: this .o1 = o1;
438: this .o2 = o2;
439: }
440: }
441:
442: /**
443: Rule state entry item class, contained within SLR syntax node.
444: */
445: protected class RuleStateItem {
446: Rule rule;
447: int pointerPosition = 1;
448: int ruleIndex;
449: int followNodeIndex = -1;
450: protected Integer hashCache = null;
451:
452: /** Constructor with syntax rule index and rule. */
453: public RuleStateItem(int ruleIndex, Rule rule) {
454: this .rule = rule;
455: this .ruleIndex = ruleIndex;
456: }
457:
458: /** Internal construction of shifted rule states. */
459: protected RuleStateItem(RuleStateItem orig) {
460: this .rule = orig.rule;
461: this .pointerPosition = orig.pointerPosition;
462: this .ruleIndex = orig.ruleIndex;
463: }
464:
465: /** Factory-method, to be overridden by subclasses. */
466: protected RuleStateItem createRuleStateItem(RuleStateItem orig) {
467: return new RuleStateItem(orig);
468: }
469:
470: /** Returns the nonterminal on the left side of the rule. */
471: String getNonterminal() {
472: return rule.getNonterminal();
473: }
474:
475: /** Returns a new shifted rule state item (dot has moved one position right). */
476: RuleStateItem shift() {
477: RuleStateItem clone = createRuleStateItem(this );
478: clone.pointerPosition++;
479: return clone;
480: }
481:
482: /** Returns null when no nonterminal is after dot, else the nonterminal to the right of the dot. */
483: String getPendingNonTerminal() {
484: if (pointerPosition > rule.rightSize())
485: return null;
486:
487: String symbol = getPendingSymbol();
488: if (Token.isTerminal(symbol))
489: return null; // is a terminal
490:
491: return symbol;
492: }
493:
494: /** Return pending symbol if pointer position is not at end, else null. */
495: String getPendingSymbol() {
496: if (pointerPosition > rule.rightSize())
497: return null;
498:
499: return rule.getRightSymbol(pointerPosition - 1);
500: }
501:
502: /** The rule number and dot position must match for equality. */
503: public boolean equals(Object o) {
504: RuleStateItem item = (RuleStateItem) o;
505: return ruleIndex == item.ruleIndex
506: && pointerPosition == item.pointerPosition;
507: }
508:
509: /** Returns rule index * 13 + position of dot. */
510: public int hashCode() {
511: if (hashCache == null)
512: hashCache = new Integer(ruleIndex * 13
513: + pointerPosition);
514: return hashCache.intValue();
515: }
516:
517: /** String representation of rule state, showing index, rule and dot position. */
518: public String toString() {
519: StringBuffer sb = new StringBuffer("(Rule " + ruleIndex
520: + ") " + getNonterminal() + " : ");
521: int i = 0;
522: for (; i < rule.rightSize(); i++) {
523: if (i == pointerPosition - 1)
524: sb.append(".");
525: sb.append(rule.getRightSymbol(i));
526: sb.append(" ");
527: }
528: if (i == pointerPosition - 1)
529: sb.append(".");
530: if (followNodeIndex >= 0)
531: sb.append(" -> State " + followNodeIndex);
532: return sb.toString();
533: }
534:
535: }
536:
537: }
|