001: /*
002: * Copyright (C) Chaperon. All rights reserved.
003: * -------------------------------------------------------------------------
004: * This software is published under the terms of the Apache Software License
005: * version 1.1, a copy of which has been included with this distribution in
006: * the LICENSE file.
007: */
008:
009: package net.sourceforge.chaperon.process.extended;
010:
011: import net.sourceforge.chaperon.common.Decoder;
012: import net.sourceforge.chaperon.common.SortedCharSet;
013: import net.sourceforge.chaperon.common.StringSet;
014: import net.sourceforge.chaperon.model.extended.Definition;
015: import net.sourceforge.chaperon.model.extended.Element;
016: import net.sourceforge.chaperon.model.extended.ExtendedGrammar;
017: import net.sourceforge.chaperon.model.extended.Pattern;
018: import net.sourceforge.chaperon.model.extended.PatternIterator;
019: import net.sourceforge.chaperon.model.extended.PatternSet;
020:
021: import org.apache.commons.logging.Log;
022:
023: import java.util.HashMap;
024:
025: /**
026: * This class contains a automaton of states.
027: *
028: * @author <a href="mailto:stephan@apache.org">Stephan Michels</a>
029: * @version CVS $Id: ExtendedParserAutomaton.java,v 1.3 2004/01/08 11:30:52 benedikta Exp $
030: */
031: public class ExtendedParserAutomaton {
032: //private ArrayList states = new ArrayList();
033: public State first = null;
034: private ExtendedGrammar grammar;
035: private Log log;
036:
037: // intermediate used sets
038: private String[] symbols;
039: private PatternSet[] firstSets;
040: private PatternSet[] lastSets;
041: private boolean[] nullable;
042: private HashMap definitions = new HashMap();
043:
044: //private PatternMap kernelFollowSet;
045: //private PatternMap followFirstSet;
046: //private PatternMap followLastSet;
047:
048: /**
049: * Create a new automaton of states, calculated with the aid of the grammar. The constructor
050: * calculates all state and transitions and combine all states with the same core.
051: *
052: * @param grammar ExtendedGrammar.
053: */
054: public ExtendedParserAutomaton(ExtendedGrammar grammar) {
055: this (grammar, null);
056: }
057:
058: /**
059: * Create a new automaton of states, calculated with the aid of the grammar. The constructor
060: * calculates all state and transitions and combine all states with the same core.
061: *
062: * @param grammar ExtendedGrammar
063: * @param firstsets First sets.
064: * @param log Log, which should be used.
065: */
066: public ExtendedParserAutomaton(ExtendedGrammar grammar, Log log) {
067: this .grammar = grammar;
068: this .log = log;
069:
070: symbols = new String[grammar.getDefinitionCount()];
071: firstSets = new PatternSet[grammar.getDefinitionCount()];
072: lastSets = new PatternSet[grammar.getDefinitionCount()];
073: nullable = new boolean[grammar.getDefinitionCount()];
074: definitions = new HashMap();
075:
076: for (int i = 0; i < grammar.getDefinitionCount(); i++) {
077: symbols[i] = grammar.getDefinition(i).getSymbol();
078: firstSets[i] = grammar.getDefinition(i).getFirstSet();
079: lastSets[i] = grammar.getDefinition(i).getLastSet();
080: nullable[i] = grammar.getDefinition(i).isNullable();
081:
082: for (PatternIterator pattern = firstSets[i].getPattern(); pattern
083: .hasNext();)
084: definitions.put(pattern.next(), grammar
085: .getDefinition(i));
086:
087: for (PatternIterator pattern = lastSets[i].getPattern(); pattern
088: .hasNext();)
089: definitions.put(pattern.next(), grammar
090: .getDefinition(i));
091: }
092:
093: grammar.update();
094:
095: //System.out.println("followLastSet=\n"+followLastSet);
096: // C = closure( [S'=^S] )
097: addState(getStartState());
098:
099: for (State state = first; state != null; state = state.next) {
100: //State state = getState(i);
101: //System.out.println("\nState "+i+"\n"+state);
102: SortedCharSet limits = new SortedCharSet();
103: StringSet nonterminals = new StringSet();
104: PatternSet gotoPattern = new PatternSet();
105:
106: for (Item item = state.first; item != null; item = item.next) {
107: if (item.position == Item.SHIFT) {
108: if (item.pattern.getSymbol() != null)
109: nonterminals
110: .addString(item.pattern.getSymbol());
111:
112: limits.addChar(item.pattern.getLimits());
113: } else if (item.position == Item.GOTO)
114: gotoPattern.addPattern(item.pattern);
115:
116: limits.addChar(item.lookahead.getLimits());
117: }
118:
119: // System.out.println("limits="+limits);
120: // System.out.println("nonterminals="+nonterminals);
121: // System.out.println("gotoPattern="+gotoPattern);
122: // for all other characters from the begin
123: if ((limits.getCharCount() >= 1)
124: && (limits.getChar(0) > '\u0000')) {
125: addShiftAction(state, '\u0000', (char) (limits
126: .getChar(0) - 1));
127: addReduceAction(state, '\u0000', (char) (limits
128: .getChar(0) - 1));
129: }
130:
131: // for each character
132: for (int j = 0; j < limits.getCharCount(); j++) {
133: if ((j > 0)
134: && ((limits.getChar(j) - limits.getChar(j - 1)) > 1)) {
135: addShiftAction(state,
136: (char) (limits.getChar(j - 1) + 1),
137: (char) (limits.getChar(j) - 1));
138: addReduceAction(state, (char) (limits
139: .getChar(j - 1) + 1), (char) (limits
140: .getChar(j) - 1));
141: }
142:
143: addShiftAction(state, limits.getChar(j), limits
144: .getChar(j));
145: addReduceAction(state, limits.getChar(j), limits
146: .getChar(j));
147: }
148:
149: // for all other characters to the end
150: if (limits.getCharCount() >= 1) {
151: addShiftAction(state, (char) (limits.getChar(limits
152: .getCharCount() - 1) + 1), '\u4000');
153: addReduceAction(state, (char) (limits.getChar(limits
154: .getCharCount() - 1) + 1), '\u4000');
155: }
156:
157: // for universal characters
158: if (limits.getCharCount() == 0) {
159: addShiftAction(state, '\u0000', '\u4000');
160: addReduceAction(state, '\u0000', '\u4000');
161: }
162:
163: // for each nonterminal
164: for (int j = 0; j < nonterminals.getStringCount(); j++)
165: addGotoAction(state, nonterminals.getString(j));
166:
167: for (PatternIterator pattern = gotoPattern.getPattern(); pattern
168: .hasNext();)
169: addGotoAction(state, pattern.next());
170:
171: addReduceAction(state);
172: }
173: }
174:
175: private boolean isNullable(String symbol) {
176: for (int i = 0; i < symbols.length; i++)
177: if (symbols[i].equals(symbol))
178: return nullable[i];
179:
180: return true;
181: }
182:
183: private String getSymbol(Pattern pattern) {
184: return ((Definition) definitions.get(pattern)).getSymbol();
185: }
186:
187: /**
188: * Return the state as start point for the calculation.
189: *
190: * @return Start point for the calculation.
191: */
192: private State getStartState() {
193: if (grammar.getStartSymbol() == null)
194: throw new IllegalArgumentException(
195: "Start symbol is not defined");
196:
197: PatternSet firstSet = grammar.getFirstSet();
198:
199: System.out.println("firstset = " + firstSet);
200:
201: State state = new State(grammar);
202:
203: for (PatternIterator pattern = firstSet.getPattern(); pattern
204: .hasNext();) {
205: Pattern firstPattern = pattern.next();
206: Item item = new Item(((Definition) definitions
207: .get(firstPattern)).getSymbol(), firstPattern,
208: Item.SHIFT, null);
209:
210: //item.end = true;
211: state.addItem(item);
212: }
213:
214: closure(state);
215:
216: return state;
217: }
218:
219: public ExtendedGrammar getExtendedGrammar() {
220: return grammar;
221: }
222:
223: /**
224: * Add a state to this automaton.
225: *
226: * @param state State.
227: *
228: * @return Index of the state in the automaton.
229: */
230: public State addState(State newState) {
231: if (first == null) {
232: first = newState;
233: return newState;
234: }
235:
236: for (State state = first; state != null; state = state.next) {
237: if (state.equals(newState))
238: return state;
239:
240: if (state.next == null) {
241: state.next = newState;
242: return newState;
243: }
244: }
245:
246: return newState;
247: }
248:
249: /**
250: * Return the index of an state. If the automaton does not contain the state, then return this
251: * method will return -1.
252: *
253: * @param state State, which should be found.
254: *
255: * @return Index of the state.
256: */
257: public int indexOf(State foreignState) {
258: int index = 0;
259: for (State state = first; state != null; state = state.next, index++)
260: if (state.equals(foreignState))
261: return index;
262:
263: return -1;
264: }
265:
266: /**
267: * If this automaton contains the state.
268: *
269: * @param state State, which should be found.
270: *
271: * @return True, if the automaton contains the state.
272: */
273: public boolean contains(State foreignState) {
274: for (State state = first; state != null; state = state.next)
275: if (state.equals(foreignState))
276: return true;
277:
278: return false;
279: }
280:
281: public void closure(State state) {
282: System.out
283: .println("=====================================\nbefore:\n"
284: + state);
285: for (Item item = state.first; item != null; item = item.next) {
286: System.out.println("item = " + item + " position="
287: + item.position + " element="
288: + (item.pattern instanceof Element));
289: if ((item.position == Item.SHIFT)
290: && (item.pattern instanceof Element)) {
291: System.out.println("add firstset");
292:
293: String symbol = ((Element) item.pattern).getSymbol();
294: System.out.println("pattern=" + item.pattern);
295:
296: Definition definition = ((Definition) definitions
297: .get(item.pattern));
298: PatternSet firstSet = grammar.getFirstSet(symbol);
299: for (PatternIterator pattern = firstSet.getPattern(); pattern
300: .hasNext();) {
301: Pattern firstPattern = pattern.next();
302:
303: //Definition definition = ((Definition)definitions.get(firstSet.getPattern(l)));
304: if (definition.getLastSet().contains(item.pattern))
305: state.addItem(new Item(symbol, firstPattern,
306: Item.SHIFT, item.lookahead));
307:
308: for (PatternIterator lookaheads = item.pattern
309: .getSuccessors(); lookaheads.hasNext();) {
310: Pattern lookahead = lookaheads.next();
311: if (!(lookahead instanceof Element))
312: state
313: .addItem(new Item(symbol,
314: firstPattern, Item.SHIFT,
315: lookahead));
316: }
317:
318: for (PatternIterator lookaheads = item.pattern
319: .getAscendingSuccessors(); lookaheads
320: .hasNext();) {
321: Pattern lookahead = lookaheads.next();
322: if (!(lookahead instanceof Element))
323: state
324: .addItem(new Item(symbol,
325: firstPattern, Item.SHIFT,
326: lookahead));
327: }
328: }
329: }
330: }
331:
332: System.out.println("after:\n" + state + "\n");
333: }
334:
335: private void addShiftAction(State state, char minimum, char maximum) {
336: State newState = new State(grammar);
337: for (Item item = state.first; item != null; item = item.next)
338: if ((item.position == Item.SHIFT)
339: && (item.pattern.contains(minimum, maximum))) {
340: newState.addItem(item.getFollowItem());
341:
342: for (PatternIterator successors = item.pattern
343: .getSuccessors(); successors.hasNext();)
344: newState.addItem(new Item(item.pattern, successors
345: .next(), Item.SHIFT, item.lookahead));
346: }
347:
348: if (!newState.isEmpty()) {
349: closure(newState);
350: newState = addState(newState);
351:
352: //System.out.println("add shift action for "+Decoder.toClass(minimum, maximum));
353: state.addShiftAction(minimum, maximum, newState);
354: }
355: }
356:
357: private void addGotoAction(State state, String symbol) {
358: State newState = new State(grammar);
359: for (Item item = state.first; item != null; item = item.next)
360: if ((item.position == Item.SHIFT)
361: && (item.pattern instanceof Element)
362: && (((Element) item.pattern).getSymbol()
363: .equals(symbol))) {
364: newState.addItem(item.getFollowItem());
365:
366: for (PatternIterator successors = item.pattern
367: .getSuccessors(); successors.hasNext();)
368: newState.addItem(new Item(item.pattern, successors
369: .next(), Item.SHIFT, item.lookahead));
370: }
371:
372: if (!newState.isEmpty()) {
373: closure(newState);
374: newState = addState(newState);
375: state.addGotoAction(symbol, newState);
376: }
377: }
378:
379: private void addGotoAction(State state, Pattern pattern) {
380: State newState = new State(grammar);
381: for (Item item = state.first; item != null; item = item.next)
382: if ((item.position == Item.GOTO)
383: && (item.pattern == pattern))
384: newState.addItem(item.getFollowItem());
385:
386: if (!newState.isEmpty()) {
387: closure(newState);
388: newState = addState(newState);
389: state.addGotoAction(pattern, newState);
390: }
391: }
392:
393: private void addReduceAction(State state, char minimum, char maximum) {
394: for (Item item = state.first; item != null; item = item.next)
395: if (item.position == Item.SHIFT) {
396: for (PatternIterator successors = item.pattern
397: .getDescendingSuccessors(); successors
398: .hasNext();)
399: if (successors.next().contains(minimum, maximum))
400: if ((item.pattern.getSymbol() != null)
401: && (isNullable(item.pattern.getSymbol()))) {
402: state.addLookaheadReduceAction(minimum,
403: maximum, item.pattern.getSymbol(),
404: 0);
405: break;
406: }
407: } else if (item.position == Item.GOTO) {
408: for (int k = 0; k < lastSets.length; k++)
409: if (lastSets[k].contains(item.pattern)) {
410: for (PatternIterator successors = item.pattern
411: .getDescendingSuccessors(); successors
412: .hasNext();)
413: if (successors.next().contains(minimum,
414: maximum)) {
415: state.addLookaheadReduceAction(minimum,
416: maximum, item.pattern, 0);
417:
418: break;
419: }
420: }
421: } else if (item.position == Item.REDUCE) {
422: for (PatternIterator successors = item.pattern
423: .getDescendingSuccessors(); successors
424: .hasNext();)
425: if (successors.next().contains(minimum, maximum)) {
426: if (item.symbol != null)
427: state.addLookaheadReduceAction(minimum,
428: maximum, item.symbol, 2);
429: else
430: state.addLookaheadReduceAction(minimum,
431: maximum, item.previous, 2);
432:
433: break;
434: }
435: }
436: }
437:
438: private void addReduceAction(State state) {
439: for (Item item = state.first; item != null; item = item.next)
440: if (item.position == Item.SHIFT) {
441: if (grammar.getLastSet().contains(item.pattern))
442: if ((item.pattern instanceof Element)
443: && (isNullable(((Element) item.pattern)
444: .getSymbol())))
445: state.addReduceAction(((Element) item.pattern)
446: .getSymbol(), 0);
447: } else if (item.position == Item.GOTO) {
448: for (int k = 0; k < lastSets.length; k++)
449: if (lastSets[k].contains(item.pattern)) {
450: if (grammar.getLastSet().contains(item.pattern))
451: state.addReduceAction(item.pattern, 0);
452: }
453: } else if (item.position == Item.REDUCE) {
454: if (grammar.getLastSet().contains(item.pattern)) {
455: if (item.symbol != null)
456: state.addReduceAction(item.symbol, 2);
457: else
458: state.addReduceAction(item.previous, 2);
459: }
460: }
461: }
462:
463: /**
464: * Return a string representation of the automaton.
465: *
466: * @return String representation of the automaton.
467: */
468: public String toString() {
469: StringBuffer buffer = new StringBuffer();
470:
471: for (State state = first; state != null; state = state.next) {
472: buffer.append("State ");
473: buffer.append(indexOf(state));
474: buffer.append(":\n");
475: buffer.append(state.toString());
476:
477: //buffer.append(grammar.toString(getState(i).getPreviousPattern(), getState(i).getNextPattern()));
478: //buffer.append(":\n");
479: ShiftAction[] shiftActions = state.getShiftActions();
480: for (int index = 0; index < shiftActions.length; index++) {
481: buffer.append("Shift ");
482: buffer.append(Decoder.toClass(
483: shiftActions[index].minimum,
484: shiftActions[index].maximum));
485: buffer.append(" -> State ");
486: buffer.append(indexOf(shiftActions[index].state));
487: buffer.append("\n");
488: }
489:
490: LookaheadReduceAction[] lookaheadReduceActions = state
491: .getLookaheadReduceActions();
492: for (int index = 0; index < lookaheadReduceActions.length; index++) {
493: buffer.append("Reduce ");
494:
495: if (lookaheadReduceActions[index].symbol != null)
496: buffer.append(lookaheadReduceActions[index].symbol);
497: else
498: buffer
499: .append(lookaheadReduceActions[index].pattern);
500:
501: buffer.append(" for ");
502: buffer.append(Decoder.toClass(
503: lookaheadReduceActions[index].minimum,
504: lookaheadReduceActions[index].maximum));
505:
506: buffer.append("\n");
507: }
508:
509: ReduceAction[] reduceActions = state.getReduceActions();
510: for (int index = 0; index < reduceActions.length; index++) {
511: buffer.append("Reduce ");
512:
513: if (reduceActions[index].symbol != null)
514: buffer.append(reduceActions[index].symbol);
515: else
516: buffer.append(reduceActions[index].pattern);
517:
518: buffer.append("\n");
519: }
520:
521: GotoAction[] gotoactions = state.getGotoActions();
522: for (int index = 0; index < gotoactions.length; index++) {
523: buffer.append("Goto ");
524:
525: if (gotoactions[index].symbol != null)
526: buffer.append(gotoactions[index].symbol);
527: else
528: buffer.append(gotoactions[index].pattern);
529:
530: buffer.append(" -> State ");
531: buffer.append(indexOf(gotoactions[index].state));
532: buffer.append("\n");
533: }
534:
535: buffer.append("\n");
536: }
537:
538: return buffer.toString();
539: }
540: }
|