001: /*
002: * Copyright (C) Chaperon. All rights reserved.
003: * -------------------------------------------------------------------------
004: * This software is published under the terms of the Apache Software License
005: * version 1.1, a copy of which has been included with this distribution in
006: * the LICENSE file.
007: */
008:
009: package net.sourceforge.chaperon.build;
010:
011: import net.sourceforge.chaperon.build.conflict.ConflictList;
012: import net.sourceforge.chaperon.build.conflict.ReduceReduceConflict;
013: import net.sourceforge.chaperon.build.conflict.ShiftReduceConflict;
014: import net.sourceforge.chaperon.common.IntegerSet;
015: import net.sourceforge.chaperon.model.Violations;
016: import net.sourceforge.chaperon.model.grammar.Associativity;
017: import net.sourceforge.chaperon.model.grammar.Error;
018: import net.sourceforge.chaperon.model.grammar.Grammar;
019: import net.sourceforge.chaperon.model.symbol.SymbolSet;
020: import net.sourceforge.chaperon.model.symbol.Terminal;
021: import net.sourceforge.chaperon.process.ParserAutomaton;
022:
023: import org.apache.commons.logging.Log;
024:
025: /**
026: * This class represents a builder for parser automata.
027: *
028: * @author <a href="mailto:stephan@apache.org">Stephan Michels</a>
029: * @version CVS $Id: ParserAutomatonBuilder.java,v 1.27 2003/12/14 09:48:33 benedikta Exp $
030: */
031: public class ParserAutomatonBuilder {
032: private static final EndOfFile EOF = new EndOfFile();
033: private Grammar grammar;
034: private SymbolSet tsymbols;
035: private SymbolSet ntsymbols;
036: private FirstSetCollection firstsets;
037: private ItemSetCollection itemsets;
038: private ParserAutomaton automaton;
039: private ConflictList conflicts = new ConflictList();
040: private Log log;
041:
042: /**
043: * Create a builder for an parser automaton.
044: *
045: * @param grammar Grammar, which should be used to build the automaton.
046: */
047: public ParserAutomatonBuilder(Grammar grammar) {
048: this (grammar, null);
049: }
050:
051: /**
052: * Create a builder for an parser automaton.
053: *
054: * @param grammar Grammar, which should be used to build the automaton.
055: * @param log Log, which should be used.
056: */
057: public ParserAutomatonBuilder(Grammar grammar, Log log) {
058: this .grammar = (Grammar) grammar.clone();
059: this .log = log;
060:
061: Violations violations = grammar.validate();
062:
063: if ((violations != null)
064: && (violations.getViolationCount() > 0))
065: throw new IllegalArgumentException("Grammar is not valid: "
066: + violations.getViolation(0));
067:
068: SymbolSet symbols = grammar.getSymbols();
069:
070: tsymbols = symbols.getTerminals();
071: ntsymbols = symbols.getNonterminals();
072:
073: //long time = System.currentTimeMillis();
074: // generate all first sets
075: if ((log != null) && (log.isDebugEnabled()))
076: log.debug("Generating first sets");
077:
078: firstsets = new FirstSetCollection(grammar, log);
079: if ((log != null) && (log.isDebugEnabled()))
080: log.debug(firstsets.toString());
081:
082: // calculation of alle states and transitions
083: if ((log != null) && (log.isDebugEnabled()))
084: log.debug("Building states and transitions");
085:
086: itemsets = new ItemSetCollection(grammar, firstsets, log);
087: if ((log != null) && (log.isDebugEnabled()))
088: log.debug(itemsets.toString());
089:
090: if ((log != null) && (log.isDebugEnabled()))
091: log.debug("Building parser automaton");
092:
093: automaton = new ParserAutomaton(tsymbols.getSymbolCount(),
094: ntsymbols.getSymbolCount(), grammar
095: .getProductionCount(), 0, itemsets
096: .getItemSetCount());
097:
098: // for alle terminal symbols
099: for (int i = 0; i < tsymbols.getSymbolCount(); i++)
100: automaton.setTerminal(i, tsymbols.getSymbol(i).getName());
101:
102: // for the non terminal symbols
103: for (int i = 0; i < ntsymbols.getSymbolCount(); i++)
104: automaton.setNonterminal(i, ntsymbols.getSymbol(i)
105: .getName());
106:
107: // for all productions
108: for (int i = 0; i < grammar.getProductionCount(); i++) {
109: automaton.setProductionSymbol(i, ntsymbols.indexOf(grammar
110: .getProduction(i).getSymbol()));
111: automaton.setProductionLength(i, grammar.getProduction(i)
112: .getLength());
113: }
114:
115: // for all itemsets I in itemsets C
116: for (int state = 0; state < itemsets.getItemSetCount(); state++) {
117: ItemSet I = itemsets.getItemSet(state);
118:
119: SymbolSet shiftsymbols = I.getShiftSymbols(); // Transition symbols for shift actions
120: SymbolSet reducesymbols = I.getReduceSymbols(); // Lookahead symbols for reduce actions
121:
122: for (int symbol = 0; symbol < tsymbols.getSymbolCount(); symbol++) {
123: IntegerSet reduceproductions = I
124: .getReduceProductions(tsymbols
125: .getSymbol(symbol));
126: int productionpriority = -1;
127: int highestproduction = -1;
128: for (int k = 0; k < reduceproductions.getCount(); k++) {
129: ReduceReduceConflict reduceconflict = null;
130:
131: if (k > 0) {
132: reduceconflict = new ReduceReduceConflict(
133: grammar, itemsets, state,
134: (Terminal) tsymbols.getSymbol(symbol),
135: reduceproductions.get(k - 1),
136: reduceproductions.get(k));
137:
138: conflicts.addConflict(reduceconflict);
139: }
140:
141: if (grammar.getPriority(grammar
142: .getProduction(reduceproductions.get(k))) > productionpriority) {
143: highestproduction = reduceproductions.get(k);
144: productionpriority = grammar
145: .getPriority(grammar
146: .getProduction(highestproduction));
147:
148: if ((log != null) && (reduceconflict != null))
149: log.info(reduceconflict.toString());
150: } else if (grammar.getPriority(grammar
151: .getProduction(reduceproductions.get(k))) == productionpriority) {
152: if (log != null)
153: log.warn(reduceconflict.toString());
154: }
155: }
156:
157: if (reduceproductions.getCount() > 1)
158: if ((log != null) && (log.isInfoEnabled()))
159: log
160: .info("The parser will reduce the production "
161: + grammar
162: .getProduction(highestproduction));
163:
164: boolean errorreduce = reducesymbols
165: .contains(Error.instance);
166:
167: if (shiftsymbols.contains(tsymbols.getSymbol(symbol))) {
168: if ((errorreduce)
169: || (reducesymbols.contains(tsymbols
170: .getSymbol(symbol)))) {
171: int tokenpriority = grammar
172: .getPriority((Terminal) tsymbols
173: .getSymbol(symbol));
174:
175: ShiftReduceConflict shiftconflict = new ShiftReduceConflict(
176: grammar, itemsets, state,
177: (Terminal) tsymbols.getSymbol(symbol),
178: highestproduction);
179:
180: if (tokenpriority > productionpriority) {
181: automaton.setShiftAction(state, symbol, I
182: .getTransition(tsymbols
183: .getSymbol(symbol)));
184:
185: if (log != null) {
186: log.info(shiftconflict.toString());
187: log.info("The parser will shift");
188: }
189: } else if (tokenpriority < productionpriority) {
190: automaton.setReduceAction(state, symbol,
191: highestproduction);
192:
193: if (log != null) {
194: log.info(shiftconflict.toString());
195: log.info("The parser will reduce");
196: }
197: } else {
198: if (log != null)
199: log.warn(shiftconflict.toString());
200:
201: Associativity associativity = grammar
202: .getAssociativity((Terminal) tsymbols
203: .getSymbol(symbol));
204: if (associativity
205: .equals(Associativity.RIGHT)) {
206: // if the terminal has a right associativity
207: automaton.setShiftAction(state, symbol,
208: I.getTransition(tsymbols
209: .getSymbol(symbol)));
210:
211: if (log != null)
212: log.warn("The parser will shift");
213: } else if (associativity
214: .equals(Associativity.LEFT)) {
215: // if the terminal has a left associativity
216: automaton.setReduceAction(state,
217: symbol, highestproduction);
218:
219: if (log != null)
220: log.warn("The parser will reduce");
221: } else {
222: // SHIFT should be always prefered
223: automaton.setShiftAction(state, symbol,
224: I.getTransition(tsymbols
225: .getSymbol(symbol)));
226:
227: if (log != null)
228: log.warn("The parser will shift");
229: }
230: }
231: } else
232: automaton.setShiftAction(state, symbol, I
233: .getTransition(tsymbols
234: .getSymbol(symbol)));
235: } else if ((errorreduce)
236: || (reducesymbols.contains(tsymbols
237: .getSymbol(symbol))))
238: automaton.setReduceAction(state, symbol,
239: highestproduction);
240: else
241: for (int i = 0; i < shiftsymbols.getSymbolCount(); i++)
242: if (shiftsymbols.getSymbol(i) instanceof Error)
243: automaton.setErrorAction(state, symbol, I
244: .getTransition(shiftsymbols
245: .getSymbol(i)));
246: }
247:
248: // create all actions for the end of file.
249: if (reducesymbols.contains(EOF)) {
250: IntegerSet reduceproductions = I
251: .getReduceProductions(EOF);
252: int productionpriority = -1;
253: int highestproduction = -1;
254: for (int k = 0; k < reduceproductions.getCount(); k++) {
255: if (grammar.getPriority(grammar
256: .getProduction(reduceproductions.get(k))) > productionpriority) {
257: highestproduction = reduceproductions.get(k);
258: productionpriority = grammar
259: .getPriority(grammar
260: .getProduction(highestproduction));
261: }
262: }
263:
264: if ((grammar.getProduction(highestproduction)
265: .getSymbol().equals(grammar.getStartSymbol())))
266: automaton.setAcceptAction(state, highestproduction);
267: else
268: automaton.setReduceAction(state, highestproduction);
269: } else
270: for (int i = 0; i < shiftsymbols.getSymbolCount(); i++)
271: if (shiftsymbols.getSymbol(i) instanceof Error)
272: automaton.setErrorAction(state, I
273: .getTransition(shiftsymbols
274: .getSymbol(i)));
275:
276: // create all entries for the goto table
277: for (int symbol = 0; symbol < ntsymbols.getSymbolCount(); symbol++)
278: if (shiftsymbols.contains(ntsymbols.getSymbol(symbol)))
279: automaton
280: .setTransition(state, symbol, I
281: .getTransition(ntsymbols
282: .getSymbol(symbol)));
283: }
284:
285: if ((log != null) && (log.isDebugEnabled()))
286: log.debug("Parser automaton:\n" + automaton.toString());
287: }
288:
289: /**
290: * Returns the generated parser automaton. If a error occurs the method will return null.
291: *
292: * @return The parser automaton.
293: */
294: public ParserAutomaton getParserAutomaton() {
295: return automaton;
296: }
297: }
|