001: /*
002: * Copyright (C) Chaperon. All rights reserved.
003: * -------------------------------------------------------------------------
004: * This software is published under the terms of the Apache Software License
005: * version 1.1, a copy of which has been included with this distribution in
006: * the LICENSE file.
007: */
008:
009: package net.sourceforge.chaperon.build;
010:
011: import net.sourceforge.chaperon.common.IntegerList;
012: import net.sourceforge.chaperon.common.IntegerSet;
013: import net.sourceforge.chaperon.model.grammar.Error;
014: import net.sourceforge.chaperon.model.grammar.Grammar;
015: import net.sourceforge.chaperon.model.symbol.Nonterminal;
016: import net.sourceforge.chaperon.model.symbol.Symbol;
017: import net.sourceforge.chaperon.model.symbol.SymbolList;
018: import net.sourceforge.chaperon.model.symbol.SymbolSet;
019: import net.sourceforge.chaperon.model.symbol.Terminal;
020:
021: /**
022: * This class represents a set of items, which means positions of production, in production and
023: * lookahead symbols. These item sets were used to decribes states.
024: *
025: * @author <a href="mailto:stephan@apache.org">Stephan Michels</a>
026: * @version CVS $Id: ItemSet.java,v 1.17 2003/12/09 19:55:52 benedikta Exp $
027: */
028: public class ItemSet {
029: private int capacityIncrement = 25;
030: private int elementCount = 0;
031: private int[] productions = new int[25];
032: private int[] positions = new int[25];
033: private SymbolSet[] lookaheads = new SymbolSet[25];
034:
035: // The symbols, which translate the states into other states
036: private SymbolSet transitionsymbols = new SymbolSet();
037: private IntegerList transitionstates = new IntegerList();
038: private Grammar grammar;
039: private FirstSetCollection firstsets;
040: private static final EmptyList EMPTYLIST = new EmptyList();
041:
042: /**
043: * Create an empty set of items.
044: *
045: * @param grammar Grammar.
046: * @param firstsets The first sets.
047: */
048: public ItemSet(Grammar grammar, FirstSetCollection firstsets) {
049: this .grammar = grammar;
050: this .firstsets = firstsets;
051: }
052:
053: /**
054: * Create a state, which contains this itemset.
055: *
056: * @param grammar Grammar.
057: * @param firstsets The first sets.
058: * @param I Itemset, which the state should contain.
059: */
060: private ItemSet(Grammar grammar, FirstSetCollection firstsets,
061: ItemSet I) {
062: this .grammar = grammar;
063: this .firstsets = firstsets;
064: addItemSet(I);
065: }
066:
067: /**
068: * Add a item to this set.
069: *
070: * @param production Production.
071: * @param position Position in this production.
072: * @param lookahead Lookahead symbol.
073: *
074: * @return True, if this item was added
075: */
076: public boolean addItem(int production, int position,
077: Symbol lookahead) {
078: if (lookahead == null)
079: throw new NullPointerException("Lookahead symbol is null");
080:
081: for (int i = 0; i < elementCount; i++)
082: if ((productions[i] == production)
083: && (positions[i] == position)) {
084: if (lookaheads[i] == null)
085: lookaheads[i] = new SymbolSet();
086:
087: return lookaheads[i].addSymbol(lookahead);
088: }
089:
090: ensureCapacity(elementCount + 1);
091: productions[elementCount] = production;
092: positions[elementCount] = position;
093: lookaheads[elementCount] = new SymbolSet();
094: lookaheads[elementCount++].addSymbol(lookahead);
095:
096: return true;
097: }
098:
099: public boolean addItem(int production, int position,
100: SymbolSet lookaheads) {
101: if (lookaheads == null)
102: throw new NullPointerException(
103: "Lookahead symbol set is null");
104:
105: for (int i = 0; i < elementCount; i++)
106: if ((productions[i] == production)
107: && (positions[i] == position)) {
108: if (this .lookaheads[i] == null)
109: this .lookaheads[i] = new SymbolSet();
110:
111: return this .lookaheads[i].addSymbol(lookaheads);
112: }
113:
114: ensureCapacity(elementCount + 1);
115: productions[elementCount] = production;
116: positions[elementCount] = position;
117: this .lookaheads[elementCount] = new SymbolSet();
118: this .lookaheads[elementCount++].addSymbol(lookaheads);
119:
120: return true;
121: }
122:
123: /**
124: * Add the items from another itemset to this set. If this set changed the method will return
125: * true.
126: *
127: * @param I ItemSet, which should be addded.
128: *
129: * @return True, if this set was changed
130: */
131: public boolean addItemSet(ItemSet I) {
132: boolean changed = false;
133:
134: for (int i = 0; i < I.elementCount; i++)
135: changed |= addItem(I.productions[i], I.positions[i],
136: I.lookaheads[i]);
137:
138: return changed;
139: }
140:
141: /**
142: * If this set contains an item, which compare by the production, position and lookahead symbol.
143: *
144: * @param production Index of production in the grammar.
145: * @param position Position in the production.
146: * @param lookahead Lookahead symbol.
147: *
148: * @return True, if this set contains the item.
149: */
150:
151: /*private boolean containsItem(int production, int position, Symbol lookahead)
152: {
153: if (lookahead==null)
154: throw new NullPointerException("Lookahead symbol is null");
155:
156: for (int i = 0; i<elementCount; i++)
157: if ((productions[i]==production) && (positions[i]==position))
158: return lookaheads[i].contains(lookahead);
159: return false;
160: }*/
161:
162: /**
163: * If this set contains an item, which compare by the production and position.
164: *
165: * @param production Index of production in the grammar.
166: * @param position Position in the production.
167: *
168: * @return True, if this set contains the core of the item.
169: */
170:
171: /*private boolean containsItemCore(int production, int position)
172: {
173: for (int i = 0; i<elementCount; i++)
174: if ((productions[i]==production) && (positions[i]==position))
175: return true;
176: return false;
177: }*/
178:
179: /**
180: * Test, if all items from a other state exists in this state
181: *
182: * @param itemset Other item set.
183: *
184: * @return True, if the state contains all items.
185: */
186: public boolean contains(ItemSet itemset) {
187: for (int i = 0; i < itemset.elementCount; i++) {
188: int production = itemset.productions[i];
189: int position = itemset.positions[i];
190:
191: boolean found = false;
192: for (int j = 0; j < elementCount; j++)
193: if ((productions[j] == production)
194: && (positions[j] == position)) {
195: found = lookaheads[j]
196: .contains(itemset.lookaheads[i]);
197: break;
198: }
199:
200: if (!found)
201: return false;
202: }
203:
204: return true;
205: }
206:
207: /**
208: * Test, if all cores of the items from another item set exists in this item set.
209: *
210: * @param itemset Other item set.
211: *
212: * @return True, if the state contains all cores the items.
213: */
214: private boolean containsCore(ItemSet itemset) {
215: for (int i = 0; i < itemset.elementCount; i++) {
216: int production = itemset.productions[i];
217: int position = itemset.positions[i];
218:
219: boolean found = false;
220: for (int j = 0; (j < elementCount) && !found; j++)
221: found = ((productions[j] == production) && (positions[j] == position));
222:
223: if (!found)
224: return false;
225: }
226:
227: return true;
228: }
229:
230: /**
231: * Returns the count of items in this set.
232: *
233: * @return Count of items of the core.
234: */
235: public int getItemCount() {
236: return elementCount;
237: }
238:
239: /**
240: * Returns true, if this set is empty.
241: *
242: * @return True, if this set is empty.
243: */
244: public boolean isEmpty() {
245: return (elementCount == 0);
246: }
247:
248: /**
249: * Compares two item sets.
250: *
251: * @param o Other itemset.
252: *
253: * @return True, if the item sets are equal.
254: */
255: public boolean equals(Object o) {
256: if (o instanceof ItemSet) {
257: ItemSet itemset = (ItemSet) o;
258:
259: if (itemset.getItemCount() != getItemCount())
260: return false;
261:
262: // The itemset must contain all item from this set.
263: if (!contains(itemset))
264: return false;
265:
266: // And this set must contain all item from the item set
267: if (!itemset.contains(this ))
268: return false;
269:
270: return true;
271: }
272:
273: return false;
274: }
275:
276: /**
277: * Compares the core of two item sets.
278: *
279: * @param o Other itemset.
280: *
281: * @return True, if the core of the itemsets are equal.
282: */
283: public boolean equalsCore(Object o) {
284: if (o instanceof ItemSet) {
285: ItemSet itemset = (ItemSet) o;
286:
287: // The itemset must contain all item from this set.
288: if (!containsCore(itemset))
289: return false;
290:
291: // And this set must contain all item from the item set
292: if (!itemset.containsCore(this ))
293: return false;
294:
295: return true;
296: }
297:
298: return false;
299: }
300:
301: /**
302: * Return the next Symbol, which follow the item.
303: *
304: * @param index Index the item.
305: *
306: * @return Symbol of the next position, otherwise the symbol for an empty list.
307: */
308: private Symbol getItemNext(int index) {
309: SymbolList productiondefinition;
310:
311: if (positions[index] < ((productiondefinition = grammar
312: .getProduction(productions[index]).getDefinition())
313: .getSymbolCount()))
314: return productiondefinition.getSymbol(positions[index]);
315:
316: return EMPTYLIST;
317: }
318:
319: public SymbolSet getNextTerminals() {
320: SymbolSet set = new SymbolSet();
321:
322: SymbolList productiondefinition;
323: for (int item = 0; item < elementCount; item++)
324: if ((positions[item] < ((productiondefinition = grammar
325: .getProduction(productions[item]).getDefinition())
326: .getSymbolCount()))
327: && (productiondefinition.getSymbol(positions[item]) instanceof Terminal))
328: set.addSymbol(productiondefinition
329: .getSymbol(positions[item]));
330:
331: return set;
332: }
333:
334: public SymbolSet getNextNonterminals() {
335: SymbolSet set = new SymbolSet();
336:
337: SymbolList productiondefinition;
338: for (int item = 0; item < elementCount; item++)
339: if ((positions[item] < ((productiondefinition = grammar
340: .getProduction(productions[item]).getDefinition())
341: .getSymbolCount()))
342: && (productiondefinition.getSymbol(positions[item]) instanceof Nonterminal))
343: set.addSymbol(productiondefinition
344: .getSymbol(positions[item]));
345:
346: return set;
347: }
348:
349: public Error getNextError() {
350: SymbolList productiondefinition;
351: for (int item = 0; item < elementCount; item++)
352: if ((positions[item] < ((productiondefinition = grammar
353: .getProduction(productions[item]).getDefinition())
354: .getSymbolCount()))
355: && (productiondefinition.getSymbol(positions[item]) instanceof Error))
356: return (Error) productiondefinition
357: .getSymbol(positions[item]);
358:
359: return null;
360: }
361:
362: /**
363: * Calculate the closure for this item set.
364: *
365: * @return Closure of the item set
366: */
367: public ItemSet closure() {
368: ItemSet J = new ItemSet(grammar, firstsets, this ); // J=I
369:
370: SymbolSet b = new SymbolSet();
371: SymbolSet b2 = new SymbolSet();
372:
373: boolean changed = false;
374: do {
375: changed = false;
376:
377: // for every item in itemset I
378: for (int i = 0; i < J.elementCount; i++) {
379: SymbolList productiondefinition = grammar
380: .getProduction(J.productions[i])
381: .getDefinition();
382:
383: // and not A=XYZ^
384: if (J.positions[i] < productiondefinition
385: .getSymbolCount()) {
386: Symbol symbol = productiondefinition
387: .getSymbol(J.positions[i]); // A=X ^symbol Z
388:
389: // for every item [A=u^Bv,a] in J and production B=w in G
390: if (symbol instanceof Nonterminal) {
391: int pos = J.positions[i] + 1; // for the FIRST set from (va)
392:
393: b.clear();
394:
395: // if [A=u^Bv,a]
396: if (pos < productiondefinition.getSymbolCount()) {
397: // then is b the list of all terminal symbols from FIRST(va)
398: do {
399: if (productiondefinition.getSymbol(pos) instanceof Terminal) {
400: b2.clear();
401: b2.addSymbol(productiondefinition
402: .getSymbol(pos));
403: } else {
404: b2.clear();
405: b2
406: .addSymbol(firstsets
407: .getFirstSet(productiondefinition
408: .getSymbol(pos)));
409: }
410:
411: b.addSymbol(b2);
412: pos++;
413: } while ((b2.contains(EMPTYLIST))
414: && (pos < productiondefinition
415: .getSymbolCount()));
416:
417: if (b.contains(EMPTYLIST))
418: b.addSymbol(J.lookaheads[i]);
419:
420: b.removeSymbol(EMPTYLIST);
421: } else if (pos >= productiondefinition
422: .getSymbolCount())
423:
424: // otherwise is b FIRST(a)
425: b.addSymbol(J.lookaheads[i]);
426:
427: // list of all productions B
428: IntegerList productionlist = grammar
429: .getProductionList(symbol);
430:
431: // for alle productions B
432: for (int j = 0; j < productionlist.getCount(); j++) {
433: // if J doesn't contain [B=^w,b] , should it added
434: for (int k = 0; k < b.getSymbolCount(); k++)
435: changed |= J.addItem(productionlist
436: .get(j), 0, b.getSymbol(k));
437: }
438: }
439: }
440: }
441: } while (changed);
442:
443: return J;
444: }
445:
446: /**
447: * Calculates the next state by a transition through the symbol X.
448: *
449: * @param symbol A Symbol, which can be a terminal or a nonterminal symbol.
450: *
451: * @return The next state, represented by an item set.
452: */
453: public ItemSet jump(Symbol symbol) {
454: ItemSet J = new ItemSet(grammar, firstsets);
455:
456: // For every item [A=u^Xv,a] in I
457: for (int i = 0; i < elementCount; i++) {
458: if (getItemNext(i).equals(symbol))
459:
460: // add [A=uX^v,a] to J
461: J.addItem(productions[i], positions[i] + 1,
462: lookaheads[i]);
463: }
464:
465: // jump(I,X) = closure(J)
466: return J.closure();
467: }
468:
469: /**
470: * Add a transition to this state.
471: *
472: * @param symbol Symbol, which forces a transition into another state.
473: * @param state Destination state.
474: */
475: public void setTransition(Symbol symbol, int state) {
476: if (transitionsymbols.contains(symbol))
477: transitionstates.set(transitionsymbols.indexOf(symbol),
478: state);
479: else {
480: transitionsymbols.addSymbol(symbol);
481: transitionstates.add(state);
482: }
483: }
484:
485: /**
486: * Returns the destination state of a transition.
487: *
488: * @param symbol Symbol, which force the transition.
489: *
490: * @return Destination state.
491: */
492: public int getTransition(Symbol symbol) {
493: if (transitionsymbols.contains(symbol))
494: return transitionstates.get(transitionsymbols
495: .indexOf(symbol));
496:
497: return -1;
498: }
499:
500: /**
501: * Returns all symbols, which forces a transition.
502: *
503: * @return List of symbols.
504: */
505: public SymbolSet getShiftSymbols() {
506: return transitionsymbols;
507: }
508:
509: /**
510: * Returns the list of productions, which could be reduced.
511: *
512: * @return List of indicies for the productions.
513: */
514: public IntegerSet getReduceProductions() {
515: IntegerSet reduceproductions = new IntegerSet();
516:
517: for (int i = 0; i < elementCount; i++) {
518: if (getItemNext(i).equals(EMPTYLIST)) // for all A=u^ and all symbols in FOLLOW(A)
519:
520: reduceproductions.add(productions[i]);
521: }
522:
523: return reduceproductions;
524: }
525:
526: /**
527: * Returns the list of productions, which a special symbol reduce.
528: *
529: * @param lookahead Symbol, which the productions reduce.
530: *
531: * @return Set of indicies from the productions.
532: */
533: public IntegerSet getReduceProductions(Symbol lookahead) {
534: IntegerSet reduceproductions = new IntegerSet();
535:
536: for (int i = 0; i < elementCount; i++) {
537: // for all A=u^ and all symbols in FOLLOW(A)
538: if ((getItemNext(i).equals(EMPTYLIST))
539: && (lookaheads[i].contains(lookahead) || lookaheads[i]
540: .contains(Error.instance)))
541: reduceproductions.add(productions[i]);
542: }
543:
544: return reduceproductions;
545: }
546:
547: /**
548: * Return all symbols, which reduce productions in this state.
549: *
550: * @return Set of symbols.
551: */
552: public SymbolSet getReduceSymbols() {
553: SymbolSet reducesymbols = new SymbolSet();
554:
555: for (int i = 0; i < elementCount; i++) {
556: if (getItemNext(i).equals(EMPTYLIST)) // for all A=u^ and all symbols in FOLLOW(A)
557:
558: reducesymbols.addSymbol(lookaheads[i]);
559: }
560:
561: return reducesymbols;
562: }
563:
564: /**
565: * Ensure the capacity for adding items.
566: *
567: * @param minCapacity Minimum capacity.
568: */
569: private void ensureCapacity(int minCapacity) {
570: if (productions.length >= minCapacity)
571: return;
572:
573: int newCapacity = productions.length + capacityIncrement;
574:
575: if (capacityIncrement <= 0)
576: newCapacity = productions.length * 2;
577:
578: int[] newProductions = new int[Math.max(newCapacity,
579: minCapacity)];
580: int[] newPositions = new int[Math.max(newCapacity, minCapacity)];
581: SymbolSet[] newLookaheads = new SymbolSet[Math.max(newCapacity,
582: minCapacity)];
583:
584: System.arraycopy(productions, 0, newProductions, 0,
585: productions.length);
586: System.arraycopy(positions, 0, newPositions, 0,
587: productions.length);
588: System.arraycopy(lookaheads, 0, newLookaheads, 0,
589: productions.length);
590:
591: productions = newProductions;
592: positions = newPositions;
593: lookaheads = newLookaheads;
594: }
595:
596: /**
597: * Return a string representation of this item set.
598: *
599: * @return String representation of this item set.
600: */
601: public String toString() {
602: StringBuffer buffer = new StringBuffer();
603:
604: SymbolList list;
605:
606: for (int production = 0; production < grammar
607: .getProductionCount(); production++) {
608: list = grammar.getProduction(production).getDefinition();
609:
610: for (int position = 0; position <= list.getSymbolCount(); position++) {
611: for (int item = 0; item < elementCount; item++)
612: if ((productions[item] == production)
613: && (positions[item] == position)) {
614: buffer.append(grammar.getProduction(production)
615: .getSymbol());
616: buffer.append(" -> ");
617:
618: for (int i = 0; i < list.getSymbolCount(); i++) {
619: if (i == position)
620: buffer.append(".");
621:
622: buffer.append(list.getSymbol(i) + " ");
623: }
624:
625: if (position == list.getSymbolCount())
626: buffer.append(".");
627:
628: buffer.append(" , ");
629: for (int lookahead = 0; lookahead < lookaheads[item]
630: .getSymbolCount(); lookahead++) {
631: if (lookahead > 0)
632: buffer.append(" / ");
633:
634: buffer.append(lookaheads[item].getSymbol(
635: lookahead).toString());
636: }
637:
638: buffer.append("\n");
639: break;
640: }
641: }
642: }
643:
644: SymbolSet set = getShiftSymbols();
645:
646: for (int index = 0; index < set.getSymbolCount(); index++) {
647: buffer.append("Transition for ");
648: buffer.append(set.getSymbol(index));
649: buffer.append(" -> State ");
650: buffer.append(String.valueOf(getTransition(set
651: .getSymbol(index))));
652: buffer.append("\n");
653: }
654:
655: return buffer.toString();
656: }
657: }
|