001: package ro.infoiasi.donald.compiler.cfg;
002:
003: import java.util.*;
004: import java.io.*;
005:
006: /** Context Free Grammar */
007: public class CFG {
008: private NonTerminals v;
009: private Terminals t;
010: private NonTerminal s;
011: private Productions p;
012:
013: /** The set of nullable non-terminals */
014: private Set vNullable;
015: /** The set of nullable productions */
016: private Set pNullable;
017:
018: /** An array containing the FIRST set for each non-terminal */
019: private Set[] vFirst;
020: /** An array containing the FIRST set for each production */
021: private Set[] pFirst;
022:
023: /** An array containing the FOLLOW set for each non-terminal */
024: private Set[] vFollow;
025:
026: public CFG(NonTerminals v, Terminals t, NonTerminal s, Productions p) {
027: this .v = v;
028: this .t = t;
029: if (s == null) {
030: this .s = v.find(0);
031: } else {
032: this .s = s;
033: }
034: this .p = p;
035: removeUseless();
036: }
037:
038: /** Removes the useless non-terminals and productions from the grammar.
039: A non-terminal is useless if it is not accessible or not productive.
040: A production is useless if it contains at least one useless non-terminal. */
041: private void removeUseless() {
042: /* When removing non-terminals and productions
043: their hash codes become unstable.*/
044: boolean haveUseless;
045: do {
046: Set vAccessible = new HashSet();
047: vAccessible.add(s);
048: LinkedList queue = new LinkedList();
049: queue.addLast(s);
050:
051: while (!queue.isEmpty()) {
052: NonTerminal a = (NonTerminal) queue.removeLast();
053: Iterator itL = p.iterator(a);
054: while (itL.hasNext()) {
055: Word w = ((Production) itL.next()).getRHS();
056: WordIterator itW = w.iterator();
057: while (itW.hasNextNonTerminal()) {
058: NonTerminal b = itW.nextNonTerminal();
059: if (vAccessible.add(b)) {
060: queue.addLast(b);
061: }
062: }
063: }
064: }
065:
066: Set vProductive = new HashSet();
067: boolean changed;
068: do {
069: changed = false;
070: Iterator itV = v.iterator();
071: while (itV.hasNext()) {
072: NonTerminal a = (NonTerminal) itV.next();
073: if (!vProductive.contains(a)) {
074: Iterator itP = p.iterator(a);
075: while (itP.hasNext()) {
076: Word w = ((Production) itP.next()).getRHS();
077: WordIterator itW = w.iterator();
078: boolean isProductive = true;
079: while (itW.hasNextNonTerminal()) {
080: NonTerminal b = (NonTerminal) itW
081: .nextNonTerminal();
082: if (!vProductive.contains(b)) {
083: isProductive = false;
084: break;
085: }
086: }
087: if (isProductive) {
088: vProductive.add(a);
089: changed = true;
090: break;
091: }
092: }
093: }
094: }
095: } while (changed);
096:
097: haveUseless = vAccessible.size() < v.count()
098: || vProductive.size() < v.count();
099: if (haveUseless) {
100: // WARNING !!! it's imperative not to use hashing here
101: // and because TreeSet requires Comparable or Comparator
102: // we use a LinkedList
103: LinkedList vUseless = new LinkedList();
104: for (int i = 0; i < v.count(); i++) {
105: NonTerminal a = v.find(i);
106: if (!vAccessible.contains(a)
107: || !vProductive.contains(a)) {
108: vUseless.add(a);
109: }
110: }
111: v.removeUseless(vUseless);
112: p.removeUseless(vUseless);
113: }
114: } while (haveUseless);
115: }
116:
117: /** Adds a new start non-terminal $START to the grammar
118: and the production $START ::= oldStart EOF */
119: public Production addStartProduction() {
120: NonTerminal oldStart = s;
121: s = v.addNew("$START");
122: Word rhs = new Word();
123: rhs.addLast(oldStart);
124: rhs.addLast(t.EOF);
125: return p.addNew(s, rhs);
126: }
127:
128: public NonTerminals getNonTerminals() {
129: return v;
130: }
131:
132: public Terminals getTerminals() {
133: return t;
134: }
135:
136: public NonTerminal getStartSymbol() {
137: return s;
138: }
139:
140: public Productions getProductions() {
141: return p;
142: }
143:
144: public int symbolCount() {
145: return v.count() + t.count();
146: }
147:
148: public int getSID(Symbol x) {
149: int xSID = x.getIndex();
150: if (x.isTerminal()) {
151: xSID += v.count();
152: }
153: return xSID;
154: }
155:
156: public Symbol symbol(int SID) {
157: if (0 <= SID && SID < v.count()) {
158: return v.find(SID);
159: } else if (SID < v.count() + t.count()) {
160: return t.find(SID - v.count());
161: } else {
162: throw new IllegalArgumentException();
163: }
164: }
165:
166: public void computeNullable() {
167: vNullable = new HashSet();
168: pNullable = new HashSet();
169: Set pVisited = new HashSet();
170: // a production is "visided" if it's (non)nullability in certain
171:
172: boolean changed;
173: do {
174: changed = false;
175: Iterator itV = v.iterator();
176: while (itV.hasNext()) {
177: NonTerminal a = (NonTerminal) itV.next();
178: Iterator itP = p.iterator(a);
179: while (itP.hasNext()) {
180: Production prod = (Production) itP.next();
181: if (!pVisited.contains(prod)) {
182: Word w = prod.getRHS();
183: WordIterator itW = w.iterator();
184: if (itW.hasNextTerminal()) {
185: pVisited.add(prod);
186: } else {
187: boolean nullable = true;
188: while (itW.hasNext() && nullable) {
189: NonTerminal b = (NonTerminal) itW
190: .next();
191: if (!vNullable.contains(b)) {
192: nullable = false; // cannot decide right now
193: }
194: }
195: if (nullable) {
196: vNullable.add(a);
197: pNullable.add(prod);
198: pVisited.add(prod);
199: changed = true;
200: }
201: }
202: }
203: }
204: }
205: } while (changed);
206: }
207:
208: public boolean nullable(NonTerminal var) {
209: return vNullable.contains(var);
210: }
211:
212: public boolean nullable(Production prod) {
213: return pNullable.contains(prod);
214: }
215:
216: public boolean nullable(Word w) {
217: WordIterator itW = w.iterator();
218: if (itW.hasNextTerminal()) {
219: return false;
220: }
221: while (itW.hasNext()) {
222: NonTerminal var = (NonTerminal) itW.next();
223: if (!nullable(var)) {
224: return false;
225: }
226: }
227: return true;
228: }
229:
230: /** Epsilon rule elimination */
231: private void eliminateEpsilon() {
232: if (vNullable == null) {
233: computeNullable();
234: }
235: // System.out.println("vNullable:"+vNullable);
236:
237: Productions pNew = new Productions();
238: Iterator itP = p.iterator();
239: while (itP.hasNext()) {
240: Production prod = (Production) itP.next();
241: // System.out.println("Production:"+prod);
242:
243: Word w = prod.getRHS();
244: // ignore epsilon rules
245: if (!w.isEmpty()) {
246: ArrayList words = new ArrayList();
247: WordIterator itW = w.iterator();
248: while (itW.hasNextNonTerminal()) {
249: NonTerminal x = itW.nextNonTerminal();
250: // System.out.println("NonTerminal:"+x);
251: // System.out.println("words:"+words);
252: // System.out.println("w:"+w);
253:
254: if (vNullable.contains(x)) {
255: Word prefW = itW.prefix();
256: // System.out.println("[NULABLE]");
257: // System.out.println("prefW:"+prefW);
258: if (words.isEmpty()) {
259: Word w1 = new Word(prefW);
260: Word w0 = new Word(prefW);
261: w0.removeLast();
262: words.add(w0);
263: words.add(w1);
264:
265: w = new Word(itW.suffix());
266: } else {
267: int size = words.size();
268: for (int i = 0; i < size; i++) {
269: Word w0 = (Word) words.get(i);
270: Word w1 = new Word(w0);
271: w1.addWordLast(new Word(prefW));
272: w0.addWordLast(new Word(prefW));
273: w0.removeLast();
274: words.add(w1);
275: }
276: w = itW.suffix();
277: }
278: itW = w.iterator();
279: }
280: }
281: while (itW.hasNextTerminal()) {
282: Terminal a = itW.nextTerminal();
283: for (int i = 0; i < words.size(); i++) {
284: Word wi = (Word) words.get(i);
285: wi.addWordLast(new Word(itW.suffix()));
286: }
287:
288: }
289: if (words.isEmpty()) {
290: // rule doesn't contain nullable symbols
291: pNew.addNew(prod.getLHS(), prod.getRHS()); //!!TODO: an exact copy might be better
292: } else {
293: Iterator it = words.iterator();
294: while (it.hasNext()) {
295: Word w0 = (Word) it.next();
296: w0.addWordLast(new Word(w)); // new optional?
297: // ignore epsilon rules
298: if (!w0.isEmpty()) {
299: pNew.addNew(prod.getLHS(), w0);
300: }
301: }
302: }
303: }
304: }
305: p = pNew;
306: // System.out.println("after epsilon rule elimination:\n"+p);
307: }
308:
309: /** Eliminate unit productions */
310: private void eliminateUnitProductions() {
311: Productions pNew = new Productions();
312:
313: // eliminate unit productions
314: Map map = new HashMap();
315: // foreach x in V, map.get(x) returns Set S, so that
316: // foreach y in S we have y ->* x
317: Iterator itV = v.iterator();
318: while (itV.hasNext()) {
319: NonTerminal x = (NonTerminal) itV.next();
320: Set set = new LinkedHashSet();
321: set.add(x);
322: map.put(x, set);
323: }
324:
325: boolean changed;
326: do {
327: changed = false;
328: Iterator itP = p.iterator();
329: while (itP.hasNext()) {
330: Production prod = (Production) itP.next();
331: Word w = prod.getRHS();
332: if (w.size() == 1
333: && w.getFirst() instanceof NonTerminal) {
334: // the rule is x2 ::= x
335: NonTerminal x = (NonTerminal) w.getFirst();
336: NonTerminal x2 = prod.getLHS();
337: Set s2 = (Set) map.get(x2);
338: Iterator itS2 = s2.iterator();
339: while (itS2.hasNext()) {
340: // x1 =>* x2 then x1 =>* x
341: NonTerminal x1 = (NonTerminal) itS2.next();
342: Set s2prime = (Set) map.get(x);
343: if (s2prime.add(x1)) {
344: changed = true;
345: // System.out.println("("+x1+","+x+")");
346: }
347: }
348: }
349: }
350: } while (changed);
351:
352: // eliminate unit productions
353: Iterator itP = p.iterator();
354: while (itP.hasNext()) {
355: Production prod = (Production) itP.next();
356: Word w = prod.getRHS();
357: if (w.size() != 1 || w.getFirst() instanceof Terminal) {
358: pNew.addNew(prod.getLHS(), prod.getRHS()); //!!TODO: an exact copy might be better
359: }
360: }
361:
362: // add new rules to the grammar
363: itV = v.iterator();
364: while (itV.hasNext()) {
365: NonTerminal x2 = (NonTerminal) itV.next();
366: Set set = (Set) map.get(x2);
367: Iterator itS = set.iterator();
368: while (itS.hasNext()) {
369: // x1 =>* x2
370: NonTerminal x1 = (NonTerminal) itS.next();
371: if (x1 != x2) {
372: LinkedList list = pNew.find(x2);
373: if (list != null) { //!TODO: check the "find" function -> is it normal to return null or empty list?
374: Iterator itL = list.iterator();
375: while (itL.hasNext()) {
376: Production prod = (Production) itL.next();
377: Word w = prod.getRHS();
378: // x2 ::= w, than add the new rule x1 ::= w
379: pNew.addNew(x1, w);
380: }
381: }
382: }
383: }
384: }
385: p = pNew;
386: }
387:
388: // Arange that all bodies of length 2 or more consist only of variables
389: private void toChomskyNormalFormStep4() {
390: Productions pNew = new Productions();
391: Map map = new LinkedHashMap(); // (terminal -> nonterminal(new) or null)
392:
393: int count = 0;
394: Iterator it = p.iterator();
395: while (it.hasNext()) {
396: Production prod = (Production) it.next();
397: Word w = prod.getRHS();
398: if (w.size() >= 2) {
399: pNew.addNew(prod.getLHS(), w);
400: WordIterator itW = w.iterator();
401: while (itW.hasNext()) {
402: Symbol sym = itW.next();
403: if (sym instanceof Terminal) {
404: NonTerminal y = (NonTerminal) map.get(sym);
405: if (y == null) {
406: y = v.addNew("$nt_cnf" + (count++));
407: map.put(sym, y);
408: }
409: pNew.addNew(y, new Word(sym));
410: itW.set(y);
411: }
412: }
413: } else {
414: pNew.addNew(prod.getLHS(), w);
415: }
416: }
417: p = pNew;
418: }
419:
420: private void toChomskyNormalFormStep5() {
421: // Break the bodies of lenght 3 or more into a cascade of productions
422: Productions pNew = new Productions();
423: int count = 0;
424: Iterator it = p.iterator();
425: while (it.hasNext()) {
426: Production prod = (Production) it.next();
427: // System.out.println("\nProduction:"+prod);
428: Word w = new Word(prod.getRHS());
429: if (w.size() > 2) {
430: NonTerminal x = prod.getLHS();
431: // x ::= x1 x2 ... xn
432: while (w.size() > 2) {
433: Symbol sym = w.removeFirst();
434: NonTerminal newX = v.addNew("$nt$cnf" + (count++));
435: pNew.addNew(x, new Word(sym, newX));
436: x = newX; // newX ::= w
437: }
438: pNew.addNew(x, new Word(w));
439: } else {
440: pNew.addNew(prod.getLHS(), w);
441: }
442: }
443: p = pNew;
444: }
445:
446: /** Transforms a grammar to the Chomsky normal form
447: all the precedences and semantic actions are discarded from the productions */
448: public void toChomskyNormalForm() {
449:
450: // Step 1
451: eliminateEpsilon();
452: // System.out.println(this);
453:
454: // Step 2
455: eliminateUnitProductions();
456: // System.out.println(this);
457:
458: // Step 3
459: removeUseless();
460:
461: // Step 4
462: toChomskyNormalFormStep4();
463: // System.out.println(this);
464:
465: // Step 5
466: toChomskyNormalFormStep5();
467: // System.out.println(this);
468:
469: System.gc();
470: }
471:
472: public void computeFirst() {
473: if (vNullable == null) {
474: computeNullable();
475: }
476:
477: vFirst = new Set[v.count()];
478: for (int i = 0; i < v.count(); i++) {
479: vFirst[i] = new LinkedHashSet();
480: }
481:
482: pFirst = new Set[p.count()];
483: for (int i = 0; i < p.count(); i++) {
484: pFirst[i] = new LinkedHashSet();
485: }
486:
487: boolean changed;
488: do {
489: changed = false;
490: for (int i = 0; i < v.count(); i++) {
491: Iterator itL = p.iterator(v.find(i));
492: while (itL.hasNext()) {
493: Production prod = (Production) itL.next();
494: int j = prod.getIndex();
495:
496: Word w = prod.getRHS();
497: if (!w.isEmpty()) {
498: WordIterator itW = w.iterator();
499: boolean over = false;
500: while (itW.hasNext() && !over) {
501: Symbol symbol = itW.next();
502: if (symbol.isTerminal()) {
503: if (vFirst[i].add(symbol)) {
504: changed = true;
505: }
506: if (pFirst[j].add(symbol)) {
507: changed = true;
508: }
509: over = true;
510: } else {
511: int k = symbol.getIndex();
512: if (vFirst[i].addAll(vFirst[k])) {
513: changed = true;
514: }
515: if (pFirst[j].addAll(vFirst[k])) {
516: changed = true;
517: }
518: if (!nullable((NonTerminal) symbol)) {
519: over = true;
520: }
521: }
522: }
523: }
524: }
525: }
526: } while (changed);
527: }
528:
529: public Set first(NonTerminal var) {
530: return vFirst[var.getIndex()];
531: }
532:
533: public Set first(Production prod) {
534: return pFirst[prod.getIndex()];
535: }
536:
537: // Space efficient - don't use pFirst
538: // return first(prod.getRHS());
539: //
540:
541: public Set first(Word w) {
542: Set first = new LinkedHashSet();
543: if (!w.isEmpty()) {
544: WordIterator itW = w.iterator();
545: Symbol symbol;
546: do {
547: symbol = itW.next();
548: if (symbol.isTerminal()) {
549: first.add(symbol);
550: } else {
551: first.addAll(first((NonTerminal) symbol));
552: }
553: } while (itW.hasNext() && !symbol.isTerminal()
554: && nullable((NonTerminal) symbol));
555: }
556: return first;
557: }
558:
559: public void computeFollow() {
560: if (vFirst == null) {
561: computeFirst();
562: }
563: vFollow = new Set[v.count()];
564: for (int i = 0; i < v.count(); i++) {
565: vFollow[i] = new LinkedHashSet();
566: }
567:
568: vFollow[s.getIndex()].add(t.EOF);
569:
570: Iterator itV = v.iterator();
571: while (itV.hasNext()) {
572: NonTerminal a = (NonTerminal) itV.next();
573: Iterator itL = p.find(a).iterator();
574: while (itL.hasNext()) {
575: Production prod = (Production) itL.next();
576: Word w = prod.getRHS();
577: WordIterator itW = w.iterator();
578: while (itW.hasNextNonTerminal()) {
579: int j = itW.nextNonTerminal().getIndex();
580: vFollow[j].addAll(first(itW.suffix()));
581: }
582: }
583: }
584:
585: boolean changed;
586: do {
587: changed = false;
588: for (int i = 0; i < v.count(); i++) {
589: List prodList = p.find(v.find(i));
590: Iterator itL = prodList.iterator();
591: while (itL.hasNext()) {
592: Production prod = (Production) itL.next();
593: Word w = prod.getRHS();
594: WordIterator itW = w.iterator();
595: while (itW.hasNextNonTerminal()) {
596: int j = itW.nextNonTerminal().getIndex();
597: if (nullable(itW.suffix())) {
598: if (vFollow[j].addAll(vFollow[i])) {
599: changed = true;
600: }
601: }
602: }
603: }
604: }
605: } while (changed);
606: }
607:
608: public Set follow(NonTerminal var) {
609: return vFollow[var.getIndex()];
610: }
611:
612: public String toString() {
613: StringBuffer sb = new StringBuffer();
614: sb.append("nonterminal " + v + ";\n");
615: sb.append("terminal " + t + ";\n");
616: sb.append("start with " + s + ";\n");
617: sb.append(p);
618: return sb.toString();
619: }
620:
621: public void printNullable() {
622: if (vNullable != null) {
623: System.out.println("nullable: ");
624: Iterator itV = v.iterator();
625: while (itV.hasNext()) {
626: NonTerminal a = (NonTerminal) itV.next();
627: if (vNullable.contains(a)) {
628: System.out.println(a);
629: }
630: }
631: } else {
632: System.out.println("nullability unknown");
633: }
634: if (pNullable != null) {
635: System.out.println("nullable productions: ");
636: Iterator itP = p.iterator();
637: while (itP.hasNext()) {
638: Production prod = (Production) itP.next();
639: if (pNullable.contains(prod)) {
640: System.out.println(prod);
641: }
642: }
643: } else {
644: System.out.println("nullability of productions unknown");
645: }
646: }
647:
648: public void printFirst() {
649: if (vFirst != null) {
650: for (int i = 0; i < v.count(); i++) {
651: System.out
652: .print("first(" + v.find(i).getName() + ")={");
653: Iterator it = t.iterator();
654: while (it.hasNext()) {
655: Terminal term = (Terminal) it.next();
656: if (vFirst[i].contains(term)) {
657: System.out.print(term + ", ");
658: }
659: }
660: System.out.println("}");
661: }
662: }
663: if (pFirst != null) {
664: for (int i = 0; i < p.count(); i++) {
665: System.out.print("first(" + p.find(i).getRHS() + ")={");
666: Iterator it = t.iterator();
667: while (it.hasNext()) {
668: Terminal term = (Terminal) it.next();
669: if (pFirst[i].contains(term)) {
670: System.out.print(term + ", ");
671: }
672: }
673: System.out.println("}");
674: }
675: }
676: }
677:
678: public void printFollow() {
679: if (vFollow != null) {
680: for (int i = 0; i < v.count(); i++) {
681: System.out.print("follow(" + v.find(i).getName()
682: + ")={");
683: Iterator it = t.iterator();
684: while (it.hasNext()) {
685: Terminal term = (Terminal) it.next();
686: if (vFollow[i].contains(term)) {
687: System.out.print(term + ", ");
688: }
689: }
690: System.out.println("}");
691: }
692: }
693: }
694:
695: public boolean isInvertible() {
696: return p.isInvertible();
697: }
698: }
|