001: /*
002: * xtc - The eXTensible Compiler
003: * Copyright (C) 2004-2007 Robert Grimm
004: *
005: * This program is free software; you can redistribute it and/or
006: * modify it under the terms of the GNU General Public License
007: * version 2 as published by the Free Software Foundation.
008: *
009: * This program is distributed in the hope that it will be useful,
010: * but WITHOUT ANY WARRANTY; without even the implied warranty of
011: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
012: * GNU General Public License for more details.
013: *
014: * You should have received a copy of the GNU General Public License
015: * along with this program; if not, write to the Free Software
016: * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
017: * USA.
018: */
019: package xtc.parser;
020:
021: import java.util.ArrayList;
022: import java.util.List;
023:
024: import xtc.Constants;
025:
026: import xtc.tree.Attribute;
027: import xtc.tree.Visitor;
028:
029: import xtc.type.AST;
030: import xtc.type.Type;
031:
032: import xtc.util.Runtime;
033:
034: /**
035: * Visitor to transform direct left-recursions into equivalent
036: * right-recursions. As a practical matter, this visitor transforms
037: * only void, text-only, token-level, and generic productions. For
038: * generic productions, it builds on {@link xtc.util.Action actions}
039: * to ensure that the resulting right-recursive productions preserve
040: * the structure of the original productions' semantic values. In
041: * particular, left-associative data structures remain
042: * left-associative, even after transformation. This visitor requires
043: * that all nested choices that do not appear as the last element in a
044: * sequence have been lifted. It also assumes that the entire grammar
045: * is contained in a single module.
046: *
047: * <p />This visitor may report errors to the user.
048: *
049: * @author Robert Grimm
050: * @version $Revision: 1.77 $
051: */
052: public class DirectLeftRecurser extends Visitor {
053:
054: /** The flag for processing the recursive case. */
055: public static final int STATE_RECURSION = 1;
056:
057: /** The flag for processing the base case. */
058: public static final int STATE_BASE = STATE_RECURSION + 1;
059:
060: /** The runtime. */
061: protected final Runtime runtime;
062:
063: /** The analyzer utility. */
064: protected final Analyzer analyzer;
065:
066: /** The type operations. */
067: protected final AST ast;
068:
069: /** The flag for whether we are transforming a void production. */
070: protected boolean isVoid;
071:
072: /** The flag for whether we are transforming a text-only production. */
073: protected boolean isTextOnly;
074:
075: /** The flag for whether we are transforming a token-level production. */
076: protected boolean isToken;
077:
078: /** The flag for whether we are transforming a generic production. */
079: protected boolean isGeneric;
080:
081: /**
082: * The flag for whether the grammar has the {@link Constants#ATT_CONSTANT
083: * constant} attribute.
084: */
085: protected boolean hasConstant;
086:
087: /**
088: * The flag for whether the grammar has the {@link
089: * Constants#ATT_PARSE_TREE parseTree} attribute.
090: */
091: protected boolean hasParseTree;
092:
093: /** The current state. */
094: protected int state;
095:
096: /**
097: * Flag for whether the current element is the top-level element of
098: * a production.
099: */
100: protected boolean isTopLevel;
101:
102: /** Flag for whether we have processed an ordered choice. */
103: protected boolean seenChoice;
104:
105: /**
106: * The list of variables representing the children of the generic
107: * node to be created.
108: */
109: protected List<Binding> children;
110:
111: /** The list of node markers. */
112: protected List<NodeMarker> markers;
113:
114: /** The new tail production. */
115: protected FullProduction pTail;
116:
117: /** The binding for the seed value. */
118: protected Binding seed;
119:
120: /** The name of the action's argument. */
121: protected String varAction;
122:
123: /**
124: * Create a new direct left-recurser.
125: *
126: * @param runtime The runtime.
127: * @param analyzer The analyzer utility.
128: * @param ast The type operations.
129: */
130: public DirectLeftRecurser(Runtime runtime, Analyzer analyzer,
131: AST ast) {
132: this .runtime = runtime;
133: this .analyzer = analyzer;
134: this .ast = ast;
135: this .children = new ArrayList<Binding>();
136: this .markers = new ArrayList<NodeMarker>();
137: }
138:
139: /**
140: * Create a binding for the specified element. This method also
141: * adds the name of the bound variable to the end of the list of
142: * children.
143: *
144: * @param e The element to bind.
145: * @return The corresponding binding.
146: */
147: protected Binding bind(Element e) {
148: Binding b = new Binding(analyzer.variable(Generifier.MARKER), e);
149: children.add(b);
150: return b;
151: }
152:
153: /** Process the specified grammar. */
154: public void visit(Module m) {
155: if ((!runtime.test("optimizeLeftRecursions"))
156: && (!runtime.test("optimizeLeftIterations"))) {
157: return;
158: }
159:
160: // Initialize the analyzer.
161: analyzer.register(this );
162: analyzer.init(m);
163:
164: // Initialize the per-grammar state.
165: hasConstant = m.hasAttribute(Constants.ATT_CONSTANT);
166: hasParseTree = m.hasAttribute(Constants.ATT_PARSE_TREE);
167:
168: // Process the productions.
169: for (int i = 0; i < m.productions.size(); i++) {
170: FullProduction p = (FullProduction) m.productions.get(i);
171:
172: if (!isTransformable(p))
173: continue;
174:
175: // Note to user.
176: if (runtime.test("optionVerbose")) {
177: System.err.println("[Transforming left-recursion in "
178: + p.qName + "]");
179: }
180:
181: // Process the production.
182: analyzer.startAdding();
183: analyzer.process(p);
184:
185: // If there are new productions, add them to the grammar and
186: // skip them for further processing.
187: i += analyzer.addNewProductionsAt(i + 1);
188:
189: // The production is not transformable anymore.
190: p.removeProperty(Properties.TRANSFORMABLE);
191: }
192: }
193:
194: /** Visit the specified production. */
195: public void visit(FullProduction p) {
196: // Reset pre-production state.
197: isVoid = AST.isVoid(p.type);
198: isTextOnly = p.getBooleanProperty(Properties.TEXT_ONLY);
199: isToken = p.getBooleanProperty(Properties.TOKEN);
200: isGeneric = AST.isGenericNode(p.type);
201:
202: isTopLevel = true;
203: seenChoice = false;
204: children.clear();
205: markers.clear();
206: seed = null;
207: varAction = null;
208:
209: // Create the new right-recursive production and the action
210: // variable. Note that the state and reset attributes are not
211: // inherited.
212: List<Attribute> attributes = new ArrayList<Attribute>(
213: p.attributes);
214: attributes.remove(Constants.ATT_STATEFUL);
215: attributes.remove(Constants.ATT_RESETTING);
216: if (isGeneric && (!attributes.contains(Constants.ATT_CONSTANT))
217: && (!hasConstant)) {
218: // The semantic value is an anonmymous inner class that
219: // references outside bindings, which, in turn, must be
220: // constant.
221: attributes.add(Constants.ATT_CONSTANT);
222: }
223: if (runtime.test("optimizeLeftIterations")
224: && (!attributes.contains(Constants.ATT_TRANSIENT))) {
225: // A new tail production always is transient.
226: attributes.add(Constants.ATT_TRANSIENT);
227: }
228: // While a new tail production may be transient, it is never
229: // inline.
230: attributes.remove(Constants.ATT_INLINE);
231:
232: Type type = null;
233: if (isGeneric) {
234: if (runtime.test("optimizeLeftIterations")) {
235: // The tail production returns a single action.
236: type = AST.actionOf(p.type);
237:
238: } else {
239: // The tail production returns a list of actions.
240: type = AST.listOf(AST.actionOf(p.type));
241: }
242:
243: } else {
244: // The tail production returns nothing.
245: type = AST.VOID;
246: }
247: pTail = new FullProduction(attributes, type, analyzer.tail(),
248: null, new OrderedChoice());
249: pTail.qName = pTail.name.qualify(analyzer.module().name.name);
250:
251: if (isGeneric) {
252: varAction = analyzer.variable();
253: }
254:
255: // Process the production's element.
256: p.choice = (OrderedChoice) dispatch(p.choice);
257:
258: // Patch the type of this production (but only for dynamic nodes).
259: if (isGeneric && AST.isDynamicNode(p.type))
260: p.type = AST.NODE;
261:
262: // Add the base alternative to the right-recursive production.
263: if (!runtime.test("optimizeLeftIterations")) {
264: Sequence s = new Sequence();
265:
266: if (isGeneric) {
267: s.add(EmptyListValue.VALUE);
268: } else {
269: s.add(NullValue.VALUE);
270: }
271: pTail.choice.alternatives.add(s);
272: }
273:
274: // Prepare the right-recursive production for addition to the
275: // grammar.
276: if (!(runtime.test("optimizeLeftIterations") && (isVoid
277: || isTextOnly || isToken))) {
278: analyzer.add(pTail);
279: }
280:
281: // Mark the production as a transformed left-recursive production.
282: if (isGeneric) {
283: Generifier.markGenericRecursion((FullProduction) analyzer
284: .current(), runtime.test("optionVerbose"));
285: }
286: }
287:
288: /** Visit the specified ordered choice. */
289: public Element visit(OrderedChoice c) {
290: boolean top = isTopLevel;
291: isTopLevel = false;
292:
293: // Process the alternatives.
294: for (int i = 0; i < c.alternatives.size(); i++) {
295: Sequence alternative = c.alternatives.get(i);
296:
297: if (top) {
298: // Alternatives in the top-level choice need to be processed
299: // differently, depending on whether they represent a
300: // left-recursion or a base case.
301: if (isRecursive(alternative, (FullProduction) analyzer
302: .current())) {
303: state = STATE_RECURSION;
304:
305: // Remove the first, directly left-recursive element from
306: // the sequence.
307: alternative.elements.remove(0);
308:
309: // Remove the sequence from the base production, process it,
310: // and add the result to the new recursive production. If
311: // the result is a sequence with an ordered choice as its
312: // only element, we add the alternatives directly to avoid a
313: // choice nested within a choice.
314: c.alternatives.remove(i);
315: i--;
316: Sequence result = (Sequence) dispatch(alternative);
317: if ((1 == result.size())
318: && (result.get(0) instanceof OrderedChoice)) {
319: pTail.choice.alternatives
320: .addAll(((OrderedChoice) result.get(0)).alternatives);
321: } else {
322: pTail.choice.alternatives.add(result);
323: }
324:
325: } else {
326: state = STATE_BASE;
327:
328: if (isGeneric) {
329: // Bind the seed value.
330: Binding b = Analyzer
331: .getBinding(alternative.elements);
332:
333: if (null == b) {
334: // No binding found. Try to create one.
335: b = analyzer.bind(alternative.elements,
336: Generifier.MARKER);
337:
338: if (null == b) {
339: runtime.error(
340: "unable to determine value of recursion's "
341: + "base case",
342: alternative);
343: b = new Binding(Analyzer.DUMMY,
344: alternative);
345: }
346:
347: } else {
348: // Rename the bound variable.
349: b.name = analyzer.variable();
350: }
351: seed = b;
352:
353: // Check the seed value's type.
354: if (!Analyzer.DUMMY.equals(b.name)) {
355: Type type = analyzer.type(b.element);
356:
357: if (AST.isAny(type)) {
358: if ((!analyzer.module().hasAttribute(
359: Constants.ATT_NO_WARNINGS))
360: && (!analyzer
361: .current()
362: .hasAttribute(
363: Constants.ATT_NO_WARNINGS))) {
364: runtime.error(
365: "value of recursion's base case may not "
366: + "be a node",
367: b.element);
368: }
369: } else if ((!type.resolve().isWildcard())
370: && (!AST.isNode(type))) {
371: runtime
372: .error(
373: "value of recursion's base case not a node",
374: b.element);
375: }
376: }
377: }
378:
379: // There is nothing to do for void and text-only productions.
380:
381: // Process the alternative.
382: c.alternatives.set(i,
383: (Sequence) dispatch(alternative));
384: }
385:
386: } else {
387: // Alternatives in nested choices require no special processing.
388: c.alternatives.set(i, (Sequence) dispatch(alternative));
389: }
390: }
391:
392: // Record that we have seen a choice.
393: seenChoice = true;
394:
395: // Done.
396: return c;
397: }
398:
399: /** Visit the specified repetition. */
400: public Element visit(Repetition r) {
401: isTopLevel = false;
402:
403: if (isGeneric && (STATE_RECURSION == state)) {
404: return bind(r);
405: } else {
406: return r;
407: }
408: }
409:
410: /** Visit the specified option. */
411: public Element visit(Option o) {
412: isTopLevel = false;
413:
414: if (isGeneric && (STATE_RECURSION == state)) {
415: return bind(o);
416: } else {
417: return o;
418: }
419: }
420:
421: /** Visit the specified sequence. */
422: public Element visit(Sequence s) {
423: isTopLevel = false;
424:
425: // Remember the current number of children and markers.
426: final int base = children.size();
427: final int base2 = markers.size();
428:
429: // Process the elements of the sequence.
430: final int size = s.size();
431: for (int i = 0; i < size; i++) {
432: s.elements.set(i, (Element) dispatch(s.get(i)));
433: }
434:
435: // If this sequence has not ended with a choice, add the
436: // appropriate semantic value.
437: if (seenChoice) {
438: seenChoice = false;
439:
440: } else if (STATE_RECURSION == state) {
441: if (runtime.test("optimizeLeftIterations")) {
442: if (isGeneric) {
443: // Add a generic action value.
444: final String name;
445: if (0 == markers.size()) {
446: name = analyzer.current().name.unqualify().name;
447: } else {
448: name = markers.get(markers.size() - 1).name;
449: }
450:
451: final List<Binding> formatting;
452: if (s.hasProperty(Properties.FORMATTING)) {
453: formatting = Properties.getFormatting(s);
454: } else {
455: formatting = new ArrayList<Binding>(0);
456: }
457:
458: s.add(new GenericActionValue(name, varAction,
459: new ArrayList<Binding>(children),
460: formatting));
461: }
462:
463: // There's nothing to add for void and text-only productions.
464:
465: } else {
466: if (isGeneric) {
467: // Add a recursive invocation and a generic recursion value.
468: final Binding b = new Binding(analyzer.variable(),
469: pTail.name);
470: s.add(b);
471: final String name;
472: if (0 == markers.size()) {
473: name = analyzer.current().name.unqualify().name;
474: } else {
475: name = markers.get(markers.size() - 1).name;
476: }
477:
478: final List<Binding> formatting;
479: if (s.hasProperty(Properties.FORMATTING)) {
480: formatting = Properties.getFormatting(s);
481: } else {
482: formatting = new ArrayList<Binding>(0);
483: }
484:
485: s.add(new GenericRecursionValue(name, varAction,
486: new ArrayList<Binding>(children),
487: formatting, b));
488: } else {
489: // Add a recursive invocation and a null value.
490: s.add(pTail.name);
491: s.add(NullValue.VALUE);
492: }
493: }
494:
495: } else if (STATE_BASE == state) {
496: if (runtime.test("optimizeLeftIterations")) {
497: if (isGeneric) {
498: // Add a binding to the repeated tail nonterminal, which, in
499: // turn, must be bound so that the production voider does
500: // not inadvertently void the tail production. Also note
501: // that the repeated element must be a sequence to preserve
502: // the code generator's contract. After the binding, add an
503: // action base value.
504: Binding b1 = new Binding(analyzer.variable(),
505: pTail.name);
506: Repetition r = new Repetition(false, new Sequence(
507: b1));
508: Binding b2 = new Binding(analyzer.variable(), r);
509: s.add(b2).add(new ActionBaseValue(b2, seed));
510:
511: } else {
512: // Add a copy of the repeated tail expression and the
513: // appropriate text or null value.
514: Element e = analyzer.copy(Analyzer
515: .strip(pTail.choice));
516: s.add(new Repetition(false, Sequence.ensure(e)));
517: if (isTextOnly) {
518: s.add(StringValue.VALUE);
519: } else if (isToken) {
520: s.add(TokenValue.VALUE);
521: } else {
522: s.add(NullValue.VALUE);
523: }
524: }
525:
526: } else {
527: if (isGeneric) {
528: // Add the bound tail nonterminal and an action base value.
529: Binding b = new Binding(analyzer.variable(),
530: pTail.name);
531: s.add(b).add(new ActionBaseValue(b, seed));
532:
533: } else if (isTextOnly) {
534: // Add a text value.
535: s.add(pTail.name).add(StringValue.VALUE);
536:
537: } else if (isToken) {
538: // Add a token value.
539: s.add(pTail.name).add(TokenValue.VALUE);
540:
541: } else {
542: // Add a null value.
543: s.add(pTail.name).add(NullValue.VALUE);
544: }
545: }
546:
547: } else {
548: throw new IllegalStateException("Invalid state " + state);
549: }
550:
551: // Remove any children and markers added by processing the sequence.
552: if (isGeneric && (STATE_RECURSION == state)) {
553: if (0 == base) {
554: children.clear();
555: } else {
556: children.subList(base, children.size()).clear();
557: }
558:
559: if (0 == base2) {
560: markers.clear();
561: } else {
562: markers.subList(base2, markers.size()).clear();
563: }
564: }
565:
566: // Done.
567: return s;
568: }
569:
570: /** Visit the specified binding. */
571: public Element visit(Binding b) {
572: isTopLevel = false;
573:
574: if (isGeneric && (STATE_RECURSION == state)) {
575: // Make sure the binding is not to CodeGenerator.VALUE, i.e., yyValue.
576: if (CodeGenerator.VALUE.equals(b.name)) {
577: runtime
578: .error(
579: "illegal binding to yyValue in left-recursive sequence",
580: b);
581: }
582:
583: // Record the binding.
584: children.add(b);
585:
586: // We assume that the bound expression does not require any
587: // further processing. I.e., if it is a repetition, option, or
588: // choice, it already has been lifted and replaced by a
589: // nonterminal.
590: }
591:
592: return b;
593: }
594:
595: /** Visit the specified string match. */
596: public Element visit(StringMatch m) {
597: isTopLevel = false;
598:
599: if (isGeneric && (STATE_RECURSION == state)) {
600: return bind(m);
601: } else {
602: return m;
603: }
604: }
605:
606: /** Visit the specified nonterminal. */
607: public Element visit(NonTerminal nt) {
608: isTopLevel = false;
609:
610: if (isGeneric && (STATE_RECURSION == state)) {
611: FullProduction p = analyzer.lookup(nt);
612: if (AST.isVoid(p.type)) {
613: return nt;
614: } else {
615: return bind(nt);
616: }
617:
618: } else {
619: return nt;
620: }
621: }
622:
623: /** Visit the specified string literal. */
624: public Element visit(StringLiteral l) {
625: isTopLevel = false;
626:
627: if (isGeneric && (STATE_RECURSION == state)) {
628: return bind(l);
629: } else {
630: return l;
631: }
632: }
633:
634: /** Visit the specified parse tree node. */
635: public Element visit(ParseTreeNode n) {
636: isTopLevel = false;
637:
638: if (isGeneric && (STATE_RECURSION == state)) {
639: return bind(n);
640: } else {
641: return n;
642: }
643: }
644:
645: /** Visit the specified null literal. */
646: public Element visit(NullLiteral l) {
647: isTopLevel = false;
648:
649: if (isGeneric && (STATE_RECURSION == state)) {
650: return bind(l);
651: } else {
652: return l;
653: }
654: }
655:
656: /** Visit the specified node marker. */
657: public Element visit(NodeMarker m) {
658: isTopLevel = false;
659: markers.add(m);
660: return m;
661: }
662:
663: /**
664: * Visit the specified element. This method provides the default
665: * implementation for repetitions, options, predicates, voided
666: * elements, character terminals, (parser) actions, and value
667: * elements.
668: */
669: public Element visit(Element e) {
670: isTopLevel = false;
671: return e;
672: }
673:
674: /**
675: * Determine whether the specified production contains a direct
676: * left-recursion that is transformable into the corresponding
677: * right-recursion by this visitor. Note that, for a production to
678: * be transformable, all direct left-recursions must precede the
679: * corresponding base cases. Further nore that this method assumes
680: * that the specified production's element is an ordered choice of
681: * sequences.
682: *
683: * @param p The production.
684: * @return <code>true</code> if the production is transformable
685: * by this visitor.
686: */
687: public static boolean isTransformable(FullProduction p) {
688: // Currently, only void, text-only, token-level and generic node
689: // productions can be transformed.
690: if (!(AST.isVoid(p.type)
691: || p.getBooleanProperty(Properties.TEXT_ONLY)
692: || p.getBooleanProperty(Properties.TOKEN) || AST
693: .isGenericNode(p.type))) {
694: return false;
695: } else if (p.hasProperty(Properties.TRANSFORMABLE)) {
696: return p.getBooleanProperty(Properties.TRANSFORMABLE);
697: }
698:
699: // Analyze the top-level alternatives.
700: boolean seenRecursion = false;
701: boolean seenBase = false;
702: for (Sequence s : p.choice.alternatives) {
703: // An empty sequence cannot be directly left-recursive nor can
704: // it be a valid base case (which needs to match some input).
705: if (s.isEmpty()) {
706: p.setProperty(Properties.TRANSFORMABLE, Boolean.FALSE);
707: return false;
708: }
709:
710: final Element e = Analyzer.stripAndUnbind(s.get(0));
711: if (p.name.equals(e) || p.qName.equals(e)) {
712: // The sequence represents a direct left-recursion.
713:
714: if ((!seenRecursion) || (!seenBase)) {
715: // A direct left-recursion before a base case.
716: seenRecursion = true;
717: } else {
718: // A direct left-recursion after a base case.
719: p.setProperty(Properties.TRANSFORMABLE,
720: Boolean.FALSE);
721: return false;
722: }
723:
724: } else {
725: // The sequence represents a base case.
726:
727: if (!seenRecursion) {
728: // A base case without a preceding direct left-recursion.
729: p.setProperty(Properties.TRANSFORMABLE,
730: Boolean.FALSE);
731: return false;
732: } else {
733: seenBase = true;
734: }
735: }
736: }
737:
738: // We need at least one direct left-recursion and one base
739: // sequence.
740: if (seenRecursion && seenBase) {
741: p.setProperty(Properties.TRANSFORMABLE, Boolean.TRUE);
742: return true;
743: } else {
744: p.setProperty(Properties.TRANSFORMABLE, Boolean.FALSE);
745: return false;
746: }
747: }
748:
749: /**
750: * Determine whether the specified sequence is directly
751: * left-recursive. The specified sequence must be a top-level
752: * alternative of the specified production, which, in turn, must be
753: * {@link #isTransformable transformable}.
754: *
755: * @param s The sequence.
756: * @param p The production.
757: * @return <code>true</code> if the specified sequence is directly
758: * left-recursive.
759: */
760: public static boolean isRecursive(Sequence s, FullProduction p) {
761: if (s.isEmpty())
762: return false;
763:
764: final Element e = Analyzer.stripAndUnbind(s.get(0));
765: return (p.name.equals(e) || p.qName.equals(e));
766: }
767:
768: /**
769: * Determine whether the specified sequence represents a base case
770: * for a direct left-recursion. The specified sequence must be a
771: * top-level alternative for the specified production, which, in
772: * turn, must be {@link #isTransformable transformable}.
773: *
774: * @param s The sequence.
775: * @param p The production.
776: * @return <code>true</code> if the specified sequence is a base
777: * case for the direct left-recursion.
778: */
779: public static boolean isBase(Sequence s, FullProduction p) {
780: return (!isRecursive(s, p));
781: }
782:
783: }
|