0001: /*
0002: * xtc - The eXTensible Compiler
0003: * Copyright (C) 2007 Robert Grimm
0004: *
0005: * This program is free software; you can redistribute it and/or
0006: * modify it under the terms of the GNU General Public License
0007: * version 2 as published by the Free Software Foundation.
0008: *
0009: * This program is distributed in the hope that it will be useful,
0010: * but WITHOUT ANY WARRANTY; without even the implied warranty of
0011: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0012: * GNU General Public License for more details.
0013: *
0014: * You should have received a copy of the GNU General Public License
0015: * along with this program; if not, write to the Free Software
0016: * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
0017: * USA.
0018: */
0019: package xtc.parser;
0020:
0021: import java.util.ArrayList;
0022: import java.util.Iterator;
0023: import java.util.List;
0024:
0025: import xtc.Constants;
0026:
0027: import xtc.tree.Attribute;
0028: import xtc.tree.Visitor;
0029:
0030: import xtc.type.AST;
0031: import xtc.type.Type;
0032:
0033: import xtc.util.Runtime;
0034:
0035: /**
0036: * Visitor to inject source code annotations into a grammar.
0037: *
0038: * <p />This visitor distinguishes between bindable elements, i.e.,
0039: * elements that would contribute to the value of a generic
0040: * production, and valuable element, i.e., elements that can be
0041: * rewritten to have a semantic value. The latter include voided
0042: * elements and references to void productions that consume the input.
0043: *
0044: * <p />For directly left-recursive alternatives in generic
0045: * productions, this visitor may annotate the last sequence with the
0046: * {@link Properties#FORMATTING} property, indicating valuable
0047: * elements that need to be captured in the promise generated by
0048: * {@link DirectLeftRecurser}.
0049: *
0050: * <p />This visitor assumes that the entire grammar is contained in a
0051: * single module, that the grammar has been tokenized, and that
0052: * productions that may consume the input have been marked.
0053: *
0054: * @see Tokenizer
0055: * @see Analyzer#consumesInput(Element)
0056: *
0057: * @author Robert Grimm
0058: * @version $Revision: 1.30 $
0059: */
0060: public class Annotator extends Visitor {
0061:
0062: /** The marker for synthetic variables. */
0063: public static final String MARKER = "pt";
0064:
0065: // ==========================================================================
0066:
0067: /** A mutable boxed integer. */
0068: public static class Index {
0069:
0070: /** The actual index. */
0071: public int index;
0072:
0073: /** Create a new index initialized to -1. */
0074: public Index() {
0075: index = -1;
0076: }
0077:
0078: /**
0079: * Create a copy of the specified index.
0080: *
0081: * @param index The index.
0082: */
0083: public Index(Index index) {
0084: this .index = index.index;
0085: }
0086:
0087: }
0088:
0089: // ==========================================================================
0090:
0091: /** A pair of indices. */
0092: public static class IndexPair {
0093:
0094: /** The first index. */
0095: public int index1;
0096:
0097: /** The second index. */
0098: public int index2;
0099:
0100: /** Create a new pair of indices, initialized to -1. */
0101: public IndexPair() {
0102: index1 = -1;
0103: index2 = -1;
0104: }
0105:
0106: }
0107:
0108: // ==========================================================================
0109:
0110: /**
0111: * A visitor to detect whether productions need to be rewritten.
0112: * This visitor detects non-left-recursive generic or list-valued
0113: * productions that contain:<ol>
0114: *
0115: * <li>a bindable element with a list value preceded by one or more
0116: * valuable elements, but no bindable elements;</li>
0117: *
0118: * <li>a bindable element with a list value followed by one or more
0119: * valuable elements, but no more bindable elements.</li>
0120: *
0121: * </ol>
0122: *
0123: * For each such production, this visitor sets the {@link
0124: * Properties#SPLIT} property. For each (flattened) alternative
0125: * that contains any of the two cases, it also annotates the last
0126: * sequence with a {@link Annotator.IndexPair}. The first index is
0127: * non-negative for the first case. The second index is
0128: * non-negative for the second case.
0129: */
0130: public class Detector extends Visitor {
0131:
0132: /** The flag for whether the production needs to be split. */
0133: protected boolean needToSplit;
0134:
0135: /** The list of elements. */
0136: protected List<Element> elements;
0137:
0138: /** Create a new detector. */
0139: public Detector() {
0140: elements = new ArrayList<Element>();
0141: }
0142:
0143: /** Visit the specified module. */
0144: public void visit(Module m) {
0145: // Initialize the per-grammar state.
0146: analyzer.register(this );
0147: analyzer.init(m);
0148:
0149: // Process the productions.
0150: for (Production p : m.productions) {
0151: // Only truly generic and list-vallued productions are
0152: // candidates, since they determine a program's AST. However,
0153: // we cannot split directly left-recursive productions because
0154: // all component values must be part of the resulting node,
0155: // whose creation is delayed through a promise.
0156: if (((!Generifier.isGeneric((FullProduction) p)) && (!isList(p.type)))
0157: || DirectLeftRecurser
0158: .isTransformable((FullProduction) p)) {
0159: continue;
0160: }
0161:
0162: // Do the work.
0163: needToSplit = false;
0164: analyzer.process(p);
0165: if (needToSplit)
0166: p.setProperty(Properties.SPLIT, Boolean.TRUE);
0167: }
0168: }
0169:
0170: /** Visit the specified production. */
0171: public void visit(FullProduction p) {
0172: dispatch(p.choice);
0173: }
0174:
0175: /** Visit the specified choice. */
0176: public void visit(OrderedChoice c) {
0177: for (Sequence alt : c.alternatives)
0178: dispatch(alt);
0179: }
0180:
0181: /** Visit the specified sequence. */
0182: public void visit(Sequence s) {
0183: // Remember the current number of elements.
0184: final int base = elements.size();
0185:
0186: // Add this sequence's elements to the list of elements.
0187: for (Iterator<Element> iter = s.elements.iterator(); iter
0188: .hasNext();) {
0189: Element e = iter.next();
0190:
0191: if ((!iter.hasNext()) && (e instanceof OrderedChoice)) {
0192: // Continue with the trailing choice.
0193: dispatch(e);
0194: } else {
0195: // Add the current element to the list of traversed elements.
0196: elements.add(e);
0197: }
0198: }
0199:
0200: // Actually process the elements.
0201: if (!s.hasTrailingChoice()) {
0202: // Flag for at least one bindable element.
0203: boolean hasBindable = false;
0204: // Flag for at least one valuable element.
0205: boolean hasValuable = false;
0206: // Index of the first bindable element with a list value.
0207: int first = -1;
0208: // Index of the last bindable element with a list value.
0209: int last = -1;
0210: // Flag for a bindable element with a list value succeeded by
0211: // one or more valuable elements.
0212: boolean hasFollowing = false;
0213:
0214: final int size = elements.size();
0215: for (int i = 0; i < size; i++) {
0216: final Element e = elements.get(i);
0217:
0218: if (analyzer.isBindable(e)) {
0219: if (AST.isList(analyzer.type(e))) {
0220: if ((!hasBindable) && hasValuable)
0221: first = i;
0222: last = i;
0223: } else {
0224: last = -1;
0225: }
0226:
0227: hasFollowing = false;
0228: hasBindable = true;
0229:
0230: } else if (isValuable(e)) {
0231: if (-1 != last)
0232: hasFollowing = true;
0233:
0234: hasValuable = true;
0235: }
0236: }
0237:
0238: if (-1 != first || hasFollowing) {
0239: IndexPair result = new IndexPair();
0240: if (-1 != first)
0241: result.index1 = first;
0242: if (hasFollowing)
0243: result.index2 = last;
0244: needToSplit = true;
0245: s.setProperty(Properties.SPLIT, result);
0246: }
0247: }
0248:
0249: // Remove any elements added by this method invocation.
0250: if (0 == base) {
0251: elements.clear();
0252: } else {
0253: elements.subList(base, elements.size()).clear();
0254: }
0255: }
0256:
0257: }
0258:
0259: // ==========================================================================
0260:
0261: /**
0262: * A visitor to rewrite productions. This visitor rewrites
0263: * productions that have been marked with the {@link
0264: * Properties#SPLIT} property to generate the same basic AST with
0265: * annotations as without annotations.
0266: *
0267: * @see Annotator.Detector
0268: */
0269: public class Rewriter extends Visitor {
0270:
0271: /** The list of elements. */
0272: protected List<Element> elements;
0273:
0274: /** The flag for whether the current choice is top-level. */
0275: protected boolean isTopLevel;
0276:
0277: /** The current top-level alternative. */
0278: protected Sequence alternative;
0279:
0280: /** The list of replacement sequences. */
0281: protected List<Sequence> replacements;
0282:
0283: /** Create a new rewriter. */
0284: public Rewriter() {
0285: elements = new ArrayList<Element>();
0286: }
0287:
0288: /** Visit the specified module. */
0289: public void visit(Module m) {
0290: // Initialize the per-grammar state.
0291: analyzer.register(this );
0292: analyzer.init(m);
0293:
0294: for (int i = 0; i < m.productions.size(); i++) {
0295: FullProduction p = (FullProduction) m.productions
0296: .get(i);
0297:
0298: // We only rewrite marked productions.
0299: if (!p.getBooleanProperty(Properties.SPLIT))
0300: continue;
0301:
0302: // Process the production.
0303: analyzer.startAdding();
0304: analyzer.process(p);
0305:
0306: // If there are new productions, add them to the grammar and
0307: // make sure they are not processed.
0308: i += analyzer.addNewProductionsAt(i + 1);
0309: }
0310: }
0311:
0312: /** Visit the specified production. */
0313: public void visit(FullProduction p) {
0314: isTopLevel = true;
0315: dispatch(p.choice);
0316: }
0317:
0318: /** Visit the specified choice. */
0319: public void visit(OrderedChoice c) {
0320: // Track whether this choice is top-level.
0321: boolean top = isTopLevel;
0322: isTopLevel = false;
0323:
0324: // Make space for the replacements.
0325: if (top)
0326: replacements = new ArrayList<Sequence>(c.alternatives
0327: .size());
0328:
0329: // Process the alternatives.
0330: for (Sequence s : c.alternatives) {
0331: if (top)
0332: alternative = s;
0333: dispatch(s);
0334: }
0335:
0336: // Patch the top-level choice's alternatives.
0337: if (top)
0338: c.alternatives = replacements;
0339: }
0340:
0341: /** Visit the specified sequence. */
0342: public void visit(Sequence s) {
0343: // Remember the current number of elements.
0344: final int base = elements.size();
0345:
0346: // Add this sequence's elements to the list of elements.
0347: for (Iterator<Element> iter = s.elements.iterator(); iter
0348: .hasNext();) {
0349: Element e = iter.next();
0350:
0351: if ((!iter.hasNext()) && (e instanceof OrderedChoice)) {
0352: // Continue with the trailing choice.
0353: dispatch(e);
0354: } else {
0355: // Add the current element to the list of traversed elements.
0356: elements.add(e);
0357: }
0358: }
0359:
0360: // Actually process the elements.
0361: if (!s.hasTrailingChoice()) {
0362: IndexPair split = (IndexPair) s
0363: .getProperty(Properties.SPLIT);
0364:
0365: if (null == split) {
0366: // There's nothing to split here. Just construct the
0367: // replacement sequence.
0368: Sequence r = new Sequence(new ArrayList<Element>(
0369: elements));
0370: r.name = alternative.name;
0371: r.setLocation(alternative);
0372: replacements.add(r);
0373:
0374: } else {
0375: // There's something to split here.
0376: Production p = analyzer.current();
0377:
0378: // Determine the node marker for the new production. If the
0379: // elements have a node marker, use the last one; otherwise,
0380: // use one for the current production.
0381: NodeMarker mark = null;
0382: for (Element e : elements) {
0383: if (e instanceof NodeMarker)
0384: mark = (NodeMarker) e;
0385: }
0386: if (null == mark)
0387: mark = new NodeMarker(p.name.name);
0388:
0389: // Determine the nonterminal and only alternative for the
0390: // new production.
0391: final NonTerminal nt = analyzer.split();
0392: final Sequence alt;
0393: if (-1 == split.index2) {
0394: alt = new Sequence(elements.size()
0395: - split.index1 + 1);
0396: alt.addAll(elements.subList(split.index1,
0397: elements.size()));
0398: alt.add(mark);
0399:
0400: } else if (-1 == split.index1) {
0401: alt = new Sequence(split.index2 + 2);
0402: alt.addAll(elements
0403: .subList(0, split.index2 + 1));
0404: alt.add(mark);
0405:
0406: } else {
0407: alt = new Sequence(split.index2 - split.index1
0408: + 2);
0409: alt.addAll(elements.subList(split.index1,
0410: split.index2 + 1));
0411: alt.add(mark);
0412: }
0413: alt.name = alternative.name;
0414: alt.setLocation(alternative);
0415:
0416: // Create the new production.
0417: FullProduction q = new FullProduction(
0418: new ArrayList<Attribute>(p.attributes),
0419: AST.GENERIC, nt, nt.qualify(analyzer
0420: .module().name.name),
0421: new OrderedChoice(alt));
0422: // Do not inherit any stateful or restting attribute.
0423: q.attributes.remove(Constants.ATT_STATEFUL);
0424: q.attributes.remove(Constants.ATT_RESETTING);
0425: // But do ensure that the new production is transient.
0426: if (!q.hasAttribute(Constants.ATT_TRANSIENT)
0427: && !q.hasAttribute(Constants.ATT_INLINE)) {
0428: q.attributes.add(Constants.ATT_TRANSIENT);
0429: }
0430:
0431: // Document activity under verbose mode.
0432: if (runtime.test("optionVerbose")) {
0433: System.err
0434: .println("[Lifting split sequence into new production "
0435: + q.qName + ']');
0436: }
0437:
0438: // Add the new production to the grammar.
0439: analyzer.add(q);
0440:
0441: // Create the replacement.
0442: final Sequence r;
0443: if (-1 == split.index2) {
0444: r = new Sequence(split.index1 + 1);
0445: r.addAll(elements.subList(0, split.index1));
0446: r.add(new Binding(CodeGenerator.VALUE, nt));
0447:
0448: } else if (-1 == split.index1) {
0449: r = new Sequence(elements.size() - split.index2);
0450: r.add(new Binding(CodeGenerator.VALUE, nt));
0451: r.addAll(elements.subList(split.index2 + 1,
0452: elements.size()));
0453:
0454: } else {
0455: r = new Sequence(split.index1 + elements.size()
0456: - split.index2);
0457: r.addAll(elements.subList(0, split.index1));
0458: r.add(new Binding(CodeGenerator.VALUE, nt));
0459: r.addAll(elements.subList(split.index2 + 1,
0460: elements.size()));
0461: }
0462: r.name = alternative.name;
0463: r.setLocation(alternative);
0464: replacements.add(r);
0465: }
0466: }
0467:
0468: // Remove any elements added by this method.
0469: if (0 == base) {
0470: elements.clear();
0471: } else {
0472: elements.subList(base, elements.size()).clear();
0473: }
0474: }
0475:
0476: }
0477:
0478: // ==========================================================================
0479:
0480: /** The runtime. */
0481: protected final Runtime runtime;
0482:
0483: /** The analyzer. */
0484: protected final Analyzer analyzer;
0485:
0486: /** The flag for whether the current production is generic. */
0487: protected boolean isGeneric;
0488:
0489: /** The flag for whether the current production is list-valued. */
0490: protected boolean isList;
0491:
0492: /**
0493: * The flag for whether the current production or sequence is
0494: * directly left-recursive.
0495: */
0496: protected boolean isRecursive;
0497:
0498: /** The flag for whether the current choice is top-level. */
0499: protected boolean isTopLevel;
0500:
0501: /**
0502: * The index of the next sequence to process. The stacks capturing
0503: * the state for tracking bindable and valuable must have as many
0504: * elements as indicated by this index.
0505: */
0506: protected int toProcessIdx;
0507:
0508: /** The elements as a list of sequences. */
0509: protected List<Sequence> sequences;
0510:
0511: /**
0512: * The list of bindings for valuable elements before the regular,
0513: * bindable value, organized as a stack of copies.
0514: *
0515: * @see #toProcessIdx
0516: */
0517: protected List<List<Binding>> before;
0518:
0519: /**
0520: * The index of the sequence with the regular, bindable value,
0521: * organized as a stack of copies. A value of -1 indicates no
0522: * bindable value.
0523: *
0524: * @see #toProcessIdx
0525: */
0526: protected List<Index> sequenceIdx;
0527:
0528: /**
0529: * The index of the regular, bindable value, organized as a stack of
0530: * copies. A value of -1 indicates no bindable value.
0531: *
0532: * @see #toProcessIdx
0533: */
0534: protected List<Index> elementIdx;
0535:
0536: /**
0537: * The list of bindings for valuable elements after the regular,
0538: * bindable value, organized as a stack of copies.
0539: *
0540: * @see #toProcessIdx
0541: */
0542: protected List<List<Binding>> after;
0543:
0544: /**
0545: * Create a new annotator.
0546: *
0547: * @param runtime The runtime.
0548: * @param analyzer The analyzer utility.
0549: */
0550: public Annotator(Runtime runtime, Analyzer analyzer) {
0551: this .runtime = runtime;
0552: this .analyzer = analyzer;
0553: this .toProcessIdx = 0;
0554: this .sequences = new ArrayList<Sequence>();
0555: this .before = new ArrayList<List<Binding>>();
0556: this .sequenceIdx = new ArrayList<Index>();
0557: this .elementIdx = new ArrayList<Index>();
0558: this .after = new ArrayList<List<Binding>>();
0559: }
0560:
0561: /**
0562: * Push a copy of the state for tracking bindable and valuable
0563: * elements. This method pushes copies of the top elemens of the
0564: * tracking state onto the respective stacks. If the stacks are
0565: * empty, it initializes the lists of bindings with empty lists and
0566: * the indices with -1.
0567: */
0568: protected void push() {
0569: assert before.size() == sequenceIdx.size();
0570: assert before.size() == elementIdx.size();
0571: assert before.size() == after.size();
0572:
0573: if (0 == before.size()) {
0574: before.add(new ArrayList<Binding>());
0575: sequenceIdx.add(new Index());
0576: elementIdx.add(new Index());
0577: after.add(new ArrayList<Binding>());
0578: } else {
0579: before.add(new ArrayList<Binding>(before
0580: .get(before.size() - 1)));
0581: sequenceIdx.add(new Index(sequenceIdx.get(sequenceIdx
0582: .size() - 1)));
0583: elementIdx.add(new Index(elementIdx
0584: .get(elementIdx.size() - 1)));
0585: after.add(new ArrayList<Binding>(after
0586: .get(after.size() - 1)));
0587: }
0588: }
0589:
0590: /**
0591: * Pop the top elements from the state for tracking bindable and
0592: * valuable elements.
0593: */
0594: protected void pop() {
0595: assert before.size() == sequenceIdx.size();
0596: assert before.size() == elementIdx.size();
0597: assert before.size() == after.size();
0598:
0599: before.remove(before.size() - 1);
0600: sequenceIdx.remove(sequenceIdx.size() - 1);
0601: elementIdx.remove(elementIdx.size() - 1);
0602: after.remove(after.size() - 1);
0603: }
0604:
0605: /**
0606: * Reset the current state for tracking bindable and valuable
0607: * elements. This method replaces the current lists of bindings
0608: * with new, empty lists and sets the current indices to -1.
0609: */
0610: protected void reset() {
0611: before.set(before.size() - 1, new ArrayList<Binding>());
0612: sequenceIndex().index = -1;
0613: elementIndex().index = -1;
0614: after.set(after.size() - 1, new ArrayList<Binding>());
0615: }
0616:
0617: /**
0618: * Get the current list of bindings before the regular value.
0619: *
0620: * @return The current list of bindings.
0621: */
0622: protected List<Binding> before() {
0623: return before.get(before.size() - 1);
0624: }
0625:
0626: /**
0627: * Get the current sequence index of the regular value.
0628: *
0629: * @return The current sequence index.
0630: */
0631: protected Index sequenceIndex() {
0632: return sequenceIdx.get(sequenceIdx.size() - 1);
0633: }
0634:
0635: /**
0636: * Get the current element index of the regular value.
0637: *
0638: * @return The current element index.
0639: */
0640: protected Index elementIndex() {
0641: return elementIdx.get(elementIdx.size() - 1);
0642: }
0643:
0644: /**
0645: * Get the current list of bindings after the regular value.
0646: *
0647: * @return The current list of bindings.
0648: */
0649: protected List<Binding> after() {
0650: return after.get(after.size() - 1);
0651: }
0652:
0653: /**
0654: * Determine whether the specified element is valuable. This method
0655: * returns <code>true</code> if the specified element has a semantic
0656: * value or can be rewritten to yield a semantic value. Rewriting
0657: * includes eliminating any voided element or converting a void
0658: * production into a production returning a semantic value.
0659: *
0660: * @param e The element.
0661: * @return <code>true</code> if the specified element is valuable.
0662: */
0663: protected boolean isValuable(Element e) {
0664: switch (e.tag()) {
0665: case VOIDED:
0666: assert isValuable(((VoidedElement) e).element);
0667: return true;
0668: case NONTERMINAL:
0669: return isValuable(analyzer.lookup((NonTerminal) e));
0670: case CHOICE:
0671: case OPTION:
0672: case REPETITION:
0673: case ANY_CHAR:
0674: case CHAR_CLASS:
0675: case CHAR_LITERAL:
0676: case STRING_LITERAL:
0677: case STRING_MATCH:
0678: case PARSE_TREE_NODE:
0679: case BINDING:
0680: return true;
0681: default:
0682: return false;
0683: }
0684: }
0685:
0686: /**
0687: * Determine whether the specified element is a possibly bound or
0688: * voided parse tree node.
0689: *
0690: * @param e The element.
0691: * @return <code>true</code> if the specified element is a parse
0692: * tree node.
0693: */
0694: protected boolean isParseTreeNode(Element e) {
0695: switch (e.tag()) {
0696: case VOIDED:
0697: case BINDING:
0698: return isParseTreeNode(((UnaryOperator) e).element);
0699: case PARSE_TREE_NODE:
0700: return true;
0701: default:
0702: return false;
0703: }
0704: }
0705:
0706: /**
0707: * Determine whether a bindable element has been processed.
0708: *
0709: * @see Analyzer#isBindable(Element)
0710: *
0711: * @return <code>true</code> if a bindable element has been
0712: * processed.
0713: */
0714: protected boolean hasBindable() {
0715: return -1 != sequenceIndex().index;
0716: }
0717:
0718: /**
0719: * Determine whether any valuable elements have been processed.
0720: *
0721: * @see #isValuable(Element)
0722: *
0723: * @return <code>true</code> if any valuable elements have been
0724: * processed.
0725: */
0726: protected boolean hasValuable() {
0727: return (0 < before().size()) || (0 < after().size());
0728: }
0729:
0730: /**
0731: * Determine whether the specified element already is bound and
0732: * voided.
0733: *
0734: * @param e The element.
0735: * @return <code>true</code> if the specified element already is
0736: * bound and voided.
0737: */
0738: protected boolean isBoundAndVoided(Element e) {
0739: return (e instanceof VoidedElement)
0740: && (((VoidedElement) e).element instanceof Binding);
0741: }
0742:
0743: /**
0744: * Bind and void the specified element. The specified element must
0745: * be valuable, i.e., can be rewritten to yield a semantic value.
0746: *
0747: * @see #isValuable(Element)
0748: *
0749: * @param e The element.
0750: * @return The voided, bound element.
0751: */
0752: protected VoidedElement bindAndVoid(final Element e) {
0753: Element tmp = e;
0754:
0755: // Strip any voided element.
0756: if (tmp instanceof VoidedElement) {
0757: tmp = ((VoidedElement) tmp).element;
0758: }
0759:
0760: // Make sure the element is bound.
0761: if (tmp instanceof Binding) {
0762: // Preserve already existing bindings, unless they refer to
0763: // yyValue.
0764: Binding b = (Binding) tmp;
0765: if (CodeGenerator.VALUE.equals(b.name)) {
0766: b.name = analyzer.variable(MARKER);
0767: }
0768: assert !isParseTreeNode(b.element);
0769:
0770: } else {
0771: assert !isParseTreeNode(tmp);
0772:
0773: tmp = new Binding(analyzer.variable(MARKER), tmp);
0774: tmp.setLocation(e); // Preserve the location.
0775: }
0776:
0777: // Void out the element.
0778: VoidedElement result = new VoidedElement(tmp);
0779: result.setLocation(e); // Preserve the location.
0780:
0781: // Done.
0782: return result;
0783: }
0784:
0785: /**
0786: * Bind and void the current regular, bindable element.
0787: *
0788: * @return The binding.
0789: */
0790: protected Binding bindAndVoid() {
0791: Sequence s = sequences.get(sequenceIndex().index);
0792: VoidedElement v = bindAndVoid(s.get(elementIndex().index));
0793: s.elements.set(elementIndex().index, v);
0794: return (Binding) v.element;
0795: }
0796:
0797: /**
0798: * Process all elements in the current list of sequences.
0799: */
0800: protected void annotate() {
0801: // If any element explicitly sets the semantic value through an
0802: // action, then all hands are off.
0803: for (Sequence s : sequences) {
0804: for (Element e : s.elements) {
0805: if ((e instanceof Action) && ((Action) e).setsValue()) {
0806: // Increment toProcessIdx to make the decrement in
0807: // visit(Sequence) a no-op.
0808: toProcessIdx++;
0809: push();
0810: assert toProcessIdx == before.size();
0811: return;
0812: }
0813: }
0814: }
0815:
0816: // Next, if any element binds yyValue directly, we need to
0817: // preserve the explicit binding.
0818: int valueSequenceIdx = -1;
0819: int valueElementIdx = -1;
0820: boolean requiresAnnotation = false;
0821: final int size = sequences.size();
0822: for (int i = 0; i < size; i++) {
0823: final Sequence s = sequences.get(i);
0824: final int seqsize = s.hasTrailingChoice() ? s.size() - 1
0825: : s.size();
0826: for (int j = 0; j < seqsize; j++) {
0827: // Skip any left-recursive nonterminal.
0828: if (isRecursive && (0 == i) && (0 == j))
0829: continue;
0830:
0831: // Process the element.
0832: final Element e = s.get(j);
0833:
0834: if ((e instanceof Binding)
0835: && CodeGenerator.VALUE
0836: .equals(((Binding) e).name)) {
0837: // Note that we track the last binding to yyValue,
0838: // overriding any previous bindings here.
0839: valueSequenceIdx = i;
0840: valueElementIdx = j;
0841: } else if (isValuable(e)) {
0842: requiresAnnotation = true;
0843: }
0844: }
0845: }
0846:
0847: if (-1 != valueSequenceIdx) {
0848: if (!requiresAnnotation) {
0849: // Increment toProcessIdx to make the decrement in
0850: // visit(Sequence) a no-op.
0851: toProcessIdx++;
0852: push();
0853: assert toProcessIdx == before.size();
0854: return;
0855:
0856: } else if (requiresAnnotation) {
0857: Binding value = null;
0858:
0859: // Recreate the before and after bindings from scratch since
0860: // the previous list of sequences may not have contained a
0861: // binding to yyValue.
0862: before.clear();
0863: sequenceIdx.clear();
0864: elementIdx.clear();
0865: after.clear();
0866:
0867: for (int i = 0; i < size; i++) {
0868: final Sequence s = sequences.get(i);
0869: push();
0870:
0871: final int seqsize = s.hasTrailingChoice() ? s
0872: .size() - 1 : s.size();
0873: for (int j = 0; j < seqsize; j++) {
0874: // Skip any left-recursive nonterminal.
0875: if (isRecursive && (0 == i) && (0 == j))
0876: continue;
0877:
0878: // Process the element.
0879: Element e = s.get(j);
0880:
0881: if (i < toProcessIdx) {
0882: // The element has been processed.
0883: if ((i == valueSequenceIdx)
0884: && (j == valueElementIdx)) {
0885: // Record the (voided) binding to yyValue.
0886: value = (Binding) ((VoidedElement) e).element;
0887: } else if (isBoundAndVoided(e)) {
0888: // Record a previously bound an voided element.
0889: if (null == value) {
0890: before()
0891: .add(
0892: (Binding) ((VoidedElement) e).element);
0893: } else {
0894: after()
0895: .add(
0896: (Binding) ((VoidedElement) e).element);
0897: }
0898: } else if (analyzer.isBindable(e)
0899: && (!isParseTreeNode(e))) {
0900: // Ensure the element has a binding.
0901: if (!(e instanceof Binding)) {
0902: e = new Binding(analyzer
0903: .variable(MARKER), e);
0904: s.elements.set(j, e);
0905: }
0906: if (null == value) {
0907: before().add((Binding) e);
0908: } else {
0909: after().add((Binding) e);
0910: }
0911: }
0912:
0913: } else if (isValuable(e)) {
0914: // The element has not yet been processed.
0915: VoidedElement v = bindAndVoid(e);
0916: s.elements.set(j, v);
0917:
0918: if ((i == valueSequenceIdx)
0919: && (j == valueElementIdx)) {
0920: value = (Binding) v.element;
0921: } else if (null == value) {
0922: before().add((Binding) v.element);
0923: } else {
0924: after().add((Binding) v.element);
0925: }
0926: }
0927: }
0928: }
0929: assert null != value; // We must have seen the binding.
0930:
0931: ParseTreeNode n = new ParseTreeNode(before(), value,
0932: after());
0933: Binding b = new Binding(CodeGenerator.VALUE, n);
0934: sequences.get(sequences.size() - 1).add(b);
0935:
0936: toProcessIdx = sequences.size();
0937: assert toProcessIdx == before.size();
0938: return;
0939: }
0940: }
0941:
0942: // Now, process the elements, one at a time.
0943: for (int i = toProcessIdx; i < size; i++) {
0944: final Sequence s = sequences.get(i);
0945: push();
0946:
0947: for (int j = 0; j < (s.hasTrailingChoice() ? s.size() - 1
0948: : s.size()); j++) {
0949: // Skip any left-recursive nonterminal.
0950: if (isRecursive && (0 == i) && (0 == j))
0951: continue;
0952:
0953: // Process the element.
0954: final Element e = s.get(j);
0955:
0956: if (analyzer.isBindable(e)) {
0957: // The element represents a regular value.
0958: if ((isGeneric || isList)
0959: && AST.isList(analyzer.type(e))) {
0960: // If the current production is generic or list-valued and
0961: // the current element has a list value, we cannot include
0962: // the current element in an annotated node.
0963: if (hasValuable()) {
0964: // Emit a new annotated node.
0965: Binding b = hasBindable() ? bindAndVoid()
0966: : null;
0967: // Note: structural modification.
0968: s.elements.add(j, new ParseTreeNode(
0969: before(), b, after()));
0970: j++; // Do not revisit the current element.
0971: }
0972: reset();
0973:
0974: } else if ((!hasBindable()) || (!hasValuable())) {
0975: // If we have not yet seen any bindable or valuable
0976: // elements, we simply remember the current element
0977: // as the current bindable element.
0978: sequenceIndex().index = i;
0979: elementIndex().index = j;
0980:
0981: } else {
0982: // Otherwise, we bind and void the current bindable
0983: // element and then add the corresponding parse tree node
0984: // to the current sequence. Note that we shift the
0985: // current element to the right, which will then be
0986: // recorded as the current bindable element on the next
0987: // iteration.
0988:
0989: // Note: structural modification.
0990: s.elements.add(j, new ParseTreeNode(before(),
0991: bindAndVoid(), after()));
0992: reset();
0993: }
0994:
0995: } else if (isValuable(e)) {
0996: // The element is valuable.
0997: VoidedElement v = bindAndVoid(e);
0998: s.elements.set(j, v); // Update the sequence.
0999:
1000: if (!hasBindable()) {
1001: before().add((Binding) v.element);
1002: } else {
1003: after().add((Binding) v.element);
1004: }
1005: }
1006: }
1007:
1008: // Ensure that a parse tree node capturing a bindable element is
1009: // in the same sequence as the bindable element. Otherwise, any
1010: // sequences with a higher index but visited later cannot see
1011: // the element.
1012: if (i < size - 1) {
1013: if (hasBindable()) {
1014: if (hasValuable()) {
1015: // Note: structural modification. Also note: we must
1016: // insert before the trailing choice.
1017: s.elements.add(s.size() - 1, new ParseTreeNode(
1018: before(), bindAndVoid(), after()));
1019: }
1020: reset();
1021: }
1022: }
1023: }
1024:
1025: // Make sure we preserve any valuable elements.
1026: if (hasValuable()) {
1027: if (isGeneric && (!hasBindable())) {
1028: // The formatting node capturing the valuable elements needs
1029: // be created inside an action.
1030: sequences.get(sequences.size() - 1).setProperty(
1031: Properties.FORMATTING, before());
1032: } else {
1033: Binding b = hasBindable() ? bindAndVoid() : null;
1034: // Note: structural modification.
1035: sequences.get(sequences.size() - 1).add(
1036: new ParseTreeNode(before(), b, after()));
1037: }
1038: }
1039:
1040: // Remember that we have processed all sequences.
1041: toProcessIdx = sequences.size();
1042: assert toProcessIdx == before.size();
1043: }
1044:
1045: /**
1046: * Recursively process the specified element.
1047: *
1048: * @param e The element.
1049: */
1050: protected void recurse(Element e) {
1051: switch (e.tag()) {
1052: case BINDING:
1053: case OPTION:
1054: case REPETITION:
1055: case STRING_MATCH:
1056: case VOIDED:
1057: // Recurse on the unary operator's element.
1058: recurse(((UnaryOperator) e).element);
1059: break;
1060:
1061: case CHOICE:
1062: case SEQUENCE: {
1063: // Save the state.
1064: boolean savedIsGeneric = isGeneric;
1065: boolean savedIsList = isList;
1066: boolean savedIsRecursive = isRecursive;
1067: boolean savedIsTopLevel = isTopLevel;
1068: int savedToProcessIdx = toProcessIdx;
1069: List<Sequence> savedSequences = sequences;
1070: List<List<Binding>> savedBefore = before;
1071: List<Index> savedSequenceIdx = sequenceIdx;
1072: List<Index> savedElementIdx = elementIdx;
1073: List<List<Binding>> savedAfter = after;
1074:
1075: // Re-initialize the state.
1076: isGeneric = false;
1077: isList = false;
1078: isRecursive = false;
1079: isTopLevel = false;
1080: toProcessIdx = 0;
1081: sequences = new ArrayList<Sequence>();
1082: before = new ArrayList<List<Binding>>();
1083: sequenceIdx = new ArrayList<Index>();
1084: elementIdx = new ArrayList<Index>();
1085: after = new ArrayList<List<Binding>>();
1086:
1087: // Actually recurse on the choice or sequence.
1088: dispatch(e);
1089:
1090: // Restore the state.
1091: isGeneric = savedIsGeneric;
1092: isList = savedIsList;
1093: isRecursive = savedIsRecursive;
1094: isTopLevel = savedIsTopLevel;
1095: toProcessIdx = savedToProcessIdx;
1096: sequences = savedSequences;
1097: before = savedBefore;
1098: sequenceIdx = savedSequenceIdx;
1099: elementIdx = savedElementIdx;
1100: after = savedAfter;
1101: }
1102: break;
1103:
1104: default:
1105: // Nothing to do.
1106: }
1107: }
1108:
1109: /** Visit the specified grammar. */
1110: public void visit(Module m) {
1111: // Take care of generic productions first.
1112: new Detector().dispatch(m);
1113: new Rewriter().dispatch(m);
1114:
1115: // Initialize the per-grammar state.
1116: analyzer.register(this );
1117: analyzer.init(m);
1118:
1119: // Annotate the grammar's productions.
1120: for (Production p : m.productions) {
1121: // Do not process lexical productions or void productions that
1122: // do not consume any input.
1123: if (p.getBooleanProperty(Properties.LEXICAL)
1124: || (AST.isVoid(p.type) && (!p
1125: .getBooleanProperty(Properties.INPUT)))) {
1126: continue;
1127: }
1128:
1129: boolean processed = false;
1130: if (isSingleRepetition((FullProduction) p)) {
1131: for (Sequence s : p.choice.alternatives) {
1132: // Note that we can safely call recurse() since
1133: // analyzer.current() is only accessed for left-recursive
1134: // productions.
1135: recurse(s.get(0));
1136: }
1137: processed = true;
1138:
1139: } else if (AST.isVoid(p.type) || AST.isString(p.type)
1140: || AST.isNode(p.type) || AST.isAny(p.type)) {
1141: analyzer.process(p);
1142: processed = true;
1143:
1144: } else if (AST.isList(p.type)) {
1145: final Type elem = AST.getArgument(p.type);
1146:
1147: if (AST.isString(elem) || AST.isNode(elem)
1148: || AST.isAny(elem)) {
1149: analyzer.process(p);
1150: processed = true;
1151: }
1152: }
1153:
1154: if (!processed) {
1155: // We don't know how to process the production.
1156: if ((!m.hasAttribute(Constants.ATT_NO_WARNINGS))
1157: && (!p.hasAttribute(Constants.ATT_NO_WARNINGS))) {
1158: runtime.warning(
1159: "unable to add parse tree annotations", p);
1160: }
1161: }
1162: }
1163:
1164: // Retype the grammar's productions.
1165: for (Production p : m.productions) {
1166: // Skip productions that are not token-level but are lexical or
1167: // are void and do not consume the input.
1168: if ((!p.getBooleanProperty(Properties.TOKEN))
1169: && (p.getBooleanProperty(Properties.LEXICAL) || (AST
1170: .isVoid(p.type) && (!p
1171: .getBooleanProperty(Properties.INPUT))))) {
1172: continue;
1173: }
1174:
1175: if (p.getBooleanProperty(Properties.TOKEN)) {
1176: // The production is token-level. Make it actually return a
1177: // token.
1178: p.type = AST.TOKEN;
1179:
1180: } else if (AST.isVoid(p.type)) {
1181: // The void production is not lexical and consumes the input.
1182: // Make it generic.
1183: p.type = AST.GENERIC;
1184:
1185: } else if (AST.isString(p.type) || AST.isToken(p.type)) {
1186: // The production is not lexical and returns a string or
1187: // token. Make it return a node.
1188: p.type = AST.NODE;
1189:
1190: } else if (AST.isList(p.type)) {
1191: Type elem = AST.getArgument(p.type);
1192:
1193: if (AST.isString(elem) || AST.isToken(elem)) {
1194: // Change a list of strings or tokens into a list of nodes.
1195: p.type = AST.listOf(AST.NODE);
1196: }
1197:
1198: }
1199: }
1200:
1201: // Et voila.
1202: }
1203:
1204: /** Visit the specified production. */
1205: public void visit(FullProduction p) {
1206: isGeneric = isGeneric(p);
1207: isList = AST.isList(p.type);
1208: isRecursive = DirectLeftRecurser.isTransformable(p);
1209: isTopLevel = true;
1210: dispatch(p.choice);
1211: }
1212:
1213: /** Visit the specified choice. */
1214: public void visit(OrderedChoice c) {
1215: final boolean rec = isRecursive;
1216: final boolean top = isTopLevel;
1217: isTopLevel = false;
1218:
1219: for (Sequence alt : c.alternatives) {
1220: // If the current choice is top-level, set the flag for whether
1221: // the current sequence is directly left-recursive.
1222: if (top) {
1223: if (rec) {
1224: isRecursive = DirectLeftRecurser.isRecursive(alt,
1225: (FullProduction) analyzer.current());
1226: } else {
1227: isRecursive = false;
1228: }
1229: }
1230:
1231: dispatch(alt);
1232: }
1233: }
1234:
1235: /** Visit the specified sequence. */
1236: public void visit(Sequence s) {
1237: // Add the sequence to the list of sequences.
1238: sequences.add(s);
1239:
1240: // Iterate over the elements of this sequence. However, we cannot
1241: // use a Java iterator, since annotate() may add elements to
1242: // sequences and, in the presence of trailing choices, may thus
1243: // cause a concurrent modification exception. We do not need to
1244: // update the sequence's size, since all elements are recursed on
1245: // during the iteration getting us to an invocation of annotate().
1246: final int size = s.elements.size();
1247: for (int i = 0; i < size; i++) {
1248: Element e = s.get(i);
1249:
1250: if ((size - 1 == i) && (e instanceof OrderedChoice)) {
1251: // Continue with the trailing choice.
1252: dispatch(e);
1253: } else {
1254: // Recurse on the element.
1255: recurse(e);
1256: }
1257: }
1258:
1259: // Process annotations.
1260: if (!s.hasTrailingChoice()) {
1261: annotate();
1262: }
1263:
1264: // Remove the sequence again.
1265: sequences.remove(sequences.size() - 1);
1266:
1267: // Pop the processing state.
1268: toProcessIdx--;
1269: pop();
1270: assert toProcessIdx == before.size();
1271: }
1272:
1273: // ==========================================================================
1274:
1275: /**
1276: * Determine whether the specified production effectively is
1277: * generic. This method returns <code>true</code> if the specified
1278: * production is, in fact, generic or it is a void production that
1279: * also is non-lexical and consumes the input.
1280: *
1281: * @see Generifier#isGeneric(FullProduction)
1282: *
1283: * @param p The production.
1284: * @return <code>true</code> if the specified production should be
1285: * treated as generic.
1286: */
1287: public static boolean isGeneric(FullProduction p) {
1288: return (Generifier.isGeneric(p) || (AST.isVoid(p.type)
1289: && (!p.getBooleanProperty(Properties.LEXICAL)) && p
1290: .getBooleanProperty(Properties.INPUT)));
1291: }
1292:
1293: /**
1294: * Determine whether the specified production can have a semantic
1295: * value. Any production that is not void or that consumes the
1296: * input can have a semantic value. This method assumes that
1297: * productions have been correctly annotated with the {@link
1298: * Properties#INPUT} property.
1299: *
1300: * @param p The production.
1301: * @return <code>true</code> if the specified production can have a
1302: * semantic value.
1303: */
1304: public static boolean isValuable(FullProduction p) {
1305: return (!AST.isVoid(p.type))
1306: || p.getBooleanProperty(Properties.INPUT);
1307: }
1308:
1309: /**
1310: * Determine whether the specified type is a list type that can be
1311: * processed by this visitor.
1312: *
1313: * @param type The type.
1314: * @return <code>true</code> if the specified type is a list type
1315: * that can be processed by this visitor.
1316: */
1317: public static boolean isList(Type type) {
1318: if (!AST.isList(type))
1319: return false;
1320:
1321: Type elem = AST.getArgument(type);
1322:
1323: return (AST.isAny(elem) || AST.isNode(elem) || AST
1324: .isString(elem));
1325: }
1326:
1327: /**
1328: * Determine whether the specified production recognizes only a
1329: * single repetition. This method returns <code>true</code> if the
1330: * specified production returns a list of objects, nodes, generic
1331: * nodes, tokens, or strings and each alternative consists of only a
1332: * single, optionally bound repetition.
1333: *
1334: * @param p The production.
1335: * @return <code>true</code> if the specified production recognizes
1336: * only a single repetition.
1337: */
1338: public static boolean isSingleRepetition(FullProduction p) {
1339: if (!isList(p.type))
1340: return false;
1341:
1342: for (Sequence s : p.choice.alternatives) {
1343: if (1 != s.size())
1344: return false;
1345:
1346: final Element e = s.get(0);
1347: if ((!(e instanceof Repetition))
1348: && (!((e instanceof Binding) && (((Binding) e).element instanceof Repetition)))) {
1349: return false;
1350: }
1351: }
1352:
1353: return true;
1354: }
1355:
1356: }
|