0001: /*
0002: * xtc - The eXTensible Compiler
0003: * Copyright (C) 2004 Robert Grimm
0004: *
0005: * This program is free software; you can redistribute it and/or
0006: * modify it under the terms of the GNU General Public License
0007: * as published by the Free Software Foundation; either version 2
0008: * of the License, or (at your option) any later version.
0009: *
0010: * This program is distributed in the hope that it will be useful,
0011: * but WITHOUT ANY WARRANTY; without even the implied warranty of
0012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0013: * GNU General Public License for more details.
0014: *
0015: * You should have received a copy of the GNU General Public License
0016: * along with this program; if not, write to the Free Software
0017: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
0018: */
0019: package xtc.parser;
0020:
0021: import java.util.ArrayList;
0022: import java.util.HashMap;
0023: import java.util.HashSet;
0024: import java.util.Iterator;
0025: import java.util.List;
0026: import java.util.Map;
0027: import java.util.Set;
0028:
0029: import xtc.Constants;
0030:
0031: import xtc.tree.Utility;
0032: import xtc.tree.Visitor;
0033:
0034: /**
0035: * Utility for analyzing and modifying grammars. This class provides
0036: * the following functionality:<ul>
0037: *
0038: * <li>A mapping from nonterminals to productions, which is
0039: * initialized through {@link #init(Grammar)} and accessed through
0040: * {@link #isDefined(NonTerminal)} and {@link #lookup(NonTerminal)}.<p
0041: * /></li>
0042: *
0043: * <li>The set of top-level nonterminals, which is accessed through
0044: * {@link #isTopLevel(NonTerminal)}.<p /></li>
0045: *
0046: * <li>Methods to start {@link #process processing} a production and
0047: * to {@link #current determine} the current production.<p /></li>
0048: *
0049: * <li>A working set, marked set, and processed set to determine
0050: * properties of productions. The idea behind these three sets is
0051: * that the working set keeps track of all productions during an
0052: * analysis pass and is used to prevent infinite recursion, the marked
0053: * set tracks the productions having the property, and the processed
0054: * set (which is a superset of the marked set) tracks the analyzed
0055: * productions. The working set is accessed through {@link
0056: * #workingOn(NonTerminal)}, {@link #notWorkingOn(NonTerminal)},
0057: * {@link #isBeingWorkedOn(NonTerminal)}, and {@link #working()}. The
0058: * marked set is accessed through {@link #mark(NonTerminal)}, {@link
0059: * #unmark(NonTerminal)}, {@link #isMarked(NonTerminal)}, and {@link
0060: * #marked()}. Finally, the processed set is accessed through {@link
0061: * #processed(NonTerminal)} and {@link #isProcessed(NonTerminal)}.<p
0062: * /></li>
0063: *
0064: * <li>Methods to add and remove productions from a grammar. New
0065: * productions are prepared for addition through {@link
0066: * #add(Production)} and committed to the grammar through {@link
0067: * #addNewProductionsAt(int)}. Existing productions are removed
0068: * through {@link #remove(Production)}.<p /></li>
0069: *
0070: * <li>A set of methods for creating temporary variable names and
0071: * nonterminals for new productions: {@link #variable()}, {@link
0072: * #choice()}, {@link #star()}, {@link #plus()}, {@link #option()},
0073: * and {@link #shared()}.<p /></li>
0074: *
0075: * <li>A method to {@link #strip(Element) strip} unnecessary ordered
0076: * choices and sequences from an element.<p /></li>
0077: *
0078: * <li>A method to test whether an element {@link
0079: * #matchesEmpty(Element) matches the empty input}.<p /></li>
0080: *
0081: * <li>A method to {@link #copy(Element) copy} an element.<p /></li>
0082: *
0083: * <li>A set of methods for optimizing sequences that start with
0084: * terminals through character switches: {@link
0085: * #hasTerminalPrefix(Sequence)}, {@link #normalize(Sequence)}, and
0086: * {@link #join(Sequence,Element)}.<p /></li>
0087: *
0088: * <li>A method to {@link #matchingText(Element) get the text} of an
0089: * element.<p /></li>
0090: *
0091: * <li>A method to {@link #bind(Sequence) add bindings} to a
0092: * sequence.<p /></li>
0093: *
0094: * </ul>
0095: *
0096: * To utilize this analyzer utility for a given grammar, the utility
0097: * must be {@link #init initialized(Grammar)} with the grammar, and
0098: * each production must be {@link #process processed} with the
0099: * utility. Furthermore, new productions must be added through {@link
0100: * #add(Production)} and obsolete productions must be removed through
0101: * {@link #remove(Production)}.
0102: *
0103: * <p />The analyzer utility tracks the current grammar so that it
0104: * need not recreate its internal state (notably, the mapping from
0105: * nonterminals to productions) as long as the same analyzer utility
0106: * is used across different visitors.
0107: *
0108: * @author Robert Grimm
0109: * @version $Revision: 1.2 $
0110: */
0111: public class Analyzer extends Utility {
0112:
0113: /**
0114: * The separator character for creating new nonterminals, which
0115: * should be illegal in regular variable or nonterminal names.
0116: *
0117: */
0118: public static final String SEPARATOR = "$";
0119:
0120: /** The base name for nonterminals representing shared productions. */
0121: public static final String SHARED = SEPARATOR + "Shared";
0122:
0123: /** The base name for temporary variables. */
0124: public static final String VARIABLE = "v" + SEPARATOR;
0125:
0126: /** The suffix for nonterminals representing choices. */
0127: public static final String CHOICE = SEPARATOR + "Choice";
0128:
0129: /**
0130: * The suffix for nonterminals representing zero or more
0131: * repetitions.
0132: */
0133: public static final String STAR = SEPARATOR + "Star";
0134:
0135: /**
0136: * The suffix for nonterminals representing one or more
0137: * repetitions.
0138: */
0139: public static final String PLUS = SEPARATOR + "Plus";
0140:
0141: /** The suffix for nonterminals representing options. */
0142: public static final String OPTION = SEPARATOR + "Option";
0143:
0144: /**
0145: * The maximum character count for turning character classes into
0146: * character switches.
0147: */
0148: public static final int MAX_COUNT = 22;
0149:
0150: // =======================================================================
0151:
0152: /** The element copier. */
0153: protected final Copier xerox;
0154:
0155: /** The current grammar. */
0156: protected Grammar grammar;
0157:
0158: /** The map from nonterminals to productions. */
0159: protected Map pMap;
0160:
0161: /** The set of top-level nonterminals. */
0162: protected Set pTop;
0163:
0164: /** The current production. */
0165: protected Production pCurrent;
0166:
0167: /**
0168: * The set of nonterminals corresponding to productions currently
0169: * being processed.
0170: */
0171: protected Set pWorking;
0172:
0173: /**
0174: * The set of nonterminals corresponding to productions having
0175: * been marked.
0176: */
0177: protected Set pMarked;
0178:
0179: /**
0180: * The set of nonterminals corresponding to productions having
0181: * been processed.
0182: */
0183: protected Set pProcessed;
0184:
0185: /** The list of newly added productions. */
0186: protected List pNew;
0187:
0188: /** The count of temporary variables for the current production. */
0189: protected int varCount;
0190:
0191: /** The count of lifted choices for the current production. */
0192: protected int choiceCount;
0193:
0194: /** The count of desugared star repetitions for the current production. */
0195: protected int starCount;
0196:
0197: /** The count of desugared plus repetitions for the current production. */
0198: protected int plusCount;
0199:
0200: /** The count of desugared options for the current production. */
0201: protected int optionCount;
0202:
0203: /** The count of shared productions. */
0204: protected int sharedCount;
0205:
0206: // =======================================================================
0207:
0208: /** Create a new analyzer utility. */
0209: public Analyzer() {
0210: xerox = new Copier();
0211: pMap = new HashMap();
0212: pTop = new HashSet();
0213: pWorking = new HashSet();
0214: pMarked = new HashSet();
0215: pProcessed = new HashSet();
0216: pNew = new ArrayList();
0217: sharedCount = 1;
0218: }
0219:
0220: /** Forcibly reset the analyzer utility. */
0221: public void reset() {
0222: grammar = null;
0223: pCurrent = null;
0224: pMap.clear();
0225: pTop.clear();
0226: pWorking.clear();
0227: pMarked.clear();
0228: pProcessed.clear();
0229: pNew.clear();
0230: varCount = 1;
0231: choiceCount = 1;
0232: starCount = 1;
0233: plusCount = 1;
0234: optionCount = 1;
0235: sharedCount = 1;
0236: }
0237:
0238: // =======================================================================
0239:
0240: /**
0241: * Initialize this analyzer for the specified grammar. This method
0242: * initializes the map from nonterminals to productions and the set
0243: * of top-level nonterminals. It also clears the sets of marked and
0244: * processed nonterminals. It should be called before iterating
0245: * over a grammar's productions.
0246: *
0247: * @param g The grammar.
0248: */
0249: public void init(Grammar g) {
0250: // Initialize the map from nonterminals to productions and the set
0251: // of top-level nonterminals.
0252: if (grammar != g) {
0253: grammar = g;
0254:
0255: pMap.clear();
0256: Iterator iter = g.productions.iterator();
0257: while (iter.hasNext()) {
0258: Production p = (Production) iter.next();
0259: pMap.put(p.nonTerminal, p);
0260: }
0261:
0262: pTop.clear();
0263: iter = g.topLevel.iterator();
0264: while (iter.hasNext()) {
0265: pTop.add(iter.next());
0266: }
0267: }
0268:
0269: // Clear the sets of marked and processed productions.
0270: pMarked.clear();
0271: pProcessed.clear();
0272:
0273: // Clear the current production.
0274: pCurrent = null;
0275: }
0276:
0277: /**
0278: * Determine whether the specified nonterminal is defined. A
0279: * nonterminal is defined if the current grammar contains a
0280: * production for that nonterminal.
0281: *
0282: * @param nt The nonterminal.
0283: * @return <code>true</code> if the nonterminal is defined.
0284: */
0285: public boolean isDefined(NonTerminal nt) {
0286: return pMap.containsKey(nt);
0287: }
0288:
0289: /**
0290: * Determine whether the specified nonterminal is top-level.
0291: *
0292: * @param nt The nonterminal.
0293: * @return <code>true</code> if the nonterminal is top-level.
0294: */
0295: public boolean isTopLevel(NonTerminal nt) {
0296: return pTop.contains(nt);
0297: }
0298:
0299: /**
0300: * Look up the production for the specified nonterminal.
0301: *
0302: * @param nt The nonterminal.
0303: * @return The corresponding production or <code>null</code>
0304: * if there is no such production.
0305: */
0306: public Production lookup(NonTerminal nt) {
0307: return (Production) pMap.get(nt);
0308: }
0309:
0310: // =======================================================================
0311:
0312: /**
0313: * Process the specified production. This method clears the set of
0314: * working nonterminals. It also resets the counters for creating
0315: * new variables and nonterminals (besides the counter for shared
0316: * productions). It then invokes this analyzer's visitor on the
0317: * specified production. This method should be called within the
0318: * loop iterating over a grammar's productions, but not at other
0319: * locations within a visitor.
0320: *
0321: * @param p The production.
0322: */
0323: public void process(Production p) {
0324: // Initialize the per-production state.
0325: pWorking.clear();
0326:
0327: varCount = 1;
0328: choiceCount = 1;
0329: starCount = 1;
0330: plusCount = 1;
0331: optionCount = 1;
0332:
0333: // Remember the current production.
0334: pCurrent = p;
0335:
0336: // Now, actually process the production.
0337: p.accept(visitor());
0338: }
0339:
0340: /**
0341: * Get the production currently being processed.
0342: *
0343: * @return The current production.
0344: */
0345: public Production current() {
0346: return pCurrent;
0347: }
0348:
0349: // =======================================================================
0350:
0351: /**
0352: * Set the status of the specified nonterminal as being worked on.
0353: *
0354: * @param nt The nonterminal.
0355: */
0356: public void workingOn(NonTerminal nt) {
0357: pWorking.add(nt);
0358: }
0359:
0360: /**
0361: * Set the status of the specified nonterminal as not being worked
0362: * on.
0363: *
0364: * @param nt The nonterminal.
0365: */
0366: public void notWorkingOn(NonTerminal nt) {
0367: pWorking.remove(nt);
0368: }
0369:
0370: /**
0371: * Determine whether the specified nonterminal is being worked on.
0372: *
0373: * @param nt The nonterminal.
0374: * @return <code>true</code> if the nonterminal is being worked
0375: * on.
0376: */
0377: public boolean isBeingWorkedOn(NonTerminal nt) {
0378: return pWorking.contains(nt);
0379: }
0380:
0381: /**
0382: * Get the set of nonterminals being worked on. Note that the
0383: * caller must copy the set if it keeps the reference to the
0384: * returned set after the next call to {@link #process}.
0385: *
0386: * @return The working set.
0387: */
0388: public Set working() {
0389: return pWorking;
0390: }
0391:
0392: // =======================================================================
0393:
0394: /**
0395: * Mark the specified nonterminal.
0396: *
0397: * @param nt The nonterminal.
0398: */
0399: public void mark(NonTerminal nt) {
0400: pMarked.add(nt);
0401: }
0402:
0403: /**
0404: * Unmark the specified nonterminal.
0405: *
0406: * @param nt The nonterminal.
0407: */
0408: public void unmark(NonTerminal nt) {
0409: pMarked.remove(nt);
0410: }
0411:
0412: /**
0413: * Determine whether the specified nonterminal has been marked.
0414: *
0415: * @param nt The nonterminal.
0416: * @return <code>true</code> if the nonterminal has been
0417: * marked.
0418: */
0419: public boolean isMarked(NonTerminal nt) {
0420: return pMarked.contains(nt);
0421: }
0422:
0423: /**
0424: * Get the set of marked nonterminals. Note that the called must
0425: * copy the set if it keeps the reference to the returned set after
0426: * the next use of this analyzer.
0427: *
0428: * @return The marked set.
0429: */
0430: public Set marked() {
0431: return pMarked;
0432: }
0433:
0434: // =======================================================================
0435:
0436: /**
0437: * Set the status of the specified nonterminal as processed.
0438: *
0439: * @param nt The nonterminal.
0440: */
0441: public void processed(NonTerminal nt) {
0442: pProcessed.add(nt);
0443: }
0444:
0445: /**
0446: * Determine whether the specified nonterminal has been processed.
0447: *
0448: * @param nt The nonterminal.
0449: * @return <code>true</code> if the nonterminal has been processed.
0450: */
0451: public boolean isProcessed(NonTerminal nt) {
0452: return pProcessed.contains(nt);
0453: }
0454:
0455: // =======================================================================
0456:
0457: /**
0458: * Clear the list of newly generated productions. This method needs
0459: * to be called before a processing step that may add new
0460: * productions through {@link #add(Production)} and {@link
0461: * #addNewProductionsAt(int)}.
0462: */
0463: public void startAdding() {
0464: pNew.clear();
0465: }
0466:
0467: /**
0468: * Prepare the specified production for addition to the grammar.
0469: * This method adds the specified production to the list of newly
0470: * generated productions. It also adds the production to the map
0471: * from nonterminals to productions and marks it as {@link
0472: * Constants#SYNTHETIC synthetic}. However, addition is not
0473: * complete: the productions in the list of newly generated
0474: * productions still need to be added into the grammar itself. This
0475: * is typically done within the main loop iterating over a grammar's
0476: * productions and thus through a separate {@link
0477: * #addNewProductionsAt(int) method}.
0478: *
0479: * @param p The new production.
0480: */
0481: public void add(Production p) {
0482: p.setProperty(Constants.SYNTHETIC, Boolean.TRUE);
0483: pNew.add(p);
0484: pMap.put(p.nonTerminal, p);
0485: }
0486:
0487: /**
0488: * Add the newly generated productions to the grammar itself. This
0489: * method adds the productions collected through {@link
0490: * #add(Production) add()} into the current grammar at the specified
0491: * index of the grammar's list of productions.
0492: *
0493: * @param idx The index into the grammar's list of productions.
0494: * @return The number of productions added.
0495: */
0496: public int addNewProductionsAt(int idx) {
0497: final int size = pNew.size();
0498:
0499: if (0 != size) {
0500: grammar.productions.addAll(idx, pNew);
0501: }
0502:
0503: return size;
0504: }
0505:
0506: /**
0507: * Prepare the specified production for removal from the grammar.
0508: * This method removes the specified production from the mapping
0509: * from nonterminals to productions and, if present, from the set of
0510: * top-level nonterminals. However, removal is not complete: the
0511: * production still needs to be removed from the grammar itself.
0512: * This is typically done within the main loop iterating over a
0513: * grammar's productions.
0514: *
0515: * @param p The production.
0516: */
0517: public void remove(Production p) {
0518: pMap.remove(p.nonTerminal);
0519: pTop.remove(p.nonTerminal);
0520: }
0521:
0522: // =======================================================================
0523:
0524: /**
0525: * Create a new temporary variable.
0526: *
0527: * @return The name of the temporary variable.
0528: */
0529: public String variable() {
0530: String temp = VARIABLE + Integer.toString(varCount);
0531: varCount++;
0532: return temp;
0533: }
0534:
0535: /**
0536: * Create a new nonterminal for a choice.
0537: *
0538: * @return The new nonterminal.
0539: */
0540: public NonTerminal choice() {
0541: NonTerminal nt = new NonTerminal(pCurrent.nonTerminal.name
0542: + CHOICE + Integer.toString(choiceCount));
0543: choiceCount++;
0544: return nt;
0545: }
0546:
0547: /**
0548: * Create a new nonterminal for zero or more repetitions.
0549: *
0550: * @return The new nonterminal.
0551: */
0552: public NonTerminal star() {
0553: NonTerminal nt = new NonTerminal(pCurrent.nonTerminal.name
0554: + STAR + Integer.toString(starCount));
0555: starCount++;
0556: return nt;
0557: }
0558:
0559: /**
0560: * Create a new nonterminal for one or more repetitions.
0561: *
0562: * @return The new nonterminal.
0563: */
0564: public NonTerminal plus() {
0565: NonTerminal nt = new NonTerminal(pCurrent.nonTerminal.name
0566: + PLUS + Integer.toString(plusCount));
0567: plusCount++;
0568: return nt;
0569: }
0570:
0571: /**
0572: * Create a new nonterminal for an option.
0573: *
0574: * @return The new nonterminal.
0575: */
0576: public NonTerminal option() {
0577: NonTerminal nt = new NonTerminal(pCurrent.nonTerminal.name
0578: + OPTION + Integer.toString(optionCount));
0579: optionCount++;
0580: return nt;
0581: }
0582:
0583: /**
0584: * Create a new nonterminal for a shared production.
0585: *
0586: * @return The new nonterminal.
0587: */
0588: public NonTerminal shared() {
0589: NonTerminal nt = new NonTerminal(SHARED
0590: + Integer.toString(sharedCount));
0591: sharedCount++;
0592: return nt;
0593: }
0594:
0595: // =======================================================================
0596:
0597: /**
0598: * Strip unnecessary ordered choices and sequences from the specified
0599: * element. A choice or sequence is unnecessary if it contains only
0600: * a single element.
0601: *
0602: * @param e The element.
0603: * @return The stripped element.
0604: */
0605: public Element strip(Element e) {
0606: if (e instanceof OrderedChoice) {
0607: OrderedChoice c = (OrderedChoice) e;
0608: if (1 == c.options.size()) {
0609: e = strip((Element) c.options.get(0));
0610: }
0611: } else if (e instanceof Sequence) {
0612: Sequence s = (Sequence) e;
0613: if (1 == s.length()) {
0614: e = strip(s.get(0));
0615: }
0616: }
0617: return e;
0618: }
0619:
0620: // =======================================================================
0621:
0622: /**
0623: * Determine whether the specified element matches the empty input.
0624: * Note that this method assumes that nonterminals do not match the
0625: * empty input.
0626: *
0627: * @param e The element.
0628: * @return <code>true</code> if the specified element matches the
0629: * empty input.
0630: */
0631: public boolean matchesEmpty(Element e) {
0632: if (e instanceof OrderedChoice) {
0633: OrderedChoice c = (OrderedChoice) e;
0634: Iterator iter = c.options.iterator();
0635: while (iter.hasNext()) {
0636: if (matchesEmpty((Element) iter.next())) {
0637: return true;
0638: }
0639: }
0640: return false;
0641:
0642: } else if (e instanceof Repetition) {
0643: Repetition r = (Repetition) e;
0644:
0645: if (r.once) {
0646: return matchesEmpty(r.element);
0647: } else {
0648: return true;
0649: }
0650:
0651: } else if (e instanceof Option) {
0652: return true;
0653:
0654: } else if (e instanceof Sequence) {
0655: Sequence s = (Sequence) e;
0656: Iterator iter = s.elements.iterator();
0657: while (iter.hasNext()) {
0658: if (!matchesEmpty((Element) iter.next())) {
0659: return false;
0660: }
0661: }
0662: return true;
0663:
0664: } else if ((e instanceof SemanticPredicate)
0665: || (e instanceof StringMatch)
0666: || (e instanceof NonTerminal)
0667: || (e instanceof Terminal)) {
0668: return false;
0669:
0670: } else {
0671: // Actions and value elements do not match anything.
0672: return true;
0673: }
0674: }
0675:
0676: // =======================================================================
0677:
0678: /**
0679: * Make a deep copy of the specified element.
0680: *
0681: * @param e The element.
0682: * @return A deep copy.
0683: */
0684: public Element copy(Element e) {
0685: return (Element) e.accept(xerox);
0686: }
0687:
0688: // =======================================================================
0689:
0690: /**
0691: * Determine whether the specified sequence starts with terminals
0692: * that can be optimized. This method returns <code>true</code> if
0693: * the terminals can be optimized through character switches.
0694: * Currently, this is only the case for character and string
0695: * literals.
0696: *
0697: * @param s The sequence.
0698: * @return <code>true</code> if the sequence starts with optimizable
0699: * terminals.
0700: */
0701: public boolean hasTerminalPrefix(Sequence s) {
0702: if (1 <= s.length()) {
0703: Element e = s.get(0);
0704:
0705: if ((e instanceof CharLiteral)
0706: || (e instanceof StringLiteral)
0707: || ((e instanceof CharClass) && (((CharClass) e)
0708: .count() <= MAX_COUNT))) {
0709: return true;
0710: }
0711: }
0712:
0713: return false;
0714: }
0715:
0716: /**
0717: * Determine the length of the specified sequence after
0718: * normalization.
0719: *
0720: * @param s The sequence
0721: * @return The size of the normalized sequence or -1 if the sequence
0722: * already is in normal form.
0723: */
0724: private int normalLength(Sequence s) {
0725: final int length = s.length();
0726: boolean normal = true;
0727: int count = 0;
0728:
0729: for (int i = 0; i < length; i++) {
0730: Element e = s.get(i);
0731:
0732: if (e instanceof CharLiteral) {
0733: normal = false;
0734: count++;
0735:
0736: } else if (e instanceof StringLiteral) {
0737: normal = false;
0738: count += ((StringLiteral) e).text.length();
0739:
0740: } else if (e instanceof CharClass) {
0741: count++;
0742:
0743: } else {
0744: count += (length - i);
0745: break;
0746: }
0747: }
0748:
0749: return (normal) ? -1 : count;
0750: }
0751:
0752: /**
0753: * Normalize the specified sequence for {@link
0754: * #join(Sequence,Element) joining} with other elements during
0755: * terminal optimization. Currently, this method converts string
0756: * literals into equivalent subsequences of character literals.
0757: *
0758: * @param s The sequence.
0759: * @return The optimized sequence.
0760: */
0761: public Sequence normalize(Sequence s) {
0762: final int nl = normalLength(s);
0763: if (-1 == nl)
0764: return s;
0765:
0766: final int l = s.length();
0767: Sequence s2 = new Sequence(new ArrayList(nl));
0768:
0769: for (int i = 0; i < l; i++) {
0770: Element e = s.get(i);
0771:
0772: if (e instanceof CharLiteral) {
0773: s2.elements.add(new CharClass(((CharLiteral) e).c));
0774:
0775: } else if (e instanceof StringLiteral) {
0776: StringLiteral sl = (StringLiteral) e;
0777:
0778: for (int j = 0; j < sl.text.length(); j++) {
0779: s2.elements.add(new CharClass(sl.text.charAt(j)));
0780: }
0781:
0782: } else if (e instanceof CharClass) {
0783: s2.elements.add(e);
0784:
0785: } else {
0786: s2.elements.addAll(s.elements.subList(i, l));
0787: break;
0788: }
0789: }
0790:
0791: return s2;
0792: }
0793:
0794: /**
0795: * Join the specified sequence with the specified element. Note
0796: * that the specified sequence must have been {@link
0797: * #normalize(Sequence) normalized}. Further note that the combined
0798: * element is guaranteed to either be a sequence or an ordered
0799: * choice.
0800: *
0801: * @param source The source sequence.
0802: * @param target The target element.
0803: * @return The combined element.
0804: */
0805: public Element join(Sequence source, Element target) {
0806: // Handle trivial case first. Otherwise, normalize target.
0807: if (null == target) {
0808: return source;
0809:
0810: } else if (target instanceof Sequence) {
0811: // Strip sequence containing only an ordered choice.
0812: Sequence s = (Sequence) target;
0813:
0814: if (1 == s.length()) {
0815: Element e = s.get(0);
0816:
0817: if (e instanceof OrderedChoice) {
0818: target = e;
0819: }
0820: }
0821: }
0822:
0823: // Now, do the real joining.
0824: if (target instanceof Sequence) {
0825: final Sequence t = (Sequence) target;
0826: final Element t1 = (t.isEmpty()) ? null : t.get(0);
0827: final Element s1 = (source.isEmpty()) ? null : source
0828: .get(0);
0829:
0830: if ((s1 instanceof CharClass) && (t1 instanceof CharClass)
0831: && s1.equals(t1)) {
0832: // Both sequences start with the same character class.
0833: // Combine them into a single sequence, independent of the
0834: // class's count.
0835: Sequence result = new Sequence(join(source
0836: .subSequence(1), t.subSequence(1)));
0837: result.elements.add(0, s1);
0838:
0839: return result;
0840:
0841: } else if ((s1 instanceof CharClass)
0842: && (((CharClass) s1).count() <= MAX_COUNT)) {
0843: CharClass sk = (CharClass) s1;
0844:
0845: if (t1 instanceof CharClass) {
0846: CharClass tk = (CharClass) t1;
0847:
0848: if (tk.count() <= MAX_COUNT) {
0849: // Both sequences start with different character classes.
0850: // Try to combine them into a new character switch.
0851: return join(source, new Sequence(
0852: new CharSwitch(tk, t.subSequence(1))));
0853: }
0854:
0855: // Fall through to creating an ordered choice.
0856:
0857: } else if (t1 instanceof CharSwitch) {
0858: CharSwitch sw = (CharSwitch) t1;
0859:
0860: // Strip the exclusive flag and then look for an existing case.
0861: CharClass klass = new CharClass(sk.ranges);
0862: CharCase kase = sw.hasCase(klass);
0863:
0864: if (sk.exclusive) {
0865: // We can only join the source into the character switch
0866: // if the switch covers exactly the non-exclusive version.
0867: if ((null != kase) && (1 == sw.cases.size())) {
0868: sw.base = join(source.subSequence(1),
0869: sw.base);
0870:
0871: return target;
0872: }
0873:
0874: // Fall through to creating an ordered choice.
0875:
0876: } else {
0877: if (null != kase) {
0878: // Join the sequence into the existing character case.
0879: kase.element = join(source.subSequence(1),
0880: kase.element);
0881:
0882: return target;
0883:
0884: } else if ((!sw.overlaps(klass))
0885: && (null == sw.base)) {
0886: // If there is no overlap with an existing case and the
0887: // switch does not contain an exclusive character class,
0888: // add a new character case.
0889: sw.cases.add(new CharCase(klass, source
0890: .subSequence(1)));
0891:
0892: return target;
0893: }
0894:
0895: // Fall through to creating an ordered choice.
0896: }
0897: }
0898: }
0899:
0900: // Create a new choice with the target and source.
0901: OrderedChoice c = new OrderedChoice(new ArrayList());
0902: c.options.add(target);
0903: c.options.add(source);
0904: return c;
0905:
0906: } else if (target instanceof OrderedChoice) {
0907: // Join the source with the last option.
0908: OrderedChoice c = (OrderedChoice) target;
0909: final int l = c.options.size();
0910: Element e = join(source, (Element) c.options.get(l - 1));
0911:
0912: if (e instanceof OrderedChoice) {
0913: c.options.remove(l - 1);
0914: c.options.addAll(((OrderedChoice) e).options);
0915: } else {
0916: c.options.set(l - 1, e);
0917: }
0918:
0919: return c;
0920:
0921: } else {
0922: // Join the source with a new sequence containing the target
0923: // element.
0924: return join(source, new Sequence(target));
0925: }
0926: }
0927:
0928: // =======================================================================
0929:
0930: /**
0931: * Get the text matched by the specified element. This method
0932: * analyzes the specified element, and, if the element always
0933: * matches the same text, this method returns the constant text.
0934: * Otherwise, this method returns <code>null</code>. Note that this
0935: * method ignores predicates, actions, and value elements, as they
0936: * do not change the text matched by an element. Further note that
0937: * this method recursively analyzes referenced nonterminals.
0938: *
0939: * @param e The element.
0940: * @return The constant text.
0941: */
0942: public String matchingText(Element e) {
0943: StringBuffer buf = new StringBuffer();
0944:
0945: if (matchingText(e, buf)) {
0946: return buf.toString();
0947: } else {
0948: return null;
0949: }
0950: }
0951:
0952: /**
0953: * Get the text matched by the specified element.
0954: *
0955: * @param e The element.
0956: * @param buf The buffer for the matched text.
0957: * @return <code>true</code> if the element matches a constant
0958: * text.
0959: */
0960: private boolean matchingText(Element e, StringBuffer buf) {
0961: if (e instanceof OrderedChoice) {
0962: OrderedChoice c = (OrderedChoice) e;
0963: if (1 == c.options.size()) {
0964: return matchingText((Element) c.options.get(0), buf);
0965: } else {
0966: return false;
0967: }
0968:
0969: } else if ((e instanceof Repetition) || (e instanceof Option)) {
0970: return false;
0971:
0972: } else if (e instanceof Sequence) {
0973: Sequence s = (Sequence) e;
0974: final int l = s.length();
0975: for (int i = 0; i < l; i++) {
0976: if (!matchingText(s.get(i), buf)) {
0977: return false;
0978: }
0979: }
0980: return true;
0981:
0982: } else if (e instanceof Predicate) {
0983: return true;
0984:
0985: } else if (e instanceof Binding) {
0986: return matchingText(((Binding) e).element, buf);
0987:
0988: } else if (e instanceof StringMatch) {
0989: return matchingText(((StringMatch) e).element, buf);
0990:
0991: } else if (e instanceof NonTerminal) {
0992: Production p = lookup((NonTerminal) e);
0993: return matchingText(p.element, buf);
0994:
0995: } else if (e instanceof StringLiteral) {
0996: buf.append(((StringLiteral) e).text);
0997: return true;
0998:
0999: } else if (e instanceof CharLiteral) {
1000: buf.append(((CharLiteral) e).c);
1001: return true;
1002:
1003: } else if (e instanceof CharClass) {
1004: CharClass c = (CharClass) e;
1005:
1006: if (1 == c.ranges.size()) {
1007: CharRange r = (CharRange) c.ranges.get(0);
1008: if (r.first == r.last) {
1009: buf.append(r.first);
1010: return true;
1011: }
1012: }
1013: return false;
1014:
1015: } else if (e instanceof Terminal) {
1016: // All other terminals do not match a constant character.
1017: return false;
1018:
1019: } else if ((e instanceof Action) || (e instanceof ValueElement)) {
1020: return true;
1021:
1022: } else {
1023: // We don't know this element.
1024: return false;
1025: }
1026: }
1027:
1028: // =======================================================================
1029:
1030: /**
1031: * Bind the elements in the specified sequence. This method
1032: * analyzes the specified sequence and, if necessary, adds in a
1033: * binding for the semantic value of the elements in the sequence.
1034: * If the sequence has more than one element to be bound, this
1035: * method returns <code>null</code> to indicate that we need to rely
1036: * on the {@link CodeGenerator#VALUE semantic value} to capture the
1037: * sequence's value.
1038: *
1039: * @param s The sequence to bind.
1040: * @return The corresponding binding.
1041: */
1042: public Binding bind(Sequence s) {
1043: Binding binding = null;
1044: Element bound = null;
1045: int idx = -1;
1046:
1047: final int length = s.length();
1048: for (int i = 0; i < length; i++) {
1049: Element e = s.get(i);
1050:
1051: if ((e instanceof OrderedChoice)
1052: || (e instanceof Repetition)
1053: || (e instanceof Option) || (e instanceof Sequence)) {
1054: // Embedded choices, repetitions, options, and sequences
1055: // should not appear. All bets are off.
1056: binding = null;
1057: idx = -1;
1058: break;
1059:
1060: } else if (e instanceof Predicate) {
1061: // Skip predicates.
1062:
1063: } else if (e instanceof Binding) {
1064: if (-1 == idx) {
1065: binding = (Binding) e;
1066: idx = i;
1067: } else {
1068: binding = null;
1069: idx = -1;
1070: break;
1071: }
1072:
1073: } else if (e instanceof NonTerminal) {
1074: Production p = lookup((NonTerminal) e);
1075:
1076: if (Type.isVoidT(p.type)) {
1077: // Void productions cannot be bound. Skip them.
1078: continue;
1079: }
1080:
1081: if (-1 == idx) {
1082: bound = e;
1083: idx = i;
1084: } else {
1085: binding = null;
1086: idx = -1;
1087: break;
1088: }
1089:
1090: } else if ((e instanceof Terminal)
1091: || (e instanceof StringMatch)) {
1092: if (-1 == idx) {
1093: bound = e;
1094: idx = i;
1095: } else {
1096: binding = null;
1097: idx = -1;
1098: break;
1099: }
1100:
1101: } else {
1102: // The element is either an action or a value element. All
1103: // bets are off.
1104: binding = null;
1105: idx = -1;
1106: break;
1107: }
1108: }
1109:
1110: if (null != binding) {
1111: // There is a single element that already has a binding.
1112: return binding;
1113:
1114: } else if (-1 == idx) {
1115: // There is a sequence of elements. We rely on the semantic
1116: // value to capture the sequence's value.
1117: return null;
1118:
1119: } else {
1120: binding = new Binding(variable(), bound);
1121: s.elements.set(idx, binding);
1122: return binding;
1123: }
1124: }
1125:
1126: }
|