001: /*
002: * xtc - The eXTensible Compiler
003: * Copyright (C) 2004-2007 Robert Grimm
004: *
005: * This program is free software; you can redistribute it and/or
006: * modify it under the terms of the GNU General Public License
007: * version 2 as published by the Free Software Foundation.
008: *
009: * This program is distributed in the hope that it will be useful,
010: * but WITHOUT ANY WARRANTY; without even the implied warranty of
011: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
012: * GNU General Public License for more details.
013: *
014: * You should have received a copy of the GNU General Public License
015: * along with this program; if not, write to the Free Software
016: * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
017: * USA.
018: */
019: package xtc.parser;
020:
021: import xtc.tree.Visitor;
022:
023: /**
024: * Visitor to provide a conservative estimate for the cost of parsing
025: * a production. One unit of cost is approximately equivalent to the
026: * effort involved in parsing a character and testing it for a
027: * specific value. The cost estimate for a production includes the
028: * costs for any other productions referenced by that production. If
029: * the cost cannot be statically determined (for example, for a
030: * repetition) or a set of productions is mutually recursive, the cost
031: * is assumed to be unlimited, as represented by
032: * <code>Integer.MAX_VALUE</code>. This visitor must be invoked by
033: * visiting a grammar, which clears any previous cost estimates and
034: * annotates each production with its estimated cost.
035: *
036: * <p />This visitor assumes that the entire grammar is contained in a
037: * single module.
038: *
039: * @author Robert Grimm
040: * @version $Revision: 1.26 $
041: */
042: public class CostEstimator extends Visitor {
043:
044: /** The analyzer utility. */
045: protected final Analyzer analyzer;
046:
047: /**
048: * Create a new cost estimator.
049: *
050: * @param analyzer The analyzer utility.
051: */
052: public CostEstimator(Analyzer analyzer) {
053: this .analyzer = analyzer;
054: }
055:
056: /** Visit the specified grammar. */
057: public void visit(Module m) {
058: // Initialize the per-grammar state.
059: analyzer.register(this );
060: analyzer.init(m);
061:
062: // Clear any previous cost estimates.
063: for (Production p : m.productions)
064: p.removeProperty(Properties.COST);
065:
066: // Now, set the cost estimates.
067: for (Production p : m.productions) {
068: if (!p.hasProperty(Properties.COST)) {
069: analyzer.process(p);
070: }
071: }
072: }
073:
074: /** Visit the specified production. */
075: public Integer visit(Production p) {
076: analyzer.workingOn(p.qName);
077: Integer cost = (Integer) dispatch(p.choice);
078: analyzer.notWorkingOn(p.qName);
079: p.setProperty(Properties.COST, cost);
080: return cost;
081: }
082:
083: /** Visit the specified ordered choice. */
084: public Integer visit(OrderedChoice c) {
085: // In the worst case, none of the alternatives parses. Hence, the
086: // overall cost is the sum of the costs of all alternatives.
087: int cost = 0;
088: for (Sequence s : c.alternatives) {
089: cost = add(cost, (Integer) dispatch(s));
090: }
091: return cost;
092: }
093:
094: /** Visit the specified repetition. */
095: public Integer visit(Repetition r) {
096: return Integer.MAX_VALUE;
097: }
098:
099: /** Visit the specified option. */
100: public Integer visit(Option o) {
101: // In the worst case, the optional element does not parse. Hence,
102: // we add one unit for the empty case.
103: return add(1, (Integer) dispatch(o.element));
104: }
105:
106: /** Visit the specified sequence. */
107: public Integer visit(Sequence s) {
108: int cost = 0;
109: for (Element e : s.elements) {
110: cost = add(cost, (Integer) dispatch(e));
111: }
112: return cost;
113: }
114:
115: /** Visit the specified predicate. */
116: public Integer visit(Predicate p) {
117: // We add one unit for the test implied by a predicate.
118: return add(1, (Integer) dispatch(p.element));
119: }
120:
121: /** Visit the specified voided element. */
122: public Integer visit(VoidedElement v) {
123: // Voiding elements comes for free as no code is generated.
124: return (Integer) dispatch(v.element);
125: }
126:
127: /** Visit the specified binding. */
128: public Integer visit(Binding b) {
129: // Bindings are pretty much for free.
130: return (Integer) dispatch(b.element);
131: }
132:
133: /** Visit the specified string match. */
134: public Integer visit(StringMatch m) {
135: // We add one unit for the test performed by a string match.
136: return add(1, (Integer) dispatch(m.element));
137: }
138:
139: /** Visit the specified nonterminal. */
140: public Integer visit(NonTerminal nt) {
141: Production p = analyzer.lookup(nt);
142:
143: if (analyzer.isBeingWorkedOn(p.qName)) {
144: // A recursion has unlimited cost.
145: return Integer.MAX_VALUE;
146: } else {
147: // We add one unit for testing the nonterminal.
148: return add(1, p.hasProperty(Properties.COST) ? (Integer) p
149: .getProperty(Properties.COST)
150: : (Integer) dispatch(p));
151: }
152: }
153:
154: /** Visit the specified string literal. */
155: public Integer visit(StringLiteral l) {
156: // Each character in the string literal requires a test.
157: return l.text.length();
158: }
159:
160: /** Visit the specified character case. */
161: public Integer visit(CharCase c) {
162: if (null == c.element) {
163: return 0;
164: } else {
165: return (Integer) dispatch(c.element);
166: }
167: }
168:
169: /** Visit the specified character switch. */
170: public Integer visit(CharSwitch sw) {
171: // A character switch has disjoint cases, so the cost is the
172: // maximum cost of the cases and default (+ 1 for the character
173: // switch itself).
174: int cost = 0;
175: for (CharCase kase : sw.cases) {
176: cost = Math.max(cost, (Integer) dispatch(kase) + 1);
177: }
178: if (null == sw.base) {
179: cost = Math.max(cost, (Integer) dispatch(sw.base) + 1);
180: }
181: return cost;
182: }
183:
184: /**
185: * Visit the specified terminal. This method provides the default
186: * implementation for any character elements, character classes, and
187: * character literals.
188: */
189: public Integer visit(Terminal t) {
190: return 1;
191: }
192:
193: /** Visit the specified node marker. */
194: public Integer visit(NodeMarker m) {
195: return 0;
196: }
197:
198: /** Visit the specified action. */
199: public Integer visit(Action a) {
200: // An action typically creates a semantic value, so the cost is 1.
201: return 1;
202: }
203:
204: /** Visit the specified parser action. */
205: public Integer visit(ParserAction a) {
206: // Parser actions may consume arbritrary inputs, so they have
207: // unlimited cost.
208: return Integer.MAX_VALUE;
209: }
210:
211: /** Visit the specified null literal. */
212: public Integer visit(NullLiteral l) {
213: // Null literals are free.
214: return 0;
215: }
216:
217: /** Visit the specified string value. */
218: public Integer visit(StringValue v) {
219: // A string value without a static text creates a string, so the
220: // cost is 1.
221: return null == v.text ? 1 : 0;
222: }
223:
224: /** Visit the specified proper list value. */
225: public Integer visit(ProperListValue v) {
226: // A proper list value creates a pair for each element.
227: return v.elements.size();
228: }
229:
230: /** Visit the specified action base value. */
231: public Integer visit(ActionBaseValue v) {
232: // An action base value applies all actions on the list, so the
233: // cost is proportional to the length of the list.
234: return Integer.MAX_VALUE;
235: }
236:
237: /** Visit the specified generic node value. */
238: public Integer visit(GenericNodeValue v) {
239: // A generic value creates a generic node, so the cost is 2 (1 for
240: // the node, 1 for the children).
241: return 2;
242: }
243:
244: /** Visit the specified generic action value. */
245: public Integer visit(GenericActionValue v) {
246: // A generic action value creates a new action, so the cost is 1.
247: return 1;
248: }
249:
250: /** Visit the specified generic recursion value. */
251: public Integer visit(GenericRecursionValue v) {
252: // A generic recursion value creates a new action and a new pair,
253: // so the cost is 2.
254: return 2;
255: }
256:
257: /**
258: * Visit the specified value element. This method provides the
259: * default implementation for null values and empty list values.
260: */
261: public Integer visit(ValueElement v) {
262: return 0;
263: }
264:
265: /**
266: * Add the two specified estimates.
267: *
268: * @param e1 The first estimate.
269: * @param e2 The second estimate.
270: * @return The sum.
271: */
272: protected static int add(final int e1, final int e2) {
273: if ((Integer.MAX_VALUE == e1) || (Integer.MAX_VALUE == e2)) {
274: return Integer.MAX_VALUE;
275: } else {
276: long sum = e1 + e2;
277: if (Integer.MAX_VALUE < sum) {
278: return Integer.MAX_VALUE;
279: } else {
280: return (int) sum;
281: }
282: }
283: }
284:
285: }
|