001: /*
002: * $Id: Parser.java,v 1.1 2003/08/19 01:12:57 jonesde Exp $
003: *
004: * Copyright (c) 1999 Steven J. Metsker.
005: * Copyright (c) 2001 The Open For Business Project - www.ofbiz.org
006: *
007: * Permission is hereby granted, free of charge, to any person obtaining a
008: * copy of this software and associated documentation files (the "Software"),
009: * to deal in the Software without restriction, including without limitation
010: * the rights to use, copy, modify, merge, publish, distribute, sublicense,
011: * and/or sell copies of the Software, and to permit persons to whom the
012: * Software is furnished to do so, subject to the following conditions:
013: *
014: * The above copyright notice and this permission notice shall be included
015: * in all copies or substantial portions of the Software.
016: *
017: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
018: * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
019: * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
020: * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
021: * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
022: * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
023: * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
024: */
025:
026: package org.ofbiz.rules.parse;
027:
028: import java.util.*;
029:
030: /**
031: * A <code>Parser</code> is an object that recognizes the elements of a language.
032: * <p>
033: * Each <code>Parser</code> object is either a <code>
034: * Terminal</code> or a composition of other parsers.
035: * The <code>Terminal</code> class is a subclass of <code>
036: * Parser</code>, and is itself a hierarchy of
037: * parsers that recognize specific patterns of text. For
038: * example, a <code>Word</code> recognizes any word, and a
039: * <code>Literal</code> matches a specific string.
040: * <p>
041: * In addition to <code>Terminal</code>, other subclasses of
042: * <code>Parser</code> provide composite parsers,
043: * describing sequences, alternations, and repetitions of
044: * other parsers. For example, the following <code>
045: * Parser</code> objects culminate in a <code>good
046: * </code> parser that recognizes a description of good
047: * coffee.
048: *
049: * <blockquote><pre>
050: * Alternation adjective = new Alternation();
051: * adjective.add(new Literal("steaming"));
052: * adjective.add(new Literal("hot"));
053: * Sequence good = new Sequence();
054: * good.add(new Repetition(adjective));
055: * good.add(new Literal("coffee"));
056: * String s = "hot hot steaming hot coffee";
057: * Assembly a = new TokenAssembly(s);
058: * System.out.println(good.bestMatch(a));
059: * </pre></blockquote>
060: *
061: * This prints out:
062: *
063: * <blockquote><pre>
064: * [hot, hot, steaming, hot, coffee]
065: * hot/hot/steaming/hot/coffee^
066: * </pre></blockquote>
067: *
068: * The parser does not match directly against a string,
069: * it matches against an <code>Assembly</code>. The
070: * resulting assembly shows its stack, with four words on it,
071: * along with its sequence of tokens, and the index at the
072: * end of these. In practice, parsers will do some work
073: * on an assembly, based on the text they recognize.
074: *
075: * @author Steven J. Metsker
076: * @version 1.0
077: */
078: public abstract class Parser {
079:
080: /** a name to identify this parser */
081: protected String name;
082:
083: /**
084: * an object that will work on an assembly whenever this
085: * parser successfully matches against the assembly
086: */
087: protected Assembler assembler;
088:
089: /** Constructs a nameless parser. */
090: public Parser() {
091: }
092:
093: /**
094: * Constructs a parser with the given name.
095: *
096: * @param name A name to be known by. For parsers
097: * that are deep composites, a simple name
098: * identifying its purpose is useful.
099: */
100: public Parser(String name) {
101: this .name = name;
102: }
103:
104: /**
105: * Accepts a "visitor" which will perform some operation on
106: * a parser structure. The book, "Design Patterns", explains
107: * the visitor pattern.
108: *
109: * @param pv the visitor to accept
110: */
111: public void accept(ParserVisitor pv) {
112: accept(pv, new ArrayList());
113: }
114:
115: /**
116: * Accepts a "visitor" along with a collection of previously
117: * visited parsers.
118: *
119: * @param pv the visitor to accept
120: * @param visited a collection of previously visited
121: * parsers.
122: */
123: public abstract void accept(ParserVisitor pv, List visited);
124:
125: /**
126: * Adds the elements of one vector to another.
127: *
128: * @param v1 the vector to add to
129: * @param v2 the vector with elements to add
130: */
131: public static void add(List v1, List v2) {
132: Enumeration e = Collections.enumeration(v2);
133:
134: while (e.hasMoreElements()) {
135: v1.add(e.nextElement());
136: }
137: }
138:
139: /**
140: * Returns the most-matched assembly in a collection.
141: *
142: * @return the most-matched assembly in a collection.
143: * @param v the collection to look through
144: *
145: */
146: public Assembly best(List v) {
147: Assembly best = null;
148: Enumeration e = Collections.enumeration(v);
149:
150: while (e.hasMoreElements()) {
151: Assembly a = (Assembly) e.nextElement();
152:
153: if (!a.hasMoreElements()) {
154: return a;
155: }
156: if (best == null) {
157: best = a;
158: } else {
159: if (a.elementsConsumed() > best.elementsConsumed()) {
160: best = a;
161: }
162: }
163: }
164: return best;
165: }
166:
167: /**
168: * Returns an assembly with the greatest possible number of
169: * elements consumed by matches of this parser.
170: *
171: * @return an assembly with the greatest possible number of
172: * elements consumed by this parser
173: * @param a an assembly to match against
174: */
175: public Assembly bestMatch(Assembly a) {
176: List in = new ArrayList();
177:
178: in.add(a);
179: List out = matchAndAssemble(in);
180:
181: // if (Debug.verboseOn()) Debug.logVerbose("[bestMatch] after match and assemble, before best: in=" + in + ", out=" + out, module);
182: return best(out);
183: }
184:
185: /**
186: * Returns either null, or a completely matched version of
187: * the supplied assembly.
188: *
189: * @return either null, or a completely matched version of the
190: * supplied assembly
191: * @param a an assembly to match against
192: */
193: public Assembly completeMatch(Assembly a) {
194: Assembly best = bestMatch(a);
195:
196: if (best != null && !best.hasMoreElements()) {
197: return best;
198: }
199: return null;
200: }
201:
202: /**
203: * Create a copy of a vector, cloning each element of
204: * the vector.
205: *
206: * @param in the vector to copy
207: * @return a copy of the input vector, cloning each
208: * element of the vector
209: */
210: public static List elementClone(List v) {
211: List copy = new ArrayList();
212: Enumeration e = Collections.enumeration(v);
213:
214: while (e.hasMoreElements()) {
215: Assembly a = (Assembly) e.nextElement();
216:
217: copy.add(a.clone());
218: }
219: return copy;
220: }
221:
222: /**
223: * Returns the name of this parser.
224: *
225: * @return the name of this parser
226: */
227: public String getName() {
228: return name;
229: }
230:
231: /**
232: * Given a set (well, a <code>List</code>, really) of
233: * assemblies, this method matches this parser against
234: * all of them, and returns a new set (also really a
235: * <code>List</code>) of the assemblies that result from
236: * the matches.
237: * <p>
238: * For example, consider matching the regular expression
239: * <code>a*</code> against the string <code>"aaab"</code>.
240: * The initial set of states is <code>{^aaab}</code>, where
241: * the ^ indicates how far along the assembly is. When
242: * <code>a*</code> matches against this initial state, it
243: * creates a new set <code>{^aaab, a^aab, aa^ab,
244: * aaa^b}</code>.
245: *
246: * @return a List of assemblies that result from
247: * matching against a beginning set of assemblies
248: * @param in a vector of assemblies to match against
249: */
250: public abstract List match(List in);
251:
252: /**
253: * Match this parser against an input state, and then
254: * apply this parser's assembler against the resulting
255: * state.
256: *
257: * @return a List of assemblies that result from matching
258: * against a beginning set of assemblies
259: * @param in a vector of assemblies to match against
260: */
261: public List matchAndAssemble(List in) {
262: List out = match(in);
263:
264: // if (Debug.verboseOn()) Debug.logVerbose("[matchAndAssemble] after match, before assemble: in=" + in + ", out=" + out, module);
265:
266: if (assembler != null) {
267: Enumeration e = Collections.enumeration(out);
268:
269: while (e.hasMoreElements()) {
270: assembler.workOn((Assembly) e.nextElement());
271: }
272: }
273: return out;
274: }
275:
276: /**
277: * Create a random expansion for this parser, where a
278: * concatenation of the returned collection will be a
279: * language element.
280: */
281: protected abstract List randomExpansion(int maxDepth, int depth);
282:
283: /**
284: * Return a random element of this parser's language.
285: *
286: * @return a random element of this parser's language
287: */
288: public String randomInput(int maxDepth, String separator) {
289: StringBuffer buf = new StringBuffer();
290: Enumeration e = Collections.enumeration(randomExpansion(
291: maxDepth, 0));
292: boolean first = true;
293:
294: while (e.hasMoreElements()) {
295: if (!first) {
296: buf.append(separator);
297: }
298: buf.append(e.nextElement());
299: first = false;
300: }
301: return buf.toString();
302: }
303:
304: /**
305: * Sets the object that will work on an assembly whenever
306: * this parser successfully matches against the
307: * assembly.
308: *
309: * @param Assembler the assembler to apply
310: *
311: * @return Parser this
312: */
313: public Parser setAssembler(Assembler assembler) {
314: this .assembler = assembler;
315: return this ;
316: }
317:
318: /**
319: * Returns a textual description of this parser.
320: *
321: * @return String a textual description of this
322: * parser, taking care to avoid
323: * infinite recursion
324: */
325: public String toString() {
326: return toString(new ArrayList());
327: }
328:
329: /**
330: * Returns a textual description of this parser.
331: * Parsers can be recursive, so when building a
332: * descriptive string, it is important to avoid infinite
333: * recursion by keeping track of the objects already
334: * described. This method keeps an object from printing
335: * twice, and uses <code>unvisitedString</code> which
336: * subclasses must implement.
337: *
338: * @param visited a list of objects already printed
339: *
340: * @return a textual version of this parser,
341: * avoiding recursion
342: */
343: protected String toString(List visited) {
344: if (name != null) {
345: return name;
346: } else if (visited.contains(this )) {
347: return "...";
348: } else {
349: visited.add(this );
350: return unvisitedString(visited);
351: }
352: }
353:
354: /**
355: * Returns a textual description of this string.
356: */
357: protected abstract String unvisitedString(List visited);
358: }
|