001: /*
002: * <copyright>
003: *
004: * Copyright 1997-2004 BBNT Solutions, LLC
005: * under sponsorship of the Defense Advanced Research Projects
006: * Agency (DARPA).
007: *
008: * You can redistribute this software and/or modify it under the
009: * terms of the Cougaar Open Source License as published on the
010: * Cougaar Open Source Website (www.cougaar.org).
011: *
012: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
013: * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
014: * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
015: * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
016: * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
017: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
018: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
019: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
020: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
021: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
022: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
023: *
024: * </copyright>
025: */
026:
027: package org.cougaar.lib.contract.lang.parser;
028:
029: import java.io.*;
030: import java.util.*;
031:
032: import org.cougaar.lib.contract.lang.*;
033:
034: /**
035: * A "paren" semi-lisp styled <code>Op</code> parser which can be
036: * used to control a <code>TreeVisitor</code>.
037: * <p>
038: * This is a fairly simple conversion which breaks the parenthesis
039: * apart and converts shorthand <code>(a b)</code> to <code>(a (b))</code>.
040: * <pre>
041: * For example,<code>
042: * (and a (b) (c d (e) "f" g) h)</code>
043: * becomes essentially<code>
044: * (and (a) (b) (c (d) (e) ("f") (g)) (h))</code></pre>.
045: * This is further simplified to the <code>TreeVisitor</code> syntax:
046: * and a) b) c d) e) "f" g) h)
047: * <p>
048: * @see XMLParser for XML implementation
049: */
050: public class ParenParser {
051:
052: private ParenParser() {
053: }
054:
055: public static void parse(TreeVisitor visitor, Object o)
056: throws ParseException {
057: if (o instanceof String) {
058: // convert string to reader
059: o = new StringReader((String) o);
060: } else if (o instanceof InputStream) {
061: o = new InputStreamReader((InputStream) o);
062: } else if (!(o instanceof Reader)) {
063: throw new ParseException("Parser unable to read from "
064: + ((o != null) ? o.getClass().getName() : "null"));
065: }
066: // build tokenizer
067: StreamTokenizer st = new StreamTokenizer((Reader) o);
068: // allow XML name characters in words
069: st.wordChars('.', '.');
070: st.wordChars('-', '-');
071: st.wordChars('_', '_');
072: st.wordChars(':', ':');
073: // allow Java-style comments
074: st.slashStarComments(true);
075: st.slashSlashComments(true);
076:
077: visitor.initialize();
078:
079: int depth = 0;
080:
081: readTokens: while (true) {
082: // read the next token
083: int token;
084: try {
085: token = st.nextToken();
086: } catch (IOException ioe) {
087: throw new ParseException(
088: "Parser received IO Exception \"" + ioe + "\"");
089: }
090: // parse the token
091: switch (token) {
092: case StreamTokenizer.TT_EOF:
093: break readTokens;
094: case (int) ')':
095: // END
096: if ((--depth) < 0) {
097: // treat extra ")"s as "EndOfTree"
098: break readTokens;
099: }
100: visitor.visitEnd();
101: break;
102: case StreamTokenizer.TT_NUMBER:
103: throw new ParseException(
104: "Parser expects numbers to use \"const\", e.g.: "
105: + "(const \"double\" \"" + st.nval
106: + "\")");
107: case StreamTokenizer.TT_WORD:
108: // shorthand "word" for "(word)"
109: visitor.visitWord(st.sval);
110: visitor.visitEnd();
111: break;
112: default:
113: // ordinary character.
114: if (st.ttype == '(') {
115: // expecting WORD or STRING
116: int tok0;
117: try {
118: tok0 = st.nextToken();
119: } catch (IOException ioe) {
120: throw new ParseException(
121: "Parser received IO Exception \"" + ioe
122: + "\"");
123: }
124: if (tok0 == StreamTokenizer.TT_WORD) {
125: // word
126: ++depth;
127: visitor.visitWord(st.sval);
128: } else if (st.ttype == '"') {
129: // quoted string -- should be followed by ")"!
130: visitor.visitConstant(st.sval);
131: int tok1;
132: try {
133: tok1 = st.nextToken();
134: } catch (IOException ioe) {
135: throw new ParseException(
136: "Parser received IO Exception \""
137: + ioe + "\"");
138: }
139: if (tok1 == (int) ')') {
140: // typical string
141: } else if (tok0 == StreamTokenizer.TT_EOF) {
142: break readTokens;
143: } else {
144: throw new ParseException(
145: "Parser expecting String to be followed by \")\"");
146: }
147: } else {
148: throw new ParseException(
149: "Parser expecting Word or String, not "
150: + st.toString());
151: }
152: break;
153: } else if (st.ttype == '"') {
154: // quoted string
155: visitor.visitConstant(st.sval);
156: break;
157: } else {
158: // single-letter non-ascii word?
159: throw new ParseException(
160: "Parser given invalid character: "
161: + st.ttype);
162: }
163: }
164: }
165:
166: for (; depth > 0; depth--) {
167: // add missing ")"s
168: visitor.visitEnd();
169: }
170: visitor.visitEndOfTree();
171: }
172:
173: public static StringVisitor getStringVisitor() {
174: return new ParenStringVisitor();
175: }
176:
177: public static String toString(VisitTokenizer visTokenizer) {
178: TreeVisitor visitor = getStringVisitor();
179: VisitReplayer.replay(visitor, visTokenizer);
180: return visitor.toString();
181: }
182:
183: public static void main(String[] args) {
184: String input = "(and (a) b (c d (e";
185: System.out.print("Given: " + input + "\nParsed: ");
186: try {
187: TreeVisitor strVis = ParenParser.getStringVisitor();
188: ParenParser.parse(strVis, input);
189: System.out.println(strVis.toString());
190: } catch (Exception e) {
191: System.out.println("\n######\n" + e);
192: e.printStackTrace();
193: }
194: }
195: }
|