001: /*
002: * SimpleSExprStream.java
003: *
004: * Copyright 1997 Massachusetts Institute of Technology.
005: * All Rights Reserved.
006: *
007: * Author: Ora Lassila
008: *
009: * $Id: SimpleSExprStream.java,v 1.2 1998/01/22 13:09:28 bmahe Exp $
010: */
011:
012: package org.w3c.tools.sexpr;
013:
014: import java.io.InputStream;
015: import java.io.PrintStream;
016: import java.io.PushbackInputStream;
017: import java.io.IOException;
018: import java.io.EOFException;
019: import java.util.Dictionary;
020: import java.util.Hashtable;
021: import java.util.Vector;
022:
023: /**
024: * Basic implementation of the SExprStream parser interface.
025: */
026: public class SimpleSExprStream extends PushbackInputStream implements
027: SExprStream {
028:
029: private StringBuffer buffer;
030: private Dictionary symbols;
031: private boolean noSymbols;
032: private Readtable readtable;
033: private boolean listsAsVectors;
034:
035: /**
036: * Initializes the parser with no read table and no symbol table assigned.
037: * Parsed lists will be represented as Cons cells.
038: */
039: public SimpleSExprStream(InputStream input) {
040: super (input);
041: this .buffer = new StringBuffer();
042: this .symbols = null;
043: this .noSymbols = false;
044: this .readtable = null;
045: this .listsAsVectors = false;
046: }
047:
048: /**
049: * Accesses the symbol table of the parser.
050: * If no symbol table has been assigned, creates an empty table.
051: */
052: public Dictionary getSymbols() {
053: if (!noSymbols && symbols == null)
054: symbols = new Hashtable();
055: return symbols;
056: }
057:
058: /**
059: * Assigns a symbol table to the parser.
060: * Assigning <tt>null</tt> will prevent an empty symbol table to be created
061: * in the future.
062: */
063: public Dictionary setSymbols(Dictionary symbols) {
064: if (symbols == null)
065: noSymbols = true;
066: return this .symbols = symbols;
067: }
068:
069: /**
070: * Accesses the read table of the parser.
071: * If no read table has been assigned, creates an empty table.
072: */
073: public Readtable getReadtable() {
074: if (readtable == null)
075: readtable = new SimpleReadtable();
076: return readtable;
077: }
078:
079: /**
080: * Assigns a new read table to the parser.
081: */
082: public Readtable setReadtable(Readtable readtable) {
083: return this .readtable = readtable;
084: }
085:
086: /**
087: * Checks whether lists should be parsed as Vectors or Cons cells.
088: */
089: public boolean getListsAsVectors() {
090: return listsAsVectors;
091: }
092:
093: /**
094: * Controls whether lists are represented as Vectors or Cons cells.
095: */
096: public boolean setListsAsVectors(boolean listsAsVectors) {
097: return this .listsAsVectors = listsAsVectors;
098: }
099:
100: /**
101: * Accesses an empty string buffer available temporary storage.
102: * This buffer can be used by sub-parsers as a scratch area. Please note
103: * that the buffer is not guarded in any way, so multithreaded and reentrant
104: * programs must worry about this themselves.
105: */
106: public StringBuffer getScratchBuffer() {
107: buffer.setLength(0);
108: return buffer;
109: }
110:
111: /**
112: * Parses a single object from the underlying input stream.
113: *
114: * @exception SExprParserException if syntax error was detected
115: * @exception IOException if any other I/O-related problem occurred
116: */
117: public Object parse() throws SExprParserException, IOException {
118: return parse(readSkipWhite(), this );
119: }
120:
121: /**
122: * Parses a single object started by the character <i>c</i>.
123: * Implements the SExprParser interface.
124: *
125: * @exception SExprParserException if syntax error was detected
126: * @exception IOException if any other I/O-related problem occurred
127: */
128: public Object parse(char c, SExprStream stream)
129: throws SExprParserException, IOException {
130: SExprParser parser = getReadtable().getParser(c);
131: if (parser != null)
132: return parser.parse(c, this );
133: else if (c == '(') {
134: if (getListsAsVectors())
135: return parseVector(new Vector(), ')');
136: else
137: return parseList();
138: } else if (c == '"')
139: return parseString();
140: else if (isAtomChar(c, true))
141: return parseAtom(c);
142: else
143: throw new SExprParserException(c);
144: }
145:
146: /**
147: * Parses a list (as Cons cells) sans first character.
148: *
149: * @exception SExprParserException if syntax error was detected
150: * @exception IOException if any other I/O-related problem occurred
151: */
152: protected Cons parseList() throws SExprParserException, IOException {
153: char c = readSkipWhite();
154: if (c == ')')
155: return null;
156: else {
157: unread(c);
158: return new Cons(parse(), parseList());
159: }
160: }
161:
162: /**
163: * Parses a list (as a Vector) sans first character.
164: * In order to parse list-like structures delimited by other characters
165: * than parentheses, the delimiting (ending) character has to be provided.
166: *
167: * @exception SExprParserException if syntax error was detected
168: * @exception IOException if any other I/O-related problem occurred
169: */
170: protected Vector parseVector(Vector vector, char delimiter)
171: throws SExprParserException, IOException {
172: char c = readSkipWhite();
173: if (c == delimiter)
174: return vector;
175: else {
176: unread(c);
177: vector.addElement(parse());
178: return parseVector(vector, delimiter);
179: }
180: }
181:
182: /**
183: * Parses an atom (a number or a symbol).
184: * Since anything that is not a number is a symbol, syntax errors are not
185: * possible.
186: *
187: * @exception SExprParserException not signalled but useful for the protocol
188: * @exception IOException if an I/O problem occurred (e.g. end of file)
189: */
190: protected Object parseAtom(char c) throws SExprParserException,
191: IOException {
192: StringBuffer b = getScratchBuffer();
193: do {
194: b.append(c);
195: } while (isAtomChar(c = (char) read(), false));
196: unread(c);
197: String s = b.toString();
198: try {
199: return makeNumber(s);
200: } catch (NumberFormatException e) {
201: return Symbol.makeSymbol(s, getSymbols());
202: }
203: }
204:
205: /**
206: * Parses a double-quote -delimited string (sans the first character).
207: * Please note: no escape-character interpretation is performed. Override
208: * this method for any escape character handling.
209: *
210: * @exception SExprParserException not signalled but useful for the protocol
211: * @exception IOException any I/O problem (including end of file)
212: */
213: public String parseString() throws SExprParserException,
214: IOException {
215: int code;
216: StringBuffer b = getScratchBuffer();
217: while (true) {
218: switch (code = read()) {
219: case (int) '"':
220: return new String(b);
221: case -1:
222: throw new EOFException();
223: default:
224: b.append((char) code);
225: break;
226: }
227: }
228: }
229:
230: /**
231: * Predicate function for checking if a chahracter can belong to an atom.
232: *
233: * @param first if true means that c is the first character of the atom
234: */
235: protected boolean isAtomChar(char c, boolean first) {
236: return !(Character.isSpace(c) || c == '(' || c == ')'
237: || c == '"' || c == '}' || c == '{');
238: }
239:
240: /**
241: * Reads from the stream, skipping whitespace and comments.
242: *
243: * @exception IOException if an I/O problem occurred (including end of file)
244: */
245: public char readSkipWhite() throws IOException {
246: char c;
247: do {
248: c = (char) read();
249: if (c == ';') // skip comments
250: do {
251: } while ((c = (char) read()) != '\n' && c != '\r');
252: if (c == -1)
253: throw new EOFException();
254: } while (Character.isSpace(c));
255: return c;
256: }
257:
258: /**
259: * Attempts to parse a number from the string.
260: *
261: * @exception NumberFormatException the string does not represent a number
262: */
263: protected Number makeNumber(String s) throws NumberFormatException {
264: try {
265: return Integer.valueOf(s);
266: } catch (NumberFormatException e) {
267: return DoubleFix.valueOf(s);
268: }
269: }
270:
271: /**
272: * Associates a dispatch character with a parser in the read table.
273: */
274: public SExprParser addParser(char key, SExprParser parser) {
275: return getReadtable().addParser(key, parser);
276: }
277:
278: /**
279: * Produces a printed representation of an s-expression.
280: */
281: public static void printExpr(Object expr, PrintStream out) {
282: if (expr == null)
283: out.print("nil");
284: else if (expr instanceof Number)
285: out.print(expr);
286: else if (expr instanceof String) {
287: out.print('"');
288: out.print(expr);
289: out.print('"');
290: } else if (expr instanceof Vector) {
291: out.print("(");
292: for (int i = 0; i < ((Vector) expr).size(); i++) {
293: if (i != 0)
294: out.print(" ");
295: printExpr(((Vector) expr).elementAt(i), out);
296: }
297: out.print(")");
298: } else if (expr instanceof SExpr)
299: ((SExpr) expr).printExpr(out);
300: else
301: out.print("#<unknown " + expr + ">");
302: }
303:
304: public static void main(String args[]) throws SExprParserException,
305: IOException {
306: SExprStream p = new SimpleSExprStream(System.in);
307: Object e = p.parse();
308: SimpleSExprStream.printExpr(e, System.out);
309: System.out.println();
310: }
311:
312: }
|