001: /*
002: * This file or a portion of this file is licensed under the terms of
003: * the Globus Toolkit Public License, found in file GTPL, or at
004: * http://www.globus.org/toolkit/download/license.html. This notice must
005: * appear in redistributions of this file, with or without modification.
006: *
007: * Redistributions of this Software, with or without modification, must
008: * reproduce the GTPL in: (1) the Software, or (2) the Documentation or
009: * some other similar material which is provided with the Software (if
010: * any).
011: *
012: * Copyright 1999-2004 University of Chicago and The University of
013: * Southern California. All rights reserved.
014: */
015: package org.griphyn.vdl.parser;
016:
017: import java.io.Reader;
018: import java.io.IOException;
019: import java.io.LineNumberReader;
020:
021: /**
022: * Implements the scanner for reserved words and other tokens that are
023: * generated from the input stream. This class is module-local on
024: * purpose.
025: *
026: * @author Jens-S. Vöckler
027: * @version $Revision: 50 $
028: *
029: */
030: class VDLtScanner {
031: /**
032: * stores the stream from which we are currently scanning.
033: */
034: private LineNumberReader m_in;
035:
036: /**
037: * captures the look-ahead character;
038: */
039: private int m_lookAhead;
040:
041: /**
042: * Starts to scan the given stream.
043: */
044: public VDLtScanner(java.io.Reader reader) throws IOException {
045: this .m_in = new LineNumberReader(reader);
046: this .m_in.setLineNumber(1);
047: this .m_lookAhead = m_in.read();
048: // skipWhitespace();
049: }
050:
051: /**
052: * Obtains the current line number in the input stream from the outside.
053: * @return the current line number.
054: */
055: public int getLineNumber() {
056: return m_in.getLineNumber();
057: }
058:
059: /**
060: * Skips any white space and comments in the input. This method
061: * stops either at the end of file, or at any non-whitespace
062: * input character.
063: */
064: private void skipWhitespace() throws IOException {
065: // end of file?
066: if (m_lookAhead == -1)
067: return;
068:
069: // skip over whitespace
070: while (m_lookAhead != -1
071: && Character.isWhitespace((char) m_lookAhead))
072: m_lookAhead = m_in.read();
073:
074: // skip over comments until eoln
075: if (m_lookAhead == '#') {
076: m_in.readLine();
077: m_lookAhead = m_in.read();
078: skipWhitespace(); // FIXME: reformulate end-recursion into loop
079: }
080: }
081:
082: /**
083: * Checks for the availability of more input.
084: * @return true, if there is more to read, false for EOF.
085: */
086: public boolean hasMoreTokens() throws IOException {
087: skipWhitespace();
088: return (this .m_lookAhead != -1);
089: }
090:
091: /**
092: * Obtains the next token from the input stream.
093: * @return an instance conforming to the token interface, or null for eof.
094: * @throws IOException if something went wrong while reading
095: * @throws VDLtScannerException if a lexical error was encountered.
096: */
097: public VDLtToken nextToken() throws IOException,
098: VDLtScannerException {
099: // sanity check
100: skipWhitespace();
101: if (m_lookAhead == -1)
102: return null;
103:
104: switch (m_lookAhead) {
105: case '$':
106: m_lookAhead = m_in.read();
107: if (m_lookAhead == -1
108: || Character.isWhitespace((char) m_lookAhead))
109: throw new VDLtScannerException(m_in,
110: "no whitespace allowed after dollar");
111: else
112: return new VDLtDollar();
113:
114: case ',':
115: m_lookAhead = m_in.read();
116: skipWhitespace();
117: return new VDLtComma();
118:
119: case '|':
120: m_lookAhead = m_in.read();
121: skipWhitespace();
122: return new VDLtVBar();
123:
124: case '.':
125: m_lookAhead = m_in.read();
126: if (m_lookAhead == -1
127: || Character.isWhitespace((char) m_lookAhead))
128: throw new VDLtScannerException(m_in,
129: "no whitespace allowed after period");
130: else
131: return new VDLtPeriod();
132:
133: case '@':
134: m_lookAhead = m_in.read();
135: if (m_lookAhead == -1
136: || Character.isWhitespace((char) m_lookAhead))
137: throw new VDLtScannerException(m_in,
138: "no whitespace allowed after at");
139: else
140: return new VDLtAt();
141:
142: case '-':
143: m_lookAhead = m_in.read();
144: if (m_lookAhead == '>') {
145: m_lookAhead = m_in.read();
146: skipWhitespace();
147: return new VDLtArrow();
148: } else {
149: throw new VDLtScannerException(m_in,
150: "a sole hyphen is not permitted");
151: }
152:
153: case '=':
154: m_lookAhead = m_in.read();
155: skipWhitespace();
156: return new VDLtEquals();
157:
158: case ';':
159: m_lookAhead = m_in.read();
160: skipWhitespace();
161: return new VDLtSemicolon();
162:
163: case '(':
164: m_lookAhead = m_in.read();
165: skipWhitespace();
166: return new VDLtOpenParenthesis();
167:
168: case ')':
169: m_lookAhead = m_in.read();
170: skipWhitespace();
171: return new VDLtCloseParenthesis();
172:
173: case '{':
174: m_lookAhead = m_in.read();
175: skipWhitespace();
176: return new VDLtOpenBrace();
177:
178: case '}':
179: m_lookAhead = m_in.read();
180: skipWhitespace();
181: return new VDLtCloseBrace();
182:
183: case '[':
184: m_lookAhead = m_in.read();
185: skipWhitespace();
186: return new VDLtOpenBracket();
187:
188: case ']':
189: m_lookAhead = m_in.read();
190: skipWhitespace();
191: return new VDLtCloseBracket();
192:
193: case ':':
194: m_lookAhead = m_in.read();
195: if (m_lookAhead == ':') {
196: m_lookAhead = m_in.read();
197: if (m_lookAhead == -1
198: || Character.isWhitespace((char) m_lookAhead)) {
199: throw new VDLtScannerException(m_in,
200: "no whitespace allowed after double colon");
201: } else {
202: return new VDLtDoubleColon();
203: }
204: } else if (m_lookAhead == -1
205: || Character.isWhitespace((char) m_lookAhead)) {
206: throw new VDLtScannerException(m_in,
207: "no whitespace allowed after colon");
208: } else {
209: return new VDLtColon();
210: }
211:
212: case '"':
213: // parse a quoted string
214: StringBuffer result = new StringBuffer(16);
215:
216: do {
217: m_lookAhead = m_in.read();
218: if (m_lookAhead == -1 || m_lookAhead == '\r'
219: || m_lookAhead == '\n') {
220: // eof is an unterminated string
221: throw new VDLtScannerException(m_in,
222: "unterminated quoted string");
223: } else if (m_lookAhead == '\\') {
224: int temp = m_in.read();
225: if (temp == -1)
226: throw new VDLtScannerException(m_in,
227: "unterminated escape in quoted string");
228: else
229: result.append((char) temp); // always add whatever is after the backslash
230: } else if (m_lookAhead != '"') {
231: result.append((char) m_lookAhead);
232: }
233: } while (m_lookAhead != '"');
234:
235: // skip over final quote
236: m_lookAhead = m_in.read();
237: skipWhitespace();
238: return new VDLtQuotedString(result.toString());
239:
240: default:
241: // are we parsing a reserved word or identifier
242: if (Character.isLetterOrDigit((char) m_lookAhead)
243: || m_lookAhead == '_' || m_lookAhead == '/') {
244: StringBuffer identifier = new StringBuffer(8);
245: identifier.append((char) m_lookAhead);
246: m_lookAhead = m_in.read();
247: while (m_lookAhead != -1
248: && (Character
249: .isLetterOrDigit((char) m_lookAhead)
250: || m_lookAhead == '_'
251: || m_lookAhead == '-' || // <-- soon to be dropped !!!
252: m_lookAhead == '/' || // <-- new for Mike
253: m_lookAhead == '.')) {
254: if (m_lookAhead == '-') {
255: // terry kludge just for Jim, grumblftz
256: m_in.mark(2);
257: m_lookAhead = m_in.read();
258: if (m_lookAhead == '>') {
259: // this is part of the next token, reset stream
260: m_in.reset();
261: m_lookAhead = '-';
262: break;
263: } else {
264: identifier.append('-');
265: }
266: } else {
267: identifier.append((char) m_lookAhead);
268: m_lookAhead = m_in.read();
269: }
270: }
271:
272: // done parsing identifier or reserved word
273: skipWhitespace();
274: String s = identifier.toString();
275: if (s.compareToIgnoreCase("tr") == 0)
276: // reserved word
277: return new VDLtTransformation();
278: else if (s.compareToIgnoreCase("dv") == 0)
279: // reserved word
280: return new VDLtDerivation();
281: else
282: // is a non-reserved identifier
283: return new VDLtIdentifier(s);
284:
285: } else {
286: // unknown material
287: throw new VDLtScannerException(m_in,
288: "unknown character " + m_lookAhead);
289: }
290: } // switch
291: }
292: }
|