001: /*
002:
003: Derby - Class org.apache.derby.impl.tools.ij.StatementFinder
004:
005: Licensed to the Apache Software Foundation (ASF) under one or more
006: contributor license agreements. See the NOTICE file distributed with
007: this work for additional information regarding copyright ownership.
008: The ASF licenses this file to You under the Apache License, Version 2.0
009: (the "License"); you may not use this file except in compliance with
010: the License. You may obtain a copy of the License at
011:
012: http://www.apache.org/licenses/LICENSE-2.0
013:
014: Unless required by applicable law or agreed to in writing, software
015: distributed under the License is distributed on an "AS IS" BASIS,
016: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017: See the License for the specific language governing permissions and
018: limitations under the License.
019:
020: */
021:
022: package org.apache.derby.impl.tools.ij;
023:
024: import java.io.IOException;
025: import java.io.Reader;
026:
027: /**
028: StatementGrabber looks through an input stream for
029: the next JSQL statement. A statement is considered to
030: be any tokens up to the next semicolon or EOF.
031: <p>
032: Semicolons inside comments, strings, and delimited identifiers
033: are not considered to be statement terminators but to be
034: part of those tokens.
035: <p>
036: The only comment form currently recognized is the SQL comment,
037: which begins with "--" and ends at the next EOL.
038: <p>
039: Strings and delimited identifiers are permitted to contain
040: newlines; the actual IJ or JSQL parsers will report errors when
041: those cases occur.
042: <p>
043: There are no escaped characters, i.e. "\n" is considered to
044: be two characters, '\' and 'n'.
045:
046: @author ames
047: */
048:
049: public class StatementFinder {
050:
051: private Reader source;
052: private StringBuffer statement = new StringBuffer();
053: private int state;
054: private boolean atEOF = false;
055: private boolean peekEOF = false;
056: private char peekChar;
057: private boolean peeked = false;
058:
059: // state variables
060: private static final int IN_STATEMENT = 0;
061: private static final int IN_STRING = 1;
062: private static final int IN_SQLCOMMENT = 2;
063: private static final int END_OF_STATEMENT = 3;
064: private static final int END_OF_INPUT = 4;
065:
066: // special state-changing characters
067: private static final char MINUS = '-';
068: private static final char SINGLEQUOTE = '\'';
069: private static final char DOUBLEQUOTE = '\"';
070: private static final char SEMICOLON = ';';
071: private static final char NEWLINE = '\n';
072: private static final char RETURN = '\r';
073: private static final char SPACE = ' ';
074: private static final char TAB = '\t';
075: private static final char FORMFEED = '\f';
076:
077: /**
078: The constructor does not assume the stream is data input
079: or buffered, so it will wrap it appropriately.
080:
081: @param s the input stream for reading statements from.
082: */
083: public StatementFinder(Reader s) {
084: source = s;
085: }
086:
087: /**
088: Reinit is used to redirect the finder to another stream.
089: The previous stream should not have been in a PEEK state.
090:
091: @param s the input stream for reading statements from.
092: */
093: public void ReInit(Reader s) {
094: try {
095: source.close();
096: } catch (IOException ioe) {
097: // just be quiet if it is already gone
098: }
099: source = s;
100: state = IN_STATEMENT;
101: atEOF = false;
102: peekEOF = false;
103: peeked = false;
104: }
105:
106: public void close() throws IOException {
107: source.close();
108: }
109:
110: /**
111: get the next statement in the input stream. Returns it,
112: dropping its closing semicolon if it has one. If there is
113: no next statement, return a null.
114:
115: @return the next statement in the input stream.
116: */
117: public String nextStatement() {
118: boolean haveSemi = false;
119: char nextChar;
120:
121: // initialize fields for getting the next statement
122: statement.setLength(0);
123: if (state == END_OF_INPUT)
124: return null;
125:
126: state = IN_STATEMENT;
127:
128: // skip leading whitespace
129: nextChar = peekChar();
130: if (peekEOF()) {
131: state = END_OF_INPUT;
132: return null;
133: }
134: if (whiteSpace(nextChar)) {
135: while (whiteSpace(peekChar()) && !peekEOF())
136: ;
137: if (peekEOF()) {
138: state = END_OF_INPUT;
139: return null;
140: }
141: }
142:
143: while (state != END_OF_STATEMENT && state != END_OF_INPUT) {
144:
145: // get the next character from the input
146: nextChar = readChar();
147: if (atEOF()) {
148: state = END_OF_INPUT;
149: break;
150: }
151:
152: switch (nextChar) {
153: case MINUS:
154: readSingleLineComment(nextChar);
155: break;
156: case SINGLEQUOTE:
157: case DOUBLEQUOTE:
158: readString(nextChar);
159: break;
160: case SEMICOLON:
161: haveSemi = true;
162: state = END_OF_STATEMENT;
163: break;
164: default:
165: // keep going, just a normal character
166: break;
167: }
168: }
169:
170: if (haveSemi)
171: statement.setLength(statement.length() - 1);
172: return statement.toString();
173: }
174:
175: /**
176: Determine if the given character is considered whitespace
177:
178: @param c the character to consider
179: @return true if the character is whitespace
180: */
181: private boolean whiteSpace(char c) {
182: return (c == SPACE || c == TAB || c == RETURN || c == NEWLINE || c == FORMFEED);
183: }
184:
185: /**
186: Advance the source stream to the end of a comment if it
187: is on one, assuming the first character of
188: a potential single line comment has been found.
189: If it is not a comment, do not advance the stream.
190: <p>
191: The form of a single line comment is, in regexp, XX.*$,
192: where XX is two instances of commentChar.
193:
194: @param commentChar the character whose duplication signifies
195: the start of the comment.
196: */
197: private void readSingleLineComment(char commentChar) {
198: char nextChar;
199:
200: nextChar = peekChar();
201: // if next char is EOF, we are done.
202: if (peekEOF())
203: return;
204:
205: // if nextChar is not a minus, it was just a normal minus,
206: // nothing special to do
207: if (nextChar != commentChar)
208: return;
209:
210: // we are really in a comment
211: readChar(); // grab the minus for real.
212:
213: state = IN_SQLCOMMENT;
214: do {
215: nextChar = peekChar();
216: if (peekEOF()) {
217: // let the caller process the EOF, don't read it
218: state = IN_STATEMENT;
219: return;
220: }
221: switch (nextChar) {
222: case NEWLINE:
223: case RETURN:
224: readChar(); // okay to process the character
225: state = IN_STATEMENT;
226: return;
227: default:
228: readChar(); // process the character, still in comment
229: break;
230: }
231: } while (state == IN_SQLCOMMENT); // could be while true...
232: }
233:
234: /**
235: Advance the stream to the end of the string.
236: Assumes the opening delimiter of the string has been read.
237: This handles the SQL ability to put the delimiter within
238: the string by doubling it, by reading those as two strings
239: sitting next to one another. I.e, 'Mary''s lamb' is read
240: by this class as two strings, 'Mary' and 's lamb'.
241: <p>
242: The delimiter of the string is expected to be repeated at
243: its other end. If the other flavor of delimiter occurs within
244: the string, it is just a normal character within it.
245: <p>
246: All characters except the delimiter are permitted within the
247: string. If EOF is hit before the closing delimiter is found,
248: the end of the string is assumed. Parsers using this parser
249: will detect the error in that case and return appropriate messages.
250:
251: @param stringDelimiter the starting and ending character
252: for the string being read.
253: */
254: private void readString(char stringDelimiter) {
255: state = IN_STRING;
256: do {
257: char nextChar = readChar();
258:
259: if (atEOF()) {
260: state = END_OF_INPUT;
261: return;
262: }
263:
264: if (nextChar == stringDelimiter) {
265: // we've reached the end of the string
266: state = IN_STATEMENT;
267: return;
268: }
269:
270: // still in string
271: } while (state == IN_STRING); // could be while true...
272: }
273:
274: private boolean atEOF() {
275: return atEOF;
276: }
277:
278: private boolean peekEOF() {
279: return peekEOF;
280: }
281:
282: /**
283: return the next character in the source stream and
284: append it to the statement buffer.
285:
286: @return the next character in the source stream.
287: */
288: private char readChar() {
289: if (!peeked)
290: peekChar();
291:
292: peeked = false;
293: atEOF = peekEOF;
294:
295: if (!atEOF)
296: statement.append(peekChar);
297:
298: return peekChar;
299: }
300:
301: /**
302: return the next character in the source stream, without
303: advancing.
304:
305: @return the next character in the source stream.
306: */
307: private char peekChar() {
308: peeked = true;
309: char c = '\00';
310:
311: try {
312: int cInt;
313:
314: // REMIND: this is assuming a flat ascii source file.
315: // will need to beef it up at some future point to
316: // understand whether the stream is ascii or something else.
317: cInt = source.read();
318: peekEOF = (cInt == -1);
319: if (!peekEOF)
320: c = (char) cInt;
321: } catch (IOException ie) {
322: throw ijException.iOException(ie);
323: }
324:
325: peekChar = c;
326: return c;
327: }
328: }
|