001: /*
002: * This is free software, licensed under the Gnu Public License (GPL)
003: * get a copy from <http://www.gnu.org/licenses/gpl.html>
004: * $Id: SQLStatementSeparator.java,v 1.20 2005/06/05 22:27:29 hzeller Exp $
005: * author: Henner Zeller <H.Zeller@acm.org>
006: */
007: package henplus;
008:
009: import java.util.Stack;
010:
011: import henplus.property.PropertyHolder;
012: import henplus.property.BooleanPropertyHolder;
013:
014: /**
015: * Simple parser that separates SQLStatements.
016: * Example.
017: * <pre>
018: *-----------------------
019: statementSeparator.append("select * from foo; echo $foobar \n");
020: while (statementSeparator.hasNext()) {
021: String stmt = statementSeparator.next();
022: if (stmt.startsWith("echo")) {
023: // is ok, this command works always without ';'
024: statementSeparator.consumed();
025: System.err.println(stmt.substring("echo ".length());
026: }
027: else { // SQL-command. we require a semicolon at the end.
028: if (!stmt.charAt(stmt.length()-1) == ';') {
029: statementSeparator.cont(); // continue.
030: }
031: else {
032: statementSeparator.consumed();
033: SQLExecute(stmt);
034: }
035: }
036: }
037: *-----------------------
038: *</pre>
039: *
040: * FIXME: this is a bit rough and accummulated some ideas. Should be
041: * cleaned up.
042: *
043: * @author Henner Zeller <H.Zeller@acm.org>
044: */
045: public class SQLStatementSeparator {
046: private static final byte NEW_STATEMENT = 0;
047: private static final byte START = 1; // statement == start
048: private static final byte STATEMENT = 1;
049: private static final byte START_COMMENT = 3;
050: private static final byte COMMENT = 4;
051: private static final byte PRE_END_COMMENT = 5;
052: private static final byte START_ANSI = 6;
053: private static final byte ENDLINE_COMMENT = 7;
054: private static final byte STRING = 8;
055: private static final byte STRING_QUOTE = 9;
056: private static final byte SQLSTRING = 10;
057: private static final byte SQLSTRING_QUOTE = 11;
058: private static final byte STATEMENT_QUOTE = 12; // backslash in statement
059: private static final byte FIRST_SEMICOLON_ON_LINE_SEEN = 13;
060: private static final byte POTENTIAL_END_FOUND = 14;
061:
062: private static class ParseState {
063: private byte _state;
064: private StringBuffer _inputBuffer;
065: private StringBuffer _commandBuffer;
066: /*
067: * instead of adding new states, we store the
068: * fact, that the last 'potential_end_found' was
069: * a newline here.
070: */
071: private boolean _eolineSeen;
072:
073: public ParseState() {
074: _eolineSeen = true; // we start with a new line.
075: _state = NEW_STATEMENT;
076: _inputBuffer = new StringBuffer();
077: _commandBuffer = new StringBuffer();
078: }
079:
080: public byte getState() {
081: return _state;
082: }
083:
084: public void setState(byte s) {
085: _state = s;
086: }
087:
088: public boolean hasNewlineSeen() {
089: return _eolineSeen;
090: }
091:
092: public void setNewlineSeen(boolean n) {
093: _eolineSeen = n;
094: }
095:
096: public StringBuffer getInputBuffer() {
097: return _inputBuffer;
098: }
099:
100: public StringBuffer getCommandBuffer() {
101: return _commandBuffer;
102: }
103: };
104:
105: private boolean _removeComments;
106: private ParseState _currentState;
107: private Stack _stateStack;
108:
109: public SQLStatementSeparator() {
110: _currentState = new ParseState();
111: _stateStack = new Stack();
112: _removeComments = true;
113: }
114:
115: /**
116: * push the current state and start with a clean one. Use to parse
117: * other files (like includes), and continue then with the old
118: * state.
119: * like
120: * load foobar.sql ; select * from foobar
121: */
122: public void push() {
123: _stateStack.push(_currentState);
124: _currentState = new ParseState();
125: }
126:
127: public void pop() {
128: _currentState = (ParseState) _stateStack.pop();
129: }
130:
131: /**
132: * add a new line including the '\n' to the input buffer.
133: */
134: public void append(String s) {
135: _currentState.getInputBuffer().append(s);
136: }
137:
138: /**
139: * discard any input.
140: */
141: public void discard() {
142: _currentState.getInputBuffer().setLength(0);
143: _currentState.getCommandBuffer().setLength(0);
144: _currentState.setState(NEW_STATEMENT);
145: }
146:
147: /**
148: * after having called next(), call cont(), if you are not yet
149: * pleased with the result; the parser should read to the next
150: * possible end.
151: */
152: public void cont() {
153: _currentState.setState(START);
154: }
155:
156: /**
157: * after having called next() and you were pleased with the result
158: * call this method to state, that you consumed it.
159: */
160: public void consumed() {
161: _currentState.setState(NEW_STATEMENT);
162: }
163:
164: /**
165: * returns true, if the parser can find a complete command that either
166: * ends with newline or with ';'
167: */
168: public boolean hasNext() throws IllegalStateException {
169: if (_currentState.getState() == POTENTIAL_END_FOUND)
170: throw new IllegalStateException(
171: "call cont() or consumed() before hasNext()");
172: if (_currentState.getInputBuffer().length() == 0)
173: return false;
174: parsePartialInput();
175: return (_currentState.getState() == POTENTIAL_END_FOUND);
176: }
177:
178: /**
179: * returns the next command; requires to call hasNext() before.
180: */
181: public String next() throws IllegalStateException {
182: if (_currentState.getState() != POTENTIAL_END_FOUND)
183: throw new IllegalStateException(
184: "next() called without hasNext()");
185: return _currentState.getCommandBuffer().toString();
186: }
187:
188: /**
189: * returns a property holder for the remove comments property.
190: */
191: public PropertyHolder getRemoveCommentsProperty() {
192: return new RemoveCommentsProperty();
193: }
194:
195: /**
196: * set, whether comments should be removed.
197: */
198: public void removeComments(boolean b) {
199: _removeComments = b;
200: }
201:
202: /**
203: * parse partial input and set state to POTENTIAL_END_FOUND if we
204: * either reached end-of-line or a semicolon.
205: */
206: private void parsePartialInput() {
207: int pos = 0;
208: char current;
209: byte oldstate = -1;
210:
211: // local variables: faster access.
212: byte state = _currentState.getState();
213: boolean lastEoline = _currentState.hasNewlineSeen();
214:
215: final StringBuffer input = _currentState.getInputBuffer();
216: final StringBuffer parsed = _currentState.getCommandBuffer();
217:
218: if (state == NEW_STATEMENT) {
219: parsed.setLength(0);
220: /* skip leading whitespaces of next statement .. */
221: while (pos < input.length()
222: && Character.isWhitespace(input.charAt(pos))) {
223: _currentState.setNewlineSeen(input.charAt(pos) == '\n');
224: ++pos;
225: }
226: input.delete(0, pos);
227: pos = 0;
228: }
229:
230: if (input.length() == 0) {
231: state = POTENTIAL_END_FOUND;
232: }
233:
234: //System.err.println("Startstate: " + state + "; LEOL: " + lastEoline);
235:
236: while (state != POTENTIAL_END_FOUND && pos < input.length()) {
237: boolean vetoAppend = false;
238: boolean reIterate;
239: current = input.charAt(pos);
240: if (current == '\r') {
241: current = '\n'; // canonicalize.
242: }
243:
244: if (current == '\n') {
245: _currentState.setNewlineSeen(true);
246: }
247:
248: //System.out.print ("Pos: " + pos + "\t");
249: do {
250: reIterate = false;
251: switch (state) {
252: case NEW_STATEMENT:
253: //case START: START == STATEMENT.
254: case STATEMENT:
255: if (current == '\n') {
256: state = POTENTIAL_END_FOUND;
257: _currentState.setNewlineSeen(true);
258: }
259:
260: /*
261: * special handling of the 'first-two-semicolons-after
262: * a-newline-comment'.
263: */
264: else if (_removeComments && lastEoline
265: && current == ';') {
266: state = FIRST_SEMICOLON_ON_LINE_SEEN;
267: } else if (!lastEoline && current == ';') {
268: _currentState.setNewlineSeen(false);
269: state = POTENTIAL_END_FOUND;
270: } else if (_removeComments && current == '/') {
271: state = START_COMMENT;
272: }
273:
274: /*
275: * only if '#' this is the first character, make it
276: * a comment..
277: */
278: else if (_removeComments && lastEoline
279: && current == '#') {
280: state = ENDLINE_COMMENT;
281: } else if (current == '"')
282: state = STRING;
283: else if (current == '\'')
284: state = SQLSTRING;
285: else if (current == '-')
286: state = START_ANSI;
287: else if (current == '\\')
288: state = STATEMENT_QUOTE;
289: break;
290: case STATEMENT_QUOTE:
291: state = STATEMENT;
292: break;
293: case FIRST_SEMICOLON_ON_LINE_SEEN:
294: if (current == ';')
295: state = ENDLINE_COMMENT;
296: else {
297: state = POTENTIAL_END_FOUND;
298: current = ';';
299: /*
300: * we've read too much. Reset position.
301: */
302: --pos;
303: }
304: break;
305: case START_COMMENT:
306: if (current == '*')
307: state = COMMENT;
308: /*
309: * Endline comment in the style '// comment' is not a
310: * good idea, since many JDBC-urls contain the '//' as
311: * part of the URL .. and this should _not_ be regarded as
312: * commend of course.
313: */
314: //else if (current == '/') state = ENDLINE_COMMENT;
315: else {
316: parsed.append('/');
317: state = STATEMENT;
318: reIterate = true;
319: }
320: break;
321: case COMMENT:
322: if (current == '*')
323: state = PRE_END_COMMENT;
324: break;
325: case PRE_END_COMMENT:
326: if (current == '/')
327: state = STATEMENT;
328: else if (current == '*')
329: state = PRE_END_COMMENT;
330: else
331: state = COMMENT;
332: break;
333: case START_ANSI:
334: if (current == '-')
335: state = ENDLINE_COMMENT;
336: else {
337: parsed.append('-');
338: state = STATEMENT;
339: reIterate = true;
340: }
341: break;
342: case ENDLINE_COMMENT:
343: if (current == '\n')
344: state = POTENTIAL_END_FOUND;
345: break;
346: case STRING:
347: if (current == '\\')
348: state = STRING_QUOTE;
349: else if (current == '"')
350: state = STATEMENT;
351: break;
352: case SQLSTRING:
353: if (current == '\\')
354: state = SQLSTRING_QUOTE;
355: if (current == '\'')
356: state = STATEMENT;
357: break;
358: case STRING_QUOTE:
359: vetoAppend = (current == '\n'); // line continuation
360: if (current == 'n')
361: current = '\n';
362: else if (current == 'r')
363: current = '\r';
364: else if (current == 't')
365: current = '\t';
366: else if (current != '\n' && current != '"') {
367: // if we do not recognize the escape sequence,
368: // pass it through.
369: parsed.append("\\");
370: }
371: state = STRING;
372: break;
373: case SQLSTRING_QUOTE:
374: vetoAppend = (current == '\n'); // line continuation
375: // convert a "\'" to a correct SQL-Quote "''"
376: if (current == '\'')
377: parsed.append("'");
378: else if (current == 'n')
379: current = '\n';
380: else if (current == 'r')
381: current = '\r';
382: else if (current == 't')
383: current = '\t';
384: else if (current != '\n') {
385: // if we do not recognize the escape sequence,
386: // pass it through.
387: parsed.append("\\");
388: }
389: state = SQLSTRING;
390: break;
391: }
392: } while (reIterate);
393:
394: /* append to parsed; ignore comments */
395: if (!vetoAppend
396: && ((state == STATEMENT && oldstate != PRE_END_COMMENT)
397: || state == NEW_STATEMENT
398: || state == STATEMENT_QUOTE
399: || state == STRING || state == SQLSTRING || state == POTENTIAL_END_FOUND)) {
400: parsed.append(current);
401: }
402:
403: oldstate = state;
404: pos++;
405: /*
406: * we maintain the state of 'just seen newline' as long
407: * as we only skip whitespaces..
408: */
409: lastEoline &= Character.isWhitespace(current);
410: }
411: // we reached: POTENTIAL_END_FOUND. Store the rest, that
412: // has not been parsed in the input-buffer.
413: input.delete(0, pos);
414: _currentState.setState(state);
415: }
416:
417: private class RemoveCommentsProperty extends BooleanPropertyHolder {
418:
419: public RemoveCommentsProperty() {
420: super (SQLStatementSeparator.this ._removeComments);
421: }
422:
423: public void booleanPropertyChanged(boolean value) {
424: removeComments(value);
425: }
426:
427: public String getDefaultValue() {
428: return "on";
429: }
430:
431: /**
432: * return a short descriptive string.
433: */
434: public String getShortDescription() {
435: return "switches the removal of SQL-comments";
436: }
437:
438: public String getLongDescription() {
439: String dsc;
440: dsc = "\tSwitch the behaviour to remove all comments\n"
441: + "\tfound in the string sent to the database. Some databases\n"
442: + "\tcan not handle comments in JDBC-Strings.\n\nValues\n"
443:
444: + "\ttrue\n"
445: + "\t\tDEFAULT. Remove all SQL92 comments found in the given\n"
446: + "\t\tSQL Strings before sending them to the database.\n\n"
447:
448: + "\tfalse\n"
449: + "\t\tSwitch off the default behaviour to remove all\n"
450: + "\t\tcomments found in the string sent to the database.\n"
451: + "\t\tUsually, this is not necessary, but there are\n"
452: + "\t\tconditions where comments actually convey a meaning\n"
453: + "\t\tto the database. For instance hinting in oracle works\n"
454: + "\t\twith comments, like\n"
455: + "\t\t select /*+ index(foo,foo_fk_idx) */ ....\n"
456: + "\t\t..so removing of comments should be off in this case";
457: return dsc;
458: }
459: }
460: }
461:
462: /*
463: * Local variables:
464: * c-basic-offset: 4
465: * compile-command: "ant -emacs -find build.xml"
466: * End:
467: */
|