001: /*
002: * IteratingScriptParser.java
003: *
004: * This file is part of SQL Workbench/J, http://www.sql-workbench.net
005: *
006: * Copyright 2002-2008, Thomas Kellerer
007: * No part of this code maybe reused without the permission of the author
008: *
009: * To contact the author please send an email to: support@sql-workbench.net
010: *
011: */
012: package workbench.sql;
013:
014: import java.io.File;
015: import java.io.IOException;
016: import java.util.regex.Matcher;
017: import java.util.regex.Pattern;
018: import workbench.interfaces.CharacterSequence;
019: import workbench.util.EncodingUtil;
020: import workbench.util.FileMappedSequence;
021: import workbench.util.SqlUtil;
022: import workbench.util.StringSequence;
023: import workbench.util.StringUtil;
024:
025: /**
026: * A class to parse a script with SQL commands. Access to the commands
027: * is given through an Iterator. If a file is set as the source for
028: * this parser, then the file will not be read into memory. A
029: * {@link workbench.util.FileMappedSequence} will be used to process
030: * the file. If the script is defined through a String, then
031: * a {@link workbench.util.StringSequence} is used to process the Script
032: *
033: * @see workbench.interfaces.CharacterSequence
034: * @see workbench.util.FileMappedSequence
035: * @see workbench.util.StringSequence
036: *
037: * @author support@sql-workbench.net
038: */
039:
040: public class IteratingScriptParser {
041: private CharacterSequence script;
042: private DelimiterDefinition delimiter = DelimiterDefinition.STANDARD_DELIMITER;
043: private int delimiterLength = 1;
044: private int scriptLength = -1;
045: private int lastPos = 0;
046: private int lastCommandEnd = -1;
047: private boolean quoteOn = false;
048: private boolean commentOn = false;
049: private boolean blockComment = false;
050: private boolean singleLineComment = false;
051: private boolean startOfLine = true;
052: private int lastNewLineStart = 0;
053: private char lastQuote = 0;
054: private boolean checkEscapedQuotes = true;
055: private boolean emptyLineIsSeparator = false;
056: private boolean supportOracleInclude = true;
057: private boolean checkSingleLineCommands = true;
058: private boolean storeSqlInCommands = false;
059: private boolean returnStartingWhitespace = false;
060: //private boolean checkHashComment = false;
061: private String alternateLineComment = "--";
062:
063: // These patterns cover the statements that
064: // can be used in a single line without a delimiter
065: // This is basically to make the parser as Oracle compatible as possible
066: // while not breaking the SQL queries for other servers
067: private Pattern[] SLC_PATTERNS = {
068: Pattern
069: .compile("(?mi)^\\s*SET\\s*\\w*\\s*(ON|OFF)\\s*;?\\s*$"),
070: Pattern
071: .compile("(?mi)^\\s*ECHO\\s*((ON)|(OFF))\\s*;?\\s*$"),
072: Pattern.compile("(?mi)^\\s*DECLARE\\s*\\S*.*$"),
073: Pattern.compile("(?mi)^\\s*WHENEVER\\s*ERROR\\s*$"),
074: Pattern
075: .compile("(?mi)^\\s*SET\\s*TRANSACTION\\s*READ\\s*((WRITE)|(ONLY))\\s*;?\\s*$") };
076:
077: private Pattern ORA_INCLUDE_PATTERN = Pattern
078: .compile("(?m)^\\s*@.*$");
079:
080: /** Create an InteratingScriptParser
081: */
082: public IteratingScriptParser() {
083: }
084:
085: /**
086: * Initialize a ScriptParser from a file with a given encoding.
087: * @see #setFile(File, String)
088: */
089: public IteratingScriptParser(File f, String encoding)
090: throws IOException {
091: this ();
092: this .setFile(f, encoding);
093: }
094:
095: /**
096: * Create a ScriptParser for the given String.
097: * The delimiter to be used will be evaluated dynamically
098: */
099: public IteratingScriptParser(String aScript) throws IOException {
100: this ();
101: if (aScript == null)
102: throw new IllegalArgumentException("Script may not be null");
103: this .setScript(aScript);
104: }
105:
106: /**
107: * Define the source file for the script using the default encoding.
108: * @see #setFile(File, String)
109: * @see workbench.util.EncodingUtil#getDefaultEncoding()
110: */
111: public void setFile(File f) throws IOException {
112: this .setFile(f, EncodingUtil.getDefaultEncoding());
113: }
114:
115: /**
116: * Define the source file to be used and the encoding of the file.
117: * If the encoding is null, the default encoding will be used.
118: * @see #setFile(File, String)
119: * @see workbench.util.EncodingUtil#getDefaultEncoding()
120: */
121: public final void setFile(File f, String enc) throws IOException {
122: this .cleanup();
123: // Make sure we have an encoding (otherwise FileMappedSequence will not work!
124: if (enc == null)
125: enc = EncodingUtil.getDefaultEncoding();
126: this .script = new FileMappedSequence(f, enc);
127: this .scriptLength = (int) f.length();
128: this .checkEscapedQuotes = false;
129: this .storeSqlInCommands = true;
130: this .reset();
131: }
132:
133: /**
134: * Should the parser check for MySQL hash comments?
135: */
136: public void setAlternateLineComment(String comment) {
137: this .alternateLineComment = (comment == null ? null : comment
138: .trim());
139: }
140:
141: public void setCheckForSingleLineCommands(boolean flag) {
142: this .checkSingleLineCommands = flag;
143: }
144:
145: public void setReturnStartingWhitespace(boolean flag) {
146: this .returnStartingWhitespace = flag;
147: }
148:
149: /**
150: * Support Oracle style @ includes
151: */
152: public void setSupportOracleInclude(boolean flag) {
153: this .supportOracleInclude = flag;
154: }
155:
156: public void allowEmptyLineAsSeparator(boolean flag) {
157: this .emptyLineIsSeparator = flag;
158: }
159:
160: private void cleanup() {
161: if (this .script != null)
162: this .script.done();
163: }
164:
165: /**
166: * Define the script to be parsed
167: */
168: public final void setScript(String aScript) {
169: this .cleanup();
170: this .storeSqlInCommands = false;
171: this .script = new StringSequence(aScript);
172: this .scriptLength = aScript.length();
173: this .checkEscapedQuotes = false;
174: this .reset();
175: }
176:
177: public void reset() {
178: lastCommandEnd = 0;
179: lastPos = 0;
180: quoteOn = false;
181: commentOn = false;
182: blockComment = false;
183: singleLineComment = false;
184: startOfLine = true;
185: lastNewLineStart = 0;
186: lastQuote = 0;
187: }
188:
189: public void setDelimiter(DelimiterDefinition delim) {
190: if (delim == null) {
191: this .delimiter = DelimiterDefinition.STANDARD_DELIMITER;
192: } else {
193: this .delimiter = delim;
194: }
195: this .delimiterLength = this .delimiter.getDelimiter().length();
196: }
197:
198: public int getScriptLength() {
199: return this .scriptLength;
200: }
201:
202: public int findNextLineStart(int pos) {
203: if (pos < 0)
204: return pos;
205:
206: if (pos >= this .scriptLength)
207: return pos;
208: char c = this .script.charAt(pos);
209: while (pos < this .scriptLength && (c == '\n' || c == '\r')) {
210: pos++;
211: c = script.charAt(pos);
212: }
213: return pos;
214: }
215:
216: public boolean hasMoreCommands() {
217: if (lastPos < this .scriptLength) {
218: int nextPos = findNextNonWhiteSpace(lastPos);
219: return nextPos < scriptLength;
220: }
221: return false;
222: }
223:
224: private int findNextNonWhiteSpace(int start) {
225: char ch = this .script.charAt(start);
226: while (start < this .scriptLength && Character.isWhitespace(ch)) {
227: start++;
228: if (start < this .scriptLength)
229: ch = this .script.charAt(start);
230: }
231: return start;
232: }
233:
234: private boolean isLineComment(int pos) {
235: return StringUtil.lineStartsWith(this .script, pos, "--")
236: || StringUtil.lineStartsWith(this .script, pos,
237: alternateLineComment);
238: }
239:
240: /**
241: * Parse the given SQL Script into a List of single SQL statements.
242: * Returns the index of the statement indicated by the currentCursorPos
243: */
244: public ScriptCommandDefinition getNextCommand() {
245: int pos;
246: String currChar;
247: boolean delimiterOnOwnLine = this .delimiter.isSingleLine();
248: String delim = this .delimiter.getDelimiter();
249:
250: for (pos = this .lastPos; pos < this .scriptLength; pos++) {
251: currChar = this .script.subSequence(pos, pos + 1).toString()
252: .toUpperCase();
253: char firstChar = currChar.charAt(0);
254:
255: // skip CR characters
256: if (firstChar == '\r')
257: continue;
258:
259: char nextChar = (pos < scriptLength - 1 ? this .script
260: .charAt(pos + 1) : 0);
261:
262: // ignore quotes in comments
263: if (!commentOn && (firstChar == '\'' || firstChar == '"')) {
264: if (!quoteOn) {
265: lastQuote = firstChar;
266: quoteOn = true;
267: } else if (firstChar == lastQuote) {
268: if (pos > 1) {
269: // check if the current quote char was escaped
270: if (!this .checkEscapedQuotes
271: || this .script.charAt(pos - 1) != '\\') {
272: lastQuote = 0;
273: quoteOn = false;
274: }
275: } else {
276: lastQuote = 0;
277: quoteOn = false;
278: }
279: }
280: }
281:
282: if (quoteOn)
283: continue;
284:
285: // now check for comment start
286: if (!quoteOn && pos < scriptLength - 1) {
287: if (!commentOn) {
288: if (firstChar == '/' && nextChar == '*') {
289: blockComment = true;
290: singleLineComment = false;
291: commentOn = true;
292: //pos ++; // ignore the next character
293: } else if (startOfLine && isLineComment(pos)) {
294: singleLineComment = true;
295: blockComment = false;
296: commentOn = true;
297: }
298: } else {
299: if (singleLineComment) {
300: if (firstChar == '\n') {
301: singleLineComment = false;
302: blockComment = false;
303: commentOn = false;
304: startOfLine = true;
305: lastNewLineStart = pos;
306: continue;
307: }
308: } else if (blockComment) {
309: char last = this .script.charAt(pos - 1);
310: if (firstChar == '/' && last == '*') {
311: blockComment = false;
312: singleLineComment = false;
313: commentOn = false;
314: continue;
315: }
316: }
317: }
318: }
319:
320: if (!quoteOn && !commentOn) {
321: if (this .delimiterLength > 1
322: && pos + this .delimiterLength < scriptLength) {
323: currChar = this .script.subSequence(pos,
324: pos + this .delimiterLength).toString()
325: .toUpperCase();
326: }
327:
328: if (!delimiterOnOwnLine
329: && (currChar.equals(delim) || (pos == scriptLength))) {
330: if (lastPos >= pos && pos < scriptLength - 1) {
331: lastPos++;
332: continue;
333: }
334: startOfLine = true;
335: this .lastNewLineStart = pos + 1;
336: this .lastPos = pos + this .delimiterLength;
337: int start = lastCommandEnd;
338: this .lastCommandEnd = lastPos;
339: ScriptCommandDefinition c = this .createCommand(
340: start, pos);
341: if (c == null)
342: continue;
343: return c;
344: } else {
345: if (firstChar == '\n') {
346: String line = this .script.subSequence(
347: lastNewLineStart, pos).toString()
348: .trim();
349: String clean = SqlUtil.makeCleanSql(line,
350: false, false, '\'');
351:
352: if ((this .emptyLineIsSeparator && clean
353: .length() == 0)
354: || (delimiterOnOwnLine && line
355: .equalsIgnoreCase(delim))) {
356: int end = pos;
357:
358: if (clean.length() > 0) {
359: // a single line delimiter was found, we have to make
360: // sure this is not added to the created command
361: end = lastNewLineStart;
362: }
363: int start = lastCommandEnd;
364: ScriptCommandDefinition c = this
365: .createCommand(start, end);
366: if (c != null) {
367: startOfLine = true;
368: this .lastNewLineStart = pos + 1;
369: this .lastPos = pos
370: + this .delimiterLength;
371: this .lastCommandEnd = lastPos;
372: return c;
373: }
374: }
375:
376: if (this .checkSingleLineCommands) {
377: boolean slcFound = false;
378:
379: int commandStart = lastNewLineStart;
380: int commandEnd = pos;
381:
382: lastNewLineStart = pos;
383: startOfLine = true;
384:
385: if (clean.length() > 0) {
386: for (int pi = 0; pi < SLC_PATTERNS.length; pi++) {
387: Matcher m = SLC_PATTERNS[pi]
388: .matcher(clean);
389:
390: if (m.matches()) {
391: slcFound = true;
392: break;
393: }
394: }
395: if (!slcFound
396: && this .supportOracleInclude) {
397: Matcher m = ORA_INCLUDE_PATTERN
398: .matcher(clean);
399: if (m.matches()) {
400: slcFound = true;
401: }
402: }
403: }
404:
405: if (slcFound) {
406: lastPos = pos;
407: this .lastCommandEnd = commandEnd + 1;
408: return createCommand(commandStart,
409: commandEnd);
410: }
411: continue;
412: }
413: lastNewLineStart = pos + 1;
414: } else {
415: startOfLine = false;
416: }
417: }
418: }
419:
420: } // end loop for next statement
421:
422: ScriptCommandDefinition c = null;
423: if (lastPos < pos && !blockComment && !quoteOn) {
424: String value = this .script.subSequence(lastCommandEnd,
425: scriptLength).toString().trim();
426: if (!this .delimiter.equals(value.trim())) {
427: int endpos = scriptLength;
428: if (value.endsWith(delim)) {
429: endpos = endpos - this .delimiterLength;
430: }
431: c = createCommand(lastCommandEnd, endpos);
432: }
433: }
434: this .lastPos = scriptLength;
435: return c;
436: }
437:
438: private ScriptCommandDefinition createCommand(int startPos,
439: int endPos) {
440: String value = null;
441:
442: if (startPos >= scriptLength)
443: return null;
444:
445: if (endPos == -1) {
446: endPos = scriptLength;
447: }
448:
449: int realStart = startPos;
450:
451: // remove whitespaces at the start
452: if (!returnStartingWhitespace) {
453: char ch = this .script.charAt(startPos);
454: while (startPos < endPos && Character.isWhitespace(ch)) {
455: startPos++;
456: if (startPos < endPos)
457: ch = this .script.charAt(startPos);
458: }
459: }
460:
461: if (startPos >= endPos)
462: return null;
463: if (storeSqlInCommands) {
464: value = this .script.subSequence(startPos, endPos)
465: .toString();
466: }
467: ScriptCommandDefinition c = new ScriptCommandDefinition(value,
468: startPos, endPos);
469: c.setWhitespaceStart(realStart);
470:
471: return c;
472: }
473:
474: public void setCheckEscapedQuotes(boolean flag) {
475: this .checkEscapedQuotes = flag;
476: }
477:
478: public void done() {
479: this.script.done();
480: }
481:
482: }
|