001: /*
002: * WbStringTokenizer.java
003: *
004: * This file is part of SQL Workbench/J, http://www.sql-workbench.net
005: *
006: * Copyright 2002-2008, Thomas Kellerer
007: * No part of this code maybe reused without the permission of the author
008: *
009: * To contact the author please send an email to: support@sql-workbench.net
010: *
011: */
012: package workbench.util;
013:
014: import java.io.BufferedReader;
015: import java.io.FileNotFoundException;
016: import java.io.FileReader;
017: import java.io.IOException;
018: import java.io.Reader;
019: import java.io.StringReader;
020: import java.util.LinkedList;
021: import java.util.List;
022:
023: /**
024: *
025: * @author support@sql-workbench.net
026: */
027: public class WbStringTokenizer {
028: private String delimit;
029: private boolean singleWordDelimiter;
030: private String quoteChars;
031: private boolean keepQuotes;
032: private int maxDelim;
033: private Reader input;
034: private boolean endOfInput = false;
035: private boolean delimNeedWhitespace = false;
036: private boolean checkBrackets = false;
037:
038: public WbStringTokenizer() {
039: }
040:
041: public WbStringTokenizer(String aSource, String delimiter) {
042: this (delimiter, false, "\"", false);
043: this .setSourceString(aSource);
044: }
045:
046: public WbStringTokenizer(char aDelim, String quoteChars,
047: boolean keepQuotes) {
048: this .delimit = new String(new char[] { aDelim });
049: this .singleWordDelimiter = false;
050: this .quoteChars = quoteChars;
051: this .keepQuotes = keepQuotes;
052: this .endOfInput = true;
053: this .maxDelim = this .delimit.length() - 1;
054: }
055:
056: /**
057: * Create a new tokenizer.
058: * If aDelim contains more then one character, the parameter isSingleDelimter indicates
059: * whether the given delimiter string should be considered as one delimiter or a sequence
060: * of possible delimiter characters.
061: *
062: * Once the Tokenizer is created, the string to be tokenized can be set with
063: * setSourceString()
064: *
065: */
066: public WbStringTokenizer(String aDelim, boolean isSingleDelimiter,
067: String quoteChars, boolean keepQuotes) {
068: this .delimit = aDelim;
069: this .singleWordDelimiter = isSingleDelimiter;
070: this .quoteChars = quoteChars;
071: this .keepQuotes = keepQuotes;
072: this .endOfInput = true;
073: this .maxDelim = this .delimit.length() - 1;
074: }
075:
076: public WbStringTokenizer(String input, String aDelim,
077: boolean isSingleDelimiter, String quoteChars,
078: boolean keepQuotes) {
079: this .delimit = aDelim;
080: this .singleWordDelimiter = isSingleDelimiter;
081: this .quoteChars = quoteChars;
082: this .keepQuotes = keepQuotes;
083: this .maxDelim = this .delimit.length() - 1;
084: this .setSourceString(input);
085: }
086:
087: public void setCheckBrackets(boolean flag) {
088: this .checkBrackets = flag;
089: }
090:
091: public void setDelimiter(String aDelimiter, boolean isSingleWord) {
092: this .delimit = aDelimiter;
093: this .singleWordDelimiter = isSingleWord;
094: }
095:
096: public void setDelimiterNeedsWhitspace(boolean flag) {
097: this .delimNeedWhitespace = flag;
098: }
099:
100: public void setQuoteChars(String chars) {
101: this .quoteChars = chars;
102: }
103:
104: public void setKeepQuotes(boolean aFlag) {
105: this .keepQuotes = aFlag;
106: }
107:
108: public void setSourceFile(String aFilename) throws IOException,
109: FileNotFoundException {
110: BufferedReader reader = new BufferedReader(new FileReader(
111: aFilename));
112: this .setReader(reader);
113: }
114:
115: public final void setSourceString(String aString) {
116: StringReader reader = new StringReader(aString);
117: this .setReader(reader);
118: }
119:
120: public List<String> getAllTokens() {
121: List<String> result = new LinkedList<String>();
122: while (this .hasMoreTokens()) {
123: result.add(this .nextToken());
124: }
125: return result;
126: }
127:
128: private void setReader(Reader aReader) {
129: this .endOfInput = false;
130: this .input = aReader;
131: }
132:
133: public boolean hasMoreTokens() {
134: return !this .endOfInput;
135: }
136:
137: private static final char[] buf = new char[1];
138:
139: public String nextToken() {
140: boolean inQuotes = false;
141: StringBuilder current = null;
142: String value = null;
143: int delimIndex = 0;
144: char lastQuote = 0;
145: // if the input string directly starts with a delimiter
146: // and delimNeedsWhitspace == true, setting lastToken to
147: // a whitespace prevents returning the delimiter for the
148: // first argument
149: char lastToken = 9;
150: int bracketCount = 0;
151:
152: // the loop will be exited if a complete "word" is built
153: // or the Reader is at the end of the file
154: while (true) {
155: try {
156: // Reader.read() does not seem to throw an EOFException
157: // when using a StringReader, but the method with checking
158: // the return value of read(char[]) seems to be reliable for
159: // a StringReader as well.
160: int num = this .input.read(buf);
161: this .endOfInput = (num == -1);
162:
163: // EOF detected
164: if (endOfInput) {
165: if (current != null)
166: return current.toString();
167: else
168: return null;
169: }
170:
171: char token = buf[0];
172:
173: // Check for quote character
174: if (quoteChars != null
175: && quoteChars.indexOf(token) > -1) {
176: if (inQuotes) {
177: // Make sure it's the same quote character that started quoting
178: if (token == lastQuote) {
179: inQuotes = false;
180: lastQuote = 0;
181: if (keepQuotes) {
182: if (current == null)
183: current = new StringBuilder();
184: current.append(token);
185: }
186: } else {
187: // quote character inside another quote character
188: // we need to add it
189: if (current == null)
190: current = new StringBuilder();
191: current.append(token);
192: }
193: } else {
194: // start quote mode
195: lastQuote = token;
196: inQuotes = true;
197: if (keepQuotes) {
198: if (current == null)
199: current = new StringBuilder();
200: current.append(token);
201: }
202: }
203: continue;
204: }
205:
206: if (inQuotes) {
207: // inside quotes, anything has to be added.
208: if (current == null)
209: current = new StringBuilder();
210: current.append(token);
211: continue;
212: }
213:
214: if (this .checkBrackets) {
215: if (token == '(') {
216: bracketCount++;
217: } else if (token == ')') {
218: bracketCount--;
219: }
220: if (bracketCount > 0) {
221: if (current == null)
222: current = new StringBuilder();
223: current.append(token);
224: continue;
225: }
226: }
227:
228: if (this .delimit.indexOf(token) > -1) {
229: if (this .singleWordDelimiter) {
230: if (token == this .delimit.charAt(delimIndex)) {
231: // advance the "pointer" until the end of the delimiter word
232: if (delimIndex < maxDelim) {
233: delimIndex++;
234: value = null;
235: } else {
236: delimIndex = 0;
237: if (current == null)
238: return "";
239: value = current.toString();
240: return value;
241: }
242: }
243: } else {
244: if (!delimNeedWhitespace || delimNeedWhitespace
245: && Character.isWhitespace(lastToken)) {
246: // found a new string to be split, return the current buffer
247: if (current != null) {
248: value = current.toString();
249: return value;
250: }
251: } else {
252: if (current == null)
253: current = new StringBuilder();
254: current.append(token);
255: }
256: }
257: } else {
258: if (current == null)
259: current = new StringBuilder();
260: current.append(token);
261: }
262: lastToken = token;
263: } catch (IOException e) {
264: this .endOfInput = true;
265: break;
266: }
267: }
268: if (current == null)
269: return null;
270: return current.toString();
271: }
272:
273: }
|