001: /*
002: * The contents of this file are subject to the Mozilla Public License
003: * Version 1.1 (the "License"); you may not use this file except in
004: * compliance with the License. You may obtain a copy of the License at
005: * http://www.mozilla.org/MPL/
006: *
007: * Software distributed under the License is distributed on an "AS IS"
008: * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
009: * License for the specific language governing rights and limitations
010: * under the License.
011: *
012: * The Original Code is iSQL-Viewer, A Mutli-Platform Database Tool.
013: *
014: * The Initial Developer of the Original Code is iSQL-Viewer, A Mutli-Platform Database Tool.
015: * Portions created by Mark A. Kobold are Copyright (C) 2000-2007. All Rights Reserved.
016: *
017: * Contributor(s):
018: * Mark A. Kobold [mkobold <at> isqlviewer <dot> com].
019: *
020: * If you didn't download this code from the following link, you should check
021: * if you aren't using an obsolete version: http://www.isqlviewer.com
022: */
023: package org.isqlviewer.util;
024:
025: import java.util.ArrayList;
026: import java.util.Enumeration;
027: import java.util.NoSuchElementException;
028:
029: /**
030: * This class is primarily a copy Of the java.util.StringTokenizer with some extentsions.
031: * <p>
032: * Things you can do in this version and not in default JDK version.
033: * <ul>
034: * <li>This class also supports tokenizing with quotes.
035: * <li>Get the index within the original text that the token was at.
036: * </ul>
037: *
038: * @author Mark A. Kobold <mkobold at isqlviewer dot com>
039: * @version 1.0
040: */
041: public class StringTokenizer implements Enumeration<String> {
042:
043: private int currentPosition;
044: private int newPosition;
045: private int maxPosition;
046: private String str;
047: private String delimiters;
048: private boolean retDelims;
049: private boolean delimsChanged;
050: /**
051: * maxDelimCodePoint stores the value of the delimiter character with the highest value. It is used to optimize the
052: * detection of delimiter characters. It is unlikely to provide any optimization benefit in the hasSurrogates case
053: * because most string characters will be smaller than the limit, but we keep it so that the two code paths remain
054: * similar.
055: */
056: private int maxDelimCodePoint;
057: private boolean quotesEnabled;
058:
059: /**
060: * Set maxDelimCodePoint to the highest char in the delimiter set.
061: */
062: private void setMaxDelimCodePoint() {
063:
064: if (delimiters == null) {
065: maxDelimCodePoint = 0;
066: return;
067: }
068:
069: int m = 0;
070: int c;
071: int count = 0;
072: for (int i = 0; i < delimiters.length(); i += Character
073: .charCount(c)) {
074: c = delimiters.charAt(i);
075: if (m < c)
076: m = c;
077: count++;
078: }
079: maxDelimCodePoint = m;
080: }
081:
082: /**
083: * Constructs a string tokenizer for the specified string. All characters in the <code>delim</code> argument are
084: * the delimiters for separating tokens.
085: * <p>
086: * If the <code>returnDelims</code> flag is <code>true</code>, then the delimiter characters are also returned
087: * as tokens. Each delimiter is returned as a string of length one. If the flag is <code>false</code>, the
088: * delimiter characters are skipped and only serve as separators between tokens.
089: * <p>
090: * Note that if <tt>delim</tt> is <tt>null</tt>, this constructor does not throw an exception. However, trying
091: * to invoke other methods on the resulting <tt>StringTokenizer</tt> may result in a <tt>NullPointerException</tt>.
092: *
093: * @param str a string to be parsed.
094: * @param delim the delimiters.
095: * @param returnDelims flag indicating whether to return the delimiters as tokens.
096: * @exception NullPointerException if str is <CODE>null</CODE>
097: */
098: public StringTokenizer(String str, String delim,
099: boolean returnDelims) {
100:
101: currentPosition = 0;
102: newPosition = -1;
103: delimsChanged = false;
104: this .str = str;
105: maxPosition = str.length();
106: delimiters = delim;
107: retDelims = returnDelims;
108: setMaxDelimCodePoint();
109: }
110:
111: /**
112: * Constructs a string tokenizer for the specified string. The characters in the <code>delim</code> argument are
113: * the delimiters for separating tokens. Delimiter characters themselves will not be treated as tokens.
114: * <p>
115: * Note that if <tt>delim</tt> is <tt>null</tt>, this constructor does not throw an exception. However, trying
116: * to invoke other methods on the resulting <tt>StringTokenizer</tt> may result in a <tt>NullPointerException</tt>.
117: *
118: * @param str a string to be parsed.
119: * @param delim the delimiters.
120: * @exception NullPointerException if str is <CODE>null</CODE>
121: */
122: public StringTokenizer(String str, String delim) {
123:
124: this (str, delim, false);
125: }
126:
127: /**
128: * Constructs a string tokenizer for the specified string. The tokenizer uses the default delimiter set, which is
129: * <code>" \t\n\r\f"</code>: the space character, the tab character, the newline character,
130: * the carriage-return character, and the form-feed character. Delimiter characters themselves will not be treated
131: * as tokens.
132: *
133: * @param str a string to be parsed.
134: * @exception NullPointerException if str is <CODE>null</CODE>
135: */
136: public StringTokenizer(String str) {
137:
138: this (str, " \t\n\r\f", false);
139: }
140:
141: /**
142: * Skips delimiters starting from the specified position. If retDelims is false, returns the index of the first
143: * non-delimiter character at or after startPos. If retDelims is true, startPos is returned.
144: */
145: private int skipDelimiters(int startPos) {
146:
147: if (delimiters == null)
148: throw new NullPointerException();
149:
150: int position = startPos;
151: while (!retDelims && position < maxPosition) {
152: char c = str.charAt(position);
153: if ((c > maxDelimCodePoint) || (delimiters.indexOf(c) < 0))
154: break;
155: position++;
156: }
157: return position;
158: }
159:
160: /**
161: * Skips ahead from startPos and returns the index of the next delimiter character encountered, or maxPosition if no
162: * such delimiter is found.
163: */
164: private int scanToken(int startPos) {
165:
166: int position = startPos;
167: boolean withinLiteral = false;
168: while (position < maxPosition) {
169: char c = str.charAt(position);
170: if (isQuotable(c) && quotesEnabled) {
171: if (position > 0 && str.charAt(position - 1) != '\\') {
172: // quote was not escaped //
173: withinLiteral = !withinLiteral;
174: }
175: }
176: if ((c <= maxDelimCodePoint)
177: && (delimiters.indexOf(c) >= 0))
178: if (!withinLiteral) {
179: break;
180: }
181: position++;
182: }
183: if (retDelims && (startPos == position)) {
184: char c = str.charAt(position);
185: if ((c <= maxDelimCodePoint)
186: && (delimiters.indexOf(c) >= 0))
187: position++;
188: }
189: return position;
190: }
191:
192: private boolean isQuotable(char character) {
193:
194: return character == '\'' || character == '\"';
195: }
196:
197: /**
198: * Tests if there are more tokens available from this tokenizer's string. If this method returns <tt>true</tt>,
199: * then a subsequent call to <tt>nextToken</tt> with no argument will successfully return a token.
200: *
201: * @return <code>true</code> if and only if there is at least one token in the string after the current position;
202: * <code>false</code> otherwise.
203: */
204: public boolean hasMoreTokens() {
205:
206: /*
207: * Temporarily store this position and use it in the following nextToken() method only if the delimiters haven't
208: * been changed in that nextToken() invocation.
209: */
210: newPosition = skipDelimiters(currentPosition);
211: return (newPosition < maxPosition);
212: }
213:
214: /**
215: * Returns the next token from this string tokenizer.
216: *
217: * @return the next token from this string tokenizer.
218: * @exception NoSuchElementException if there are no more tokens in this tokenizer's string.
219: */
220: public String nextToken() {
221:
222: /*
223: * If next position already computed in hasMoreElements() and delimiters have changed between the computation
224: * and this invocation, then use the computed value.
225: */
226:
227: currentPosition = (newPosition >= 0 && !delimsChanged) ? newPosition
228: : skipDelimiters(currentPosition);
229:
230: /* Reset these anyway */
231: delimsChanged = false;
232: newPosition = -1;
233:
234: if (currentPosition >= maxPosition)
235: throw new NoSuchElementException();
236: int start = currentPosition;
237: currentPosition = scanToken(currentPosition);
238: return str.substring(start, currentPosition);
239: }
240:
241: /**
242: * Returns the next token in this string tokenizer's string. First, the set of characters considered to be
243: * delimiters by this <tt>StringTokenizer</tt> object is changed to be the characters in the string <tt>delim</tt>.
244: * Then the next token in the string after the current position is returned. The current position is advanced beyond
245: * the recognized token. The new delimiter set remains the default after this call.
246: *
247: * @param delim the new delimiters.
248: * @return the next token, after switching to the new delimiter set.
249: * @exception NoSuchElementException if there are no more tokens in this tokenizer's string.
250: * @exception NullPointerException if delim is <CODE>null</CODE>
251: */
252: public String nextToken(String delim) {
253:
254: delimiters = delim;
255:
256: /* delimiter string specified, so set the appropriate flag. */
257: delimsChanged = true;
258:
259: setMaxDelimCodePoint();
260: return nextToken();
261: }
262:
263: /**
264: * Returns the same value as the <code>hasMoreTokens</code> method. It exists so that this class can implement the
265: * <code>Enumeration</code> interface.
266: *
267: * @return <code>true</code> if there are more tokens; <code>false</code> otherwise.
268: * @see java.util.Enumeration
269: * @see java.util.StringTokenizer#hasMoreTokens()
270: */
271: public boolean hasMoreElements() {
272:
273: return hasMoreTokens();
274: }
275:
276: /**
277: * Returns the same value as the <code>nextToken</code> method, except that its declared return value is
278: * <code>Object</code> rather than <code>String</code>. It exists so that this class can implement the
279: * <code>Enumeration</code> interface.
280: *
281: * @return the next token in the string.
282: * @exception NoSuchElementException if there are no more tokens in this tokenizer's string.
283: * @see java.util.Enumeration
284: * @see java.util.StringTokenizer#nextToken()
285: */
286: public String nextElement() {
287:
288: return nextToken();
289: }
290:
291: /**
292: * Calculates the number of times that this tokenizer's <code>nextToken</code> method can be called before it
293: * generates an exception. The current position is not advanced.
294: *
295: * @return the number of tokens remaining in the string using the current delimiter set.
296: * @see java.util.StringTokenizer#nextToken()
297: */
298: public int countTokens() {
299:
300: int count = 0;
301: int currpos = currentPosition;
302: while (currpos < maxPosition) {
303: currpos = skipDelimiters(currpos);
304: if (currpos >= maxPosition)
305: break;
306: currpos = scanToken(currpos);
307: count++;
308: }
309: return count;
310: }
311:
312: public int getTokenIndex() {
313:
314: return currentPosition;
315: }
316:
317: /**
318: * Sets quotes enabled for determining tokenzation.
319: * <p>
320: *
321: * @param quotesEnabled flag to enable/disable quote sensitivity during tokenization.
322: */
323: public void setQuotesEnabled(boolean quotesEnabled) {
324:
325: this .quotesEnabled = quotesEnabled;
326: }
327:
328: /**
329: * Determines if this tokenizer is sensitive to quoted strings.
330: * <p>
331: *
332: * @return <tt>true</tt> if this instance is sensitive to quotes for each token.
333: */
334: public boolean isQuotesEnabled() {
335:
336: return quotesEnabled;
337: }
338:
339: public String[] toArray() {
340:
341: ArrayList<String> tokens = new ArrayList<String>();
342: // reset internal variables so we get all tokens.
343: currentPosition = 0;
344: newPosition = -1;
345: delimsChanged = false;
346:
347: while (hasMoreTokens()) {
348: tokens.add(nextToken());
349: }
350:
351: // reset internal variables so it can stil be used normally.
352: currentPosition = 0;
353: newPosition = -1;
354: delimsChanged = false;
355: return tokens.toArray(new String[tokens.size()]);
356: }
357: }
|