001: /*
002: * @(#)StringTokenizer.java 1.32 06/10/10
003: *
004: * Copyright 1990-2006 Sun Microsystems, Inc. All Rights Reserved.
005: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License version
009: * 2 only, as published by the Free Software Foundation.
010: *
011: * This program is distributed in the hope that it will be useful, but
012: * WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: * General Public License version 2 for more details (a copy is
015: * included at /legal/license.txt).
016: *
017: * You should have received a copy of the GNU General Public License
018: * version 2 along with this work; if not, write to the Free Software
019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020: * 02110-1301 USA
021: *
022: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
023: * Clara, CA 95054 or visit www.sun.com if you need additional
024: * information or have any questions.
025: *
026: */
027:
028: package java.util;
029:
030: import java.lang.*;
031:
032: /**
033: * The string tokenizer class allows an application to break a
034: * string into tokens. The tokenization method is much simpler than
035: * the one used by the <code>StreamTokenizer</code> class. The
036: * <code>StringTokenizer</code> methods do not distinguish among
037: * identifiers, numbers, and quoted strings, nor do they recognize
038: * and skip comments.
039: * <p>
040: * The set of delimiters (the characters that separate tokens) may
041: * be specified either at creation time or on a per-token basis.
042: * <p>
043: * An instance of <code>StringTokenizer</code> behaves in one of two
044: * ways, depending on whether it was created with the
045: * <code>returnDelims</code> flag having the value <code>true</code>
046: * or <code>false</code>:
047: * <ul>
048: * <li>If the flag is <code>false</code>, delimiter characters serve to
049: * separate tokens. A token is a maximal sequence of consecutive
050: * characters that are not delimiters.
051: * <li>If the flag is <code>true</code>, delimiter characters are themselves
052: * considered to be tokens. A token is thus either one delimiter
053: * character, or a maximal sequence of consecutive characters that are
054: * not delimiters.
055: * </ul><p>
056: * A <tt>StringTokenizer</tt> object internally maintains a current
057: * position within the string to be tokenized. Some operations advance this
058: * current position past the characters processed.<p>
059: * A token is returned by taking a substring of the string that was used to
060: * create the <tt>StringTokenizer</tt> object.
061: * <p>
062: * The following is one example of the use of the tokenizer. The code:
063: * <blockquote><pre>
064: * StringTokenizer st = new StringTokenizer("this is a test");
065: * while (st.hasMoreTokens()) {
066: * System.out.println(st.nextToken());
067: * }
068: * </pre></blockquote>
069: * <p>
070: * prints the following output:
071: * <blockquote><pre>
072: * this
073: * is
074: * a
075: * test
076: * </pre></blockquote>
077: *
078: * <p>
079: * <tt>StringTokenizer</tt> is a legacy class that is retained for
080: * compatibility reasons although its use is discouraged in new code. It is
081: * recommended that anyone seeking this functionality use the <tt>split</tt>
082: * method of <tt>String</tt> or the java.util.regex package instead.
083: * <p>
084: * The following example illustrates how the <tt>String.split</tt>
085: * method can be used to break up a string into its basic tokens:
086: * <blockquote><pre>
087: * String[] result = "this is a test".split("\\s");
088: * for (int x=0; x<result.length; x++)
089: * System.out.println(result[x]);
090: * </pre></blockquote>
091: * <p>
092: * prints the following output:
093: * <blockquote><pre>
094: * this
095: * is
096: * a
097: * test
098: * </pre></blockquote>
099: *
100: * @author unascribed
101: * @version 1.25, 02/02/00
102: * @see java.io.StreamTokenizer
103: * @since JDK1.0
104: */
105: public class StringTokenizer implements Enumeration {
106: private int currentPosition;
107: private int newPosition;
108: private int maxPosition;
109: private String str;
110: private String delimiters;
111: private boolean retDelims;
112: private boolean delimsChanged;
113:
114: /**
115: * maxDelimChar stores the value of the delimiter character with the
116: * highest value. It is used to optimize the detection of delimiter
117: * characters.
118: */
119: private char maxDelimChar;
120:
121: /**
122: * Set maxDelimChar to the highest char in the delimiter set.
123: */
124: private void setMaxDelimChar() {
125: if (delimiters == null) {
126: maxDelimChar = 0;
127: return;
128: }
129:
130: char m = 0;
131: for (int i = 0; i < delimiters.length(); i++) {
132: char c = delimiters.charAt(i);
133: if (m < c)
134: m = c;
135: }
136: maxDelimChar = m;
137: }
138:
139: /**
140: * Constructs a string tokenizer for the specified string. All
141: * characters in the <code>delim</code> argument are the delimiters
142: * for separating tokens.
143: * <p>
144: * If the <code>returnDelims</code> flag is <code>true</code>, then
145: * the delimiter characters are also returned as tokens. Each
146: * delimiter is returned as a string of length one. If the flag is
147: * <code>false</code>, the delimiter characters are skipped and only
148: * serve as separators between tokens.
149: * <p>
150: * Note that if <tt>delim</tt> is <tt>null</tt>, this constructor does
151: * not throw an exception. However, trying to invoke other methods on the
152: * resulting <tt>StringTokenizer</tt> may result in a
153: * <tt>NullPointerException</tt>.
154: *
155: * @param str a string to be parsed.
156: * @param delim the delimiters.
157: * @param returnDelims flag indicating whether to return the delimiters
158: * as tokens.
159: */
160: public StringTokenizer(String str, String delim,
161: boolean returnDelims) {
162: currentPosition = 0;
163: newPosition = -1;
164: delimsChanged = false;
165: this .str = str;
166: maxPosition = str.length();
167: delimiters = delim;
168: retDelims = returnDelims;
169: setMaxDelimChar();
170: }
171:
172: /**
173: * Constructs a string tokenizer for the specified string. The
174: * characters in the <code>delim</code> argument are the delimiters
175: * for separating tokens. Delimiter characters themselves will not
176: * be treated as tokens.
177: *
178: * @param str a string to be parsed.
179: * @param delim the delimiters.
180: */
181: public StringTokenizer(String str, String delim) {
182: this (str, delim, false);
183: }
184:
185: /**
186: * Constructs a string tokenizer for the specified string. The
187: * tokenizer uses the default delimiter set, which is
188: * <code>" \t\n\r\f"</code>: the space character,
189: * the tab character, the newline character, the carriage-return character,
190: * and the form-feed character. Delimiter characters themselves will
191: * not be treated as tokens.
192: *
193: * @param str a string to be parsed.
194: */
195: public StringTokenizer(String str) {
196: this (str, " \t\n\r\f", false);
197: }
198:
199: /**
200: * Skips delimiters starting from the specified position. If retDelims
201: * is false, returns the index of the first non-delimiter character at or
202: * after startPos. If retDelims is true, startPos is returned.
203: */
204: private int skipDelimiters(int startPos) {
205: if (delimiters == null)
206: throw new NullPointerException();
207:
208: int position = startPos;
209: while (!retDelims && position < maxPosition) {
210: char c = str.charAt(position);
211: if ((c > maxDelimChar) || (delimiters.indexOf(c) < 0))
212: break;
213: position++;
214: }
215: return position;
216: }
217:
218: /**
219: * Skips ahead from startPos and returns the index of the next delimiter
220: * character encountered, or maxPosition if no such delimiter is found.
221: */
222: private int scanToken(int startPos) {
223: int position = startPos;
224: while (position < maxPosition) {
225: char c = str.charAt(position);
226: if ((c <= maxDelimChar) && (delimiters.indexOf(c) >= 0))
227: break;
228: position++;
229: }
230: if (retDelims && (startPos == position)) {
231: char c = str.charAt(position);
232: if ((c <= maxDelimChar) && (delimiters.indexOf(c) >= 0))
233: position++;
234: }
235: return position;
236: }
237:
238: /**
239: * Tests if there are more tokens available from this tokenizer's string.
240: * If this method returns <tt>true</tt>, then a subsequent call to
241: * <tt>nextToken</tt> with no argument will successfully return a token.
242: *
243: * @return <code>true</code> if and only if there is at least one token
244: * in the string after the current position; <code>false</code>
245: * otherwise.
246: */
247: public boolean hasMoreTokens() {
248: /*
249: * Temporary store this position and use it in the following
250: * nextToken() method only if the delimiters have'nt been changed in
251: * that nextToken() invocation.
252: */
253: newPosition = skipDelimiters(currentPosition);
254: return (newPosition < maxPosition);
255: }
256:
257: /**
258: * Returns the next token from this string tokenizer.
259: *
260: * @return the next token from this string tokenizer.
261: * @exception NoSuchElementException if there are no more tokens in this
262: * tokenizer's string.
263: */
264: public String nextToken() {
265: /*
266: * If next position already computed in hasMoreElements() and
267: * delimiters have changed between the computation and this invocation,
268: * then use the computed value.
269: */
270:
271: currentPosition = (newPosition >= 0 && !delimsChanged) ? newPosition
272: : skipDelimiters(currentPosition);
273:
274: /* Reset these anyway */
275: delimsChanged = false;
276: newPosition = -1;
277:
278: if (currentPosition >= maxPosition)
279: throw new NoSuchElementException();
280: int start = currentPosition;
281: currentPosition = scanToken(currentPosition);
282: return str.substring(start, currentPosition);
283: }
284:
285: /**
286: * Returns the next token in this string tokenizer's string. First,
287: * the set of characters considered to be delimiters by this
288: * <tt>StringTokenizer</tt> object is changed to be the characters in
289: * the string <tt>delim</tt>. Then the next token in the string
290: * after the current position is returned. The current position is
291: * advanced beyond the recognized token. The new delimiter set
292: * remains the default after this call.
293: *
294: * @param delim the new delimiters.
295: * @return the next token, after switching to the new delimiter set.
296: * @exception NoSuchElementException if there are no more tokens in this
297: * tokenizer's string.
298: */
299: public String nextToken(String delim) {
300: delimiters = delim;
301:
302: /* delimiter string specified, so set the appropriate flag. */
303: delimsChanged = true;
304:
305: setMaxDelimChar();
306: return nextToken();
307: }
308:
309: /**
310: * Returns the same value as the <code>hasMoreTokens</code>
311: * method. It exists so that this class can implement the
312: * <code>Enumeration</code> interface.
313: *
314: * @return <code>true</code> if there are more tokens;
315: * <code>false</code> otherwise.
316: * @see java.util.Enumeration
317: * @see java.util.StringTokenizer#hasMoreTokens()
318: */
319: public boolean hasMoreElements() {
320: return hasMoreTokens();
321: }
322:
323: /**
324: * Returns the same value as the <code>nextToken</code> method,
325: * except that its declared return value is <code>Object</code> rather than
326: * <code>String</code>. It exists so that this class can implement the
327: * <code>Enumeration</code> interface.
328: *
329: * @return the next token in the string.
330: * @exception NoSuchElementException if there are no more tokens in this
331: * tokenizer's string.
332: * @see java.util.Enumeration
333: * @see java.util.StringTokenizer#nextToken()
334: */
335: public Object nextElement() {
336: return nextToken();
337: }
338:
339: /**
340: * Calculates the number of times that this tokenizer's
341: * <code>nextToken</code> method can be called before it generates an
342: * exception. The current position is not advanced.
343: *
344: * @return the number of tokens remaining in the string using the current
345: * delimiter set.
346: * @see java.util.StringTokenizer#nextToken()
347: */
348: public int countTokens() {
349: int count = 0;
350: int currpos = currentPosition;
351: while (currpos < maxPosition) {
352: currpos = skipDelimiters(currpos);
353: if (currpos >= maxPosition)
354: break;
355: currpos = scanToken(currpos);
356: count++;
357: }
358: return count;
359: }
360: }
|