001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.cocoon.util;
018:
019: import java.util.Enumeration;
020: import java.util.NoSuchElementException;
021:
022: /**
023: * Replacement for StringTokenizer in java.util, because of bug in the
024: * Sun's implementation.
025: *
026: * @deprecated Use commons lang. This class will be removed in 2.2.
027: * @author <A HREF="mailto:moravek@pobox.sk">Peter Moravek</A>
028: * @version CVS $Id: Tokenizer.java 433543 2006-08-22 06:22:54Z crossley $
029: */
030: public class Tokenizer implements Enumeration {
031:
032: /**
033: * Constructs a string tokenizer for the specified string. All characters
034: * in the delim argument are the delimiters for separating tokens.
035: * If the returnTokens flag is true, then the delimiter characters are
036: * also returned as tokens. Each delimiter is returned as a string of
037: * length one. If the flag is false, the delimiter characters are skipped
038: * and only serve as separators between tokens.
039: *
040: * @param str a string to be parsed
041: * @param delim the delimiters
042: * @param returnTokens flag indicating whether to return the delimiters
043: * as tokens
044: */
045: public Tokenizer(String str, String delim, boolean returnTokens) {
046: this .str = str;
047: this .delim = delim;
048: this .returnTokens = returnTokens;
049:
050: max = str.length();
051: }
052:
053: /**
054: * Constructs a string tokenizer for the specified string. The characters
055: * in the delim argument are the delimiters for separating tokens.
056: * Delimiter characters themselves will not be treated as tokens.
057: *
058: * @param str a string to be parsed
059: * @param delim the delimiters
060: */
061: public Tokenizer(String str, String delim) {
062: this (str, delim, false);
063: }
064:
065: /**
066: * Constructs a string tokenizer for the specified string. The character
067: * in the delim argument is the delimiter for separating tokens.
068: * Delimiter character themselves will not be treated as token.
069: *
070: * @param str a string to be parsed
071: * @param delim the delimiter
072: */
073: public Tokenizer(String str, char delim) {
074: this (str, String.valueOf(delim), false);
075: }
076:
077: /**
078: * Constructs a string tokenizer for the specified string. The tokenizer
079: * uses the default delimiter set, which is " \t\n\r\f": the space
080: * character, the tab character, the newline character, the carriage-return
081: * character, and the form-feed character. Delimiter characters themselves
082: * will not be treated as tokens.
083: *
084: * @param str a string to be parsed
085: */
086: public Tokenizer(String str) {
087: this (str, DEFAULT_DELIMITERS, false);
088: }
089:
090: /**
091: * Tests if there are more tokens available from this tokenizer's string.
092: * If this method returns true, then a subsequent call to nextToken with
093: * no argument will successfully return a token.
094: *
095: * @return true if and only if there is at least one token in the string
096: * after the current position; false otherwise.
097: */
098: public boolean hasMoreTokens() {
099: return ((current < max) ? (true)
100: : (((current == max) && (max == 0 || (returnTokens && delim
101: .indexOf(str.charAt(previous)) >= 0)))));
102: }
103:
104: /**
105: * Returns the next token from this string tokenizer.
106: *
107: * @return the next token from this string tokenizer
108: *
109: * @exception NoSuchElementException if there are no more tokens in this
110: * tokenizer's string
111: */
112: public String nextToken() throws NoSuchElementException {
113: if (current == max
114: && (max == 0 || (returnTokens && delim.indexOf(str
115: .charAt(previous)) >= 0))) {
116:
117: current++;
118: return "";
119: }
120:
121: if (current >= max)
122: throw new NoSuchElementException();
123:
124: int start = current;
125: String result = null;
126:
127: if (delim.indexOf(str.charAt(start)) >= 0) {
128: if (previous == -1
129: || (returnTokens && previous != current && delim
130: .indexOf(str.charAt(previous)) >= 0)) {
131:
132: result = "";
133: } else if (returnTokens)
134: result = str.substring(start, ++current);
135:
136: if (!returnTokens)
137: current++;
138: }
139:
140: previous = start;
141: start = current;
142:
143: if (result == null)
144: while (current < max
145: && delim.indexOf(str.charAt(current)) < 0)
146: current++;
147:
148: return result == null ? str.substring(start, current) : result;
149: }
150:
151: /**
152: * Returns the next token in this string tokenizer's string. First, the
153: * set of characters considered to be delimiters by this Tokenizer
154: * object is changed to be the characters in the string delim.
155: * Then the next token in the string after the current position is
156: * returned. The current position is advanced beyond the recognized token.
157: * The new delimiter set remains the default after this call.
158: *
159: * @param delim the new delimiters
160: *
161: * @return the next token, after switching to the new delimiter set
162: *
163: * @exception NoSuchElementException if there are no more tokens in this
164: * tokenizer's string.
165: */
166: public String nextToken(String delim) throws NoSuchElementException {
167: this .delim = delim;
168: return nextToken();
169: }
170:
171: /**
172: * Returns the same value as the hasMoreTokens method. It exists so that
173: * this class can implement the Enumeration interface.
174: *
175: * @return true if there are more tokens; false otherwise.
176: */
177: public boolean hasMoreElements() {
178: return hasMoreTokens();
179: }
180:
181: /**
182: * Returns the same value as the nextToken method, except that its
183: * declared return value is Object rather than String. It exists so that
184: * this class can implement the Enumeration interface.
185: *
186: * @return the next token in the string
187: *
188: * @exception NoSuchElementException if there are no more tokens in this
189: * tokenizer's string
190: */
191: public Object nextElement() {
192: return nextToken();
193: }
194:
195: /**
196: * Calculates the number of times that this tokenizer's nextToken method
197: * can be called before it generates an exception. The current position
198: * is not advanced.
199: *
200: * @return the number of tokens remaining in the string using the
201: * current delimiter set
202: */
203: public int countTokens() {
204: int curr = current;
205: int count = 0;
206:
207: for (int i = curr; i < max; i++) {
208: if (delim.indexOf(str.charAt(i)) >= 0)
209: count++;
210:
211: curr++;
212: }
213:
214: return count + (returnTokens ? count : 0) + 1;
215: }
216:
217: /**
218: * Resets this tokenizer's state so the tokenizing starts from the begin.
219: */
220: public void reset() {
221: previous = -1;
222: current = 0;
223: }
224:
225: /**
226: * Constructs a string tokenizer for the specified string. All characters
227: * in the delim argument are the delimiters for separating tokens.
228: * If the returnTokens flag is true, then the delimiter characters are
229: * also returned as tokens. Each delimiter is returned as a string of
230: * length one. If the flag is false, the delimiter characters are skipped
231: * and only serve as separators between tokens. Then tokenizes the str
232: * and return an String[] array with tokens.
233: *
234: * @param str a string to be parsed
235: * @param delim the delimiters
236: * @param returnTokens flag indicating whether to return the delimiters
237: * as tokens
238: *
239: * @return array with tokens
240: */
241: public static String[] tokenize(String str, String delim,
242: boolean returnTokens) {
243:
244: Tokenizer tokenizer = new Tokenizer(str, delim, returnTokens);
245: String[] tokens = new String[tokenizer.countTokens()];
246:
247: int i = 0;
248: while (tokenizer.hasMoreTokens()) {
249: tokens[i] = tokenizer.nextToken();
250: i++;
251: }
252:
253: return tokens;
254: }
255:
256: /**
257: * Default delimiters " \t\n\r\f":
258: * the space character, the tab character, the newline character,
259: * the carriage-return character, and the form-feed character.
260: */
261: public static final String DEFAULT_DELIMITERS = " \t\n\r\f";
262:
263: /**
264: * String to tokenize.
265: */
266: private String str = null;
267:
268: /**
269: * Delimiters.
270: */
271: private String delim = null;
272:
273: /**
274: * Flag indicating whether to return the delimiters as tokens.
275: */
276: private boolean returnTokens = false;
277:
278: /**
279: * Previous token start.
280: */
281: private int previous = -1;
282:
283: /**
284: * Current position in str string.
285: */
286: private int current = 0;
287:
288: /**
289: * Maximal position in str string.
290: */
291: private int max = 0;
292: }
|