0001: /*
0002: * A replacement for java.util.StringTokenizer
0003: * Copyright (C) 2001 Stephen Ostermiller
0004: * http://ostermiller.org/contact.pl?regarding=Java+Utilities
0005: *
0006: * This program is free software; you can redistribute it and/or modify
0007: * it under the terms of the GNU General Public License as published by
0008: * the Free Software Foundation; either version 2 of the License, or
0009: * (at your option) any later version.
0010: *
0011: * This program is distributed in the hope that it will be useful,
0012: * but WITHOUT ANY WARRANTY; without even the implied warranty of
0013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0014: * GNU General Public License for more details.
0015: *
0016: * See COPYING.TXT for details.
0017: */
0018: package com.Ostermiller.util;
0019:
0020: import java.util.NoSuchElementException;
0021:
0022: /**
0023: * The string tokenizer class allows an application to break a string into
0024: * tokens.
0025: * More information about this class is available from <a target="_top" href=
0026: * "http://ostermiller.org/utils/StringTokenizer.html">ostermiller.org</a>.
0027: * <p>
0028: * The tokenization method is much simpler than the one used by the
0029: * <code>StreamTokenizer</code> class. The <code>StringTokenizer</code> methods
0030: * do not distinguish among identifiers, numbers, and quoted strings, nor do
0031: * they recognize and skip comments.
0032: * <p>
0033: * The set of delimiters (the characters that separate tokens) may be specified
0034: * either at creation time or on a per-token basis.
0035: * <p>
0036: * There are two kinds of delimiters: token delimiters and non-token delimiters.
0037: * A token is either one token delimiter character, or a maximal sequence of
0038: * consecutive characters that are not delimiters.
0039: * <p>
0040: * A <code>StringTokenizer</code> object internally maintains a current
0041: * position within the string to be tokenized. Some operations advance this
0042: * current position past the characters processed.
0043: * <p>
0044: * The implementation is not thread safe; if a <code>StringTokenizer</code>
0045: * object is intended to be used in multiple threads, an appropriate wrapper
0046: * must be provided.
0047: * <p>
0048: * The following is one example of the use of the tokenizer. It also
0049: * demonstrates the usefulness of having both token and non-token delimiters in
0050: * one <code>StringTokenizer</code>.
0051: * <p>
0052: * The code:
0053: * <blockquote><code>
0054: * String s = " ( aaa \t * (b+c1 ))";<br>
0055: * StringTokenizer tokenizer = new StringTokenizer(s, " \t\n\r\f", "()+*");<br>
0056: * while (tokenizer.hasMoreTokens()) {<br>
0057: * System.out.println(tokenizer.nextToken());<br>
0058: * };
0059: * </code></blockquote>
0060: * <p>
0061: * prints the following output:
0062: * <blockquote>
0063: * (<br>
0064: * aaa<br>
0065: * *<br>
0066: * (<br>
0067: * b<br>
0068: * +<br>
0069: * c1<br>
0070: * )<br>
0071: * )
0072: * </blockquote>
0073: * <p>
0074: * </b>Compatibility with <code>java.util.StringTokenizer</code></b>
0075: * <p>
0076: * In the original version of <code>java.util.StringTokenizer</code>, the method
0077: * <code>nextToken()</code> left the current position after the returned token,
0078: * and the method <code>hasMoreTokens()</code> moved (as a side effect) the
0079: * current position before the beginning of the next token. Thus, the code:
0080: * <blockquote><code>
0081: * String s = "x=a,b,c";<br>
0082: * java.util.StringTokenizer tokenizer = new java.util.StringTokenizer(s,"=");<br>
0083: * System.out.println(tokenizer.nextToken());<br>
0084: * while (tokenizer.hasMoreTokens()) {<br>
0085: * System.out.println(tokenizer.nextToken(","));<br>
0086: * };
0087: * </code></blockquote>
0088: * <p>
0089: * prints the following output:
0090: * <blockquote>
0091: * x<br>
0092: * a<br>
0093: * b<br>
0094: * c
0095: * </blockquote>
0096: * <p>
0097: * The Java SDK 1.3 implementation removed the undesired side effect of
0098: * <code>hasMoreTokens</code> method: now, it does not advance current position.
0099: * However, after these changes the output of the above code was:
0100: * <blockquote>
0101: * x<br>
0102: * =a<br>
0103: * b<br>
0104: * c
0105: * </blockquote>
0106: * <p>
0107: * and there was no good way to produce a second token without "=".
0108: * <p>
0109: * To solve the problem, this implementation introduces a new method
0110: * <code>skipDelimiters()</code>. To produce the original output, the above code
0111: * should be modified as:
0112: * <blockquote><code>
0113: * String s = "x=a,b,c";<br>
0114: * StringTokenizer tokenizer = new StringTokenizer(s,"=");<br>
0115: * System.out.println(tokenizer.nextToken());<br>
0116: * tokenizer.skipDelimiters();<br>
0117: * while (tokenizer.hasMoreTokens()) {<br>
0118: * System.out.println(tokenizer.nextToken(","));<br>
0119: * };
0120: * </code></blockquote>
0121: *
0122: * @author Stephen Ostermiller http://ostermiller.org/contact.pl?regarding=Java+Utilities
0123: * @since ostermillerutils 1.00.00
0124: */
0125: public class StringTokenizer implements java.util.Enumeration<String>,
0126: java.util.Iterator<String> {
0127:
0128: /**
0129: * The string to be tokenized.
0130: * The code relies on this to never be null.
0131: *
0132: * @since ostermillerutils 1.00.00
0133: */
0134: protected String text;
0135:
0136: /**
0137: * The length of the text.
0138: * Cached for performance. This should be set whenever the
0139: * string we are working with is changed.
0140: *
0141: * @since ostermillerutils 1.00.00
0142: */
0143: protected int strLength;
0144:
0145: /**
0146: * The set of non-token delimiters.
0147: *
0148: * @since ostermillerutils 1.00.00
0149: */
0150: protected String nontokenDelims;
0151:
0152: /**
0153: * The set of token delimiters.
0154: *
0155: * @since ostermillerutils 1.00.00
0156: */
0157: protected String tokenDelims;
0158:
0159: /**
0160: * One of two variables used to maintain state through
0161: * the tokenizing process.
0162: * <P>
0163: * Represents the position at which we should start looking for
0164: * the next token(the position of the character immediately
0165: * following the end of the last token, or 0 to start), or
0166: * -1 if the entire string has been examined.
0167: *
0168: * @since ostermillerutils 1.00.00
0169: */
0170: protected int position;
0171:
0172: /**
0173: * One of two variables used to maintain state through
0174: * the tokenizing process.
0175: * <p>
0176: * true if and only if is found that an empty token should
0177: * be returned or if empty token was the last thing returned.
0178: * <p>
0179: * If returnEmptyTokens in false, then this variable will
0180: * always be false.
0181: *
0182: * @since ostermillerutils 1.00.00
0183: */
0184: protected boolean emptyReturned;
0185:
0186: /**
0187: * Stores the value of the delimiter character with the
0188: * highest value. It is used to optimize the detection of delimiter
0189: * characters. The common case will be that the int values of delimiters
0190: * will be less than that of most characters in the string (, or space less
0191: * than any letter for example). Given this, we can check easily check
0192: * to see if a character is not a delimiter by comparing it to the max
0193: * delimiter. If it is greater than the max delimiter, then it is no
0194: * a delimiter otherwise we have to do some more in depth analysis. (for example
0195: * search the delimiter string.) This will reduce the running time of
0196: * the algorithm not to depend on the length of the delimiter string
0197: * for the common case.
0198: *
0199: * @since ostermillerutils 1.00.00
0200: */
0201: protected char maxDelimChar;
0202:
0203: /**
0204: * Whether empty tokens should be returned.
0205: * for example, if "" should be returned when text starts with
0206: * a delimiter, has two delimiters next to each other, or
0207: * ends with a delimiter.
0208: *
0209: * @since ostermillerutils 1.00.00
0210: */
0211: protected boolean returnEmptyTokens;
0212:
0213: /**
0214: * Indicates at which position the delimiters last changed. This
0215: * will effect how null tokens are returned. Any
0216: * time that delimiters are changed, the string will be treated as if
0217: * it is being parsed from position zero, for example, null strings are possible
0218: * at the very beginning.
0219: *
0220: * @since ostermillerutils 1.00.00
0221: */
0222: protected int delimsChangedPosition;
0223:
0224: /**
0225: * A cache of the token count. This variable should be -1 if the token
0226: * have not yet been counted. It should be greater than or equal to zero
0227: * if the tokens have been counted.
0228: *
0229: * @since ostermillerutils 1.00.00
0230: */
0231: protected int tokenCount;
0232:
0233: /**
0234: * Constructs a string tokenizer for the specified string. Both token and
0235: * non-token delimiters are specified.
0236: * <p>
0237: * The current position is set at the beginning of the string.
0238: *
0239: * @param text a string to be parsed.
0240: * @param nontokenDelims the non-token delimiters, i.e. the delimiters that only separate
0241: * tokens and are not returned as separate tokens.
0242: * @param tokenDelims the token delimiters, i.e. delimiters that both separate tokens,
0243: * and are themselves returned as tokens.
0244: * @throws NullPointerException if text is null.
0245: *
0246: * @since ostermillerutils 1.00.00
0247: */
0248: public StringTokenizer(String text, String nontokenDelims,
0249: String tokenDelims) {
0250: this (text, nontokenDelims, tokenDelims, false);
0251: }
0252:
0253: /**
0254: * Constructs a string tokenizer for the specified string. Both token and
0255: * non-token delimiters are specified and whether or not empty tokens are returned
0256: * is specified.
0257: * <p>
0258: * Empty tokens are tokens that are between consecutive delimiters.
0259: * <p>
0260: * It is a primary constructor (i.e. all other constructors are defined in terms
0261: * of it.)
0262: * <p>
0263: * The current position is set at the beginning of the string.
0264: *
0265: * @param text a string to be parsed.
0266: * @param nontokenDelims the non-token delimiters, i.e. the delimiters that only separate
0267: * tokens and are not returned as separate tokens.
0268: * @param tokenDelims the token delimiters, i.e. delimiters that both separate tokens,
0269: * and are themselves returned as tokens.
0270: * @param returnEmptyTokens true if empty tokens may be returned; false otherwise.
0271: * @throws NullPointerException if text is null.
0272: *
0273: * @since ostermillerutils 1.00.00
0274: */
0275: public StringTokenizer(String text, String nontokenDelims,
0276: String tokenDelims, boolean returnEmptyTokens) {
0277: setDelims(nontokenDelims, tokenDelims);
0278: setText(text);
0279: setReturnEmptyTokens(returnEmptyTokens);
0280: }
0281:
0282: /**
0283: * Constructs a string tokenizer for the specified string. Either token or
0284: * non-token delimiters are specified.
0285: * <p>
0286: * Is equivalent to:
0287: * <ul>
0288: * <li> If the third parameter is <code>false</code> --
0289: * <code>StringTokenizer(text, delimiters, null)</code>
0290: * <li> If the third parameter is <code>true</code> --
0291: * <code>StringTokenizer(text, null, delimiters)</code>
0292: * </ul>
0293: *
0294: * @param text a string to be parsed.
0295: * @param delims the delimiters.
0296: * @param delimsAreTokens
0297: * flag indicating whether the second parameter specifies token or
0298: * non-token delimiters: <code>false</code> -- the second parameter
0299: * specifies non-token delimiters, the set of token delimiters is
0300: * empty; <code>true</code> -- the second parameter specifies token
0301: * delimiters, the set of non-token delimiters is empty.
0302: * @throws NullPointerException if text is null.
0303: *
0304: * @since ostermillerutils 1.00.00
0305: */
0306: public StringTokenizer(String text, String delims,
0307: boolean delimsAreTokens) {
0308: this (text, (delimsAreTokens ? null : delims),
0309: (delimsAreTokens ? delims : null));
0310: }
0311:
0312: /**
0313: * Constructs a string tokenizer for the specified string. The characters in the
0314: * <code>nontokenDelims</code> argument are the delimiters for separating
0315: * tokens. Delimiter characters themselves will not be treated as tokens.
0316: * <p>
0317: * Is equivalent to <code>StringTokenizer(text,nontokenDelims, null)</code>.
0318: *
0319: * @param text a string to be parsed.
0320: * @param nontokenDelims the non-token delimiters.
0321: * @throws NullPointerException if text is null.
0322: *
0323: * @since ostermillerutils 1.00.00
0324: */
0325: public StringTokenizer(String text, String nontokenDelims) {
0326: this (text, nontokenDelims, null);
0327: }
0328:
0329: /**
0330: * Constructs a string tokenizer for the specified string. The tokenizer uses
0331: * " \t\n\r\f" as a delimiter set of non-token delimiters, and an empty token
0332: * delimiter set.
0333: * <p>
0334: * Is equivalent to <code>StringTokenizer(text, " \t\n\r\f", null);
0335: *
0336: * @param text a string to be parsed.
0337: * @throws NullPointerException if text is null.
0338: *
0339: * @since ostermillerutils 1.00.00
0340: */
0341: public StringTokenizer(String text) {
0342: this (text, " \t\n\r\f", null);
0343: }
0344:
0345: /**
0346: * Set the text to be tokenized in this StringTokenizer.
0347: * <p>
0348: * This is useful when for StringTokenizer re-use so that new string tokenizers do not
0349: * have to be created for each string you want to tokenizer.
0350: * <p>
0351: * The string will be tokenized from the beginning of the string.
0352: *
0353: * @param text a string to be parsed.
0354: * @throws NullPointerException if text is null.
0355: *
0356: * @since ostermillerutils 1.00.00
0357: */
0358: public void setText(String text) {
0359: if (text == null) {
0360: throw new NullPointerException();
0361: }
0362: this .text = text;
0363: strLength = text.length();
0364: emptyReturned = false;
0365: // set the position to start evaluation to zero
0366: // unless the string has no length, in which case
0367: // the entire string has already been examined.
0368: position = (strLength > 0 ? 0 : -1);
0369: // because the text was changed since the last time the delimiters
0370: // were changed we need to set the delimiter changed position
0371: delimsChangedPosition = 0;
0372: // The token count changes when the text changes
0373: tokenCount = -1;
0374: }
0375:
0376: /**
0377: * Set the delimiters for this StringTokenizer.
0378: * The position must be initialized before this method is used.
0379: * (setText does this and it is called from the constructor)
0380: *
0381: * @param nontokenDelims delimiters that should not be returned as tokens.
0382: * @param tokenDelims delimiters that should be returned as tokens.
0383: *
0384: * @since ostermillerutils 1.00.00
0385: */
0386: private void setDelims(String nontokenDelims, String tokenDelims) {
0387: this .nontokenDelims = nontokenDelims;
0388: this .tokenDelims = tokenDelims;
0389: // If we change delimiters, we do not want to start fresh,
0390: // without returning empty tokens.
0391: // the delimiter changed position can never be less than
0392: // zero, unlike position.
0393: delimsChangedPosition = (position != -1 ? position : strLength);
0394: // set the max delimiter
0395: maxDelimChar = 0;
0396: for (int i = 0; nontokenDelims != null
0397: && i < nontokenDelims.length(); i++) {
0398: if (maxDelimChar < nontokenDelims.charAt(i)) {
0399: maxDelimChar = nontokenDelims.charAt(i);
0400: }
0401: }
0402: for (int i = 0; tokenDelims != null && i < tokenDelims.length(); i++) {
0403: if (maxDelimChar < tokenDelims.charAt(i)) {
0404: maxDelimChar = tokenDelims.charAt(i);
0405: }
0406: }
0407: // Changing the delimiters may change the number of tokens
0408: tokenCount = -1;
0409: }
0410:
0411: /**
0412: * Tests if there are more tokens available from this tokenizer's string.
0413: * If this method returns <tt>true</tt>, then a subsequent call to
0414: * <tt>nextToken</tt> with no argument will successfully return a token.
0415: * <p>
0416: * The current position is not changed.
0417: *
0418: * @return <code>true</code> if and only if there is at least one token in the
0419: * string after the current position; <code>false</code> otherwise.
0420: *
0421: * @since ostermillerutils 1.00.00
0422: */
0423: public boolean hasMoreTokens() {
0424:
0425: // handle the easy case in which the number
0426: // of tokens has been counted.
0427: if (tokenCount == 0) {
0428: return false;
0429: } else if (tokenCount > 0) {
0430: return true;
0431: }
0432:
0433: // copy over state variables from the class to local
0434: // variables so that the state of this object can be
0435: // restored to the state that it was in before this
0436: // method was called.
0437: int savedPosition = position;
0438: boolean savedEmptyReturned = emptyReturned;
0439:
0440: int workingPosition = position;
0441: boolean workingEmptyReturned = emptyReturned;
0442: boolean onToken = advancePosition();
0443: while (position != workingPosition
0444: || emptyReturned != workingEmptyReturned) {
0445: if (onToken) {
0446: // restore object state
0447: position = savedPosition;
0448: emptyReturned = savedEmptyReturned;
0449: return true;
0450: }
0451: workingPosition = position;
0452: workingEmptyReturned = emptyReturned;
0453: onToken = advancePosition();
0454: }
0455:
0456: // restore object state
0457: position = savedPosition;
0458: emptyReturned = savedEmptyReturned;
0459: return false;
0460: }
0461:
0462: /**
0463: * Returns the next token from this string tokenizer.
0464: * <p>
0465: * The current position is set after the token returned.
0466: *
0467: * @return the next token from this string tokenizer.
0468: * @throws NoSuchElementException if there are no more tokens in this tokenizer's string.
0469: *
0470: * @since ostermillerutils 1.00.00
0471: */
0472: public String nextToken() {
0473: int workingPosition = position;
0474: boolean workingEmptyReturned = emptyReturned;
0475: boolean onToken = advancePosition();
0476: while (position != workingPosition
0477: || emptyReturned != workingEmptyReturned) {
0478: if (onToken) {
0479: // returning a token decreases the token count
0480: tokenCount--;
0481: return (emptyReturned ? "" : text.substring(
0482: workingPosition, (position != -1) ? position
0483: : strLength));
0484: }
0485: workingPosition = position;
0486: workingEmptyReturned = emptyReturned;
0487: onToken = advancePosition();
0488: }
0489: throw new NoSuchElementException();
0490: }
0491:
0492: /**
0493: * Advances the current position so it is before the next token.
0494: * <p>
0495: * This method skips non-token delimiters but does not skip
0496: * token delimiters.
0497: * <p>
0498: * This method is useful when switching to the new delimiter sets (see the
0499: * second example in the class comment.)
0500: *
0501: * @return <code>true</code> if there are more tokens, <code>false</code> otherwise.
0502: *
0503: * @since ostermillerutils 1.00.00
0504: */
0505: public boolean skipDelimiters() {
0506: int workingPosition = position;
0507: boolean workingEmptyReturned = emptyReturned;
0508: boolean onToken = advancePosition();
0509:
0510: // skipping delimiters may cause the number of tokens to change
0511: tokenCount = -1;
0512:
0513: while (position != workingPosition
0514: || emptyReturned != workingEmptyReturned) {
0515: if (onToken) {
0516: // restore the state to just as it was before we found
0517: // this token and return
0518: position = workingPosition;
0519: emptyReturned = workingEmptyReturned;
0520: return true;
0521: }
0522: workingPosition = position;
0523: workingEmptyReturned = emptyReturned;
0524: onToken = advancePosition();
0525: }
0526:
0527: // the end of the string was reached
0528: // without finding any tokens
0529: return false;
0530: }
0531:
0532: /**
0533: * Calculates the number of times that this tokenizer's <code>nextToken</code>
0534: * method can be called before it generates an exception. The current position
0535: * is not advanced.
0536: *
0537: * @return the number of tokens remaining in the string using the current
0538: * delimiter set.
0539: *
0540: * @see #nextToken()
0541: * @since ostermillerutils 1.00.00
0542: */
0543: public int countTokens() {
0544:
0545: // return the cached token count if a cache
0546: // is available.
0547: if (this .tokenCount >= 0) {
0548: return this .tokenCount;
0549: }
0550:
0551: int tokenCount = 0;
0552:
0553: // copy over state variables from the class to local
0554: // variables so that the state of this object can be
0555: // restored to the state that it was in before this
0556: // method was called.
0557: int savedPosition = position;
0558: boolean savedEmptyReturned = emptyReturned;
0559:
0560: int workingPosition = position;
0561: boolean workingEmptyReturned = emptyReturned;
0562: boolean onToken = advancePosition();
0563: while (position != workingPosition
0564: || emptyReturned != workingEmptyReturned) {
0565: if (onToken) {
0566: tokenCount++;
0567: }
0568: workingPosition = position;
0569: workingEmptyReturned = emptyReturned;
0570: onToken = advancePosition();
0571: }
0572:
0573: // restore object state
0574: position = savedPosition;
0575: emptyReturned = savedEmptyReturned;
0576:
0577: // Save the token count in case this is called again
0578: // so we wouldn't have to do so much work.
0579: this .tokenCount = tokenCount;
0580:
0581: return tokenCount;
0582: }
0583:
0584: /**
0585: * Set the delimiters used to this set of (non-token) delimiters.
0586: *
0587: * @param delims the new set of non-token delimiters (the set of token delimiters will be empty).
0588: *
0589: * @since ostermillerutils 1.00.00
0590: */
0591: public void setDelimiters(String delims) {
0592: setDelims(delims, null);
0593: }
0594:
0595: /**
0596: * Set the delimiters used to this set of delimiters.
0597: *
0598: * @param delims the new set of delimiters.
0599: * @param delimsAreTokens flag indicating whether the first parameter specifies
0600: * token or non-token delimiters: false -- the first parameter specifies non-token
0601: * delimiters, the set of token delimiters is empty; true -- the first parameter
0602: * specifies token delimiters, the set of non-token delimiters is empty.
0603: *
0604: * @since ostermillerutils 1.00.00
0605: */
0606: public void setDelimiters(String delims, boolean delimsAreTokens) {
0607: setDelims((delimsAreTokens ? null : delims),
0608: (delimsAreTokens ? delims : null));
0609: }
0610:
0611: /**
0612: * Set the delimiters used to this set of delimiters.
0613: *
0614: * @param nontokenDelims the new set of non-token delimiters.
0615: * @param tokenDelims the new set of token delimiters.
0616: *
0617: * @since ostermillerutils 1.00.00
0618: */
0619: public void setDelimiters(String nontokenDelims, String tokenDelims) {
0620: setDelims(nontokenDelims, tokenDelims);
0621: }
0622:
0623: /**
0624: * Set the delimiters used to this set of delimiters.
0625: *
0626: * @param nontokenDelims the new set of non-token delimiters.
0627: * @param tokenDelims the new set of token delimiters.
0628: * @param returnEmptyTokens true if empty tokens may be returned; false otherwise.
0629: *
0630: * @since ostermillerutils 1.00.00
0631: */
0632: public void setDelimiters(String nontokenDelims,
0633: String tokenDelims, boolean returnEmptyTokens) {
0634: setDelims(nontokenDelims, tokenDelims);
0635: setReturnEmptyTokens(returnEmptyTokens);
0636: }
0637:
0638: /**
0639: * Calculates the number of times that this tokenizer's <code>nextToken</code>
0640: * method can be called before it generates an exception using the given set of
0641: * (non-token) delimiters. The delimiters given will be used for future calls to
0642: * nextToken() unless new delimiters are given. The current position
0643: * is not advanced.
0644: *
0645: * @param delims the new set of non-token delimiters (the set of token delimiters will be empty).
0646: * @return the number of tokens remaining in the string using the new
0647: * delimiter set.
0648: *
0649: * @see #countTokens()
0650: * @since ostermillerutils 1.00.00
0651: */
0652: public int countTokens(String delims) {
0653: setDelims(delims, null);
0654: return countTokens();
0655: }
0656:
0657: /**
0658: * Calculates the number of times that this tokenizer's <code>nextToken</code>
0659: * method can be called before it generates an exception using the given set of
0660: * delimiters. The delimiters given will be used for future calls to
0661: * nextToken() unless new delimiters are given. The current position
0662: * is not advanced.
0663: *
0664: * @param delims the new set of delimiters.
0665: * @param delimsAreTokens flag indicating whether the first parameter specifies
0666: * token or non-token delimiters: false -- the first parameter specifies non-token
0667: * delimiters, the set of token delimiters is empty; true -- the first parameter
0668: * specifies token delimiters, the set of non-token delimiters is empty.
0669: * @return the number of tokens remaining in the string using the new
0670: * delimiter set.
0671: *
0672: * @see #countTokens()
0673: * @since ostermillerutils 1.00.00
0674: */
0675: public int countTokens(String delims, boolean delimsAreTokens) {
0676: setDelims((delimsAreTokens ? null : delims),
0677: (delimsAreTokens ? delims : null));
0678: return countTokens();
0679: }
0680:
0681: /**
0682: * Calculates the number of times that this tokenizer's <code>nextToken</code>
0683: * method can be called before it generates an exception using the given set of
0684: * delimiters. The delimiters given will be used for future calls to
0685: * nextToken() unless new delimiters are given. The current position
0686: * is not advanced.
0687: *
0688: * @param nontokenDelims the new set of non-token delimiters.
0689: * @param tokenDelims the new set of token delimiters.
0690: * @return the number of tokens remaining in the string using the new
0691: * delimiter set.
0692: *
0693: * @see #countTokens()
0694: * @since ostermillerutils 1.00.00
0695: */
0696: public int countTokens(String nontokenDelims, String tokenDelims) {
0697: setDelims(nontokenDelims, tokenDelims);
0698: return countTokens();
0699: }
0700:
0701: /**
0702: * Calculates the number of times that this tokenizer's <code>nextToken</code>
0703: * method can be called before it generates an exception using the given set of
0704: * delimiters. The delimiters given will be used for future calls to
0705: * nextToken() unless new delimiters are given. The current position
0706: * is not advanced.
0707: *
0708: * @param nontokenDelims the new set of non-token delimiters.
0709: * @param tokenDelims the new set of token delimiters.
0710: * @param returnEmptyTokens true if empty tokens may be returned; false otherwise.
0711: * @return the number of tokens remaining in the string using the new
0712: * delimiter set.
0713: *
0714: * @see #countTokens()
0715: * @since ostermillerutils 1.00.00
0716: */
0717: public int countTokens(String nontokenDelims, String tokenDelims,
0718: boolean returnEmptyTokens) {
0719: setDelims(nontokenDelims, tokenDelims);
0720: setReturnEmptyTokens(returnEmptyTokens);
0721: return countTokens();
0722: }
0723:
0724: /**
0725: * Advances the state of the tokenizer to the next token or delimiter. This method only
0726: * modifies the class variables position, and emptyReturned. The type of token that
0727: * should be emitted can be deduced by examining the changes to these two variables.
0728: * If there are no more tokens, the state of these variables does not change at all.
0729: *
0730: * @return true if we are at a juncture at which a token may be emitted, false otherwise.
0731: *
0732: * @since ostermillerutils 1.00.00
0733: */
0734: private boolean advancePosition() {
0735: // if we are returning empty tokens, we are just starting to tokenizer
0736: // and there is a delimiter at the beginning of the string or the string
0737: // is empty we need to indicate that there is an empty token at the beginning.
0738: // The beginning is defined as where the delimiters were last changed.
0739: if (returnEmptyTokens
0740: && !emptyReturned
0741: && (delimsChangedPosition == position || (position == -1 && strLength == delimsChangedPosition))) {
0742: if (strLength == delimsChangedPosition) {
0743: // Case in which the string (since delimiter change)
0744: // is empty, but because we are returning empty
0745: // tokens, a single empty token should be returned.
0746: emptyReturned = true;
0747: return true;
0748: }
0749: char c = text.charAt(position);
0750: if (c <= maxDelimChar
0751: && (nontokenDelims != null && nontokenDelims
0752: .indexOf(c) != -1)
0753: || (tokenDelims != null && tokenDelims.indexOf(c) != -1)) {
0754: // There is delimiter at the very start of the string
0755: // so we must return an empty token at the beginning.
0756: emptyReturned = true;
0757: return true;
0758: }
0759: }
0760: // The main loop
0761: // Do this as long as parts of the string have yet to be examined
0762: while (position != -1) {
0763: char c = text.charAt(position);
0764: if (returnEmptyTokens && !emptyReturned
0765: && position > delimsChangedPosition) {
0766: char c1 = text.charAt(position - 1);
0767: // Examine the current character and the one before it.
0768: // If both of them are delimiters, then we need to return
0769: // an empty delimiter. Note that characters that were examined
0770: // before the delimiters changed should not be reexamined.
0771: if (c <= maxDelimChar
0772: && c1 <= maxDelimChar
0773: && ((nontokenDelims != null && nontokenDelims
0774: .indexOf(c) != -1) || (tokenDelims != null && tokenDelims
0775: .indexOf(c) != -1))
0776: && ((nontokenDelims != null && nontokenDelims
0777: .indexOf(c1) != -1) || (tokenDelims != null && tokenDelims
0778: .indexOf(c1) != -1))) {
0779: emptyReturned = true;
0780: /*System.out.println("Empty token.");*/
0781: return true;
0782: }
0783: }
0784:
0785: int nextDelimiter = (position < strLength - 1 ? indexOfNextDelimiter(position + 1)
0786: : -1);
0787: if (c > maxDelimChar
0788: || ((nontokenDelims == null || nontokenDelims
0789: .indexOf(c) == -1) && (tokenDelims == null || tokenDelims
0790: .indexOf(c) == -1))) {
0791: // token found
0792: /*System.out.println("Token: '" +
0793: text.substring(position, (nextDelimiter == -1 ? strLength : nextDelimiter)) +
0794: "' at " + position + ".");*/
0795: position = nextDelimiter;
0796: emptyReturned = false;
0797: return true;
0798: } else if (tokenDelims != null
0799: && tokenDelims.indexOf(c) != -1) {
0800: // delimiter that can be returned as a token found
0801: emptyReturned = false;
0802: /*System.out.println("Delimiter: '" + c + "' at " + position + ".");*/
0803: position = (position < strLength - 1 ? position + 1
0804: : -1);
0805: return true;
0806: } else {
0807: // delimiter that is not a token found.
0808: emptyReturned = false;
0809: position = (position < strLength - 1 ? position + 1
0810: : -1);
0811: return false;
0812: }
0813: }
0814: // handle the case that a token is at the end of the string and we should
0815: // return empty tokens.
0816: if (returnEmptyTokens && !emptyReturned && strLength > 0) {
0817: char c = text.charAt(strLength - 1);
0818: if (c <= maxDelimChar
0819: && (nontokenDelims != null && nontokenDelims
0820: .indexOf(c) != -1)
0821: || (tokenDelims != null && tokenDelims.indexOf(c) != -1)) {
0822: // empty token at the end of the string found.
0823: emptyReturned = true;
0824: /*System.out.println("Empty token at end.");*/
0825: return true;
0826: }
0827: }
0828: return false;
0829: }
0830:
0831: /**
0832: * Returns the next token in this string tokenizer's string.
0833: * <p>
0834: * First, the sets of token and non-token delimiters are changed to be the
0835: * <code>tokenDelims</code> and <code>nontokenDelims</code>, respectively.
0836: * Then the next token (with respect to new delimiters) in the string after the
0837: * current position is returned.
0838: * <p>
0839: * The current position is set after the token returned.
0840: * <p>
0841: * The new delimiter sets remains the used ones after this call.
0842: *
0843: * @param nontokenDelims the new set of non-token delimiters.
0844: * @param tokenDelims the new set of token delimiters.
0845: * @return the next token, after switching to the new delimiter set.
0846: * @throws NoSuchElementException if there are no more tokens in this tokenizer's string.
0847: * @see #nextToken()
0848: *
0849: * @since ostermillerutils 1.00.00
0850: */
0851: public String nextToken(String nontokenDelims, String tokenDelims) {
0852: setDelims(nontokenDelims, tokenDelims);
0853: return nextToken();
0854: }
0855:
0856: /**
0857: * Returns the next token in this string tokenizer's string.
0858: * <p>
0859: * First, the sets of token and non-token delimiters are changed to be the
0860: * <code>tokenDelims</code> and <code>nontokenDelims</code>, respectively;
0861: * and whether or not to return empty tokens is set.
0862: * Then the next token (with respect to new delimiters) in the string after the
0863: * current position is returned.
0864: * <p>
0865: * The current position is set after the token returned.
0866: * <p>
0867: * The new delimiter set remains the one used for this call and empty tokens are
0868: * returned in the future as they are in this call.
0869: *
0870: * @param nontokenDelims the new set of non-token delimiters.
0871: * @param tokenDelims the new set of token delimiters.
0872: * @param returnEmptyTokens true if empty tokens may be returned; false otherwise.
0873: * @return the next token, after switching to the new delimiter set.
0874: * @throws NoSuchElementException if there are no more tokens in this tokenizer's string.
0875: * @see #nextToken()
0876: *
0877: * @since ostermillerutils 1.00.00
0878: */
0879: public String nextToken(String nontokenDelims, String tokenDelims,
0880: boolean returnEmptyTokens) {
0881: setDelims(nontokenDelims, tokenDelims);
0882: setReturnEmptyTokens(returnEmptyTokens);
0883: return nextToken();
0884: }
0885:
0886: /**
0887: * Returns the next token in this string tokenizer's string.
0888: * <p>
0889: * Is equivalent to:
0890: * <ul>
0891: * <li> If the second parameter is <code>false</code> --
0892: * <code>nextToken(delimiters, null)</code>
0893: * <li> If the second parameter is <code>true</code> --
0894: * <code>nextToken(null, delimiters)</code>
0895: * </ul>
0896: * <p>
0897: * @param delims the new set of token or non-token delimiters.
0898: * @param delimsAreTokens
0899: * flag indicating whether the first parameter specifies token or
0900: * non-token delimiters: <code>false</code> -- the first parameter
0901: * specifies non-token delimiters, the set of token delimiters is
0902: * empty; <code>true</code> -- the first parameter specifies token
0903: * delimiters, the set of non-token delimiters is empty.
0904: * @return the next token, after switching to the new delimiter set.
0905: * @throws NoSuchElementException if there are no more tokens in this tokenizer's string.
0906: *
0907: * @see #nextToken(String,String)
0908: * @since ostermillerutils 1.00.00
0909: */
0910: public String nextToken(String delims, boolean delimsAreTokens) {
0911: return (delimsAreTokens ? nextToken(null, delims) : nextToken(
0912: delims, null));
0913: }
0914:
0915: /**
0916: * Returns the next token in this string tokenizer's string.
0917: * <p>
0918: * Is equivalent to <code>nextToken(delimiters, null)</code>.
0919: *
0920: * @param nontokenDelims the new set of non-token delimiters (the set of
0921: * token delimiters will be empty).
0922: * @return the next token, after switching to the new delimiter set.
0923: * @throws NoSuchElementException if there are no more tokens in this
0924: * tokenizer's string.
0925: *
0926: * @see #nextToken(String,String)
0927: * @since ostermillerutils 1.00.00
0928: */
0929: public String nextToken(String nontokenDelims) {
0930: return nextToken(nontokenDelims, null);
0931: }
0932:
0933: /**
0934: * Similar to String.indexOf(int, String) but will look for
0935: * any character from string rather than the entire string.
0936: *
0937: * @param start index in text at which to begin the search
0938: * @return index of the first delimiter from the start index (inclusive), or -1
0939: * if there are no more delimiters in the string
0940: *
0941: * @since ostermillerutils 1.00.00
0942: */
0943: private int indexOfNextDelimiter(int start) {
0944: char c;
0945: int next;
0946: for (next = start; (c = text.charAt(next)) > maxDelimChar
0947: || ((nontokenDelims == null || nontokenDelims
0948: .indexOf(c) == -1) && (tokenDelims == null || tokenDelims
0949: .indexOf(c) == -1)); next++) {
0950: if (next == strLength - 1) {
0951: // we have reached the end of the string without
0952: // finding a delimiter
0953: return (-1);
0954: }
0955: }
0956: return next;
0957: }
0958:
0959: /**
0960: * Returns the same value as the <code>hasMoreTokens()</code> method. It exists
0961: * so that this class can implement the <code>Enumeration</code> interface.
0962: *
0963: * @return <code>true</code> if there are more tokens;
0964: * <code>false</code> otherwise.
0965: *
0966: * @see java.util.Enumeration
0967: * @see #hasMoreTokens()
0968: * @since ostermillerutils 1.00.00
0969: */
0970: public boolean hasMoreElements() {
0971: return hasMoreTokens();
0972: }
0973:
0974: /**
0975: * Returns the same value as the <code>nextToken()</code> method, except that
0976: * its declared return value is <code>Object</code> rather than
0977: * <code>String</code>. It exists so that this class can implement the
0978: * <code>Enumeration</code> interface.
0979: *
0980: * @return the next token in the string.
0981: * @throws NoSuchElementException if there are no more tokens in this tokenizer's string.
0982: *
0983: * @see java.util.Enumeration
0984: * @see #nextToken()
0985: * @since ostermillerutils 1.00.00
0986: */
0987: public String nextElement() {
0988: return nextToken();
0989: }
0990:
0991: /**
0992: * Returns the same value as the <code>hasMoreTokens()</code> method. It exists
0993: * so that this class can implement the <code>Iterator</code> interface.
0994: *
0995: * @return <code>true</code> if there are more tokens;
0996: * <code>false</code> otherwise.
0997: *
0998: * @see java.util.Iterator
0999: * @see #hasMoreTokens()
1000: * @since ostermillerutils 1.00.00
1001: */
1002: public boolean hasNext() {
1003: return hasMoreTokens();
1004: }
1005:
1006: /**
1007: * Returns the same value as the <code>nextToken()</code> method, except that
1008: * its declared return value is <code>Object</code> rather than
1009: * <code>String</code>. It exists so that this class can implement the
1010: * <code>Iterator</code> interface.
1011: *
1012: * @return the next token in the string.
1013: * @throws NoSuchElementException if there are no more tokens in this tokenizer's string.
1014: *
1015: * @see java.util.Iterator
1016: * @see #nextToken()
1017: * @since ostermillerutils 1.00.00
1018: */
1019: public String next() {
1020: return nextToken();
1021: }
1022:
1023: /**
1024: * This implementation always throws <code>UnsupportedOperationException</code>.
1025: * It exists so that this class can implement the <code>Iterator</code> interface.
1026: *
1027: * @throws UnsupportedOperationException always is thrown.
1028: *
1029: * @see java.util.Iterator
1030: * @since ostermillerutils 1.00.00
1031: */
1032: public void remove() {
1033: throw new UnsupportedOperationException();
1034: }
1035:
1036: /**
1037: * Set whether empty tokens should be returned from this point in
1038: * in the tokenizing process onward.
1039: * <P>
1040: * Empty tokens occur when two delimiters are next to each other
1041: * or a delimiter occurs at the beginning or end of a string. If
1042: * empty tokens are set to be returned, and a comma is the non token
1043: * delimiter, the following table shows how many tokens are in each
1044: * string.<br>
1045: * <table><tr><th>String<th><th>Number of tokens<th></tr>
1046: * <tr><td align=right>"one,two"<td><td>2 - normal case with no empty tokens.<td></tr>
1047: * <tr><td align=right>"one,,three"<td><td>3 including the empty token in the middle.<td></tr>
1048: * <tr><td align=right>"one,"<td><td>2 including the empty token at the end.<td></tr>
1049: * <tr><td align=right>",two"<td><td>2 including the empty token at the beginning.<td></tr>
1050: * <tr><td align=right>","<td><td>2 including the empty tokens at the beginning and the ends.<td></tr>
1051: * <tr><td align=right>""<td><td>1 - all strings will have at least one token if empty tokens are returned.<td></tr></table>
1052: *
1053: * @param returnEmptyTokens true iff empty tokens should be returned.
1054: *
1055: * @since ostermillerutils 1.00.00
1056: */
1057: public void setReturnEmptyTokens(boolean returnEmptyTokens) {
1058: // this could effect the number of tokens
1059: tokenCount = -1;
1060: this .returnEmptyTokens = returnEmptyTokens;
1061: }
1062:
1063: /**
1064: * Get the the index of the character immediately
1065: * following the end of the last token. This is the position at which this tokenizer will begin looking
1066: * for the next token when a <code>nextToken()</code> method is invoked.
1067: *
1068: * @return the current position or -1 if the entire string has been tokenized.
1069: *
1070: * @since ostermillerutils 1.00.00
1071: */
1072: public int getCurrentPosition() {
1073: return this .position;
1074: }
1075:
1076: /**
1077: * Retrieve all of the remaining tokens in a String array.
1078: * This method uses the options that are currently set for
1079: * the tokenizer and will advance the state of the tokenizer
1080: * such that <code>hasMoreTokens()</code> will return false.
1081: *
1082: * @return an array of tokens from this tokenizer.
1083: *
1084: * @since ostermillerutils 1.00.00
1085: */
1086: public String[] toArray() {
1087: String[] tokenArray = new String[countTokens()];
1088: for (int i = 0; hasMoreTokens(); i++) {
1089: tokenArray[i] = nextToken();
1090: }
1091: return tokenArray;
1092: }
1093:
1094: /**
1095: * Retrieves the rest of the text as a single token.
1096: * After calling this method hasMoreTokens() will always return false.
1097: *
1098: * @return any part of the text that has not yet been tokenized.
1099: *
1100: * @since ostermillerutils 1.00.00
1101: */
1102: public String restOfText() {
1103: return nextToken(null, null);
1104: }
1105:
1106: /**
1107: * Returns the same value as nextToken() but does not alter
1108: * the internal state of the Tokenizer. Subsequent calls
1109: * to peek() or a call to nextToken() will return the same
1110: * token again.
1111: *
1112: * @return the next token from this string tokenizer.
1113: * @throws NoSuchElementException if there are no more tokens in this tokenizer's string.
1114: *
1115: * @since ostermillerutils 1.00.00
1116: */
1117: public String peek() {
1118: // copy over state variables from the class to local
1119: // variables so that the state of this object can be
1120: // restored to the state that it was in before this
1121: // method was called.
1122: int savedPosition = position;
1123: boolean savedEmptyReturned = emptyReturned;
1124: int savedtokenCount = tokenCount;
1125:
1126: // get the next token
1127: String retval = nextToken();
1128:
1129: // restore the state
1130: position = savedPosition;
1131: emptyReturned = savedEmptyReturned;
1132: tokenCount = savedtokenCount;
1133:
1134: // return the nextToken;
1135: return (retval);
1136: }
1137: }
|