0001: /*
0002: * A replacement for java.util.StringTokenizer
0003: * Copyright (C) 2001 Stephen Ostermiller
0004: * http://ostermiller.org/contact.pl?regarding=Java+Utilities
0005: *
0006: * This program is free software; you can redistribute it and/or modify
0007: * it under the terms of the GNU General Public License as published by
0008: * the Free Software Foundation; either version 2 of the License, or
0009: * (at your option) any later version.
0010: *
0011: * This program is distributed in the hope that it will be useful,
0012: * but WITHOUT ANY WARRANTY; without even the implied warranty of
0013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0014: * GNU General Public License for more details.
0015: *
0016: * See COPYING.TXT for details.
0017: */
0018: package com.Ostermiller.util;
0019:
0020: /**
0021: * The string tokenizer class allows an application to break a string into
0022: * tokens.
0023: * More information about this class is available from <a target="_top" href=
0024: * "http://ostermiller.org/utils/StringTokenizer.html">ostermiller.org</a>.
0025: * <p>
0026: * The tokenization method is much simpler than the one used by the
0027: * <code>StreamTokenizer</code> class. The <code>StringTokenizer</code> methods
0028: * do not distinguish among identifiers, numbers, and quoted strings, nor do
0029: * they recognize and skip comments.
0030: * <p>
0031: * The set of delimiters (the characters that separate tokens) may be specified
0032: * either at creation time or on a per-token basis.
0033: * <p>
0034: * There are two kinds of delimiters: token delimiters and nontoken delimiters.
0035: * A token is either one token delimiter character, or a maximal sequence of
0036: * consecutive characters that are not delimiters.
0037: * <p>
0038: * A <code>StringTokenizer</code> object internally maintains a current
0039: * position within the string to be tokenized. Some operations advance this
0040: * current position past the characters processed.
0041: * <p>
0042: * The implementation is not thread safe; if a <code>StringTokenizer</code>
0043: * object is intended to be used in multiple threads, an appropriate wrapper
0044: * must be provided.
0045: * <p>
0046: * The following is one example of the use of the tokenizer. It also
0047: * demonstrates the usefulness of having both token and nontoken delimiters in
0048: * one <code>StringTokenizer</code>.
0049: * <p>
0050: * The code:
0051: * <blockquote><code>
0052: * String s = " ( aaa \t * (b+c1 ))";<br>
0053: * StringTokenizer st = new StringTokenizer(s, " \t\n\r\f", "()+*");<br>
0054: * while (st.hasMoreTokens()) {<br>
0055: * System.out.println(st.nextToken());<br>
0056: * };
0057: * </code></blockquote>
0058: * <p>
0059: * prints the following output:
0060: * <blockquote>
0061: * (<br>
0062: * aaa<br>
0063: * *<br>
0064: * (<br>
0065: * b<br>
0066: * +<br>
0067: * c1<br>
0068: * )<br>
0069: * )
0070: * </blockquote>
0071: * <p>
0072: * </b>Compatibility with <code>java.util.StringTokenizer</code></b>
0073: * <p>
0074: * In the original version of <code>java.util.StringTokenizer</code>, the method
0075: * <code>nextToken()</code> left the current position after the returned token,
0076: * and the method <code>hasMoreTokens()</code> moved (as a side effect) the
0077: * current position before the beginning of the next token. Thus, the code:
0078: * <blockquote><code>
0079: * String s = "x=a,b,c";<br>
0080: * java.util.StringTokenizer st = new java.util.StringTokenizer(s,"=");<br>
0081: * System.out.println(st.nextToken());<br>
0082: * while (st.hasMoreTokens()) {<br>
0083: * System.out.println(st.nextToken(","));<br>
0084: * };
0085: * </code></blockquote>
0086: * <p>
0087: * prints the following output:
0088: * <blockquote>
0089: * x<br>
0090: * a<br>
0091: * b<br>
0092: * c
0093: * </blockquote>
0094: * <p>
0095: * The Java SDK 1.3 implementation removed the undesired side effect of
0096: * <code>hasMoreTokens</code> method: now, it does not advance current position.
0097: * However, after these changes the output of the above code was:
0098: * <blockquote>
0099: * x<br>
0100: * =a<br>
0101: * b<br>
0102: * c
0103: * </blockquote>
0104: * <p>
0105: * and there was no good way to produce a second token without "=".
0106: * <p>
0107: * To solve the problem, this implementation introduces a new method
0108: * <code>skipDelimiters()</code>. To produce the original output, the above code
0109: * should be modified as:
0110: * <blockquote><code>
0111: * String s = "x=a,b,c";<br>
0112: * StringTokenizer st = new StringTokenizer(s,"=");<br>
0113: * System.out.println(st.nextToken());<br>
0114: * st.skipDelimiters();<br>
0115: * while (st.hasMoreTokens()) {<br>
0116: * System.out.println(st.nextToken(","));<br>
0117: * };
0118: * </code></blockquote>
0119: *
0120: * @author Stephen Ostermiller http://ostermiller.org/contact.pl?regarding=Java+Utilities
0121: * @since ostermillerutils 1.00.00
0122: */
0123: public class StringTokenizer implements java.util.Enumeration,
0124: java.util.Iterator {
0125: /**
0126: * The string to be tokenized.
0127: * The code relies on this to never be null.
0128: *
0129: * @since ostermillerutils 1.00.00
0130: */
0131: protected String text;
0132:
0133: /**
0134: * The length of the text.
0135: * Cached for performance. This should be set whenever the
0136: * string we are working with is changed.
0137: *
0138: * @since ostermillerutils 1.00.00
0139: */
0140: protected int strLength;
0141:
0142: /**
0143: * The set of nontoken delimiters.
0144: *
0145: * @since ostermillerutils 1.00.00
0146: */
0147: protected String nontokenDelims;
0148:
0149: /**
0150: * The set of token delimiters.
0151: *
0152: * @since ostermillerutils 1.00.00
0153: */
0154: protected String tokenDelims;
0155:
0156: /**
0157: * One of two variables used to maintain state through
0158: * the tokenizing process.
0159: * <P>
0160: * Represents the position at which we should start looking for
0161: * the next token(the position of the character immediately
0162: * following the end of the last token, or 0 to start), or
0163: * -1 if the entire string has been examined.
0164: *
0165: * @since ostermillerutils 1.00.00
0166: */
0167: protected int position;
0168:
0169: /**
0170: * One of two variables used to maintain state through
0171: * the tokenizing process.
0172: * <p>
0173: * true if and only if is found that an empty token should
0174: * be returned or if empty token was the last thing returned.
0175: * <p>
0176: * If returnEmptyTokens in false, then this variable will
0177: * always be false.
0178: *
0179: * @since ostermillerutils 1.00.00
0180: */
0181: protected boolean emptyReturned;
0182:
0183: /**
0184: * Stores the value of the delimiter character with the
0185: * highest value. It is used to optimize the detection of delimiter
0186: * characters. The common case will be that the int values of delimiters
0187: * will be less than that of most characters in the string (, or space less
0188: * than any letter for example). Given this, we can check easily check
0189: * to see if a character is not a delimiter by comparing it to the max
0190: * delimiter. If it is greater than the max delimiter, then it is no
0191: * a delimiter otherwise we have to do some more in depth analysis. (ie
0192: * search the delimiter string.) This will reduce the running time of
0193: * the algorithm not to depend on the length of the delimiter string
0194: * for the common case.
0195: *
0196: * @since ostermillerutils 1.00.00
0197: */
0198: protected char maxDelimChar;
0199:
0200: /**
0201: * Whether empty tokens should be returned.
0202: * ie if "" should be returned when text starts with
0203: * a delim, has two delims next to each other, or
0204: * ends with a delim.
0205: *
0206: * @since ostermillerutils 1.00.00
0207: */
0208: protected boolean returnEmptyTokens;
0209:
0210: /**
0211: * Indicates at which position the delimiters last changed. This
0212: * will effect how null tokens are returned. Any
0213: * time that delimiters are changed, the string will be treated as if
0214: * it is being parsed from position zero, ie, null strings are possible
0215: * at the very beginning.
0216: *
0217: * @since ostermillerutils 1.00.00
0218: */
0219: protected int delimsChangedPosition;
0220:
0221: /**
0222: * A cache of the token count. This variable should be -1 if the token
0223: * have not yet been counted. It should be greater than or equal to zero
0224: * if the tokens have been counted.
0225: *
0226: * @since ostermillerutils 1.00.00
0227: */
0228: protected int tokenCount;
0229:
0230: /**
0231: * Constructs a string tokenizer for the specified string. Both token and
0232: * nontoken delimiters are specified.
0233: * <p>
0234: * The current position is set at the beginning of the string.
0235: *
0236: * @param text a string to be parsed.
0237: * @param nontokenDelims the nontoken delimiters, i.e. the delimiters that only separate
0238: * tokens and are not returned as separate tokens.
0239: * @param tokenDelims the token delimiters, i.e. delimiters that both separate tokens,
0240: * and are themselves returned as tokens.
0241: * @throws NullPointerException if text is null.
0242: *
0243: * @since ostermillerutils 1.00.00
0244: */
0245: public StringTokenizer(String text, String nontokenDelims,
0246: String tokenDelims) {
0247: this (text, nontokenDelims, tokenDelims, false);
0248: }
0249:
0250: /**
0251: * Constructs a string tokenizer for the specified string. Both token and
0252: * nontoken delimiters are specified and whether or not empty tokens are returned
0253: * is specified.
0254: * <p>
0255: * Empty tokens are tokens that are between consecutive delimiters.
0256: * <p>
0257: * It is a primary constructor (i.e. all other constructors are defined in terms
0258: * of it.)
0259: * <p>
0260: * The current position is set at the beginning of the string.
0261: *
0262: * @param text a string to be parsed.
0263: * @param nontokenDelims the nontoken delimiters, i.e. the delimiters that only separate
0264: * tokens and are not returned as separate tokens.
0265: * @param tokenDelims the token delimiters, i.e. delimiters that both separate tokens,
0266: * and are themselves returned as tokens.
0267: * @param returnEmptyTokens true if empty tokens may be returned; false otherwise.
0268: * @throws NullPointerException if text is null.
0269: *
0270: * @since ostermillerutils 1.00.00
0271: */
0272: public StringTokenizer(String text, String nontokenDelims,
0273: String tokenDelims, boolean returnEmptyTokens) {
0274: setDelims(nontokenDelims, tokenDelims);
0275: setText(text);
0276: setReturnEmptyTokens(returnEmptyTokens);
0277: }
0278:
0279: /**
0280: * Constructs a string tokenizer for the specified string. Either token or
0281: * nontoken delimiters are specified.
0282: * <p>
0283: * Is equivalent to:
0284: * <ul>
0285: * <li> If the third parameter is <code>false</code> --
0286: * <code>StringTokenizer(text,delims, null)</code>
0287: * <li> If the third parameter is <code>true</code> --
0288: * <code>StringTokenizer(text, null ,delims)</code>
0289: * </ul>
0290: *
0291: * @param text a string to be parsed.
0292: * @param delims the delimiters.
0293: * @param delimsAreTokens
0294: * flag indicating whether the second parameter specifies token or
0295: * nontoken delimiters: <code>false</code> -- the second parameter
0296: * specifies nontoken delimiters, the set of token delimiters is
0297: * empty; <code>true</code> -- the second parameter specifies token
0298: * delimiters, the set of nontoken delimiters is empty.
0299: * @throws NullPointerException if text is null.
0300: *
0301: * @since ostermillerutils 1.00.00
0302: */
0303: public StringTokenizer(String text, String delims,
0304: boolean delimsAreTokens) {
0305: this (text, (delimsAreTokens ? null : delims),
0306: (delimsAreTokens ? delims : null));
0307: }
0308:
0309: /**
0310: * Constructs a string tokenizer for the specified string. The characters in the
0311: * <code>nontokenDelims</code> argument are the delimiters for separating
0312: * tokens. Delimiter characters themselves will not be treated as tokens.
0313: * <p>
0314: * Is equivalent to <code>StringTokenizer(text,nontokenDelims, null)</code>.
0315: *
0316: * @param text a string to be parsed.
0317: * @param nontokenDelims the nontoken delimiters.
0318: * @throws NullPointerException if text is null.
0319: *
0320: * @since ostermillerutils 1.00.00
0321: */
0322: public StringTokenizer(String text, String nontokenDelims) {
0323: this (text, nontokenDelims, null);
0324: }
0325:
0326: /**
0327: * Constructs a string tokenizer for the specified string. The tokenizer uses
0328: * " \t\n\r\f" as a delimiter set of nontoken delimiters, and an empty token
0329: * delimiter set.
0330: * <p>
0331: * Is equivalent to <code>StringTokenizer(text, " \t\n\r\f", null);
0332: *
0333: * @param text a string to be parsed.
0334: * @throws NullPointerException if text is null.
0335: *
0336: * @since ostermillerutils 1.00.00
0337: */
0338: public StringTokenizer(String text) {
0339: this (text, " \t\n\r\f", null);
0340: }
0341:
0342: /**
0343: * Set the text to be tokenized in this StringTokenizer.
0344: * <p>
0345: * This is useful when for StringTokenizer re-use so that new string tokenizers do no
0346: * have to be created for each string you want to tokenizer.
0347: * <p>
0348: * The string will be tokenized from the beginning of the string.
0349: *
0350: * @param text a string to be parsed.
0351: * @throws NullPointerException if text is null.
0352: *
0353: * @since ostermillerutils 1.00.00
0354: */
0355: public void setText(String text) {
0356: if (text == null) {
0357: throw new NullPointerException();
0358: }
0359:
0360: this .text = text;
0361: strLength = text.length();
0362: emptyReturned = false;
0363:
0364: // set the position to start evaluation to zero
0365: // unless the string has no length, in which case
0366: // the entire string has already been examined.
0367: position = ((strLength > 0) ? 0 : (-1));
0368:
0369: // because the text was changed since the last time the delimiters
0370: // were changed we need to set the delimiter changed position
0371: delimsChangedPosition = 0;
0372:
0373: // The token count changes when the text changes
0374: tokenCount = -1;
0375: }
0376:
0377: /**
0378: * Set the delimiters for this StringTokenizer.
0379: * The position must be initialized before this method is used.
0380: * (setText does this and it is called from the constructor)
0381: *
0382: * @param nontokenDelims delimiters that should not be returned as tokens.
0383: * @param tokenDelims delimiters that should be returned as tokens.
0384: *
0385: * @since ostermillerutils 1.00.00
0386: */
0387: private void setDelims(String nontokenDelims, String tokenDelims) {
0388: this .nontokenDelims = nontokenDelims;
0389: this .tokenDelims = tokenDelims;
0390:
0391: // If we change delimiters, we do not want to start fresh,
0392: // without returning empty tokens.
0393: // the delimiter changed position can never be less than
0394: // zero, unlike position.
0395: delimsChangedPosition = ((position != -1) ? position
0396: : strLength);
0397:
0398: // set the max delimiter
0399: maxDelimChar = 0;
0400:
0401: for (int i = 0; (nontokenDelims != null)
0402: && (i < nontokenDelims.length()); i++) {
0403: if (maxDelimChar < nontokenDelims.charAt(i)) {
0404: maxDelimChar = nontokenDelims.charAt(i);
0405: }
0406: }
0407:
0408: for (int i = 0; (tokenDelims != null)
0409: && (i < tokenDelims.length()); i++) {
0410: if (maxDelimChar < tokenDelims.charAt(i)) {
0411: maxDelimChar = tokenDelims.charAt(i);
0412: }
0413: }
0414:
0415: // Changing the delimiters may change the number of tokens
0416: tokenCount = -1;
0417: }
0418:
0419: /**
0420: * Tests if there are more tokens available from this tokenizer's string.
0421: * If this method returns <tt>true</tt>, then a subsequent call to
0422: * <tt>nextToken</tt> with no argument will successfully return a token.
0423: * <p>
0424: * The current position is not changed.
0425: *
0426: * @return <code>true</code> if and only if there is at least one token in the
0427: * string after the current position; <code>false</code> otherwise.
0428: *
0429: * @since ostermillerutils 1.00.00
0430: */
0431: public boolean hasMoreTokens() {
0432: // handle the easy case in which the number
0433: // of tokens has been counted.
0434: if (tokenCount == 0) {
0435: return false;
0436: } else if (tokenCount > 0) {
0437: return true;
0438: }
0439:
0440: // copy over state variables from the class to local
0441: // variables so that the state of this object can be
0442: // restored to the state that it was in before this
0443: // method was called.
0444: int savedPosition = position;
0445: boolean savedEmptyReturned = emptyReturned;
0446:
0447: int workingPosition = position;
0448: boolean workingEmptyReturned = emptyReturned;
0449: boolean onToken = advancePosition();
0450:
0451: while ((position != workingPosition)
0452: || (emptyReturned != workingEmptyReturned)) {
0453: if (onToken) {
0454: // restore object state
0455: position = savedPosition;
0456: emptyReturned = savedEmptyReturned;
0457:
0458: return true;
0459: }
0460:
0461: workingPosition = position;
0462: workingEmptyReturned = emptyReturned;
0463: onToken = advancePosition();
0464: }
0465:
0466: // restore object state
0467: position = savedPosition;
0468: emptyReturned = savedEmptyReturned;
0469:
0470: return false;
0471: }
0472:
0473: /**
0474: * Returns the next token from this string tokenizer.
0475: * <p>
0476: * The current position is set after the token returned.
0477: *
0478: * @return the next token from this string tokenizer.
0479: * @throws NoSuchElementException if there are no more tokens in this tokenizer's string.
0480: *
0481: * @since ostermillerutils 1.00.00
0482: */
0483: public String nextToken() {
0484: int workingPosition = position;
0485: boolean workingEmptyReturned = emptyReturned;
0486: boolean onToken = advancePosition();
0487:
0488: while ((position != workingPosition)
0489: || (emptyReturned != workingEmptyReturned)) {
0490: if (onToken) {
0491: // returning a token decreases the token count
0492: tokenCount--;
0493:
0494: return (emptyReturned ? "" : text.substring(
0495: workingPosition, (position != -1) ? position
0496: : strLength));
0497: }
0498:
0499: workingPosition = position;
0500: workingEmptyReturned = emptyReturned;
0501: onToken = advancePosition();
0502: }
0503:
0504: throw new java.util.NoSuchElementException();
0505: }
0506:
0507: /**
0508: * Advances the current position so it is before the next token.
0509: * <p>
0510: * This method skips nontoken delimiters but does not skip
0511: * token delimiters.
0512: * <p>
0513: * This method is useful when switching to the new delimiter sets (see the
0514: * second example in the class comment.)
0515: *
0516: * @return <code>true</code> if there are more tokens, <code>false</code> otherwise.
0517: *
0518: * @since ostermillerutils 1.00.00
0519: */
0520: public boolean skipDelimiters() {
0521: int workingPosition = position;
0522: boolean workingEmptyReturned = emptyReturned;
0523: boolean onToken = advancePosition();
0524:
0525: // skipping delimiters may cause the number of tokens to change
0526: tokenCount = -1;
0527:
0528: while ((position != workingPosition)
0529: || (emptyReturned != workingEmptyReturned)) {
0530: if (onToken) {
0531: // restore the state to just as it was before we found
0532: // this token and return
0533: position = workingPosition;
0534: emptyReturned = workingEmptyReturned;
0535:
0536: return true;
0537: }
0538:
0539: workingPosition = position;
0540: workingEmptyReturned = emptyReturned;
0541: onToken = advancePosition();
0542: }
0543:
0544: // the end of the string was reached
0545: // without finding any tokens
0546: return false;
0547: }
0548:
0549: /**
0550: * Calculates the number of times that this tokenizer's <code>nextToken</code>
0551: * method can be called before it generates an exception. The current position
0552: * is not advanced.
0553: *
0554: * @return the number of tokens remaining in the string using the current
0555: * delimiter set.
0556: *
0557: * @see #nextToken()
0558: * @since ostermillerutils 1.00.00
0559: */
0560: public int countTokens() {
0561: // return the cached token count if a cache
0562: // is available.
0563: if (this .tokenCount >= 0) {
0564: return this .tokenCount;
0565: }
0566:
0567: int tokenCount = 0;
0568:
0569: // copy over state variables from the class to local
0570: // variables so that the state of this object can be
0571: // restored to the state that it was in before this
0572: // method was called.
0573: int savedPosition = position;
0574: boolean savedEmptyReturned = emptyReturned;
0575:
0576: int workingPosition = position;
0577: boolean workingEmptyReturned = emptyReturned;
0578: boolean onToken = advancePosition();
0579:
0580: while ((position != workingPosition)
0581: || (emptyReturned != workingEmptyReturned)) {
0582: if (onToken) {
0583: tokenCount++;
0584: }
0585:
0586: workingPosition = position;
0587: workingEmptyReturned = emptyReturned;
0588: onToken = advancePosition();
0589: }
0590:
0591: // restore object state
0592: position = savedPosition;
0593: emptyReturned = savedEmptyReturned;
0594:
0595: // Save the token count in case this is called again
0596: // so we wouldn't have to do so much work.
0597: this .tokenCount = tokenCount;
0598:
0599: return tokenCount;
0600: }
0601:
0602: /**
0603: * Set the delimiters used to this set of (nontoken) delimiters.
0604: *
0605: * @param delims the new set of nontoken delimiters (the set of token delimiters will be empty).
0606: *
0607: * @since ostermillerutils 1.00.00
0608: */
0609: public void setDelimiters(String delims) {
0610: setDelims(delims, null);
0611: }
0612:
0613: /**
0614: * Set the delimiters used to this set of delimiters.
0615: *
0616: * @param delims the new set of delimiters.
0617: * @param delimsAreTokens flag indicating whether the first parameter specifies
0618: * token or nontoken delimiters: false -- the first parameter specifies nontoken
0619: * delimiters, the set of token delimiters is empty; true -- the first parameter
0620: * specifies token delimiters, the set of nontoken delimiters is empty.
0621: *
0622: * @since ostermillerutils 1.00.00
0623: */
0624: public void setDelimiters(String delims, boolean delimsAreTokens) {
0625: setDelims((delimsAreTokens ? null : delims),
0626: (delimsAreTokens ? delims : null));
0627: }
0628:
0629: /**
0630: * Set the delimiters used to this set of delimiters.
0631: *
0632: * @param nontokenDelims the new set of nontoken delimiters.
0633: * @param tokenDelims the new set of token delimiters.
0634: *
0635: * @since ostermillerutils 1.00.00
0636: */
0637: public void setDelimiters(String nontokenDelims, String tokenDelims) {
0638: setDelims(nontokenDelims, tokenDelims);
0639: }
0640:
0641: /**
0642: * Set the delimiters used to this set of delimiters.
0643: *
0644: * @param nontokenDelims the new set of nontoken delimiters.
0645: * @param tokenDelims the new set of token delimiters.
0646: * @param returnEmptyTokens true if empty tokens may be returned; false otherwise.
0647: *
0648: * @since ostermillerutils 1.00.00
0649: */
0650: public void setDelimiters(String nontokenDelims,
0651: String tokenDelims, boolean returnEmptyTokens) {
0652: setDelims(nontokenDelims, tokenDelims);
0653: setReturnEmptyTokens(returnEmptyTokens);
0654: }
0655:
0656: /**
0657: * Calculates the number of times that this tokenizer's <code>nextToken</code>
0658: * method can be called before it generates an exception using the given set of
0659: * (nontoken) delimiters. The delimiters given will be used for future calls to
0660: * nextToken() unless new delimiters are given. The current position
0661: * is not advanced.
0662: *
0663: * @param delims the new set of nontoken delimiters (the set of token delimiters will be empty).
0664: * @return the number of tokens remaining in the string using the new
0665: * delimiter set.
0666: *
0667: * @see #countTokens()
0668: * @since ostermillerutils 1.00.00
0669: */
0670: public int countTokens(String delims) {
0671: setDelims(delims, null);
0672:
0673: return countTokens();
0674: }
0675:
0676: /**
0677: * Calculates the number of times that this tokenizer's <code>nextToken</code>
0678: * method can be called before it generates an exception using the given set of
0679: * delimiters. The delimiters given will be used for future calls to
0680: * nextToken() unless new delimiters are given. The current position
0681: * is not advanced.
0682: *
0683: * @param delims the new set of delimiters.
0684: * @param delimsAreTokens flag indicating whether the first parameter specifies
0685: * token or nontoken delimiters: false -- the first parameter specifies nontoken
0686: * delimiters, the set of token delimiters is empty; true -- the first parameter
0687: * specifies token delimiters, the set of nontoken delimiters is empty.
0688: * @return the number of tokens remaining in the string using the new
0689: * delimiter set.
0690: *
0691: * @see #countTokens()
0692: * @since ostermillerutils 1.00.00
0693: */
0694: public int countTokens(String delims, boolean delimsAreTokens) {
0695: setDelims((delimsAreTokens ? null : delims),
0696: (delimsAreTokens ? delims : null));
0697:
0698: return countTokens();
0699: }
0700:
0701: /**
0702: * Calculates the number of times that this tokenizer's <code>nextToken</code>
0703: * method can be called before it generates an exception using the given set of
0704: * delimiters. The delimiters given will be used for future calls to
0705: * nextToken() unless new delimiters are given. The current position
0706: * is not advanced.
0707: *
0708: * @param nontokenDelims the new set of nontoken delimiters.
0709: * @param tokenDelims the new set of token delimiters.
0710: * @return the number of tokens remaining in the string using the new
0711: * delimiter set.
0712: *
0713: * @see #countTokens()
0714: * @since ostermillerutils 1.00.00
0715: */
0716: public int countTokens(String nontokenDelims, String tokenDelims) {
0717: setDelims(nontokenDelims, tokenDelims);
0718:
0719: return countTokens();
0720: }
0721:
0722: /**
0723: * Calculates the number of times that this tokenizer's <code>nextToken</code>
0724: * method can be called before it generates an exception using the given set of
0725: * delimiters. The delimiters given will be used for future calls to
0726: * nextToken() unless new delimiters are given. The current position
0727: * is not advanced.
0728: *
0729: * @param nontokenDelims the new set of nontoken delimiters.
0730: * @param tokenDelims the new set of token delimiters.
0731: * @param returnEmptyTokens true if empty tokens may be returned; false otherwise.
0732: * @return the number of tokens remaining in the string using the new
0733: * delimiter set.
0734: *
0735: * @see #countTokens()
0736: * @since ostermillerutils 1.00.00
0737: */
0738: public int countTokens(String nontokenDelims, String tokenDelims,
0739: boolean returnEmptyTokens) {
0740: setDelims(nontokenDelims, tokenDelims);
0741: setReturnEmptyTokens(returnEmptyTokens);
0742:
0743: return countTokens();
0744: }
0745:
0746: /**
0747: * Advances the state of the tokenizer to the next token or delimiter. This method only
0748: * modifies the class variables position, and emptyReturned. The type of token that
0749: * should be emitted can be deduced by examining the changes to these two variables.
0750: * If there are no more tokens, the state of these variables does not change at all.
0751: *
0752: * @return true if we are at a juncture at which a token may be emitted, false otherwise.
0753: *
0754: * @since ostermillerutils 1.00.00
0755: */
0756: private boolean advancePosition() {
0757: // if we are returning empty tokens, we are just starting to tokenizer
0758: // and there is a delimiter at the beginning of the string or the string
0759: // is empty we need to indicate that there is an empty token at the beginning.
0760: // The beginning is defined as where the delimiters were last changed.
0761: if (returnEmptyTokens
0762: && !emptyReturned
0763: && ((delimsChangedPosition == position) || ((position == -1) && (strLength == delimsChangedPosition)))) {
0764: if (strLength == delimsChangedPosition) {
0765: // Case in which the string (since delim change)
0766: // is empty, but because we are returning empty
0767: // tokens, a single empty token should be returned.
0768: emptyReturned = true;
0769:
0770: /*System.out.println("Empty token for empty string.");*/
0771: return true;
0772: }
0773:
0774: char c = text.charAt(position);
0775:
0776: if (((c <= maxDelimChar) && ((nontokenDelims != null) && (nontokenDelims
0777: .indexOf(c) != -1)))
0778: || ((tokenDelims != null) && (tokenDelims
0779: .indexOf(c) != -1))) {
0780: // There is delimiter at the very start of the string
0781: // so we must return an empty token at the beginning.
0782: emptyReturned = true;
0783:
0784: /*System.out.println("Empty token at beginning.");*/
0785: return true;
0786: }
0787: }
0788:
0789: // The main loop
0790: // Do this as long as parts of the string have yet to be examined
0791: while (position != -1) {
0792: char c = text.charAt(position);
0793:
0794: if (returnEmptyTokens && !emptyReturned
0795: && (position > delimsChangedPosition)) {
0796: char c1 = text.charAt(position - 1);
0797:
0798: // Examine the current character and the one before it.
0799: // If both of them are delimiters, then we need to return
0800: // an empty delimiter. Note that characters that were examined
0801: // before the delimiters changed should not be reexamined.
0802: if ((c <= maxDelimChar)
0803: && (c1 <= maxDelimChar)
0804: && (((nontokenDelims != null) && (nontokenDelims
0805: .indexOf(c) != -1)) || ((tokenDelims != null) && (tokenDelims
0806: .indexOf(c) != -1)))
0807: && (((nontokenDelims != null) && (nontokenDelims
0808: .indexOf(c1) != -1)) || ((tokenDelims != null) && (tokenDelims
0809: .indexOf(c1) != -1)))) {
0810: emptyReturned = true;
0811:
0812: /*System.out.println("Empty token.");*/
0813: return true;
0814: }
0815: }
0816:
0817: int nextDelimiter = ((position < (strLength - 1)) ? indexOfNextDelimiter(position + 1)
0818: : (-1));
0819:
0820: if ((c > maxDelimChar)
0821: || (((nontokenDelims == null) || (nontokenDelims
0822: .indexOf(c) == -1)) && ((tokenDelims == null) || (tokenDelims
0823: .indexOf(c) == -1)))) {
0824: // token found
0825:
0826: /*System.out.println("Token: '" +
0827: text.substring(position, (nextDelimiter == -1 ? strLength : nextDelimiter)) +
0828: "' at " + position + ".");*/
0829: position = nextDelimiter;
0830: emptyReturned = false;
0831:
0832: return true;
0833: } else if ((tokenDelims != null)
0834: && (tokenDelims.indexOf(c) != -1)) {
0835: // delimiter that can be returned as a token found
0836: emptyReturned = false;
0837:
0838: /*System.out.println("Delimiter: '" + c + "' at " + position + ".");*/
0839: position = ((position < (strLength - 1)) ? (position + 1)
0840: : (-1));
0841:
0842: return true;
0843: } else {
0844: // delimiter that is not a token found.
0845: emptyReturned = false;
0846: position = ((position < (strLength - 1)) ? (position + 1)
0847: : (-1));
0848:
0849: return false;
0850: }
0851: }
0852:
0853: // handle the case that a token is at the end of the string and we should
0854: // return empty tokens.
0855: if (returnEmptyTokens && !emptyReturned && (strLength > 0)) {
0856: char c = text.charAt(strLength - 1);
0857:
0858: if (((c <= maxDelimChar) && ((nontokenDelims != null) && (nontokenDelims
0859: .indexOf(c) != -1)))
0860: || ((tokenDelims != null) && (tokenDelims
0861: .indexOf(c) != -1))) {
0862: // empty token at the end of the string found.
0863: emptyReturned = true;
0864:
0865: /*System.out.println("Empty token at end.");*/
0866: return true;
0867: }
0868: }
0869:
0870: return false;
0871: }
0872:
0873: /**
0874: * Returns the next token in this string tokenizer's string.
0875: * <p>
0876: * First, the sets of token and nontoken delimiters are changed to be the
0877: * <code>tokenDelims</code> and <code>nontokenDelims</code>, respectively.
0878: * Then the next token (with respect to new delimiters) in the string after the
0879: * current position is returned.
0880: * <p>
0881: * The current position is set after the token returned.
0882: * <p>
0883: * The new delimiter sets remains the used ones after this call.
0884: *
0885: * @param nontokenDelims the new set of nontoken delimiters.
0886: * @param tokenDelims the new set of token delimiters.
0887: * @return the next token, after switching to the new delimiter set.
0888: * @throws NoSuchElementException if there are no more tokens in this tokenizer's string.
0889: * @see #nextToken()
0890: *
0891: * @since ostermillerutils 1.00.00
0892: */
0893: public String nextToken(String nontokenDelims, String tokenDelims) {
0894: setDelims(nontokenDelims, tokenDelims);
0895:
0896: return nextToken();
0897: }
0898:
0899: /**
0900: * Returns the next token in this string tokenizer's string.
0901: * <p>
0902: * First, the sets of token and nontoken delimiters are changed to be the
0903: * <code>tokenDelims</code> and <code>nontokenDelims</code>, respectively;
0904: * and whether or not to return empty tokens is set.
0905: * Then the next token (with respect to new delimiters) in the string after the
0906: * current position is returned.
0907: * <p>
0908: * The current position is set after the token returned.
0909: * <p>
0910: * The new delimiter set remains the one used for this call and empty tokens are
0911: * returned in the future as they are in this call.
0912: *
0913: * @param nontokenDelims the new set of nontoken delimiters.
0914: * @param tokenDelims the new set of token delimiters.
0915: * @param returnEmptyTokens true if empty tokens may be returned; false otherwise.
0916: * @return the next token, after switching to the new delimiter set.
0917: * @throws NoSuchElementException if there are no more tokens in this tokenizer's string.
0918: * @see #nextToken()
0919: *
0920: * @since ostermillerutils 1.00.00
0921: */
0922: public String nextToken(String nontokenDelims, String tokenDelims,
0923: boolean returnEmptyTokens) {
0924: setDelims(nontokenDelims, tokenDelims);
0925: setReturnEmptyTokens(returnEmptyTokens);
0926:
0927: return nextToken();
0928: }
0929:
0930: /**
0931: * Returns the next token in this string tokenizer's string.
0932: * <p>
0933: * Is equivalent to:
0934: * <ul>
0935: * <li> If the second parameter is <code>false</code> --
0936: * <code>nextToken(delims, null)</code>
0937: * <li> If the second parameter is <code>true</code> --
0938: * <code>nextToken(null ,delims)</code>
0939: * </ul>
0940: * <p>
0941: * @param delims the new set of token or nontoken delimiters.
0942: * @param delimsAreTokens
0943: * flag indicating whether the first parameter specifies token or
0944: * nontoken delimiters: <code>false</code> -- the first parameter
0945: * specifies nontoken delimiters, the set of token delimiters is
0946: * empty; <code>true</code> -- the first parameter specifies token
0947: * delimiters, the set of nontoken delimiters is empty.
0948: * @return the next token, after switching to the new delimiter set.
0949: * @throws NoSuchElementException if there are no more tokens in this tokenizer's string.
0950: *
0951: * @see #nextToken(String,String)
0952: * @since ostermillerutils 1.00.00
0953: */
0954: public String nextToken(String delims, boolean delimsAreTokens) {
0955: return (delimsAreTokens ? nextToken(null, delims) : nextToken(
0956: delims, null));
0957: }
0958:
0959: /**
0960: * Returns the next token in this string tokenizer's string.
0961: * <p>
0962: * Is equivalent to <code>nextToken(delims, null)</code>.
0963: *
0964: * @param nontokenDelims the new set of nontoken delimiters (the set of
0965: * token delimiters will be empty).
0966: * @return the next token, after switching to the new delimiter set.
0967: * @throws NoSuchElementException if there are no more tokens in this
0968: * tokenizer's string.
0969: *
0970: * @see #nextToken(String,String)
0971: * @since ostermillerutils 1.00.00
0972: */
0973: public String nextToken(String nontokenDelims) {
0974: return nextToken(nontokenDelims, null);
0975: }
0976:
0977: /**
0978: * Similar to String.indexOf(int, String) but will look for
0979: * any character from string rather than the entire string.
0980: *
0981: * @param start index in text at which to begin the search
0982: * @return index of the first delimiter from the start index (inclusive), or -1
0983: * if there are no more delimiters in the string
0984: *
0985: * @since ostermillerutils 1.00.00
0986: */
0987: private int indexOfNextDelimiter(int start) {
0988: char c;
0989: int next;
0990:
0991: for (next = start; ((c = text.charAt(next)) > maxDelimChar)
0992: || (((nontokenDelims == null) || (nontokenDelims
0993: .indexOf(c) == -1)) && ((tokenDelims == null) || (tokenDelims
0994: .indexOf(c) == -1))); next++) {
0995: if (next == (strLength - 1)) {
0996: // we have reached the end of the string without
0997: // finding a delimiter
0998: return (-1);
0999: }
1000: }
1001:
1002: return next;
1003: }
1004:
1005: /**
1006: * Returns the same value as the <code>hasMoreTokens()</code> method. It exists
1007: * so that this class can implement the <code>Enumeration</code> interface.
1008: *
1009: * @return <code>true</code> if there are more tokens;
1010: * <code>false</code> otherwise.
1011: *
1012: * @see java.util.Enumeration
1013: * @see #hasMoreTokens()
1014: * @since ostermillerutils 1.00.00
1015: */
1016: public boolean hasMoreElements() {
1017: return hasMoreTokens();
1018: }
1019:
1020: /**
1021: * Returns the same value as the <code>nextToken()</code> method, except that
1022: * its declared return value is <code>Object</code> rather than
1023: * <code>String</code>. It exists so that this class can implement the
1024: * <code>Enumeration</code> interface.
1025: *
1026: * @return the next token in the string.
1027: * @throws NoSuchElementException if there are no more tokens in this tokenizer's string.
1028: *
1029: * @see java.util.Enumeration
1030: * @see #nextToken()
1031: * @since ostermillerutils 1.00.00
1032: */
1033: public Object nextElement() {
1034: return nextToken();
1035: }
1036:
1037: /**
1038: * Returns the same value as the <code>hasMoreTokens()</code> method. It exists
1039: * so that this class can implement the <code>Iterator</code> interface.
1040: *
1041: * @return <code>true</code> if there are more tokens;
1042: * <code>false</code> otherwise.
1043: *
1044: * @see java.util.Iterator
1045: * @see #hasMoreTokens()
1046: * @since ostermillerutils 1.00.00
1047: */
1048: public boolean hasNext() {
1049: return hasMoreTokens();
1050: }
1051:
1052: /**
1053: * Returns the same value as the <code>nextToken()</code> method, except that
1054: * its declared return value is <code>Object</code> rather than
1055: * <code>String</code>. It exists so that this class can implement the
1056: * <code>Iterator</code> interface.
1057: *
1058: * @return the next token in the string.
1059: * @throws NoSuchElementException if there are no more tokens in this tokenizer's string.
1060: *
1061: * @see java.util.Iterator
1062: * @see #nextToken()
1063: * @since ostermillerutils 1.00.00
1064: */
1065: public Object next() {
1066: return nextToken();
1067: }
1068:
1069: /**
1070: * This implementation always throws <code>UnsupportedOperationException</code>.
1071: * It exists so that this class can implement the <code>Iterator</code> interface.
1072: *
1073: * @throws UnsupportedOperationException always is thrown.
1074: *
1075: * @see java.util.Iterator
1076: * @since ostermillerutils 1.00.00
1077: */
1078: public void remove() {
1079: throw new UnsupportedOperationException();
1080: }
1081:
1082: /**
1083: * Set whether empty tokens should be returned from this point in
1084: * in the tokenizing process onward.
1085: * <P>
1086: * Empty tokens occur when two delimiters are next to each other
1087: * or a delimiter occurs at the beginning or end of a string. If
1088: * empty tokens are set to be returned, and a comma is the non token
1089: * delimiter, the following table shows how many tokens are in each
1090: * string.<br>
1091: * <table><tr><th>String<th><th>Number of tokens<th></tr>
1092: * <tr><td align=right>"one,two"<td><td>2 - normal case with no empty tokens.<td></tr>
1093: * <tr><td align=right>"one,,three"<td><td>3 including the empty token in the middle.<td></tr>
1094: * <tr><td align=right>"one,"<td><td>2 including the empty token at the end.<td></tr>
1095: * <tr><td align=right>",two"<td><td>2 including the empty token at the beginning.<td></tr>
1096: * <tr><td align=right>","<td><td>2 including the empty tokens at the beginning and the ends.<td></tr>
1097: * <tr><td align=right>""<td><td>1 - all strings will have at least one token if empty tokens are returned.<td></tr></table>
1098: *
1099: * @param returnEmptyTokens true iff empty tokens should be returned.
1100: *
1101: * @since ostermillerutils 1.00.00
1102: */
1103: public void setReturnEmptyTokens(boolean returnEmptyTokens) {
1104: // this could effect the number of tokens
1105: tokenCount = -1;
1106: this .returnEmptyTokens = returnEmptyTokens;
1107: }
1108:
1109: /**
1110: * Get the the index of the character immediately
1111: * following the end of the last token. This is the position at which this tokenizer will begin looking
1112: * for the next token when a <code>nextToken()</code> method is invoked.
1113: *
1114: * @return the current position or -1 if the entire string has been tokenized.
1115: *
1116: * @since ostermillerutils 1.00.00
1117: */
1118: public int getCurrentPosition() {
1119: return this .position;
1120: }
1121:
1122: /**
1123: * Retrieve all of the remaining tokens in a String array.
1124: * This method uses the options that are currently set for
1125: * the tokenizer and will advance the state of the tokenizer
1126: * such that <code>hasMoreTokens()</code> will return false.
1127: *
1128: * @return an array of tokens from this tokenizer.
1129: *
1130: * @since ostermillerutils 1.00.00
1131: */
1132: public String[] toArray() {
1133: String[] tokenArray = new String[countTokens()];
1134:
1135: for (int i = 0; hasMoreTokens(); i++) {
1136: tokenArray[i] = nextToken();
1137: }
1138:
1139: return tokenArray;
1140: }
1141:
1142: /**
1143: * Retrieves the rest of the text as a single token.
1144: * After calling this method hasMoreTokens() will always return false.
1145: *
1146: * @return any part of the text that has not yet been tokenized.
1147: *
1148: * @since ostermillerutils 1.00.00
1149: */
1150: public String restOfText() {
1151: return nextToken(null, null);
1152: }
1153:
1154: /**
1155: * Returns the same value as nextToken() but does not alter
1156: * the internal state of the Tokenizer. Subsequent calls
1157: * to peek() or a call to nextToken() will return the same
1158: * token again.
1159: *
1160: * @return the next token from this string tokenizer.
1161: * @throws NoSuchElementException if there are no more tokens in this tokenizer's string.
1162: *
1163: * @since ostermillerutils 1.00.00
1164: */
1165: public String peek() {
1166: // copy over state variables from the class to local
1167: // variables so that the state of this object can be
1168: // restored to the state that it was in before this
1169: // method was called.
1170: int savedPosition = position;
1171: boolean savedEmptyReturned = emptyReturned;
1172: int savedtokenCount = tokenCount;
1173:
1174: // get the next token
1175: String retval = nextToken();
1176:
1177: // restore the state
1178: position = savedPosition;
1179: emptyReturned = savedEmptyReturned;
1180: tokenCount = savedtokenCount;
1181:
1182: // return the nextToken;
1183: return (retval);
1184: }
1185: }
|