001: /*
002: * This is free software, licensed under the Gnu Public License (GPL)
003: * get a copy from <http://www.gnu.org/licenses/gpl.html>
004: *
005: * author: Henner Zeller <H.Zeller@acm.org>
006: */
007: package henplus.commands;
008:
009: import java.util.Iterator;
010:
011: /**
012: * Utility class to split Commands into tokens.
013: */
014: public class CommandTokenizer implements Iterator {
015: private final char[] _toTokenize;
016: private final char[] _separatorBegins;
017: private final char[] _separatorEnds;
018:
019: private int _pos;
020: private boolean _tokenFinished;
021: private String _nextToken;
022:
023: /**
024: * Tokenizes a command string. Strings are separated at any whitespace
025: * character if not within nested element.
026: * Handles nesting with the given
027: * separatorPairs; separator pairs have to be given always in
028: * pairs, even if the opening and closing element is the same.
029: * Example could be <b><code>""()[]{}</code></b>. These separator
030: * pairs handle strings and elements in all kinds of parentheses.
031: */
032: public CommandTokenizer(String cmd, String separatorPairs) {
033: _toTokenize = new char[cmd.length()];
034: cmd.getChars(0, cmd.length(), _toTokenize, 0);
035: int sepLen = separatorPairs.length();
036: if (sepLen % 2 != 0) {
037: throw new IllegalArgumentException(
038: "invalid numbers of pairs");
039: }
040: _separatorBegins = new char[sepLen / 2];
041: _separatorEnds = new char[sepLen / 2];
042: for (int i = 0; i < sepLen; i += 2) {
043: _separatorBegins[i / 2] = separatorPairs.charAt(i);
044: _separatorEnds[i / 2] = separatorPairs.charAt(i + 1);
045: }
046: _pos = 0;
047: }
048:
049: //-- java.util.Iterator interface implementation
050: public boolean hasNext() {
051: while (_pos < _toTokenize.length
052: && Character.isWhitespace(_toTokenize[_pos])) {
053: _pos++;
054: }
055: if (_pos >= _toTokenize.length) {
056: return false;
057: }
058: int startToken = _pos;
059: int expectedEndToken = findEndToken(_toTokenize[_pos]);
060: if (expectedEndToken < 0) {
061: while (_pos < _toTokenize.length
062: && !Character.isWhitespace(_toTokenize[_pos])
063: && !isSpecialSeparator(_toTokenize[_pos])) {
064: _pos++;
065: }
066: _tokenFinished = _pos < _toTokenize.length;
067: } else {
068: char endTok = (char) expectedEndToken;
069: ++_pos;
070: while (_pos < _toTokenize.length
071: && endTok != _toTokenize[_pos]) {
072: _pos++;
073: }
074: if (_pos < _toTokenize.length
075: && endTok == _toTokenize[_pos]) {
076: _pos++; // include the close token character
077: _tokenFinished = true;
078: } else {
079: _tokenFinished = false;
080: }
081: }
082: _nextToken = new String(_toTokenize, startToken, _pos
083: - startToken);
084: return true;
085: }
086:
087: private boolean isSpecialSeparator(char c) {
088: for (int i = 0; i < _separatorBegins.length; ++i) {
089: if (_separatorBegins[i] == c) {
090: return true;
091: }
092: }
093: return false;
094: }
095:
096: /**
097: * tries to determine appropriate end token if this is a startToken;
098: * returns -1 otherwise.
099: */
100: private int findEndToken(char tokenStart) {
101: int i = 0;
102: for (/* */; i < _separatorBegins.length; ++i) {
103: if (_separatorBegins[i] == tokenStart) {
104: return _separatorEnds[i];
105: }
106: }
107: return -1;
108: }
109:
110: public String nextToken() {
111: return _nextToken;
112: }
113:
114: public Object next() {
115: return _nextToken;
116: }
117:
118: /**
119: * returns, wether the current token is finished. An token is unfinished
120: * if
121: * - it is a nested token that has not seen its closing element till
122: * the end of the string.
123: * - it is a normal token that is not followed by a whitespace
124: */
125: public boolean isCurrentTokenFinished() {
126: return _tokenFinished;
127: }
128:
129: public void remove() {
130: throw new UnsupportedOperationException("no!");
131: }
132:
133: public static void main(String argv[]) {
134: CommandTokenizer cmdTok = new CommandTokenizer(argv[0], argv[1]);
135: while (cmdTok.hasNext()) {
136: String tok = cmdTok.nextToken();
137: System.out.println("token: '" + tok + "'; complete="
138: + cmdTok.isCurrentTokenFinished());
139: }
140: }
141: }
|