001: package org.ofbiz.rules.parse.tokens;
002:
003: import java.io.*;
004:
005: /**
006: * <p><b>Title:</b> Word State
007: * <p><b>Description:</b> None
008: * <p>Copyright (c) 1999 Steven J. Metsker.
009: * <p>Copyright (c) 2001 The Open For Business Project - www.ofbiz.org
010: *
011: * <p>Permission is hereby granted, free of charge, to any person obtaining a
012: * copy of this software and associated documentation files (the "Software"),
013: * to deal in the Software without restriction, including without limitation
014: * the rights to use, copy, modify, merge, publish, distribute, sublicense,
015: * and/or sell copies of the Software, and to permit persons to whom the
016: * Software is furnished to do so, subject to the following conditions:
017: *
018: * <p>The above copyright notice and this permission notice shall be included
019: * in all copies or substantial portions of the Software.
020: *
021: * <p>THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
022: * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
023: * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
024: * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
025: * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
026: * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
027: * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
028: *
029: * <br>
030: * A wordState returns a word from a reader. Like other
031: * states, a tokenizer transfers the job of reading to this
032: * state, depending on an initial character. Thus, the
033: * tokenizer decides which characters may begin a word, and
034: * this state determines which characters may appear as a
035: * second or later character in a word. These are typically
036: * different sets of characters; in particular, it is typical
037: * for digits to appear as parts of a word, but not as the
038: * initial character of a word.
039: *
040: * <p>
041: * By default, the following characters may appear in a word.
042: * The method <code>setWordChars()</code> allows customizing
043: * this.
044: *
045: * <blockquote><pre>
046: * From To
047: * 'a', 'z'
048: * 'A', 'Z'
049: * '0', '9'
050: *
051: * as well as: minus sign, underscore, and apostrophe.
052: *
053: * </pre></blockquote>
054: *
055: * @author Steven J. Metsker
056: * @version 1.0
057: */
058: public class WordState extends TokenizerState {
059: protected char charbuf[] = new char[16];
060: protected boolean wordChar[] = new boolean[256];
061:
062: /**
063: * Constructs a word state with a default idea of what
064: * characters are admissible inside a word (as described in
065: * the class comment).
066: *
067: * @return a state for recognizing a word
068: */
069: public WordState() {
070: setWordChars('a', 'z', true);
071: setWordChars('A', 'Z', true);
072: setWordChars('0', '9', true);
073: setWordChars('-', '-', true);
074: setWordChars('_', '_', true);
075: setWordChars('\'', '\'', true);
076: setWordChars(0xc0, 0xff, true);
077: }
078:
079: /**
080: * Fatten up charbuf as necessary.
081: */
082: protected void checkBufLength(int i) {
083: if (i >= charbuf.length) {
084: char nb[] = new char[charbuf.length * 2];
085:
086: System.arraycopy(charbuf, 0, nb, 0, charbuf.length);
087: charbuf = nb;
088: }
089: }
090:
091: /**
092: * Return a word token from a reader.
093: *
094: * @return a word token from a reader
095: */
096: public Token nextToken(PushbackReader r, int c, Tokenizer t)
097: throws IOException {
098:
099: int i = 0;
100:
101: do {
102: checkBufLength(i);
103: charbuf[i++] = (char) c;
104: c = r.read();
105: } while (wordChar(c));
106:
107: if (c >= 0) {
108: r.unread(c);
109: }
110: String sval = String.copyValueOf(charbuf, 0, i);
111:
112: return new Token(Token.TT_WORD, sval, 0);
113: }
114:
115: /**
116: * Establish characters in the given range as valid
117: * characters for part of a word after the first character.
118: * Note that the tokenizer must determine which characters
119: * are valid as the beginning character of a word.
120: *
121: * @param from char
122: *
123: * @param to char
124: *
125: * @param boolean true, if this state should allow
126: * characters in the given range as part
127: * of a word
128: */
129: public void setWordChars(int from, int to, boolean b) {
130: for (int i = from; i <= to; i++) {
131: if (i >= 0 && i < wordChar.length) {
132: wordChar[i] = b;
133: }
134: }
135: }
136:
137: /**
138: * Just a test of the wordChar array.
139: */
140: protected boolean wordChar(int c) {
141: if (c >= 0 && c < wordChar.length) {
142: return wordChar[c];
143: }
144: return false;
145: }
146: }
|