001: /*
002: * gnu/regexp/RETokenPOSIX.java
003: * Copyright (C) 1998-2001 Wes Biggs
004: *
005: * This library is free software; you can redistribute it and/or modify
006: * it under the terms of the GNU Lesser General Public License as published
007: * by the Free Software Foundation; either version 2.1 of the License, or
008: * (at your option) any later version.
009: *
010: * This library is distributed in the hope that it will be useful,
011: * but WITHOUT ANY WARRANTY; without even the implied warranty of
012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
013: * GNU Lesser General Public License for more details.
014: *
015: * You should have received a copy of the GNU Lesser General Public License
016: * along with this program; if not, write to the Free Software
017: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
018: */
019:
020: package gnu.regexp;
021:
022: final class RETokenPOSIX extends REToken {
023: int type;
024: boolean insens;
025: boolean negated;
026:
027: static final int ALNUM = 0;
028: static final int ALPHA = 1;
029: static final int BLANK = 2;
030: static final int CNTRL = 3;
031: static final int DIGIT = 4;
032: static final int GRAPH = 5;
033: static final int LOWER = 6;
034: static final int PRINT = 7;
035: static final int PUNCT = 8;
036: static final int SPACE = 9;
037: static final int UPPER = 10;
038: static final int XDIGIT = 11;
039:
040: // Array indices correspond to constants defined above.
041: static final String[] s_nameTable = { "alnum", "alpha", "blank",
042: "cntrl", "digit", "graph", "lower", "print", "punct",
043: "space", "upper", "xdigit" };
044:
045: // The RE constructor uses this to look up the constant for a string
046: static int intValue(String key) {
047: for (int i = 0; i < s_nameTable.length; i++) {
048: if (s_nameTable[i].equals(key))
049: return i;
050: }
051: return -1;
052: }
053:
054: RETokenPOSIX(int subIndex, int type, boolean insens, boolean negated) {
055: super (subIndex);
056: this .type = type;
057: this .insens = insens;
058: this .negated = negated;
059: }
060:
061: int getMinimumLength() {
062: return 1;
063: }
064:
065: boolean match(CharIndexed input, REMatch mymatch) {
066: char ch = input.charAt(mymatch.index);
067: if (ch == CharIndexed.OUT_OF_BOUNDS)
068: return false;
069:
070: boolean retval = false;
071: switch (type) {
072: case ALNUM:
073: // Note that there is some debate over whether '_' should be included
074: retval = Character.isLetterOrDigit(ch) || (ch == '_');
075: break;
076: case ALPHA:
077: retval = Character.isLetter(ch);
078: break;
079: case BLANK:
080: retval = ((ch == ' ') || (ch == '\t'));
081: break;
082: case CNTRL:
083: retval = Character.isISOControl(ch);
084: break;
085: case DIGIT:
086: retval = Character.isDigit(ch);
087: break;
088: case GRAPH:
089: retval = (!(Character.isWhitespace(ch) || Character
090: .isISOControl(ch)));
091: break;
092: case LOWER:
093: retval = ((insens && Character.isLetter(ch)) || Character
094: .isLowerCase(ch));
095: break;
096: case PRINT:
097: retval = (!(Character.isWhitespace(ch) || Character
098: .isISOControl(ch)))
099: || (ch == ' ');
100: break;
101: case PUNCT:
102: // This feels sloppy, especially for non-U.S. locales.
103: retval = ("`~!@#$%^&*()-_=+[]{}\\|;:'\"/?,.<>".indexOf(ch) != -1);
104: break;
105: case SPACE:
106: retval = Character.isWhitespace(ch);
107: break;
108: case UPPER:
109: retval = ((insens && Character.isLetter(ch)) || Character
110: .isUpperCase(ch));
111: break;
112: case XDIGIT:
113: retval = (Character.isDigit(ch) || ("abcdefABCDEF"
114: .indexOf(ch) != -1));
115: break;
116: }
117:
118: if (negated)
119: retval = !retval;
120: if (retval) {
121: ++mymatch.index;
122: return next(input, mymatch);
123: } else
124: return false;
125: }
126:
127: void dump(StringBuffer os) {
128: if (negated)
129: os.append('^');
130: os.append("[:" + s_nameTable[type] + ":]");
131: }
132: }
|