01: /*
02: * gnu/regexp/RETokenWordBoundary.java
03: * Copyright (C) 2001 Wes Biggs
04: *
05: * This library is free software; you can redistribute it and/or modify
06: * it under the terms of the GNU Lesser General Public License as published
07: * by the Free Software Foundation; either version 2.1 of the License, or
08: * (at your option) any later version.
09: *
10: * This library is distributed in the hope that it will be useful,
11: * but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13: * GNU Lesser General Public License for more details.
14: *
15: * You should have received a copy of the GNU Lesser General Public License
16: * along with this program; if not, write to the Free Software
17: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18: */
19:
20: package gnu.regexp;
21:
22: /**
23: * Represents a combination lookahead/lookbehind for POSIX [:alnum:].
24: */
25: final class RETokenWordBoundary extends REToken {
26: private boolean negated;
27: private int where;
28: static final int BEGIN = 1;
29: static final int END = 2;
30:
31: RETokenWordBoundary(int subIndex, int where, boolean negated) {
32: super (subIndex);
33: this .where = where;
34: this .negated = negated;
35: }
36:
37: boolean match(CharIndexed input, REMatch mymatch) {
38: // Word boundary means input[index-1] was a word character
39: // and input[index] is not, or input[index] is a word character
40: // and input[index-1] was not
41: // In the string "one two three", these positions match:
42: // |o|n|e| |t|w|o| |t|h|r|e|e|
43: // ^ ^ ^ ^ ^ ^
44: boolean after = false; // is current character a letter or digit?
45: boolean before = false; // is previous character a letter or digit?
46: char ch;
47:
48: // TODO: Also check REG_ANCHORINDEX vs. anchor
49: if (((mymatch.eflags & RE.REG_ANCHORINDEX) != RE.REG_ANCHORINDEX)
50: || (mymatch.offset + mymatch.index > mymatch.anchor)) {
51: if ((ch = input.charAt(mymatch.index - 1)) != CharIndexed.OUT_OF_BOUNDS) {
52: before = Character.isLetterOrDigit(ch) || (ch == '_');
53: }
54: }
55:
56: if ((ch = input.charAt(mymatch.index)) != CharIndexed.OUT_OF_BOUNDS) {
57: after = Character.isLetterOrDigit(ch) || (ch == '_');
58: }
59:
60: // if (before) and (!after), we're at end (\>)
61: // if (after) and (!before), we're at beginning (\<)
62: boolean doNext = false;
63:
64: if ((where & BEGIN) == BEGIN) {
65: doNext = after && !before;
66: }
67: if ((where & END) == END) {
68: doNext ^= before && !after;
69: }
70:
71: if (negated)
72: doNext = !doNext;
73:
74: return (doNext ? next(input, mymatch) : false);
75: }
76:
77: void dump(StringBuffer os) {
78: if (where == (BEGIN | END)) {
79: os.append(negated ? "\\B" : "\\b");
80: } else if (where == BEGIN) {
81: os.append("\\<");
82: } else {
83: os.append("\\>");
84: }
85: }
86: }
|