001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: /**
019: * @author Nikolay A. Kuznetsov
020: * @version $Revision: 1.15.2.2 $
021: */package java.util.regex;
022:
023: /**
024: * This class represents nodes constructed with character sequences. For
025: * example, lets consider regular expression: ".*word.*". During regular
026: * expression compilation phase character sequence w-o-r-d, will be represented
027: * with single node for the entire word.
028: *
029: * During the match phase, Moyer-Moore algorithm will be used for fast
030: * searching.
031: *
032: * Please follow the next link for more details about mentioned algorithm:
033: * http://portal.acm.org/citation.cfm?id=359859
034: *
035: * @author Nikolay A. Kuznetsov
036: * @version $Revision: 1.15.2.2 $
037: */
038: class SequenceSet extends LeafSet {
039:
040: private String string = null;
041:
042: private IntHash leftToRight;
043:
044: private IntHash rightToLeft;
045:
046: SequenceSet(StringBuffer substring) {
047: this .string = substring.toString();
048: charCount = substring.length();
049:
050: leftToRight = new IntHash(charCount);
051: rightToLeft = new IntHash(charCount);
052: for (int j = 0; j < charCount - 1; j++) {
053: leftToRight.put(string.charAt(j), charCount - j - 1);
054: rightToLeft.put(string.charAt(charCount - j - 1), charCount
055: - j - 1);
056: }
057: }
058:
059: public int accepts(int strIndex, CharSequence testString) {
060: return startsWith(testString, strIndex) ? charCount : -1;
061: }
062:
063: public int find(int strIndex, CharSequence testString,
064: MatchResultImpl matchResult) {
065:
066: int strLength = matchResult.getRightBound();
067:
068: while (strIndex <= strLength) {
069: strIndex = indexOf(testString, strIndex, strLength);
070:
071: if (strIndex < 0)
072: return -1;
073: if (next.matches(strIndex + charCount, testString,
074: matchResult) >= 0)
075: return strIndex;
076:
077: strIndex++;
078: }
079:
080: return -1;
081: }
082:
083: public int findBack(int strIndex, int lastIndex,
084: CharSequence testString, MatchResultImpl matchResult) {
085: String testStr = testString.toString();
086:
087: while (lastIndex >= strIndex) {
088: lastIndex = lastIndexOf(testString, strIndex, lastIndex);
089:
090: if (lastIndex < 0)
091: return -1;
092: if (next.matches(lastIndex + charCount, testString,
093: matchResult) >= 0)
094: return lastIndex;
095:
096: lastIndex--;
097: }
098:
099: return -1;
100: }
101:
102: public String getName() {
103: return "sequence: " + string; //$NON-NLS-1$
104: }
105:
106: public boolean first(AbstractSet set) {
107: if (set instanceof CharSet) {
108: return ((CharSet) set).getChar() == string.charAt(0);
109: } else if (set instanceof RangeSet) {
110: return ((RangeSet) set).accepts(0, string.substring(0, 1)) > 0;
111: } else if (set instanceof SupplRangeSet) {
112: return ((SupplRangeSet) set).contains(string.charAt(0))
113: || ((string.length() > 1) && ((SupplRangeSet) set)
114: .contains(Character.toCodePoint(string
115: .charAt(0), string.charAt(1))));
116: } else if ((set instanceof SupplCharSet)) {
117: return (string.length() > 1) ? ((SupplCharSet) set)
118: .getCodePoint() == Character.toCodePoint(string
119: .charAt(0), string.charAt(1)) : false;
120: }
121:
122: return true;
123: }
124:
125: protected int indexOf(CharSequence str, int from, int to) {
126: int last = string.charAt(charCount - 1);
127: int i = from;
128:
129: while (i <= to - charCount) {
130: char ch = str.charAt(i + charCount - 1);
131: if (ch == last && startsWith(str, i)) {
132: return i;
133: }
134:
135: i += leftToRight.get(ch);
136: }
137: return -1;
138: }
139:
140: protected int lastIndexOf(CharSequence str, int to, int from) {
141: int first = string.charAt(0);
142: int size = str.length();
143: int delta;
144: int i = ((delta = size - from - charCount) > 0) ? from : from
145: + delta;
146:
147: while (i >= to) {
148: char ch = str.charAt(i);
149: if (ch == first && startsWith(str, i)) {
150: return i;
151: }
152:
153: i -= rightToLeft.get(ch);
154: }
155: return -1;
156: }
157:
158: protected boolean startsWith(CharSequence str, int from) {
159: for (int i = 0; i < charCount; i++) {
160: if (str.charAt(i + from) != string.charAt(i))
161: return false;
162: }
163: return true;
164: }
165:
166: class IntHash {
167: int[] table, values;
168:
169: int mask;
170:
171: int size; // <-maximum shift
172:
173: public IntHash(int size) {
174: while (size >= mask) {
175: mask = (mask << 1) | 1;
176: }
177: mask = (mask << 1) | 1;
178: table = new int[mask + 1];
179: values = new int[mask + 1];
180: this .size = size;
181: }
182:
183: public void put(int key, int value) {
184: int i = 0;
185: int hashCode = key & mask;
186:
187: for (;;) {
188: if (table[hashCode] == 0 // empty
189: || table[hashCode] == key) {// rewrite
190: table[hashCode] = key;
191: values[hashCode] = value;
192: return;
193: }
194: i++;
195: i &= mask;
196:
197: hashCode += i;
198: hashCode &= mask;
199: }
200: }
201:
202: public int get(int key) {
203:
204: int hashCode = key & mask;
205: int i = 0;
206: int storedKey;
207:
208: for (;;) {
209: storedKey = table[hashCode];
210:
211: if (storedKey == 0) { // empty
212: return size;
213: }
214:
215: if (storedKey == key) {
216: return values[hashCode];
217: }
218:
219: i++;
220: i &= mask;
221:
222: hashCode += i;
223: hashCode &= mask;
224: }
225: }
226: }
227: }
|