001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package org.apache.xerces.impl.xpath.regex;
019:
020: import java.text.CharacterIterator;
021:
022: /**
023: * Boyer-Moore searcher.
024: *
025: * @xerces.internal
026: *
027: * @version $Id: BMPattern.java 572108 2007-09-02 18:48:31Z mrglavas $
028: */
029: public class BMPattern {
030: final char[] pattern;
031: final int[] shiftTable;
032: final boolean ignoreCase;
033:
034: public BMPattern(String pat, boolean ignoreCase) {
035: this (pat, 256, ignoreCase);
036: }
037:
038: public BMPattern(String pat, int tableSize, boolean ignoreCase) {
039: this .pattern = pat.toCharArray();
040: this .shiftTable = new int[tableSize];
041: this .ignoreCase = ignoreCase;
042:
043: int length = pattern.length;
044: for (int i = 0; i < this .shiftTable.length; i++)
045: this .shiftTable[i] = length;
046:
047: for (int i = 0; i < length; i++) {
048: char ch = this .pattern[i];
049: int diff = length - i - 1;
050: int index = ch % this .shiftTable.length;
051: if (diff < this .shiftTable[index])
052: this .shiftTable[index] = diff;
053: if (this .ignoreCase) {
054: ch = Character.toUpperCase(ch);
055: index = ch % this .shiftTable.length;
056: if (diff < this .shiftTable[index])
057: this .shiftTable[index] = diff;
058: ch = Character.toLowerCase(ch);
059: index = ch % this .shiftTable.length;
060: if (diff < this .shiftTable[index])
061: this .shiftTable[index] = diff;
062: }
063: }
064: }
065:
066: /**
067: *
068: * @return -1 if <var>iterator</var> does not contain this pattern.
069: */
070: public int matches(CharacterIterator iterator, int start, int limit) {
071: if (this .ignoreCase)
072: return this .matchesIgnoreCase(iterator, start, limit);
073: int plength = this .pattern.length;
074: if (plength == 0)
075: return start;
076: int index = start + plength;
077: while (index <= limit) {
078: int pindex = plength;
079: int nindex = index + 1;
080: char ch;
081: do {
082: if ((ch = iterator.setIndex(--index)) != this .pattern[--pindex])
083: break;
084: if (pindex == 0)
085: return index;
086: } while (pindex > 0);
087: index += this .shiftTable[ch % this .shiftTable.length] + 1;
088: if (index < nindex)
089: index = nindex;
090: }
091: return -1;
092: }
093:
094: /**
095: *
096: * @return -1 if <var>str</var> does not contain this pattern.
097: */
098: public int matches(String str, int start, int limit) {
099: if (this .ignoreCase)
100: return this .matchesIgnoreCase(str, start, limit);
101: int plength = this .pattern.length;
102: if (plength == 0)
103: return start;
104: int index = start + plength;
105: while (index <= limit) {
106: //System.err.println("Starts at "+index);
107: int pindex = plength;
108: int nindex = index + 1;
109: char ch;
110: do {
111: if ((ch = str.charAt(--index)) != this .pattern[--pindex])
112: break;
113: if (pindex == 0)
114: return index;
115: } while (pindex > 0);
116: index += this .shiftTable[ch % this .shiftTable.length] + 1;
117: if (index < nindex)
118: index = nindex;
119: }
120: return -1;
121: }
122:
123: /**
124: *
125: * @return -1 if <var>chars</char> does not contain this pattern.
126: */
127: public int matches(char[] chars, int start, int limit) {
128: if (this .ignoreCase)
129: return this .matchesIgnoreCase(chars, start, limit);
130: int plength = this .pattern.length;
131: if (plength == 0)
132: return start;
133: int index = start + plength;
134: while (index <= limit) {
135: //System.err.println("Starts at "+index);
136: int pindex = plength;
137: int nindex = index + 1;
138: char ch;
139: do {
140: if ((ch = chars[--index]) != this .pattern[--pindex])
141: break;
142: if (pindex == 0)
143: return index;
144: } while (pindex > 0);
145: index += this .shiftTable[ch % this .shiftTable.length] + 1;
146: if (index < nindex)
147: index = nindex;
148: }
149: return -1;
150: }
151:
152: int matchesIgnoreCase(CharacterIterator iterator, int start,
153: int limit) {
154: int plength = this .pattern.length;
155: if (plength == 0)
156: return start;
157: int index = start + plength;
158: while (index <= limit) {
159: int pindex = plength;
160: int nindex = index + 1;
161: char ch;
162: do {
163: char ch1 = ch = iterator.setIndex(--index);
164: char ch2 = this .pattern[--pindex];
165: if (ch1 != ch2) {
166: ch1 = Character.toUpperCase(ch1);
167: ch2 = Character.toUpperCase(ch2);
168: if (ch1 != ch2
169: && Character.toLowerCase(ch1) != Character
170: .toLowerCase(ch2))
171: break;
172: }
173: if (pindex == 0)
174: return index;
175: } while (pindex > 0);
176: index += this .shiftTable[ch % this .shiftTable.length] + 1;
177: if (index < nindex)
178: index = nindex;
179: }
180: return -1;
181: }
182:
183: int matchesIgnoreCase(String text, int start, int limit) {
184: int plength = this .pattern.length;
185: if (plength == 0)
186: return start;
187: int index = start + plength;
188: while (index <= limit) {
189: int pindex = plength;
190: int nindex = index + 1;
191: char ch;
192: do {
193: char ch1 = ch = text.charAt(--index);
194: char ch2 = this .pattern[--pindex];
195: if (ch1 != ch2) {
196: ch1 = Character.toUpperCase(ch1);
197: ch2 = Character.toUpperCase(ch2);
198: if (ch1 != ch2
199: && Character.toLowerCase(ch1) != Character
200: .toLowerCase(ch2))
201: break;
202: }
203: if (pindex == 0)
204: return index;
205: } while (pindex > 0);
206: index += this .shiftTable[ch % this .shiftTable.length] + 1;
207: if (index < nindex)
208: index = nindex;
209: }
210: return -1;
211: }
212:
213: int matchesIgnoreCase(char[] chars, int start, int limit) {
214: int plength = this .pattern.length;
215: if (plength == 0)
216: return start;
217: int index = start + plength;
218: while (index <= limit) {
219: int pindex = plength;
220: int nindex = index + 1;
221: char ch;
222: do {
223: char ch1 = ch = chars[--index];
224: char ch2 = this .pattern[--pindex];
225: if (ch1 != ch2) {
226: ch1 = Character.toUpperCase(ch1);
227: ch2 = Character.toUpperCase(ch2);
228: if (ch1 != ch2
229: && Character.toLowerCase(ch1) != Character
230: .toLowerCase(ch2))
231: break;
232: }
233: if (pindex == 0)
234: return index;
235: } while (pindex > 0);
236: index += this .shiftTable[ch % this .shiftTable.length] + 1;
237: if (index < nindex)
238: index = nindex;
239: }
240: return -1;
241: }
242:
243: /*
244: public static void main(String[] argv) {
245: try {
246: int[] shiftTable = new int[256];
247: initializeBoyerMoore(argv[0], shiftTable, true);
248: int o = -1;
249: CharacterIterator ite = new java.text.StringCharacterIterator(argv[1]);
250: long start = System.currentTimeMillis();
251: //for (int i = 0; i < 10000; i ++)
252: o = searchIgnoreCasesWithBoyerMoore(ite, 0, argv[0], shiftTable);
253: start = System.currentTimeMillis()-start;
254: System.out.println("Result: "+o+", Elapsed: "+start);
255: } catch (Exception ex) {
256: ex.printStackTrace();
257: }
258: }*/
259: }
|