001: /*
002: * The Apache Software License, Version 1.1
003: *
004: *
005: * Copyright (c) 1999,2000 The Apache Software Foundation. All rights
006: * reserved.
007: *
008: * Redistribution and use in source and binary forms, with or without
009: * modification, are permitted provided that the following conditions
010: * are met:
011: *
012: * 1. Redistributions of source code must retain the above copyright
013: * notice, this list of conditions and the following disclaimer.
014: *
015: * 2. Redistributions in binary form must reproduce the above copyright
016: * notice, this list of conditions and the following disclaimer in
017: * the documentation and/or other materials provided with the
018: * distribution.
019: *
020: * 3. The end-user documentation included with the redistribution,
021: * if any, must include the following acknowledgment:
022: * "This product includes software developed by the
023: * Apache Software Foundation (http://www.apache.org/)."
024: * Alternately, this acknowledgment may appear in the software itself,
025: * if and wherever such third-party acknowledgments normally appear.
026: *
027: * 4. The names "Xerces" and "Apache Software Foundation" must
028: * not be used to endorse or promote products derived from this
029: * software without prior written permission. For written
030: * permission, please contact apache@apache.org.
031: *
032: * 5. Products derived from this software may not be called "Apache",
033: * nor may "Apache" appear in their name, without prior written
034: * permission of the Apache Software Foundation.
035: *
036: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
037: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
038: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
039: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
040: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
041: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
042: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
043: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
044: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
045: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
046: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
047: * SUCH DAMAGE.
048: * ====================================================================
049: *
050: * This software consists of voluntary contributions made by many
051: * individuals on behalf of the Apache Software Foundation and was
052: * originally based on software copyright (c) 1999, International
053: * Business Machines, Inc., http://www.apache.org. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: package org.apache.xerces.utils.regex;
059:
060: import java.text.CharacterIterator;
061:
062: /**
063: * Boyer-Moore searcher.
064: */
065: public class BMPattern {
066: char[] pattern;
067: int[] shiftTable;
068: boolean ignoreCase;
069:
070: public BMPattern(String pat, boolean ignoreCase) {
071: this (pat, 256, ignoreCase);
072: }
073:
074: public BMPattern(String pat, int tableSize, boolean ignoreCase) {
075: this .pattern = pat.toCharArray();
076: this .shiftTable = new int[tableSize];
077: this .ignoreCase = ignoreCase;
078:
079: int length = pattern.length;
080: for (int i = 0; i < this .shiftTable.length; i++)
081: this .shiftTable[i] = length;
082:
083: for (int i = 0; i < length; i++) {
084: char ch = this .pattern[i];
085: int diff = length - i - 1;
086: int index = ch % this .shiftTable.length;
087: if (diff < this .shiftTable[index])
088: this .shiftTable[index] = diff;
089: if (this .ignoreCase) {
090: ch = Character.toUpperCase(ch);
091: index = ch % this .shiftTable.length;
092: if (diff < this .shiftTable[index])
093: this .shiftTable[index] = diff;
094: ch = Character.toLowerCase(ch);
095: index = ch % this .shiftTable.length;
096: if (diff < this .shiftTable[index])
097: this .shiftTable[index] = diff;
098: }
099: }
100: }
101:
102: /**
103: *
104: * @return -1 if <var>iterator</var> does not contain this pattern.
105: */
106: public int matches(CharacterIterator iterator, int start, int limit) {
107: if (this .ignoreCase)
108: return this .matchesIgnoreCase(iterator, start, limit);
109: int plength = this .pattern.length;
110: if (plength == 0)
111: return start;
112: int index = start + plength;
113: while (index <= limit) {
114: int pindex = plength;
115: int nindex = index + 1;
116: char ch;
117: do {
118: if ((ch = iterator.setIndex(--index)) != this .pattern[--pindex])
119: break;
120: if (pindex == 0)
121: return index;
122: } while (pindex > 0);
123: index += this .shiftTable[ch % this .shiftTable.length] + 1;
124: if (index < nindex)
125: index = nindex;
126: }
127: return -1;
128: }
129:
130: /**
131: *
132: * @return -1 if <var>str</var> does not contain this pattern.
133: */
134: public int matches(String str, int start, int limit) {
135: if (this .ignoreCase)
136: return this .matchesIgnoreCase(str, start, limit);
137: int plength = this .pattern.length;
138: if (plength == 0)
139: return start;
140: int index = start + plength;
141: while (index <= limit) {
142: //System.err.println("Starts at "+index);
143: int pindex = plength;
144: int nindex = index + 1;
145: char ch;
146: do {
147: if ((ch = str.charAt(--index)) != this .pattern[--pindex])
148: break;
149: if (pindex == 0)
150: return index;
151: } while (pindex > 0);
152: index += this .shiftTable[ch % this .shiftTable.length] + 1;
153: if (index < nindex)
154: index = nindex;
155: }
156: return -1;
157: }
158:
159: /**
160: *
161: * @return -1 if <var>chars</char> does not contain this pattern.
162: */
163: public int matches(char[] chars, int start, int limit) {
164: if (this .ignoreCase)
165: return this .matchesIgnoreCase(chars, start, limit);
166: int plength = this .pattern.length;
167: if (plength == 0)
168: return start;
169: int index = start + plength;
170: while (index <= limit) {
171: //System.err.println("Starts at "+index);
172: int pindex = plength;
173: int nindex = index + 1;
174: char ch;
175: do {
176: if ((ch = chars[--index]) != this .pattern[--pindex])
177: break;
178: if (pindex == 0)
179: return index;
180: } while (pindex > 0);
181: index += this .shiftTable[ch % this .shiftTable.length] + 1;
182: if (index < nindex)
183: index = nindex;
184: }
185: return -1;
186: }
187:
188: int matchesIgnoreCase(CharacterIterator iterator, int start,
189: int limit) {
190: int plength = this .pattern.length;
191: if (plength == 0)
192: return start;
193: int index = start + plength;
194: while (index <= limit) {
195: int pindex = plength;
196: int nindex = index + 1;
197: char ch;
198: do {
199: char ch1 = ch = iterator.setIndex(--index);
200: char ch2 = this .pattern[--pindex];
201: if (ch1 != ch2) {
202: ch1 = Character.toUpperCase(ch1);
203: ch2 = Character.toUpperCase(ch2);
204: if (ch1 != ch2
205: && Character.toLowerCase(ch1) != Character
206: .toLowerCase(ch2))
207: break;
208: }
209: if (pindex == 0)
210: return index;
211: } while (pindex > 0);
212: index += this .shiftTable[ch % this .shiftTable.length] + 1;
213: if (index < nindex)
214: index = nindex;
215: }
216: return -1;
217: }
218:
219: int matchesIgnoreCase(String text, int start, int limit) {
220: int plength = this .pattern.length;
221: if (plength == 0)
222: return start;
223: int index = start + plength;
224: while (index <= limit) {
225: int pindex = plength;
226: int nindex = index + 1;
227: char ch;
228: do {
229: char ch1 = ch = text.charAt(--index);
230: char ch2 = this .pattern[--pindex];
231: if (ch1 != ch2) {
232: ch1 = Character.toUpperCase(ch1);
233: ch2 = Character.toUpperCase(ch2);
234: if (ch1 != ch2
235: && Character.toLowerCase(ch1) != Character
236: .toLowerCase(ch2))
237: break;
238: }
239: if (pindex == 0)
240: return index;
241: } while (pindex > 0);
242: index += this .shiftTable[ch % this .shiftTable.length] + 1;
243: if (index < nindex)
244: index = nindex;
245: }
246: return -1;
247: }
248:
249: int matchesIgnoreCase(char[] chars, int start, int limit) {
250: int plength = this .pattern.length;
251: if (plength == 0)
252: return start;
253: int index = start + plength;
254: while (index <= limit) {
255: int pindex = plength;
256: int nindex = index + 1;
257: char ch;
258: do {
259: char ch1 = ch = chars[--index];
260: char ch2 = this .pattern[--pindex];
261: if (ch1 != ch2) {
262: ch1 = Character.toUpperCase(ch1);
263: ch2 = Character.toUpperCase(ch2);
264: if (ch1 != ch2
265: && Character.toLowerCase(ch1) != Character
266: .toLowerCase(ch2))
267: break;
268: }
269: if (pindex == 0)
270: return index;
271: } while (pindex > 0);
272: index += this .shiftTable[ch % this .shiftTable.length] + 1;
273: if (index < nindex)
274: index = nindex;
275: }
276: return -1;
277: }
278:
279: /*
280: public static void main(String[] argv) {
281: try {
282: int[] shiftTable = new int[256];
283: initializeBoyerMoore(argv[0], shiftTable, true);
284: int o = -1;
285: CharacterIterator ite = new java.text.StringCharacterIterator(argv[1]);
286: long start = System.currentTimeMillis();
287: //for (int i = 0; i < 10000; i ++)
288: o = searchIgnoreCasesWithBoyerMoore(ite, 0, argv[0], shiftTable);
289: start = System.currentTimeMillis()-start;
290: System.out.println("Result: "+o+", Elapsed: "+start);
291: } catch (Exception ex) {
292: ex.printStackTrace();
293: }
294: }*/
295: }
|