001: /*
002: * BoyerMooreHorspoolRaita.java
003: *
004: * Created on 15.09.2003
005: *
006: * eaio: StringSearch - high-performance pattern matching algorithms in Java
007: * Copyright (c) 2003, 2004 Johann Burkard (jb@eaio.com) http://eaio.com
008: *
009: * Permission is hereby granted, free of charge, to any person obtaining a copy
010: * of this software and associated documentation files (the "Software"), to deal
011: * in the Software without restriction, including without limitation the rights
012: * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
013: * copies of the Software, and to permit persons to whom the Software is
014: * furnished to do so, subject to the following conditions:
015: *
016: * The above copyright notice and this permission notice shall be included in
017: * all copies or substantial portions of the Software.
018: *
019: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
020: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
021: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
022: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
023: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
024: * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
025: * SOFTWARE.
026: *
027: */
028: package com.eaio.stringsearch;
029:
030: /**
031: * An implementation of Raita's enhancement to the Boyer-Moore-Horspool String
032: * searching algorithm. See "Tuning the Boyer-Moore-Horspool string searching
033: * algorithm" (appeared in <em>Software - Practice & Experience,
034: * 22(10):879-884</em>).
035: * <br><br>
036: * This algorithm is slightly faster than the
037: * {@link com.eaio.stringsearch.BoyerMooreHorspool} algorithm for the
038: * <code>searchChars</code> and <code>searchString</code> methods. It's
039: * <code>searchBytes</code> methods are slightly slower.
040: * <pre>
041: * Preprocessing: O(m + ∑) time
042: *
043: * Processing : O(mn) worst case
044: * </pre>
045: *
046: * @see <a
047: * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol22/issue10/spe787tr.pdf">
048: * http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol22/issue10/spe787tr.pdf
049: * </a>
050: * @author <a href="mailto:jb@eaio.com">Johann Burkard</a>
051: * @version 1.2
052: */
053: public class BoyerMooreHorspoolRaita extends /* comment:start */
054: BoyerMooreHorspool /* comment:end *//* comment:remove StringSearch comment:remove */{
055:
056: /**
057: * Constructor for BoyerMooreHorspoolRaita. Note that it is not required to
058: * create multiple instances.
059: */
060: public BoyerMooreHorspoolRaita() {
061: }
062:
063: /**
064: * @see com.eaio.stringsearch.StringSearch#searchBytes(byte[], int, int,
065: * byte[], java.lang.Object)
066: */
067: public int searchBytes(byte[] text, int textStart, int textEnd,
068: byte[] pattern, Object processed) {
069:
070: /* comment:start */
071:
072: return useNative ? nativeSearchBytes(text, textStart, textEnd,
073: pattern, processed) : javaSearchBytes(text, textStart,
074: textEnd, pattern, processed);
075:
076: }
077:
078: private int javaSearchBytes(byte[] text, int textStart,
079: int textEnd, byte[] pattern, Object processed) {
080:
081: /* comment:end */
082:
083: int[] b = (int[]) processed;
084:
085: int i, j, k, mMinusOne;
086: byte last, first;
087:
088: i = pattern.length - 1;
089: mMinusOne = pattern.length - 2;
090:
091: last = pattern[pattern.length - 1];
092: first = pattern[0];
093:
094: i += textStart;
095:
096: while (i < textEnd) {
097:
098: if (text[i] == last) {
099:
100: if (text[i - (pattern.length - 1)] == first) {
101:
102: k = i - 1;
103: j = mMinusOne;
104:
105: while (k > -1 && j > -1 && text[k] == pattern[j]) {
106: --k;
107: --j;
108: }
109: if (j == -1) {
110: return k + 1;
111: }
112: }
113: }
114:
115: i += b[index(text[i])];
116: }
117:
118: return -1;
119:
120: }
121:
122: /* comment:start */
123:
124: private native int nativeSearchBytes(byte[] text, int textStart,
125: int textEnd, byte[] pattern, Object processed);
126:
127: /* comment:end */
128:
129: /**
130: * @see com.eaio.stringsearch.StringSearch#searchChars(char[], int, int,
131: * char[], Object)
132: */
133: public int searchChars(char[] text, int textStart, int textEnd,
134: char[] pattern, Object processed) {
135:
136: CharIntMap m = (CharIntMap) processed;
137:
138: int i, j, k, mMinusOne;
139: char last, first;
140:
141: i = pattern.length - 1;
142: mMinusOne = i - 1;
143:
144: last = pattern[i];
145: first = pattern[0];
146:
147: i += textStart;
148:
149: while (i < textEnd) {
150:
151: if (text[i] == last) {
152:
153: if (text[i - (pattern.length - 1)] == first) {
154:
155: k = i - 1;
156: j = mMinusOne;
157:
158: while (k > -1 && j > -1 && text[k] == pattern[j]) {
159: --k;
160: --j;
161: }
162: if (j == -1) {
163: return k + 1;
164: }
165: }
166: }
167: i += m.get(text[i]);
168: }
169:
170: return -1;
171:
172: }
173:
174: /* comment:remove
175:
176: public Object processBytes(byte[] pattern) {
177: int[] skip = new int[256];
178:
179: for (int i = 0; i < skip.length; ++i) {
180: skip[i] = pattern.length;
181: }
182:
183: for (int i = 0; i < pattern.length - 1; ++i) {
184: skip[index(pattern[i])] = pattern.length - i - 1;
185: }
186:
187: return skip;
188: }
189:
190: public Object processChars(char[] pattern) {
191: CharIntMap skip = createCharIntMap(pattern, pattern.length);
192:
193: for (int i = 0; i < pattern.length - 1; ++i) {
194: skip.set(pattern[i], pattern.length - i - 1);
195: }
196:
197: return skip;
198: }
199:
200: comment:remove */
201:
202: }
|