001: /*
002: * BoyerMooreHorspool.java
003: *
004: * Created on 12.09.2003
005: *
006: * eaio: StringSearch - high-performance pattern matching algorithms in Java
007: * Copyright (c) 2003, 2004 Johann Burkard (jb@eaio.com) http://eaio.com
008: *
009: * Permission is hereby granted, free of charge, to any person obtaining a copy
010: * of this software and associated documentation files (the "Software"), to deal
011: * in the Software without restriction, including without limitation the rights
012: * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
013: * copies of the Software, and to permit persons to whom the Software is
014: * furnished to do so, subject to the following conditions:
015: *
016: * The above copyright notice and this permission notice shall be included in
017: * all copies or substantial portions of the Software.
018: *
019: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
020: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
021: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
022: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
023: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
024: * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
025: * SOFTWARE.
026: *
027: */
028: package com.eaio.stringsearch;
029:
030: /**
031: * An implementation of Horspool's improved version of the Boyer-Moore String
032: * searching algorithm. See "Practical fast searching in strings" (appeared in
033: * <em>Software - Practice & Experience, 10(6):501-506</em>). Unfortunately,
034: * there seems to be no on-line version of his paper.
035: * <br><br>
036: * This is the second fastest algorithm in this library for the
037: * <code>searchChars</code> and <code>searchString</code>. Except for very short
038: * patterns (< 5 characters), it is always faster than any other algorithm
039: * except {@link com.eaio.stringsearch.BoyerMooreHorspoolRaita} and faster than
040: * {@link java.lang.String#indexOf(java.lang.String)} by more than 5 times for
041: * patterns longer than 24 characters. It's <code>searchBytes</code> methods are
042: * slightly faster than in the
043: * {@link com.eaio.stringsearch.BoyerMooreHorspoolRaita} algorithm.
044: * <br><br>
045: * This implementation is based on <a
046: * href="http://www.dcc.uchile.cl/~rbaeza/handbook/algs/7/713b.srch.c">Ricardo
047: * Baeza-Yates' implementation</a>.
048: * <pre>
049: * Preprocessing: O(m + ∑) time
050: *
051: * Processing : O(mn) worst case
052: * </pre>
053: *
054: * @author <a href="mailto:jb@eaio.com">Johann Burkard</a>
055: * @version 1.2
056: */
057: public class BoyerMooreHorspool extends StringSearch {
058:
059: /**
060: * Constructor for BoyerMooreHorspool. Note that it is not required to create
061: * multiple instances.
062: */
063: public BoyerMooreHorspool() {
064: }
065:
066: /**
067: * Returns a <code>int</code> array.
068: *
069: * @see com.eaio.stringsearch.StringSearch#processBytes(byte[])
070: */
071: public Object processBytes(byte[] pattern) {
072: int[] skip = new int[256];
073:
074: for (int i = 0; i < skip.length; ++i) {
075: skip[i] = pattern.length;
076: }
077:
078: for (int i = 0; i < pattern.length - 1; ++i) {
079: skip[index(pattern[i])] = pattern.length - i - 1;
080: }
081:
082: return skip;
083: }
084:
085: /**
086: * Returns a {@link CharIntMap}.
087: *
088: * @see com.eaio.stringsearch.StringSearch#processChars(char[])
089: */
090: public Object processChars(char[] pattern) {
091: CharIntMap skip = createCharIntMap(pattern, pattern.length);
092:
093: for (int i = 0; i < pattern.length - 1; ++i) {
094: skip.set(pattern[i], pattern.length - i - 1);
095: }
096:
097: return skip;
098: }
099:
100: /**
101: * @see com.eaio.stringsearch.StringSearch#searchBytes(byte[], int, int,
102: * byte[], java.lang.Object)
103: */
104: public int searchBytes(byte[] text, int textStart, int textEnd,
105: byte[] pattern, Object processed) {
106:
107: return useNative ? nativeSearchBytes(text, textStart, textEnd,
108: pattern, processed) : javaSearchBytes(text, textStart,
109: textEnd, pattern, processed);
110:
111: }
112:
113: private int javaSearchBytes(byte[] text, int textStart,
114: int textEnd, byte[] pattern, Object processed) {
115:
116: int[] skip = (int[]) processed;
117:
118: int i, j, k;
119:
120: for (k = pattern.length - 1; k < textEnd; k += skip[index(text[k])]) {
121: for (j = pattern.length - 1, i = k; j >= 0
122: && text[i] == pattern[j] && i >= textStart; --j, --i)
123: ;
124: if (j == -1)
125: return ++i;
126: }
127:
128: return -1;
129:
130: }
131:
132: private native int nativeSearchBytes(byte[] text, int textStart,
133: int textEnd, byte[] pattern, Object processed);
134:
135: /**
136: * @see com.eaio.stringsearch.StringSearch#searchChars(char[], int, int,
137: * char[], Object)
138: */
139: public int searchChars(char[] text, int textStart, int textEnd,
140: char[] pattern, Object processed) {
141:
142: CharIntMap skip = (CharIntMap) processed;
143:
144: int i, j, k;
145:
146: for (k = pattern.length - 1; k < textEnd; k += skip
147: .get(text[k])) {
148: for (j = pattern.length - 1, i = k; j >= 0
149: && text[i] == pattern[j] && i >= textStart; --j, --i)
150: ;
151: if (j == -1)
152: return ++i;
153: }
154:
155: return -1;
156:
157: }
158:
159: }
|