001: /*
002: * BoyerMooreSunday.java
003: *
004: * Created on 20.10.2003
005: *
006: * eaio: StringSearch - high-performance pattern matching algorithms in Java
007: * Copyright (c) 2003, 2004 Johann Burkard (jb@eaio.com) http://eaio.com
008: *
009: * Permission is hereby granted, free of charge, to any person obtaining a copy
010: * of this software and associated documentation files (the "Software"), to deal
011: * in the Software without restriction, including without limitation the rights
012: * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
013: * copies of the Software, and to permit persons to whom the Software is
014: * furnished to do so, subject to the following conditions:
015: *
016: * The above copyright notice and this permission notice shall be included in
017: * all copies or substantial portions of the Software.
018: *
019: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
020: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
021: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
022: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
023: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
024: * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
025: * SOFTWARE.
026: *
027: */
028: package com.eaio.stringsearch;
029:
030: /**
031: * An implementation of Sunday's simplified "Quick Finder" version of the
032: * Boyer-Moore algorithm. See "A very fast substring search algorithm" (appeared
033: * in <em>Communications of the ACM . 33 (8):132-142</em>).
034: * <pre>
035: * Preprocessing: O(m + ∑) time
036: *
037: * Processing : O(mn) worst case
038: * </pre>
039: *
040: * @author <a href="mailto:jb@eaio.com">Johann Burkard</a>
041: * @version 1.2
042: */
043: public class BoyerMooreSunday extends StringSearch {
044:
045: /**
046: * Constructor for BoyerMooreSunday. Note that it is not required to create
047: * multiple instances.
048: */
049: public BoyerMooreSunday() {
050: }
051:
052: /**
053: * Returns a <code>int</code> array.
054: *
055: * @see com.eaio.stringsearch.StringSearch#processBytes(byte[])
056: */
057: public Object processBytes(byte[] pattern) {
058: int[] td1 = new int[256];
059:
060: for (int i = 0; i < td1.length; ++i) {
061: td1[i] = pattern.length + 1;
062: }
063:
064: for (int i = 0; i < pattern.length; ++i) {
065: td1[index(pattern[i])] = pattern.length - i;
066: }
067:
068: return td1;
069: }
070:
071: /**
072: * Returns a {@link CharIntMap}.
073: *
074: * @see com.eaio.stringsearch.StringSearch#processChars(char[])
075: */
076: public Object processChars(char[] pattern) {
077: CharIntMap td1 = createCharIntMap(pattern, pattern.length + 1);
078:
079: for (int i = 0; i < pattern.length; ++i) {
080: td1.set(pattern[i], pattern.length - i);
081: }
082:
083: return td1;
084: }
085:
086: /**
087: * @see com.eaio.stringsearch.StringSearch#searchBytes(byte[], int, int,
088: * byte[], java.lang.Object)
089: */
090: public int searchBytes(byte[] text, int textStart, int textEnd,
091: byte[] pattern, Object processed) {
092:
093: return useNative ? nativeSearchBytes(text, textStart, textEnd,
094: pattern, processed) : javaSearchBytes(text, textStart,
095: textEnd, pattern, processed);
096:
097: }
098:
099: private int javaSearchBytes(byte[] text, int textStart,
100: int textEnd, byte[] pattern, Object processed) {
101:
102: int[] td1 = (int[]) processed;
103:
104: int p;
105:
106: while (textStart + pattern.length <= textEnd) {
107:
108: p = 0;
109:
110: while (p < pattern.length
111: && pattern[p] == text[textStart + p]) {
112: ++p;
113: }
114:
115: if (p == pattern.length) {
116: return textStart;
117: }
118:
119: if (textStart + pattern.length >= textEnd) {
120: return -1;
121: }
122:
123: textStart += td1[index(text[textStart + pattern.length])];
124: }
125:
126: return -1;
127:
128: }
129:
130: private native int nativeSearchBytes(byte[] text, int textStart,
131: int textEnd, byte[] pattern, Object processed);
132:
133: /**
134: * @see com.eaio.stringsearch.StringSearch#searchChars(char[], int, int,
135: * char[], Object)
136: */
137: public int searchChars(char[] text, int textStart, int textEnd,
138: char[] pattern, Object processed) {
139:
140: CharIntMap td1 = (CharIntMap) processed;
141:
142: int p;
143:
144: while (textStart + pattern.length <= textEnd) {
145:
146: p = 0;
147:
148: while (p < pattern.length
149: && pattern[p] == text[textStart + p]) {
150: ++p;
151: }
152:
153: if (p == pattern.length) {
154: return textStart;
155: }
156:
157: if (textStart + pattern.length >= textEnd) {
158: return -1;
159: }
160:
161: textStart += td1.get(text[textStart + pattern.length]);
162:
163: }
164:
165: return -1;
166:
167: }
168:
169: }
|