001: /*
002: * BNDM.java
003: *
004: * Created on 21.10.2003
005: *
006: * eaio: StringSearch - high-performance pattern matching algorithms in Java
007: * Copyright (c) 2003, 2004 Johann Burkard (jb@eaio.com) http://eaio.com
008: *
009: * Permission is hereby granted, free of charge, to any person obtaining a copy
010: * of this software and associated documentation files (the "Software"), to deal
011: * in the Software without restriction, including without limitation the rights
012: * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
013: * copies of the Software, and to permit persons to whom the Software is
014: * furnished to do so, subject to the following conditions:
015: *
016: * The above copyright notice and this permission notice shall be included in
017: * all copies or substantial portions of the Software.
018: *
019: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
020: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
021: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
022: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
023: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
024: * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
025: * SOFTWARE.
026: *
027: */
028: package com.eaio.stringsearch;
029:
030: /**
031: * An implementation of the Backwards Non-deterministic Dawg (Directed
032: * acyclic word graph) Matching algorithm by Gonzalo Navarro and
033: * Mathieu Raffinot. See "A Bit-Parallel Approach to Suffix Automata: Fast
034: * Extended String Matching" (appeared in <em>Proceedings of the 9th Annual
035: * Symposium on Combinatorial Pattern Matching, 1998</em>).
036: * <br><br>
037: * This is one of the fastest algorithms, but it does not beat the
038: * {@link com.eaio.stringsearch.BoyerMooreHorspoolRaita} and the
039: * {@link com.eaio.stringsearch.BoyerMooreHorspool} algorithms.
040: * <br><br>
041: * <pre>
042: * Preprocessing: O(m) time
043: *
044: * Searching : O(n/m) (best case)
045: * O(n log|∑| m / m) (average)
046: * O(mn) (worst case)
047: * </pre>
048: *
049: * @see <a href="http://www.dcc.uchile.cl/~gnavarro/ps/cpm98.ps.gz">
050: * http://www.dcc.uchile.cl/~gnavarro/ps/cpm98.ps.gz
051: * </a>
052: * @see <a href="http://www-igm.univ-mlv.fr/~raffinot/ftp/cpm98.ps.gz">
053: * http://www-igm.univ-mlv.fr/~raffinot/ftp/cpm98.ps.gz
054: * </a>
055: * @see <a href="http://citeseer.nj.nec.com/navarro98bitparallel.html">
056: * http://citeseer.nj.nec.com/navarro98bitparallel.html
057: * </a>
058: * @author <a href="mailto:jb@eaio.de">Johann Burkard</a>
059: * @version 1.2
060: */
061: public class BNDM extends StringSearch {
062:
063: /**
064: * Constructor for BNDM. Note that it is not required to create multiple
065: * instances.
066: */
067: public BNDM() {
068: }
069:
070: /**
071: * Pre-processing of the pattern. The pattern may not exceed 32 bytes in
072: * length. If it does, <b>only it's first 32 bytes</b> are processed which
073: * might lead to unexpected results. Returns an <code>int</code> array.
074: *
075: * @see com.eaio.stringsearch.StringSearch#processBytes(byte[])
076: */
077: public Object processBytes(byte[] pattern) {
078: int end = pattern.length < 32 ? pattern.length : 32;
079:
080: int[] b = new int[256];
081:
082: int j = 1;
083: for (int i = end - 1; i >= 0; --i, j <<= 1) {
084: b[index(pattern[i])] |= j;
085: }
086:
087: return b;
088: }
089:
090: /**
091: * Pre-processing of the pattern. The pattern may not exceed 32 bytes in
092: * length. If it does, <b>only it's first 32 bytes</b> are processed which
093: * might lead to unexpected results. Returns a {@link CharIntMap}.
094: *
095: * @see com.eaio.stringsearch.StringSearch#processChars(char[])
096: */
097: public Object processChars(char[] pattern) {
098: int end = pattern.length < 32 ? pattern.length : 32;
099:
100: CharIntMap b = createCharIntMap(pattern);
101:
102: int j = 1;
103: for (int i = end - 1; i >= 0; --i, j <<= 1) {
104: b.set(pattern[i], b.get(pattern[i]) | j);
105: }
106:
107: return b;
108: }
109:
110: /**
111: * @see com.eaio.stringsearch.StringSearch#searchBytes(byte[], int, int,
112: * byte[], java.lang.Object)
113: */
114: public int searchBytes(byte[] text, int textStart, int textEnd,
115: byte[] pattern, Object processed) {
116:
117: /* comment:start */
118:
119: return useNative ? nativeSearchBytes(text, textStart, textEnd,
120: pattern, processed) : javaSearchBytes(text, textStart,
121: textEnd, pattern, processed);
122:
123: }
124:
125: private int javaSearchBytes(byte[] text, int textStart,
126: int textEnd, byte[] pattern, Object processed) {
127:
128: /* comment:end */
129:
130: int[] b = (int[]) processed;
131:
132: int d, j, pos, last;
133: pos = textStart;
134: while (pos <= textEnd - pattern.length) {
135: j = pattern.length - 1;
136: last = pattern.length;
137: d = -1;
138: while (d != 0) {
139: d &= b[index(text[pos + j])];
140: if (d != 0) {
141: if (j == 0) {
142: return pos;
143: }
144: last = j;
145: }
146: --j;
147: d <<= 1;
148: }
149: pos += last;
150: }
151:
152: return -1;
153:
154: }
155:
156: /* comment:start */
157:
158: private native int nativeSearchBytes(byte[] text, int textStart,
159: int textEnd, byte[] pattern, Object processed);
160:
161: /* comment:end */
162:
163: /**
164: * @see com.eaio.stringsearch.StringSearch#searchChars(char[], int, int,
165: * char[], Object)
166: */
167: public int searchChars(char[] text, int textStart, int textEnd,
168: char[] pattern, Object processed) {
169:
170: CharIntMap b = (CharIntMap) processed;
171:
172: int d, j, pos, last;
173: pos = textStart;
174: while (pos <= textEnd - pattern.length) {
175: j = pattern.length - 1;
176: last = pattern.length;
177: d = -1;
178: while (d != 0) {
179: d &= b.get(text[pos + j]);
180: if (d != 0) {
181: if (j == 0) {
182: return pos;
183: }
184: last = j;
185: }
186: --j;
187: d <<= 1;
188: }
189: pos += last;
190: }
191:
192: return -1;
193:
194: }
195:
196: }
|