001: /*
002: * BNDMWildcards.java
003: *
004: * Created on 19.01.2004.
005: *
006: * eaio: StringSearch - high-performance pattern matching algorithms in Java
007: * Copyright (c) 2003, 2004 Johann Burkard (jb@eaio.com) http://eaio.com
008: *
009: * Permission is hereby granted, free of charge, to any person obtaining a copy
010: * of this software and associated documentation files (the "Software"), to deal
011: * in the Software without restriction, including without limitation the rights
012: * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
013: * copies of the Software, and to permit persons to whom the Software is
014: * furnished to do so, subject to the following conditions:
015: *
016: * The above copyright notice and this permission notice shall be included in
017: * all copies or substantial portions of the Software.
018: *
019: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
020: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
021: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
022: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
023: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
024: * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
025: * SOFTWARE.
026: *
027: */
028: package com.eaio.stringsearch;
029:
030: /**
031: * An implementation of the BNDM algorithm with wildcards ("don't care"
032: * symbols). The wildcard character is initially '?', but any character can be
033: * used through the {@link #processChars(char[], char)} and the
034: * {@link #processBytes (byte[], byte)} methods.
035: * <br><br>
036: * This algorithm is around five times faster than
037: * {@link com.eaio.stringsearch.ShiftOrWildcards}.
038: * <br><br>
039: * <pre>
040: * Preprocessing: O(2m + ∑) time
041: * </pre>
042: *
043: * @see #processBytes(byte[], byte)
044: * @see #processChars(char[], char)
045: * @see com.eaio.stringsearch.BNDM
046: * @author <a href="mailto:jb@eaio.com">Johann Burkard</a>
047: * @version 1.2
048: */
049: public class BNDMWildcards extends BNDM {
050:
051: /**
052: * The wildcard character (initially '?').
053: */
054: public static char wildcard = '?';
055:
056: /**
057: * Constructor for BNDMWildcards. Note that it is not required to create
058: * multiple instances.
059: */
060: public BNDMWildcards() {
061: }
062:
063: /**
064: * Pre-processing of the pattern. The pattern may not exceed 32 bytes in
065: * length. If it does, <b>only it's first 32 bytes</b> are processed which
066: * might lead to unexpected results. The wildcard character is obtained from
067: * the static {@link #wildcard} field.
068: *
069: * @see com.eaio.stringsearch.StringSearch#processBytes(byte[])
070: * @see #processBytes(byte[], byte)
071: */
072: public Object processBytes(byte[] pattern) {
073: return processBytes(pattern, (byte) wildcard);
074: }
075:
076: /**
077: * Pre-processing of the pattern. The pattern may not exceed 32 bytes in
078: * length. If it does, <b>only it's first 32 bytes</b> are processed which
079: * might lead to unexpected results. Returns an <code>int</code> array.
080: *
081: * @param pattern the <code>byte</code> array containing the pattern, may not
082: * be <code>null</code>
083: * @param w the wildcard <code>byte</code> character
084: * @return an <code>int</code> array
085: */
086: public Object processBytes(byte[] pattern, byte w) {
087: int j = 0;
088: int end = pattern.length < 32 ? pattern.length : 32;
089:
090: for (int i = 0; i < end; ++i) {
091: if (pattern[i] == w) {
092: j |= (1 << end - i - 1);
093: }
094: }
095:
096: int[] b = new int[256];
097:
098: if (j != 0) {
099: for (int i = 0; i < b.length; i++) {
100: b[i] = j;
101: }
102: }
103:
104: j = 1;
105: for (int i = end - 1; i >= 0; --i, j <<= 1) {
106: b[index(pattern[i])] |= j;
107: }
108:
109: return b;
110: }
111:
112: /**
113: * Pre-processes the pattern. The pattern may not exceed 32 characters in
114: * length. If it does, <b>only it's first 32 bytes</b> are processed which
115: * might lead to unexpected results. The wildcard character is obtained from
116: * the static {@link #wildcard} field.
117: *
118: * @param pattern the <code>char</code> array containing the pattern, may not
119: * be <code>null</code>
120: * @return a {@link CharIntMap}
121: * @see StringSearch#processChars(char[])
122: * @see #processChars(char[], char)
123: */
124: public Object processChars(char[] pattern) {
125: return processChars(pattern, wildcard);
126: }
127:
128: /**
129: * Pre-processes the pattern. The pattern may not exceed 32 characters in
130: * length. If it does, <b>only it's first 32 bytes</b> are processed which
131: * might lead to unexpected results. Returns a {@link CharIntMap}.
132: *
133: * @param pattern the <code>char</code> array containing the pattern, may not
134: * be <code>null</code>
135: * @param w the wildcard character
136: * @return a {@link CharIntMap}.
137: */
138: public Object processChars(char[] pattern, char w) {
139: int j = 0;
140: int end = pattern.length < 32 ? pattern.length : 32;
141:
142: for (int i = 0; i < end; ++i) {
143: if (pattern[i] == w) {
144: j |= (1 << end - i - 1);
145: }
146: }
147:
148: CharIntMap b = createCharIntMap(pattern, j);
149:
150: j = 1;
151: for (int i = end - 1; i >= 0; --i, j <<= 1) {
152: b.set(pattern[i], b.get(pattern[i]) | j);
153: }
154:
155: return b;
156: }
157:
158: /**
159: * Pre-processes the pattern. The pattern may not exceed 32 characters in
160: * length. If it does, <b>only it's first 32 bytes</b> are processed which
161: * might lead to unexpected results. Returns a {@link CharIntMap}.
162: *
163: * @param pattern the String array containing the pattern, may not be
164: * <code>null</code>
165: * @param w the wildcard character
166: * @return a {@link CharIntMap}.
167: */
168: public Object processString(String pattern, char w) {
169: return processChars(StringSearch.activeDispatch
170: .charsOf(pattern), w);
171: }
172:
173: }
|