001: /*
002: * ShiftOrWildcards.java
003: *
004: * Created on 13.08.2003
005: *
006: * eaio: StringSearch - high-performance pattern matching algorithms in Java
007: * Copyright (c) 2003, 2004 Johann Burkard (jb@eaio.com) http://eaio.com
008: *
009: * Permission is hereby granted, free of charge, to any person obtaining a copy
010: * of this software and associated documentation files (the "Software"), to deal
011: * in the Software without restriction, including without limitation the rights
012: * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
013: * copies of the Software, and to permit persons to whom the Software is
014: * furnished to do so, subject to the following conditions:
015: *
016: * The above copyright notice and this permission notice shall be included in
017: * all copies or substantial portions of the Software.
018: *
019: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
020: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
021: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
022: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
023: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
024: * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
025: * SOFTWARE.
026: *
027: */
028: package com.eaio.stringsearch;
029:
030: /**
031: * An implementation of the Shift-Or algorithm with wildcards ("don't care"
032: * symbols). The wildcard character is initially '?', but any character can
033: * be used through the {@link #processChars(char[], char)} and the
034: * {@link #processBytes(byte[], byte)} methods.
035: * <br><br>
036: * <pre>
037: * Preprocessing: O(2n + ∑) time
038: *
039: * Searching : O(mn / log n) (worst case and average)
040: * </pre>
041: *
042: * @see #processBytes(byte[], byte)
043: * @see #processChars(char[], char)
044: * @see com.eaio.stringsearch.ShiftOr
045: * @author <a href="mailto:jb@eaio.de">Johann Burkard</a>
046: * @version 1.2
047: */
048: public class ShiftOrWildcards extends ShiftOr {
049:
050: /**
051: * The wildcard character (initially '?').
052: */
053: public static char wildcard = '?';
054:
055: /**
056: * Constructor for ShiftOrWildcards. Note that it is not required to create
057: * multiple instances.
058: */
059: public ShiftOrWildcards() {
060: }
061:
062: /**
063: * Pre-processing of the pattern. The pattern may not exceed 31 bytes in
064: * length. If it does, <b>only it's first 31 bytes</b> are processed which
065: * might lead to unexpected results. Returns an <code>int</code> array. The
066: * wildcard character is obtained from the static {@link #wildcard} field.
067: *
068: * @see com.eaio.stringsearch.StringSearch#processBytes(byte[])
069: * @see #processBytes(byte[], byte)
070: */
071: public Object processBytes(byte[] pattern) {
072: return processBytes(pattern, (byte) wildcard);
073: }
074:
075: /**
076: * Pre-processing of the pattern. The pattern may not exceed 31 bytes in
077: * length. If it does, <b>only it's first 31 bytes</b> are processed which
078: * might lead to unexpected results. Returns an <code>int</code> array.
079: *
080: * @param pattern the <code>byte</code> array containing the pattern, may not
081: * be <code>null</code>
082: * @param w the wildcard <code>byte</code> character
083: * @return an <code>int</code> array
084: */
085: public Object processBytes(byte[] pattern, byte w) {
086: int j = ~0;
087: int end = Math.min(pattern.length, 31);
088:
089: for (int i = 0; i < end; ++i) {
090: if (pattern[i] == w) {
091: j -= 1 << i;
092: }
093: }
094:
095: int[] t = new int[256];
096:
097: for (int i = 0; i < t.length; ++i) {
098: t[i] = j;
099: }
100:
101: for (int i = 0; i < end; ++i) {
102: if (pattern[i] != w) {
103: t[index(pattern[i])] &= ~(1 << i);
104: }
105: }
106: return t;
107: }
108:
109: /**
110: * Pre-processing of the pattern. The pattern may not exceed 31 bytes in
111: * length. If it does, <b>only it's first 31 bytes</b> are processed which
112: * might lead to unexpected results. Returns a {@link CharIntMap}. The wildcard
113: * character is obtained from the static {@link #wildcard} field.
114: *
115: * @param pattern the <code>char</code> array containing the pattern, may not
116: * be <code>null</code>
117: * @return a {@link CharIntMap}
118: * @see StringSearch#processChars(char[])
119: * @see #processChars(char[], char)
120: */
121: public Object processChars(char[] pattern) {
122: return processChars(pattern, wildcard);
123: }
124:
125: /**
126: * Pre-processing of the pattern. The pattern may not exceed 31 bytes in
127: * length. If it does, <b>only it's first 31 bytes</b> are processed which
128: * might lead to unexpected results. Returns a {@link CharIntMap}.
129: *
130: * @param pattern the <code>char</code> array containing the pattern, may not
131: * be <code>null</code>
132: * @param w the wildcard character
133: * @return a {@link CharIntMap}.
134: */
135: public Object processChars(char[] pattern, char w) {
136: int j = ~0;
137: int end = Math.min(pattern.length, 31);
138:
139: for (int i = 0; i < end; ++i) {
140: if (pattern[i] == w) {
141: j -= 1 << i;
142: }
143: }
144:
145: CharIntMap m = createCharIntMap(pattern, j);
146:
147: for (int i = 0; i < end; ++i) {
148: if (pattern[i] != w) {
149: m.set(pattern[i], m.get(pattern[i]) & ~(1 << i));
150: }
151: }
152:
153: return m;
154: }
155:
156: /**
157: * Pre-processing of the pattern. The pattern may not exceed 31 bytes in
158: * length. If it does, <b>only it's first 31 bytes</b> are processed which
159: * might lead to unexpected results. Returns a {@link CharIntMap}.
160: *
161: * @param pattern the String containing the pattern, may not be
162: * <code>null</code>
163: * @param w the wildcard character
164: * @return a {@link CharIntMap}.
165: */
166: public Object processString(String pattern, char w) {
167: return processChars(StringSearch.activeDispatch
168: .charsOf(pattern), w);
169: }
170:
171: }
|