001: /*
002: * MismatchSearch.java
003: *
004: * Created on 12.11.2003.
005: *
006: * eaio: StringSearch - high-performance pattern matching algorithms in Java
007: * Copyright (c) 2003, 2004 Johann Burkard (jb@eaio.com) http://eaio.com
008: *
009: * Permission is hereby granted, free of charge, to any person obtaining a copy
010: * of this software and associated documentation files (the "Software"), to deal
011: * in the Software without restriction, including without limitation the rights
012: * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
013: * copies of the Software, and to permit persons to whom the Software is
014: * furnished to do so, subject to the following conditions:
015: *
016: * The above copyright notice and this permission notice shall be included in
017: * all copies or substantial portions of the Software.
018: *
019: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
020: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
021: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
022: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
023: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
024: * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
025: * SOFTWARE.
026: *
027: */
028: package com.eaio.stringsearch;
029:
030: /**
031: * Subclasses of MismatchSearch allow for searching with a fixed number of
032: * possible errors. Subclasses of this class return a
033: * <code>int</code> array with the first <code>int</code> being the position at
034: * which the hit occurred and the second <code>int</code> being the number of
035: * mismatches at the position.
036: * <br><br>
037: * Example:
038: * <pre>
039: * int[] positions = new ShiftOrMismatches().searchString("this is null",
040: * "nall", 1);
041: * </pre>
042: * positions[0] would be 8, positions[1] (the number of mismatches) would be 1.
043: *
044: * @author <a href="mailto:jb@eaio.com">Johann Burkard</a>
045: * @version 1.2
046: */
047: public abstract class MismatchSearch extends StringSearch {
048:
049: /**
050: * Constructor for MismatchSearch. Note that it is not required to create
051: * multiple instances.
052: */
053: protected MismatchSearch() {
054: }
055:
056: /*
057: * Pre-processing methods
058: */
059:
060: /**
061: * Pre-process the pattern, allowing <b>zero</b> errors.
062: * <br><br>
063: * Identical to <code>process(pattern, 0)</code>
064: *
065: * @param pattern the <code>byte</code> array containing the pattern, may not
066: * be <code>null</code>
067: * @see com.eaio.stringsearch.StringSearch#processBytes(byte[])
068: * @see #processBytes(byte[], int)
069: */
070: public final Object processBytes(byte[] pattern) {
071: return processBytes(pattern, 0);
072: }
073:
074: /**
075: * Pre-processes the pattern, allowing k errors.
076: *
077: * @param pattern the <code>byte</code> array containing the pattern, may not
078: * be <code>null</code>
079: * @param k the editing distance
080: * @return an Object
081: */
082: public abstract Object processBytes(byte[] pattern, int k);
083:
084: /**
085: * Pre-processes the pattern, allowing <b>zero</b> errors.
086: * <br><br>
087: * Identical to <code>process(pattern, 0)</code>.
088: *
089: * @param pattern a <code>char</code> array containing the pattern, may not be
090: * <code>null</code>
091: * @return an Object
092: * @see #processChars(char[], int)
093: * @see com.eaio.stringsearch.StringSearch#processChars(char[])
094: */
095: public final Object processChars(char[] pattern) {
096: return processChars(pattern, 0);
097: }
098:
099: /**
100: * Pre-processes a <code>char</code> array, allowing k errors.
101: *
102: * @param pattern a <code>char</code> array containing the pattern, may not be
103: * <code>null</code>
104: * @param k the editing distance
105: * @return an Object
106: */
107: public abstract Object processChars(char[] pattern, int k);
108:
109: /**
110: * Pre-processes a String, allowing k errors. This method should not be used
111: * directly because it is implicitly called in the
112: * {@link #searchString(String, String)} methods.
113: *
114: * @param pattern the String containing the pattern, may not be
115: * <code>null</code>
116: * @param k the editing distance
117: * @return an Object
118: */
119: public final Object processString(String pattern, int k) {
120: return processChars(StringSearch.activeDispatch
121: .charsOf(pattern), k);
122: }
123:
124: /*
125: * Byte searching methods
126: */
127:
128: /**
129: * @see com.eaio.stringsearch.StringSearch#searchBytes(byte[], int, int, byte[], Object)
130: * @see #searchBytes(byte[], int, int, byte[], Object, int)
131: */
132: public final int searchBytes(byte[] text, int textStart,
133: int textEnd, byte[] pattern, Object processed) {
134:
135: return searchBytes(text, textStart, textEnd, pattern,
136: processed, 0)[0];
137:
138: }
139:
140: /**
141: * Returns the position in the text at which the pattern was found. Returns -1
142: * if the pattern was not found.
143: *
144: * @param text the <code>byte</code> array containing the text, may not be
145: * <code>null</code>
146: * @param pattern the <code>byte</code> array containing the pattern, may not
147: * be <code>null</code>
148: * @param k the editing distance
149: * @return the position in the text or -1 if the pattern was not found
150: * @see #searchBytes(byte[], int, int, byte[], Object, int)
151: */
152: public final int[] searchBytes(byte[] text, byte[] pattern, int k) {
153: return searchBytes(text, 0, text.length, pattern, processBytes(
154: pattern, k), k);
155: }
156:
157: /**
158: * Returns the position in the text at which the pattern was found. Returns -1
159: * if the pattern was not found.
160: *
161: * @param text the <code>byte</code> array containing the text, may not be
162: * <code>null</code>
163: * @param pattern the <code>byte</code> array containing the pattern, may not
164: * be <code>null</code>
165: * @param processed an Object as returned from
166: * {@link #processBytes(byte[], int)}, may not be <code>null</code>
167: * @param k the editing distance
168: * @return the position in the text or -1 if the pattern was not found
169: * @see #searchBytes(byte[], int, int, byte[], Object, int)
170: */
171: public final int[] searchBytes(byte[] text, byte[] pattern,
172: Object processed, int k) {
173:
174: return searchBytes(text, 0, text.length, pattern, processed, k);
175:
176: }
177:
178: /**
179: * Returns the position in the text at which the pattern was found. Returns -1
180: * if the pattern was not found.
181: *
182: * @param text the <code>byte</code> array containing the text, may not be
183: * <code>null</code>
184: * @param textStart at which position in the text the comparing should start
185: * @param pattern the <code>byte</code> array containing the pattern, may not
186: * be <code>null</code>
187: * @param k the editing distance
188: * @return int the position in the text or -1 if the pattern was not found
189: * @see #searchBytes(byte[], int, int, byte[], Object, int)
190: */
191: public final int[] searchBytes(byte[] text, int textEnd,
192: byte[] pattern, int k) {
193:
194: return searchBytes(text, 0, textEnd, pattern, processBytes(
195: pattern, k), k);
196:
197: }
198:
199: /**
200: * Returns the position in the text at which the pattern was found. Returns -1
201: * if the pattern was not found.
202: *
203: * @param text the <code>byte</code> array containing the text, may not be
204: * <code>null</code>
205: * @param textStart at which position in the text the comparing should start
206: * @param pattern the pattern to search for, may not be <code>null</code>
207: * @param processed
208: * @param k the editing distance
209: * @return the position in the text or -1 if the pattern was not found
210: * @see #searchBytes(byte[], int, int, byte[], Object, int)
211: */
212: public final int[] searchBytes(byte[] text, int textEnd,
213: byte[] pattern, Object processed, int k) {
214:
215: return searchBytes(text, 0, textEnd, pattern, processed, k);
216:
217: }
218:
219: /**
220: * Returns the position in the text at which the pattern was found. Returns -1
221: * if the pattern was not found.
222: *
223: * @param text text the <code>byte</code> array containing the text, may not be
224: * <code>null</code>
225: * @param textStart at which position in the text the comparing should start
226: * @param textEnd at which position in the text comparing should stop
227: * @param pattern the <code>byte</code> array containing the pattern, may not
228: * be <code>null</code>
229: * @param k the editing distance
230: * @return the position in the text or -1 if the pattern was not found
231: * @see #searchBytes(byte[], int, int, byte[], Object, int)
232: */
233: public final int[] searchBytes(byte[] text, int textStart,
234: int textEnd, byte[] pattern, int k) {
235:
236: return searchBytes(text, textStart, textEnd, pattern,
237: processBytes(pattern, k), k);
238:
239: }
240:
241: /**
242: * Returns the position in the text at which the pattern was found. Returns -1
243: * if the pattern was not found.
244: *
245: * @param text text the <code>byte</code> array containing the text, may not be
246: * <code>null</code>
247: * @param textStart at which position in the text the comparing should start
248: * @param textEnd at which position in the text comparing should stop
249: * @param pattern the pattern to search for, may not be <code>null</code>
250: * @param processed an Object as returned from
251: * {@link #processBytes(byte[], int)}, may not be <code>null</code>
252: * @param k the editing distance
253: * @return the position in the text or -1 if the pattern was not found
254: * @see #processBytes(byte[], int)
255: */
256: public abstract int[] searchBytes(byte[] text, int textStart,
257: int textEnd, byte[] pattern, Object processed, int k);
258:
259: /*
260: * Char searching methods
261: */
262:
263: /**
264: * Finder for the given pattern in the text, starting at textStart and
265: * comparing to at most textEnd, allowing zero errors.
266: *
267: * @see StringSearch#searchChars(char[], int, int, char[], Object)
268: * @see #processChars(char[], int)
269: */
270: public final int searchChars(char[] text, int textStart,
271: int textEnd, char[] pattern, Object processed) {
272:
273: return searchChars(text, textStart, textEnd, pattern,
274: processed, 0)[0];
275:
276: }
277:
278: /**
279: * Finder for the given pattern in the text, allowing k errors.
280: *
281: * @param text the String containing the text, may not be <code>null</code>
282: * @param pattern the pattern to search for, may not be <code>null</code>
283: * @param k the maximum number of mismatches (the editing distance)
284: * @return the position in the text or -1 if the pattern was not found
285: * @see #searchChars(char[], int, int, char[], Object, int)
286: */
287: public final int[] searchChars(char[] text, char[] pattern, int k) {
288: return searchChars(text, 0, text.length, pattern, processChars(
289: pattern, k), k);
290: }
291:
292: /**
293: * Finder for the given pattern in the text, allowing k errors.
294: *
295: * @param text the String containing the text, may not be <code>null</code>
296: * @param pattern the pattern to search for, may not be <code>null</code>
297: * @param processed an Object as returned from
298: * {@link #processChars(char[], int)} or {@link #processString(String, int)},
299: * may not be <code>null</code>
300: * @param k the maximum number of mismatches (the editing distance)
301: * @return the position in the text or -1 if the pattern was not found
302: * @see #searchChars(char[], int, int, char[], Object, int)
303: */
304: public final int[] searchChars(char[] text, char[] pattern,
305: Object processed, int k) {
306:
307: return searchChars(text, 0, text.length, pattern, processed, k);
308:
309: }
310:
311: /**
312: * Finder for the given pattern in the text, starting at textStart, allowing k
313: * errors.
314: *
315: * @param text the String containing the text, may not be <code>null</code>
316: * @param textStart at which position in the text the comparing should start
317: * @param pattern the pattern to search for, may not be <code>null</code>
318: * @param processed an Object as returned from
319: * {@link #processChars(char[], int)} or {@link #processString(String), int},
320: * may not be <code>null</code>
321: * @param k the maximum number of mismatches (the editing distance)
322: * @return the position in the text or -1 if the pattern was not found
323: * @see #searchChars(char[], int, int, char[], Object)
324: */
325: public final int[] searchChars(char[] text, int textStart,
326: char[] pattern, int k) {
327:
328: return searchChars(text, textStart, text.length, pattern,
329: processChars(pattern, k), k);
330:
331: }
332:
333: /**
334: * Finder for the given pattern in the text, starting at textStart, allowing k
335: * errors.
336: *
337: * @param text the String containing the text, may not be <code>null</code>
338: * @param textStart at which position in the text the comparing should start
339: * @param pattern the pattern to search for, may not be <code>null</code>
340: * @param processed an Object as returned from
341: * {@link #processChars(char[], int)} or {@link #processString(String, int)},
342: * may not be <code>null</code>
343: * @param k the maximum number of mismatches (the editing distance)
344: * @return the position in the text or -1 if the pattern was not found
345: * @see #searchChars(char[], int, int, char[], Object, int)
346: */
347: public final int[] searchChars(char[] text, int textStart,
348: char[] pattern, Object processed, int k) {
349:
350: return searchChars(text, textStart, text.length, pattern,
351: processed, k);
352:
353: }
354:
355: /**
356: * Finder for the given pattern in the text, starting at textStart and
357: * comparing to at most textEnd, allowing k errors.
358: *
359: * @param text the String containing the text, may not be <code>null</code>
360: * @param textStart at which position in the text the comparing should start
361: * @param textEnd at which position in the text comparing should stop
362: * @param pattern the pattern to search for, may not be <code>null</code>
363: * @param k the maximum number of mismatches (the editing distance)
364: * @return the position in the text or -1 if the pattern was not found
365: */
366: public final int[] searchChars(char[] text, int textStart,
367: int textEnd, char[] pattern, int k) {
368:
369: return searchChars(text, textStart, textEnd, pattern,
370: processChars(pattern, k), k);
371:
372: }
373:
374: /**
375: * Finder for the given pattern in the text, starting at textStart and
376: * comparing to at most textEnd, allowing k errors.
377: *
378: * @param text the String containing the text, may not be <code>null</code>
379: * @param textStart at which position in the text the comparing should start
380: * @param textEnd at which position in the text comparing should stop
381: * @param pattern the pattern to search for, may not be <code>null</code>
382: * @param processed an Object as returned from
383: * {@link #processChars(char[], int)} or {@link #processString(String, int)},
384: * may not be <code>null</code>
385: * @param k the maximum number of mismatches (the editing distance)
386: * @return the position in the text or -1 if the pattern was not found
387: */
388: public abstract int[] searchChars(char[] text, int textStart,
389: int textEnd, char[] pattern, Object processed, int k);
390:
391: /* String searching methods */
392:
393: /**
394: * Convenience method to search for patterns in Strings. Returns the position
395: * in the text at which the pattern was found. Returns -1 if the pattern was
396: * not found.
397: *
398: * @param text the String containing the text, may not be <code>null</code>
399: * @param pattern the String containing the pattern, may not be
400: * <code>null</code>
401: * @param k the maximum number of mismatches (the editing distance)
402: * @return the position in the text or -1 if the pattern was not found
403: * @see #searchChars(char[], int, int, char[], int)
404: */
405: public final int[] searchString(String text, String pattern, int k) {
406: return searchString(text, 0, text.length(), pattern, k);
407: }
408:
409: /**
410: * Convenience method to search for patterns in Strings. Returns the position
411: * in the text at which the pattern was found. Returns -1 if the pattern was
412: * not found.
413: *
414: * @param text the String containing the text, may not be <code>null</code>
415: * @param pattern the String containing the pattern, may not be
416: * <code>null</code>
417: * @param processed an Object as returned from
418: * {@link #processChars(char[], int)} or {@link #processString(String, int)},
419: * may not be <code>null</code>
420: * @param k the maximum number of mismatches (the editing distance)
421: * @return the position in the text or -1 if the pattern was not found
422: * @see #searchChars(char[], int, int, char[], Object, int)
423: */
424: public final int[] searchString(String text, String pattern,
425: Object processed, int k) {
426:
427: return searchString(text, 0, text.length(), pattern, processed,
428: k);
429:
430: }
431:
432: /**
433: * Convenience method to search for patterns in Strings. Returns the position
434: * in the text at which the pattern was found. Returns -1 if the pattern was
435: * not found.
436: *
437: * @param text the String containing the text, may not be <code>null</code>
438: * @param textStart at which position in the text the comparing should start
439: * @param pattern the String containing the pattern, may not be
440: * <code>null</code>
441: * @param k the maximum number of mismatches (the editing distance)
442: * @return the position in the text or -1 if the pattern was not found
443: * @see #searchChars(char[], int, int, char[], int)
444: */
445: public final int[] searchString(String text, int textStart,
446: String pattern, int k) {
447:
448: return searchString(text, textStart, text.length(), pattern, k);
449:
450: }
451:
452: /**
453: * Convenience method to search for patterns in Strings. Returns the position
454: * in the text at which the pattern was found. Returns -1 if the pattern was
455: * not found.
456: *
457: * @param text the String containing the text, may not be <code>null</code>
458: * @param textStart at which position in the text the comparing should start
459: * @param pattern the String containing the pattern, may not be
460: * <code>null</code>
461: * @param processed an Object as returned from
462: * {@link #processChars(char[], int)} or {@link #processString(String, int)},
463: * may not be <code>null</code>
464: * @param k the maximum number of mismatches (the editing distance)
465: * @return the position in the text or -1 if the pattern was not found
466: * @see #searchChars(char[], int, int, char[], Object, int)
467: */
468: public final int[] searchString(String text, int textStart,
469: String pattern, Object processed, int k) {
470:
471: return searchString(text, textStart, text.length(), pattern,
472: processed, k);
473:
474: }
475:
476: /**
477: * Convenience method to search for patterns in Strings. Returns the position
478: * in the text at which the pattern was found. Returns -1 if the pattern was
479: * not found.
480: *
481: * @param text the String containing the text, may not be <code>null</code>
482: * @param textStart at which position in the text the comparing should start
483: * @param textEnd at which position in the text comparing should stop
484: * @param pattern the String containing the pattern, may not be
485: * <code>null</code>
486: * @param k the maximum number of mismatches (the editing distance)
487: * @return the position in the text or -1 if the pattern was not found
488: * @see #searchChars(char[], int, int, char[], int)
489: */
490: public final int[] searchString(String text, int textStart,
491: int textEnd, String pattern, int k) {
492:
493: return StringSearch.activeDispatch.searchString(text,
494: textStart, textEnd, pattern, k, this );
495:
496: }
497:
498: /**
499: * Convenience method to search for patterns in Strings. Returns the position
500: * in the text at which the pattern was found. Returns -1 if the pattern was
501: * not found.
502: *
503: * @param text the String containing the text, may not be <code>null</code>
504: * @param textStart at which position in the text the comparing should start
505: * @param textEnd at which position in the text comparing should stop
506: * @param pattern the String containing the pattern, may not be
507: * <code>null</code>
508: * @param processed an Object as returned from
509: * {@link #processChars(char[], int)} or {@link #processString(String, int)},
510: * may not be <code>null</code>
511: * @param k the maximum number of mismatches (the editing distance)
512: * @return the position in the text or -1 if the pattern was not found
513: * @see #searchChars(char[], int, int, char[], Object, int)
514: */
515: public final int[] searchString(String text, int textStart,
516: int textEnd, String pattern, Object processed, int k) {
517:
518: return StringSearch.activeDispatch.searchString(text,
519: textStart, textEnd, pattern, processed, k, this);
520:
521: }
522:
523: }
|