0001: /*
0002: * StringSearch.java
0003: *
0004: * Created on 14.06.2003.
0005: *
0006: * eaio: StringSearch - high-performance pattern matching algorithms in Java
0007: * Copyright (c) 2003, 2004 Johann Burkard (jb@eaio.com) http://eaio.com
0008: *
0009: * Permission is hereby granted, free of charge, to any person obtaining a copy
0010: * of this software and associated documentation files (the "Software"), to deal
0011: * in the Software without restriction, including without limitation the rights
0012: * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
0013: * copies of the Software, and to permit persons to whom the Software is
0014: * furnished to do so, subject to the following conditions:
0015: *
0016: * The above copyright notice and this permission notice shall be included in
0017: * all copies or substantial portions of the Software.
0018: *
0019: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0020: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0021: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
0022: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
0023: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
0024: * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
0025: * SOFTWARE.
0026: *
0027: */
0028: package com.eaio.stringsearch;
0029:
0030: import java.lang.reflect.Field;
0031: import java.security.AccessController;
0032: import java.security.PrivilegedAction;
0033:
0034: /**
0035: * The base class for String searching implementations. String searching
0036: * implementations do not maintain state and are thread safe - one instance can
0037: * be used by as many threads as required.
0038: * <p>
0039: * Most pattern-matching algorithms pre-process the pattern to search for in
0040: * some way. Subclasses of StringSearch allow retrieving the pre-processed
0041: * pattern to save the time required to build up character tables.
0042: * <p>
0043: * Some of the Objects returned from {@link #processBytes(byte[])},
0044: * {@link #processChars(char[])}, {@link #processString(String)} might implement
0045: * the {@link java.io.Serializable} interface and enable you to serialize
0046: * pre-processed Objects to disk, see concrete implementations for details.
0047: *
0048: * @author <a href="mailto:jb@eaio.com">Johann Burkard</a>
0049: * @version 1.2
0050: */
0051: public abstract class StringSearch {
0052:
0053: private static final int CROSSOVER_IBM_1_3 = 68;
0054: private static final int CROSSOVER_SUN_PRE_1_4 = 90;
0055: private static final int CROSSOVER_SUN_1_4 = 12;
0056:
0057: /**
0058: * Stores if the native library should be loaded. This package comes with a
0059: * native library called "NativeSearch". If the system property
0060: * "com.eaio.stringsearch.native" is <code>null</code> (not defined) or "true"
0061: * (ignoring case), an attempt is always made to load the native library. Any
0062: * other values will prevent the native library from being loaded.
0063: */
0064:
0065: /* comment:start */
0066:
0067: protected static boolean useNative = false;
0068:
0069: /* comment:end */
0070:
0071: /**
0072: * The crossover point at which the Reflection based char accessor should be
0073: * used - if Reflection access is allowed, of course. The crossover point is
0074: * set in the static initializer. If a String is longer than this value and
0075: * Reflection is allowed, it's <code>char</code> array will be extracted
0076: * through Reflection.
0077: */
0078: private static int crossover = 0;
0079:
0080: /**
0081: * The Dispatch instance.
0082: */
0083: protected static Dispatch activeDispatch;
0084:
0085: /**
0086: * The Dispatch class implements the strategy to convert Strings to
0087: * <code>char</code> arrays and calls the appropriate <code>searchChars</code>
0088: * method in the given StringSearch instance.
0089: */
0090: protected static class Dispatch {
0091:
0092: /**
0093: * Instances are created in StringSearch only.
0094: */
0095: private Dispatch() {
0096: }
0097:
0098: /**
0099: * Searches a pattern inside a text, using the pre-processed Object and using
0100: * the given StringSearch instance.
0101: */
0102: protected int searchString(String text, int textStart,
0103: int textEnd, String pattern, Object processed,
0104: StringSearch instance) {
0105:
0106: return instance.searchChars(text.toCharArray(), textStart,
0107: textEnd, pattern.toCharArray(), processed);
0108:
0109: }
0110:
0111: /**
0112: * Searches a pattern inside a text, using the given StringSearch instance.
0113: */
0114: protected int searchString(String text, int textStart,
0115: int textEnd, String pattern, StringSearch instance) {
0116:
0117: return instance.searchChars(text.toCharArray(), textStart,
0118: textEnd, pattern.toCharArray());
0119:
0120: }
0121:
0122: /**
0123: * Searches a pattern inside a text with at most k mismatches, using the given
0124: * MismatchSearch instance.
0125: */
0126:
0127: /* comment:start */
0128:
0129: protected int[] searchString(String text, int textStart,
0130: int textEnd, String pattern, int k,
0131: MismatchSearch instance) {
0132:
0133: return instance.searchChars(text.toCharArray(), textStart,
0134: textEnd, pattern.toCharArray(), k);
0135:
0136: }
0137:
0138: /* comment:end */
0139:
0140: /**
0141: * Searches a pattern inside a text, using the pre-processed Object and at
0142: * most k mismatches, using the given MismatchSearch instance.
0143: */
0144:
0145: /* comment:start */
0146:
0147: protected int[] searchString(String text, int textStart,
0148: int textEnd, String pattern, Object processed, int k,
0149: MismatchSearch instance) {
0150:
0151: return instance.searchChars(text.toCharArray(), textStart,
0152: textEnd, pattern.toCharArray(), processed, k);
0153:
0154: }
0155:
0156: /* comment:end */
0157:
0158: /**
0159: * Returns the underlying <code>char</code> array.
0160: *
0161: * @param s the String, may not be <code>null</code>
0162: * @return char[]
0163: */
0164: protected char[] charsOf(String s) {
0165: return s.toCharArray();
0166: }
0167:
0168: }
0169:
0170: /**
0171: * The ReflectionDispatch class is used if Reflection can be used to access the
0172: * underlying <code>char</code> array in Strings to avoid the cloning overhead.
0173: */
0174: protected static class ReflectionDispatch extends Dispatch {
0175:
0176: private Field value, offset;
0177:
0178: /**
0179: * Instances are created in StringSearch only.
0180: *
0181: * @param value the "value" field in String
0182: * @param offset the "offset" field in String
0183: */
0184: private ReflectionDispatch(Field value, Field offset) {
0185: this .value = value;
0186: this .offset = offset;
0187: }
0188:
0189: /**
0190: * @see com.eaio.stringsearch.StringSearch.Dispatch#searchString(
0191: * String, int, int, String, Object, StringSearch)
0192: */
0193: protected int searchString(String text, int textStart,
0194: int textEnd, String pattern, Object processed,
0195: StringSearch instance) {
0196:
0197: int l = text.length();
0198: if (l > crossover) {
0199: try {
0200: int o = offset.getInt(text);
0201: char[] t = (char[]) value.get(text);
0202: return instance.searchChars(t, textStart + o,
0203: textEnd + o, charsOf(pattern), processed)
0204: - o;
0205:
0206: } catch (IllegalAccessException ex) {
0207: synchronized (activeDispatch) {
0208: activeDispatch = new Dispatch();
0209: }
0210: }
0211: }
0212:
0213: return super .searchString(text, textStart, textEnd,
0214: pattern, processed, instance);
0215:
0216: }
0217:
0218: /**
0219: * @see com.eaio.stringsearch.StringSearch.Dispatch#searchString(
0220: * String, int, int, String, StringSearch)
0221: */
0222: protected int searchString(String text, int textStart,
0223: int textEnd, String pattern, StringSearch instance) {
0224:
0225: int l = text.length();
0226: if (l > crossover) {
0227: try {
0228: int o = offset.getInt(text);
0229: char[] t = (char[]) value.get(text);
0230: return instance.searchChars(t, textStart + o,
0231: textEnd + o, charsOf(pattern))
0232: - o;
0233: } catch (IllegalAccessException ex) {
0234: synchronized (activeDispatch) {
0235: activeDispatch = new Dispatch();
0236: }
0237: }
0238: }
0239:
0240: return super .searchString(text, textStart, textEnd,
0241: pattern, instance);
0242:
0243: }
0244:
0245: /**
0246: * @see com.eaio.stringsearch.StringSearch.Dispatch#searchString(
0247: * String, int, int, String, int, MismatchSearch)
0248: */
0249:
0250: /* comment:start */
0251:
0252: protected int[] searchString(String text, int textStart,
0253: int textEnd, String pattern, int k,
0254: MismatchSearch instance) {
0255:
0256: int l = text.length();
0257: if (l > crossover) {
0258: try {
0259: int o = offset.getInt(text);
0260: char[] t = (char[]) value.get(text);
0261: int[] r = instance.searchChars(t, textStart + o,
0262: textEnd + o, charsOf(pattern), k);
0263: if (r[0] != -1) {
0264: r[0] -= o;
0265: }
0266: return r;
0267: } catch (IllegalAccessException ex) {
0268: synchronized (activeDispatch) {
0269: activeDispatch = new Dispatch();
0270: }
0271: }
0272: }
0273:
0274: return super .searchString(text, textStart, textEnd,
0275: pattern, k, instance);
0276:
0277: }
0278:
0279: /* comment:end */
0280:
0281: /**
0282: * @see com.eaio.stringsearch.StringSearch.Dispatch#searchString(
0283: * String, int, int, String, Object, int, MismatchSearch)
0284: */
0285:
0286: /* comment:start */
0287:
0288: protected int[] searchString(String text, int textStart,
0289: int textEnd, String pattern, Object processed, int k,
0290: MismatchSearch instance) {
0291:
0292: int l = text.length();
0293: if (l > crossover) {
0294: try {
0295: int o = offset.getInt(text);
0296: char[] t = (char[]) value.get(text);
0297: int[] r = instance
0298: .searchChars(t, textStart + o, textEnd + o,
0299: charsOf(pattern), processed, k);
0300: if (r[0] != -1) {
0301: r[0] -= o;
0302: }
0303: return r;
0304: } catch (IllegalAccessException ex) {
0305: synchronized (activeDispatch) {
0306: activeDispatch = new Dispatch();
0307: }
0308: }
0309: }
0310:
0311: return super .searchString(text, textStart, textEnd,
0312: pattern, processed, k, instance);
0313:
0314: }
0315:
0316: /* comment:end */
0317:
0318: /**
0319: * Tries to return the underlying <code>char</code> array directly. Only works
0320: * if the "offset" field is 0 and the "count" field is equal to the String's
0321: * length.
0322: *
0323: * @see com.eaio.stringsearch.StringSearch.Dispatch#chars(java.lang.String)
0324: */
0325: protected char[] charsOf(String s) {
0326: int l = s.length();
0327: if (l > crossover) {
0328: try {
0329: if (offset.getInt(s) != 0) {
0330: return super .charsOf(s);
0331: }
0332: char[] c = (char[]) value.get(s);
0333: if (c.length != l) {
0334: return super .charsOf(s);
0335: }
0336: return c;
0337: } catch (IllegalAccessException ex) {
0338: synchronized (activeDispatch) {
0339: activeDispatch = new Dispatch();
0340: }
0341: }
0342: }
0343: return super .charsOf(s);
0344: }
0345:
0346: }
0347:
0348: static {
0349:
0350: final String shortString = " ";
0351: shortString.hashCode(); // make sure the cached hashCode is not 0
0352:
0353: /*
0354: * Try loading the native library.
0355: */
0356:
0357: /* comment:start */
0358:
0359: String s = null;
0360: try {
0361: s = System.getProperty("com.eaio.stringsearch.native");
0362: } catch (SecurityException ex) {
0363: }
0364:
0365: if (s == null || "true".equalsIgnoreCase(s)) {
0366: try {
0367: Runtime.getRuntime().loadLibrary("NativeSearch");
0368: useNative = true;
0369: } catch (SecurityException ex) {
0370: } catch (UnsatisfiedLinkError ex) {
0371: }
0372: }
0373:
0374: /* comment:end */
0375:
0376: /*
0377: * Find out if we can use Reflection. If there's a SecurityManager in place,
0378: * we probably can't.
0379: */
0380:
0381: Field value = null;
0382: Field offset = null;
0383:
0384: Field[] stringFields = null;
0385:
0386: try {
0387: stringFields = shortString.getClass().getDeclaredFields();
0388: } catch (SecurityException ex) {
0389: try {
0390: stringFields = (Field[]) AccessController
0391: .doPrivileged(new PrivilegedAction() {
0392:
0393: public Object run() {
0394: return shortString.getClass()
0395: .getDeclaredFields();
0396: }
0397:
0398: });
0399:
0400: } catch (SecurityException ex2) {
0401: }
0402: }
0403:
0404: if (stringFields != null) {
0405:
0406: Class charArray = new char[0].getClass();
0407:
0408: for (int i = 0; i < stringFields.length; i++) {
0409: if (stringFields[i].getType() == charArray) {
0410: final Field v = stringFields[i];
0411:
0412: try {
0413:
0414: AccessController
0415: .doPrivileged(new PrivilegedAction() {
0416:
0417: public Object run() {
0418: v.setAccessible(true);
0419: return null;
0420: }
0421:
0422: });
0423: value = v;
0424: } catch (SecurityException ex) {
0425: }
0426: } else if (stringFields[i].getType() == Integer.TYPE) {
0427: final Field o = stringFields[i];
0428:
0429: try {
0430:
0431: AccessController
0432: .doPrivileged(new PrivilegedAction() {
0433:
0434: public Object run() {
0435: o.setAccessible(true);
0436: return null;
0437: }
0438:
0439: });
0440:
0441: if (o.getInt(shortString) == 0) {
0442: offset = o;
0443: break;
0444: }
0445: } catch (SecurityException ex) {
0446: } catch (IllegalAccessException ex) {
0447: }
0448: }
0449: }
0450: }
0451:
0452: if (value != null && offset != null) {
0453:
0454: StringSearch.activeDispatch = new ReflectionDispatch(value,
0455: offset);
0456:
0457: /*
0458: * We can use Reflection. Set the crossover point at which Reflection becomes
0459: * faster than cloning the char array.
0460: */
0461:
0462: /*
0463: * Find out the version of the virtual machine.
0464: */
0465:
0466: String vendor = System.getProperty("java.vendor");
0467: String version = System.getProperty("java.version");
0468:
0469: char v = vendor.charAt(0);
0470: int ver = ((int) version.charAt(2)) - 48;
0471:
0472: if (v == 'I') {
0473:
0474: /*
0475: * IBM VMs
0476: */
0477:
0478: /*
0479: * Reflection in IBM 1.4.1 Linux is extremely fast, so we simply use
0480: * Reflection all the time.
0481: */
0482:
0483: if (ver < 4) {
0484:
0485: /*
0486: * For the 1.3 IBM SDK, the crossover point is around 68 chars. At this
0487: * point, String#toCharArray becomes slower than Reflection based access.
0488: * Their System#arraycopy() implementation is about two times slower than
0489: * the one from Sun btw.
0490: */
0491:
0492: crossover = CROSSOVER_IBM_1_3;
0493:
0494: }
0495:
0496: } else if (v == 'S') {
0497:
0498: /*
0499: * Sun VMs
0500: */
0501:
0502: if (ver > 3) {
0503:
0504: /*
0505: * For the 1.4 Sun SDKs, the crossover point is 12 chars. At this point,
0506: * String#toCharArray becomes slower than Reflection based access. It's not
0507: * String#toCharArray that is slow, it's Reflection they made so fast.
0508: */
0509:
0510: crossover = CROSSOVER_SUN_1_4;
0511:
0512: } else {
0513:
0514: /*
0515: * For the 1.3 Sun SDKs, the crossover point is around 90 chars.
0516: */
0517:
0518: crossover = CROSSOVER_SUN_PRE_1_4;
0519:
0520: }
0521:
0522: }
0523:
0524: /* Intentionally commented out */
0525:
0526: // else if (v == 'B') {
0527: //
0528: // /*
0529: // * BEA JRockIt VMs
0530: // */
0531: //
0532: // /*
0533: // * In all BEA JRockIt VMs, Reflection access is so incredibly fast that we
0534: // * simply use Reflection all the time.
0535: // */
0536: //
0537: // }
0538: // else if (v == 'T') {
0539: //
0540: // /*
0541: // * Kaffe VM (reports "Transvirtual Technologies, Inc." in java.vendor)
0542: // */
0543: //
0544: // /*
0545: // * In the Kaffe VM, Reflection again is much faster than cloning, so we use
0546: // * it all the time.
0547: // */
0548: //
0549: // }
0550: /* */
0551:
0552: } else {
0553:
0554: /*
0555: * No Reflection support.
0556: */
0557:
0558: StringSearch.activeDispatch = new Dispatch();
0559:
0560: }
0561:
0562: }
0563:
0564: /**
0565: * Returns if Reflection is used to access the underlying <code>char</code>
0566: * array in Strings.
0567: *
0568: * @return <code>true</code> or <code>false</code>
0569: */
0570: public static boolean usesReflection() {
0571: return activeDispatch instanceof ReflectionDispatch;
0572: }
0573:
0574: /**
0575: * Constructor for StringSearch. Note that it is not required to create
0576: * multiple instances.
0577: */
0578: protected StringSearch() {
0579: }
0580:
0581: /**
0582: * Returns if this algorithm currently uses the native library - if it could be
0583: * loaded. If the algorithm has a different strategy concerning native
0584: * libraries or if it does not use the native library at all, it will return
0585: * <code>false</code>.
0586: *
0587: * @return <code>true</code> or <code>false</code>
0588: */
0589:
0590: /* comment:start */
0591:
0592: public boolean usesNative() {
0593: return useNative;
0594: }
0595:
0596: /* comment:end */
0597:
0598: /*
0599: * Pre-processing methods
0600: */
0601:
0602: /**
0603: * Pre-processes a <code>byte</code> array.
0604: *
0605: * @param pattern the <code>byte</code> array containing the pattern, may not
0606: * be <code>null</code>
0607: * @return an Object
0608: */
0609: public abstract Object processBytes(byte[] pattern);
0610:
0611: /**
0612: * Pre-processes a <code>char</code> array
0613: *
0614: * @param pattern a <code>char</code> array containing the pattern, may not be
0615: * <code>null</code>
0616: * @return an Object
0617: */
0618: public abstract Object processChars(char[] pattern);
0619:
0620: /**
0621: * Pre-processes a String. This method should not be used directly because it
0622: * is implicitly called in the {@link #searchString(String, String)} methods.
0623: *
0624: * @param pattern the String containing the pattern, may not be
0625: * <code>null</code>
0626: * @return an Object
0627: * @see #processChars(char[])
0628: */
0629: public final Object processString(String pattern) {
0630: return processChars(activeDispatch.charsOf(pattern));
0631: }
0632:
0633: /* Byte searching methods */
0634:
0635: /**
0636: * Returns the position in the text at which the pattern was found. Returns -1
0637: * if the pattern was not found.
0638: *
0639: * @param text the <code>byte</code> array containing the text, may not be
0640: * <code>null</code>
0641: * @param pattern the <code>byte</code> array containing the pattern, may not
0642: * be <code>null</code>
0643: * @return the position in the text or -1 if the pattern was not found
0644: * @see #searchBytes(byte[], int, int, byte[], Object)
0645: */
0646: public final int searchBytes(byte[] text, byte[] pattern) {
0647: return searchBytes(text, 0, text.length, pattern,
0648: processBytes(pattern));
0649: }
0650:
0651: /**
0652: * Returns the position in the text at which the pattern was found. Returns -1
0653: * if the pattern was not found.
0654: *
0655: * @param text the <code>byte</code> array containing the text, may not be
0656: * <code>null</code>
0657: * @param pattern the pattern to search for, may not be <code>null</code>
0658: * @param processed an Object as returned from {@link #processBytes(byte[])},
0659: * may not be <code>null</code>
0660: * @return the position in the text or -1 if the pattern was not found
0661: * @see #searchBytes(byte[], int, int, byte[], Object)
0662: */
0663: public final int searchBytes(byte[] text, byte[] pattern,
0664: Object processed) {
0665: return searchBytes(text, 0, text.length, pattern, processed);
0666: }
0667:
0668: /**
0669: * Returns the position in the text at which the pattern was found. Returns -1
0670: * if the pattern was not found.
0671: *
0672: * @param text the <code>byte</code> array containing the text, may not be
0673: * <code>null</code>
0674: * @param textStart at which position in the text the comparing should start
0675: * @param pattern the <code>byte</code> array containing the pattern, may not
0676: * be <code>null</code>
0677: * @return int the position in the text or -1 if the pattern was not found
0678: * @see #searchBytes(byte[], int, int, byte[], Object)
0679: */
0680: public final int searchBytes(byte[] text, int textStart,
0681: byte[] pattern) {
0682: return searchBytes(text, textStart, text.length, pattern,
0683: processBytes(pattern));
0684: }
0685:
0686: /**
0687: * Returns the position in the text at which the pattern was found. Returns -1
0688: * if the pattern was not found.
0689: *
0690: * @param text the <code>byte</code> array containing the text, may not be
0691: * <code>null</code>
0692: * @param textStart at which position in the text the comparing should start
0693: * @param pattern the pattern to search for, may not be <code>null</code>
0694: * @param processed
0695: * @return the position in the text or -1 if the pattern was not found
0696: * @see #searchBytes(byte[], int, int, byte[], Object)
0697: */
0698: public final int searchBytes(byte[] text, int textStart,
0699: byte[] pattern, Object processed) {
0700:
0701: return searchBytes(text, textStart, text.length, pattern,
0702: processed);
0703:
0704: }
0705:
0706: /**
0707: * Returns the position in the text at which the pattern was found. Returns -1
0708: * if the pattern was not found.
0709: *
0710: * @param text text the <code>byte</code> array containing the text, may not be
0711: * <code>null</code>
0712: * @param textStart at which position in the text the comparing should start
0713: * @param textEnd at which position in the text comparing should stop
0714: * @param pattern the <code>byte</code> array containing the pattern, may not
0715: * be <code>null</code>
0716: * @return the position in the text or -1 if the pattern was not found
0717: * @see #searchBytes(byte[], int, int, byte[], Object)
0718: */
0719: public final int searchBytes(byte[] text, int textStart,
0720: int textEnd, byte[] pattern) {
0721:
0722: return searchBytes(text, textStart, textEnd, pattern,
0723: processBytes(pattern));
0724:
0725: }
0726:
0727: /**
0728: * Returns the position in the text at which the pattern was found. Returns -1
0729: * if the pattern was not found.
0730: *
0731: * @param text text the <code>byte</code> array containing the text, may not be
0732: * <code>null</code>
0733: * @param textStart at which position in the text the comparing should start
0734: * @param textEnd at which position in the text comparing should stop
0735: * @param pattern the pattern to search for, may not be <code>null</code>
0736: * @param processed an Object as returned from {@link #processBytes(byte[])},
0737: * may not be <code>null</code>
0738: * @return the position in the text or -1 if the pattern was not found
0739: * @see #processBytes(byte[])
0740: */
0741: public abstract int searchBytes(byte[] text, int textStart,
0742: int textEnd, byte[] pattern, Object processed);
0743:
0744: /* Char searching methods */
0745:
0746: /**
0747: * Returns the position in the text at which the pattern was found. Returns -1
0748: * if the pattern was not found.
0749: *
0750: * @param text the character array containing the text, may not be
0751: * <code>null</code>
0752: * @param pattern the <code>char</code> array containing the pattern, may not
0753: * be <code>null</code>
0754: * @return the position in the text or -1 if the pattern was not found
0755: * @see #searchChars(char[], int, int, char[], Object)
0756: */
0757: public final int searchChars(char[] text, char[] pattern) {
0758: return searchChars(text, 0, text.length, pattern,
0759: processChars(pattern));
0760: }
0761:
0762: /**
0763: * Returns the index of the pattern in the text using the pre-processed Object.
0764: * Returns -1 if the pattern was not found.
0765: *
0766: * @param text the character array containing the text, may not be
0767: * <code>null</code>
0768: * @param pattern the <code>char</code> array containing the pattern, may not
0769: * be <code>null</code>
0770: * @param processed an Object as returned from {@link #processChars(char[])} or
0771: * {@link #processString(String)}, may not be <code>null</code>
0772: * @return the position in the text or -1 if the pattern was not found
0773: * @see #searchChars(char[], int, int, char[], Object)
0774: */
0775: public final int searchChars(char[] text, char[] pattern,
0776: Object processed) {
0777: return searchChars(text, 0, text.length, pattern, processed);
0778: }
0779:
0780: /**
0781: * Returns the position in the text at which the pattern was found. Returns -1
0782: * if the pattern was not found.
0783: *
0784: * @param text the character array containing the text, may not be
0785: * <code>null</code>
0786: * @param textStart at which position in the text the comparing should start
0787: * @param pattern the <code>char</code> array containing the pattern, may not
0788: * be <code>null</code>
0789: * @return the position in the text or -1 if the pattern was not found
0790: * @see #searchChars(char[], int, int, char[], Object)
0791: */
0792: public final int searchChars(char[] text, int textStart,
0793: char[] pattern) {
0794: return searchChars(text, textStart, text.length, pattern,
0795: processChars(pattern));
0796: }
0797:
0798: /**
0799: * Returns the index of the pattern in the text using the pre-processed Object.
0800: * Returns -1 if the pattern was not found.
0801: *
0802: * @param text the String containing the text, may not be <code>null</code>
0803: * @param textStart at which position in the text the comparing should start
0804: * @param pattern the <code>char</code> array containing the pattern, may not
0805: * be <code>null</code>
0806: * @param processed an Object as returned from {@link #processChars(char[])} or
0807: * {@link #processString(String)}, may not be <code>null</code>
0808: * @return the position in the text or -1 if the pattern was not found
0809: * @see #searchChars(char[], int, int, char[], Object)
0810: */
0811: public final int searchChars(char[] text, int textStart,
0812: char[] pattern, Object processed) {
0813:
0814: return searchChars(text, textStart, text.length, pattern,
0815: processed);
0816:
0817: }
0818:
0819: /**
0820: * Returns the position in the text at which the pattern was found. Returns -1
0821: * if the pattern was not found.
0822: *
0823: * @param text the character array containing the text, may not be
0824: * <code>null</code>
0825: * @param textStart at which position in the text the comparing should start
0826: * @param textEnd at which position in the text comparing should stop
0827: * @param pattern the <code>char</code> array containing the pattern, may not
0828: * be <code>null</code>
0829: * @return the position in the text or -1 if the pattern was not found
0830: * @see #searchChars(char[], int, int, char[], Object)
0831: */
0832: public final int searchChars(char[] text, int textStart,
0833: int textEnd, char[] pattern) {
0834:
0835: return searchChars(text, textStart, textEnd, pattern,
0836: processChars(pattern));
0837:
0838: }
0839:
0840: /**
0841: * Returns the index of the pattern in the text using the pre-processed Object.
0842: * Returns -1 if the pattern was not found.
0843: *
0844: * @param text the String containing the text, may not be <code>null</code>
0845: * @param textStart at which position in the text the comparing should start
0846: * @param textEnd at which position in the text comparing should stop
0847: * @param pattern the pattern to search for, may not be <code>null</code>
0848: * @param processed an Object as returned from {@link #processChars(char[])} or
0849: * {@link #processString(String)}, may not be <code>null</code>
0850: * @return the position in the text or -1 if the pattern was not found
0851: */
0852: public abstract int searchChars(char[] text, int textStart,
0853: int textEnd, char[] pattern, Object processed);
0854:
0855: /* String searching methods */
0856:
0857: /**
0858: * Convenience method to search for patterns in Strings. Returns the position
0859: * in the text at which the pattern was found. Returns -1 if the pattern was
0860: * not found.
0861: *
0862: * @param text the String containing the text, may not be <code>null</code>
0863: * @param pattern the String containing the pattern, may not be
0864: * <code>null</code>
0865: * @return the position in the text or -1 if the pattern was not found
0866: * @see #searchChars(char[], int, int, char[], Object)
0867: */
0868: public final int searchString(String text, String pattern) {
0869: return searchString(text, 0, text.length(), pattern);
0870: }
0871:
0872: /**
0873: * Convenience method to search for patterns in Strings. Returns the position
0874: * in the text at which the pattern was found. Returns -1 if the pattern was
0875: * not found.
0876: *
0877: * @param text the String containing the text, may not be <code>null</code>
0878: * @param pattern the String containing the pattern, may not be
0879: * <code>null</code>
0880: * @param processed an Object as returned from {@link #processChars(char[])} or
0881: * {@link #processString(String)}, may not be <code>null</code>
0882: * @return the position in the text or -1 if the pattern was not found
0883: * @see #searchChars(char[], int, int, char[], Object)
0884: */
0885: public final int searchString(String text, String pattern,
0886: Object processed) {
0887: return searchString(text, 0, text.length(), pattern, processed);
0888: }
0889:
0890: /**
0891: * Convenience method to search for patterns in Strings. Returns the position
0892: * in the text at which the pattern was found. Returns -1 if the pattern was
0893: * not found.
0894: *
0895: * @param text the String containing the text, may not be <code>null</code>
0896: * @param textStart at which position in the text the comparing should start
0897: * @param pattern the String containing the pattern, may not be
0898: * <code>null</code>
0899: * @return the position in the text or -1 if the pattern was not found
0900: * @see #searchChars(char[], int, int, char[], Object)
0901: */
0902: public final int searchString(String text, int textStart,
0903: String pattern) {
0904: return searchString(text, textStart, text.length(), pattern);
0905: }
0906:
0907: /**
0908: * Convenience method to search for patterns in Strings. Returns the position
0909: * in the text at which the pattern was found. Returns -1 if the pattern was
0910: * not found.
0911: *
0912: * @param text the String containing the text, may not be <code>null</code>
0913: * @param textStart at which position in the text the comparing should start
0914: * @param pattern the String containing the pattern, may not be
0915: * <code>null</code>
0916: * @param processed an Object as returned from {@link #processChars(char[])} or
0917: * {@link #processString(String)}, may not be <code>null</code>
0918: * @return the position in the text or -1 if the pattern was not found
0919: * @see #searchChars(char[], int, int, char[], Object)
0920: */
0921: public final int searchString(String text, int textStart,
0922: String pattern, Object processed) {
0923:
0924: return searchString(text, textStart, text.length(), pattern,
0925: processed);
0926:
0927: }
0928:
0929: /**
0930: * Convenience method to search for patterns in Strings. Returns the position
0931: * in the text at which the pattern was found. Returns -1 if the pattern was
0932: * not found.
0933: *
0934: * @param text the String containing the text, may not be <code>null</code>
0935: * @param textStart at which position in the text the comparing should start
0936: * @param textEnd at which position in the text comparing should stop
0937: * @param pattern the String containing the pattern, may not be
0938: * <code>null</code>
0939: * @return the position in the text or -1 if the pattern was not found
0940: * @see #searchChars(char[], int, int, char[])
0941: */
0942: public final int searchString(String text, int textStart,
0943: int textEnd, String pattern) {
0944:
0945: return StringSearch.activeDispatch.searchString(text,
0946: textStart, textEnd, pattern, this );
0947:
0948: }
0949:
0950: /**
0951: * Convenience method to search for patterns in Strings. Returns the position
0952: * in the text at which the pattern was found. Returns -1 if the pattern was
0953: * not found.
0954: *
0955: * @param text the String containing the text, may not be <code>null</code>
0956: * @param textStart at which position in the text the comparing should start
0957: * @param textEnd at which position in the text comparing should stop
0958: * @param pattern the String containing the pattern, may not be
0959: * <code>null</code>
0960: * @param processed an Object as returned from {@link #processChars(char[])} or
0961: * {@link #processString(String)}, may not be <code>null</code>
0962: * @return the position in the text or -1 if the pattern was not found
0963: * @see #searchChars(char[], int, int, char[])
0964: */
0965: public final int searchString(String text, int textStart,
0966: int textEnd, String pattern, Object processed) {
0967:
0968: return StringSearch.activeDispatch.searchString(text,
0969: textStart, textEnd, pattern, processed, this );
0970:
0971: }
0972:
0973: /**
0974: * Returns if the Object's class name matches this Object's class name.
0975: *
0976: * @param obj the other Object
0977: * @return if the Object is equal to this Object
0978: * @see java.lang.Object#equals(Object)
0979: */
0980: public final boolean equals(Object obj) {
0981: if (this == obj) {
0982: return true;
0983: }
0984: if (obj == null) {
0985: return false;
0986: }
0987: return getClass().getName().equals(obj.getClass().getName());
0988: }
0989:
0990: /**
0991: * Returns the hashCode of the Object's Class because all instances of this
0992: * Class are equal.
0993: *
0994: * @return an int
0995: * @see java.lang.Object#hashCode()
0996: */
0997: public final int hashCode() {
0998: return getClass().getName().hashCode();
0999: }
1000:
1001: /**
1002: * Returns a String representation of this. Simply returns the name of the
1003: * Class.
1004: *
1005: * @return a String
1006: * @see java.lang.Object#toString()
1007: */
1008: public final String toString() {
1009: return toStringBuffer(null).toString();
1010: }
1011:
1012: /**
1013: * Appends a String representation of this to the given {@link StringBuffer} or
1014: * creates a new one if none is given. This method is not <code>final</code>
1015: * because subclasses might want a different String format.
1016: *
1017: * @param in the StringBuffer to append to, may be <code>null</code>
1018: * @return a StringBuffer
1019: */
1020: public StringBuffer toStringBuffer(StringBuffer in) {
1021: if (in == null) {
1022: in = new StringBuffer();
1023: }
1024: in.append("{ ");
1025: int idx = getClass().getName().lastIndexOf(".");
1026: if (idx > -1) {
1027: in.append(getClass().getName().substring(++idx));
1028: } else {
1029: in.append(getClass().getName());
1030: }
1031: in.append(" }");
1032: return in;
1033: }
1034:
1035: /* Utility methods */
1036:
1037: /**
1038: * Returns a {@link CharIntMap} of the extent of the given pattern, using no
1039: * default value.
1040: *
1041: * @param pattern the pattern
1042: * @return a CharIntMap
1043: * @see CharIntMap#CharIntMap(int, char)
1044: */
1045: protected CharIntMap createCharIntMap(char[] pattern) {
1046: return createCharIntMap(pattern, 0);
1047: }
1048:
1049: /**
1050: * Returns a {@link CharIntMap} of the extent of the given pattern, using the
1051: * specified default value.
1052: *
1053: * @param pattern the pattern
1054: * @param defaultValue the default value
1055: * @return a CharIntMap
1056: * @see CharIntMap#CharIntMap(int, char, int)
1057: */
1058: protected CharIntMap createCharIntMap(char[] pattern,
1059: int defaultValue) {
1060: char min = Character.MAX_VALUE;
1061: char max = Character.MIN_VALUE;
1062: for (int i = 0; i < pattern.length; i++) {
1063: max = max > pattern[i] ? max : pattern[i];
1064: min = min < pattern[i] ? min : pattern[i];
1065: }
1066: return new CharIntMap(max - min + 1, min, defaultValue);
1067: }
1068:
1069: /**
1070: * Converts the given <code>byte</code> to an <code>int</code>.
1071: *
1072: * @param idx the byte
1073: * @return an int
1074: */
1075: protected final int index(byte idx) {
1076: return (idx < 0) ? 256 + idx : idx;
1077: }
1078:
1079: /* Utility methods */
1080:
1081: }
|