0001: /*
0002: *******************************************************************************
0003: * Copyright (C) 2000-2005, International Business Machines Corporation and *
0004: * others. All Rights Reserved. *
0005: *******************************************************************************
0006: */
0007:
0008: /**
0009: * Port From: ICU4C v2.1 : collate/StringSearchTest
0010: * Source File: $ICU4CRoot/source/test/intltest/srchtest.cpp
0011: **/package com.ibm.icu.dev.test.search;
0012:
0013: import java.util.Locale;
0014: import java.text.StringCharacterIterator;
0015: import com.ibm.icu.dev.test.*;
0016: import com.ibm.icu.text.*;
0017:
0018: public class SearchTest extends TestFmwk {
0019:
0020: //inner class
0021: static class SearchData {
0022: SearchData(String text, String pattern, String coll,
0023: int strength, String breaker, int[] offset, int[] size) {
0024: this .text = text;
0025: this .pattern = pattern;
0026: this .collator = coll;
0027: this .strength = strength;
0028: this .breaker = breaker;
0029: this .offset = offset;
0030: this .size = size;
0031: }
0032:
0033: String text;
0034: String pattern;
0035: String collator;
0036: int strength;
0037: String breaker;
0038: int[] offset;
0039: int[] size;
0040: }
0041:
0042: RuleBasedCollator m_en_us_;
0043: RuleBasedCollator m_fr_fr_;
0044: RuleBasedCollator m_de_;
0045: RuleBasedCollator m_es_;
0046: BreakIterator m_en_wordbreaker_;
0047: BreakIterator m_en_characterbreaker_;
0048:
0049: static SearchData[] BASIC = {
0050: new SearchData("xxxxxxxxxxxxxxxxxxxx", "fisher", null,
0051: Collator.TERTIARY, null, new int[] { -1 },
0052: new int[] { 0 }),
0053: new SearchData("silly spring string", "string", null,
0054: Collator.TERTIARY, null, new int[] { 13, -1 },
0055: new int[] { 6 }),
0056: new SearchData("silly spring string string", "string",
0057: null, Collator.TERTIARY, null, new int[] { 13, 20,
0058: -1 }, new int[] { 6, 6 }),
0059: new SearchData("silly string spring string", "string",
0060: null, Collator.TERTIARY, null, new int[] { 6, 20,
0061: -1 }, new int[] { 6, 6 }),
0062: new SearchData("string spring string", "string", null,
0063: Collator.TERTIARY, null, new int[] { 0, 14, -1 },
0064: new int[] { 6, 6 }),
0065: new SearchData("Scott Ganyo", "c", null, Collator.TERTIARY,
0066: null, new int[] { 1, -1 }, new int[] { 1 }),
0067: new SearchData("Scott Ganyo", " ", null, Collator.TERTIARY,
0068: null, new int[] { 5, -1 }, new int[] { 1 }),
0069: new SearchData("\u0300\u0325", "\u0300", null,
0070: Collator.TERTIARY, null, new int[] { -1 },
0071: new int[] { 0 }),
0072: new SearchData("a\u0300\u0325", "\u0300", null,
0073: Collator.TERTIARY, null, new int[] { -1 },
0074: new int[] { 0 }),
0075: new SearchData("a\u0300\u0325", "\u0300\u0325", null,
0076: Collator.TERTIARY, null, new int[] { 1, -1 },
0077: new int[] { 2 }),
0078: new SearchData("a\u0300b", "\u0300", null,
0079: Collator.TERTIARY, null, new int[] { 1, -1 },
0080: new int[] { 1 }),
0081: new SearchData("\u00c9", "e", null, Collator.PRIMARY, null,
0082: new int[] { 0, -1 }, new int[] { 1 }),
0083: new SearchData(null, null, null, Collator.TERTIARY, null,
0084: new int[] { -1 }, new int[] { 0 }) };
0085:
0086: SearchData BREAKITERATOREXACT[] = {
0087: new SearchData("foxy fox", "fox", null, Collator.TERTIARY,
0088: "characterbreaker", new int[] { 0, 5, -1 },
0089: new int[] { 3, 3 }),
0090: new SearchData("foxy fox", "fox", null, Collator.TERTIARY,
0091: "wordbreaker", new int[] { 5, -1 }, new int[] { 3 }),
0092: new SearchData("This is a toe T\u00F6ne", "toe", "de",
0093: Collator.PRIMARY, "characterbreaker", new int[] {
0094: 10, 14, -1 }, new int[] { 3, 2 }),
0095: new SearchData("This is a toe T\u00F6ne", "toe", "de",
0096: Collator.PRIMARY, "wordbreaker",
0097: new int[] { 10, -1 }, new int[] { 3 }),
0098: new SearchData(
0099: "Channel, another channel, more channels, and one last Channel",
0100: "Channel", "es", Collator.TERTIARY, "wordbreaker",
0101: new int[] { 0, 54, -1 }, new int[] { 7, 7 }),
0102: /* jitterbug 1745 */
0103: new SearchData("testing that \u00e9 does not match e", "e",
0104: null, Collator.TERTIARY, "characterbreaker",
0105: new int[] { 1, 17, 30, -1 }, new int[] { 1, 1, 1 }),
0106: new SearchData(
0107: "testing that string ab\u00e9cd does not match e",
0108: "e", null, Collator.TERTIARY, "characterbreaker",
0109: new int[] { 1, 28, 41, -1 }, new int[] { 1, 1, 1 }),
0110: new SearchData("\u00c9", "e", "fr", Collator.PRIMARY,
0111: "characterbreaker", new int[] { 0, -1 },
0112: new int[] { 1 }),
0113: new SearchData(null, null, null, Collator.TERTIARY, null,
0114: new int[] { -1 }, new int[] { 0 }) };
0115:
0116: SearchData BREAKITERATORCANONICAL[] = {
0117: new SearchData("foxy fox", "fox", null, Collator.TERTIARY,
0118: "characterbreaker", new int[] { 0, 5, -1 },
0119: new int[] { 3, 3 }),
0120: new SearchData("foxy fox", "fox", null, Collator.TERTIARY,
0121: "wordbreaker", new int[] { 5, -1 }, new int[] { 3 }),
0122: new SearchData("This is a toe T\u00F6ne", "toe", "de",
0123: Collator.PRIMARY, "characterbreaker", new int[] {
0124: 10, 14, -1 }, new int[] { 3, 2 }),
0125: new SearchData("This is a toe T\u00F6ne", "toe", "de",
0126: Collator.PRIMARY, "wordbreaker",
0127: new int[] { 10, -1 }, new int[] { 3 }),
0128: new SearchData(
0129: "Channel, another channel, more channels, and one last Channel",
0130: "Channel", "es", Collator.TERTIARY, "wordbreaker",
0131: new int[] { 0, 54, -1 }, new int[] { 7, 7 }),
0132: /* jitterbug 1745 */
0133: new SearchData("testing that \u00e9 does not match e", "e",
0134: null, Collator.TERTIARY, "characterbreaker",
0135: new int[] { 1, 17, 30, -1 }, new int[] { 1, 1, 1 }),
0136: new SearchData(
0137: "testing that string ab\u00e9cd does not match e",
0138: "e", null, Collator.TERTIARY, "characterbreaker",
0139: new int[] { 1, 28, 41, -1 }, new int[] { 1, 1, 1 }),
0140: new SearchData("\u00c9", "e", "fr", Collator.PRIMARY,
0141: "characterbreaker", new int[] { 0, -1 },
0142: new int[] { 1 }),
0143: new SearchData(null, null, null, Collator.TERTIARY, null,
0144: new int[] { -1 }, new int[] { 0 }) };
0145:
0146: SearchData BASICCANONICAL[] = {
0147: new SearchData("xxxxxxxxxxxxxxxxxxxx", "fisher", null,
0148: Collator.TERTIARY, null, new int[] { -1 },
0149: new int[] { 0 }),
0150: new SearchData("silly spring string", "string", null,
0151: Collator.TERTIARY, null, new int[] { 13, -1 },
0152: new int[] { 6 }),
0153: new SearchData("silly spring string string", "string",
0154: null, Collator.TERTIARY, null, new int[] { 13, 20,
0155: -1 }, new int[] { 6, 6 }),
0156: new SearchData("silly string spring string", "string",
0157: null, Collator.TERTIARY, null, new int[] { 6, 20,
0158: -1 }, new int[] { 6, 6 }),
0159: new SearchData("string spring string", "string", null,
0160: Collator.TERTIARY, null, new int[] { 0, 14, -1 },
0161: new int[] { 6, 6 }),
0162: new SearchData("Scott Ganyo", "c", null, Collator.TERTIARY,
0163: null, new int[] { 1, -1 }, new int[] { 1 }),
0164: new SearchData("Scott Ganyo", " ", null, Collator.TERTIARY,
0165: null, new int[] { 5, -1 }, new int[] { 1 }),
0166: new SearchData("\u0300\u0325", "\u0300", null,
0167: Collator.TERTIARY, null, new int[] { 0, -1 },
0168: new int[] { 2 }),
0169: new SearchData("a\u0300\u0325", "\u0300", null,
0170: Collator.TERTIARY, null, new int[] { 1, -1 },
0171: new int[] { 2 }),
0172: new SearchData("a\u0300\u0325", "\u0300\u0325", null,
0173: Collator.TERTIARY, null, new int[] { 1, -1 },
0174: new int[] { 2 }),
0175: new SearchData("a\u0300b", "\u0300", null,
0176: Collator.TERTIARY, null, new int[] { 1, -1 },
0177: new int[] { 1 }),
0178: new SearchData("a\u0300\u0325b", "\u0300b", null,
0179: Collator.TERTIARY, null, new int[] { 1, -1 },
0180: new int[] { 3 }),
0181: new SearchData("\u0325\u0300A\u0325\u0300",
0182: "\u0300A\u0300", null, Collator.TERTIARY, null,
0183: new int[] { 0, -1 }, new int[] { 5 }),
0184: new SearchData("\u0325\u0300A\u0325\u0300",
0185: "\u0325A\u0325", null, Collator.TERTIARY, null,
0186: new int[] { 0, -1 }, new int[] { 5 }),
0187: new SearchData(
0188: "a\u0300\u0325b\u0300\u0325c \u0325b\u0300 \u0300b\u0325",
0189: "\u0300b\u0325", null, Collator.TERTIARY, null,
0190: new int[] { 1, 12, -1 }, new int[] { 5, 3 }),
0191: new SearchData("\u00c4\u0323", "A\u0323\u0308", null,
0192: Collator.TERTIARY, null, new int[] { 0, -1 },
0193: new int[] { 2 }),
0194: new SearchData("\u0308\u0323", "\u0323\u0308", null,
0195: Collator.TERTIARY, null, new int[] { 0, -1 },
0196: new int[] { 2 }),
0197: new SearchData(null, null, null, Collator.TERTIARY, null,
0198: new int[] { -1 }, new int[] { 0 }) };
0199:
0200: SearchData COLLATOR[] = {
0201: /* english */
0202: new SearchData("fox fpx", "fox", null, Collator.TERTIARY,
0203: null, new int[] { 0, -1 }, new int[] { 3 }),
0204: /* tailored */
0205: new SearchData("fox fpx", "fox", null, Collator.PRIMARY,
0206: null, new int[] { 0, 4, -1 }, new int[] { 3, 3 }),
0207: new SearchData(null, null, null, Collator.TERTIARY, null,
0208: new int[] { -1 }, new int[] { 0 }) };
0209:
0210: String TESTCOLLATORRULE = "& o,O ; p,P";
0211: String EXTRACOLLATIONRULE = " & ae ; \u00e4 & AE ; \u00c4 & oe ; \u00f6 & OE ; \u00d6 & ue ; \u00fc & UE ; \u00dc";
0212:
0213: SearchData COLLATORCANONICAL[] = {
0214: /* english */
0215: new SearchData("fox fpx", "fox", null, Collator.TERTIARY,
0216: null, new int[] { 0, -1 }, new int[] { 3 }),
0217: /* tailored */
0218: new SearchData("fox fpx", "fox", null, Collator.PRIMARY,
0219: null, new int[] { 0, 4, -1 }, new int[] { 3, 3 }),
0220: new SearchData(null, null, null, Collator.TERTIARY, null,
0221: new int[] { -1 }, new int[] { 0 }) };
0222:
0223: SearchData COMPOSITEBOUNDARIES[] = {
0224: new SearchData("\u00C0", "A", null, Collator.TERTIARY,
0225: null, new int[] { 0, -1 }, new int[] { 1 }),
0226: new SearchData("A\u00C0C", "A", null, Collator.TERTIARY,
0227: null, new int[] { 0, 1, -1 }, new int[] { 1, 1 }),
0228: new SearchData("\u00C0A", "A", null, Collator.TERTIARY,
0229: null, new int[] { 0, 1, -1 }, new int[] { 1, 1 }),
0230: new SearchData("B\u00C0", "A", null, Collator.TERTIARY,
0231: null, new int[] { 1, -1 }, new int[] { 1 }),
0232: new SearchData("\u00C0B", "A", null, Collator.TERTIARY,
0233: null, new int[] { 0, -1 }, new int[] { 1 }),
0234: new SearchData("\u00C0", "\u0300", null, Collator.TERTIARY,
0235: null, new int[] { 0, -1 }, new int[] { 1 }),
0236: new SearchData("\u0300\u00C0", "\u0300", null,
0237: Collator.TERTIARY, null, new int[] { 0, 1, -1 },
0238: new int[] { 1, 1 }),
0239: new SearchData("\u00C0\u0300", "\u0300", null,
0240: Collator.TERTIARY, null, new int[] { -1 },
0241: new int[] { 0 }),
0242: /* A + 030A + 0301 */
0243: new SearchData("\u01FA", "\u01FA", null, Collator.TERTIARY,
0244: null, new int[] { 0, -1 }, new int[] { 1 }),
0245: new SearchData("\u01FA", "\u030A", null, Collator.TERTIARY,
0246: null, new int[] { -1 }, new int[] { 0 }),
0247: new SearchData("\u01FA", "A\u030A", null,
0248: Collator.TERTIARY, null, new int[] { -1 },
0249: new int[] { 0 }),
0250: new SearchData("\u01FA", "\u030AA", null,
0251: Collator.TERTIARY, null, new int[] { -1 },
0252: new int[] { 0 }),
0253: new SearchData("\u01FA", "\u0301", null, Collator.TERTIARY,
0254: null, new int[] { -1 }, new int[] { 0 }),
0255: new SearchData("\u01FA", "A\u0301", null,
0256: Collator.TERTIARY, null, new int[] { -1 },
0257: new int[] { 0 }),
0258: new SearchData("\u01FA", "\u0301A", null,
0259: Collator.TERTIARY, null, new int[] { -1 },
0260: new int[] { 0 }),
0261: new SearchData("\u01FA", "\u030A\u0301", null,
0262: Collator.TERTIARY, null, new int[] { 0, -1 },
0263: new int[] { 1 }),
0264: new SearchData("A\u01FA", "A\u030A", null,
0265: Collator.TERTIARY, null, new int[] { -1 },
0266: new int[] { 0 }),
0267: new SearchData("\u01FAA", "\u0301A", null,
0268: Collator.TERTIARY, null, new int[] { -1 },
0269: new int[] { 0 }),
0270: new SearchData("\u0F73", "\u0F73", null, Collator.TERTIARY,
0271: null, new int[] { 0, -1 }, new int[] { 1 }),
0272: new SearchData("\u0F73", "\u0F71", null, Collator.TERTIARY,
0273: null, new int[] { -1 }, new int[] { 0 }),
0274: new SearchData("\u0F73", "\u0F72", null, Collator.TERTIARY,
0275: null, new int[] { -1 }, new int[] { 0 }),
0276: new SearchData("\u0F73", "\u0F71\u0F72", null,
0277: Collator.TERTIARY, null, new int[] { 0, -1 },
0278: new int[] { 1 }),
0279: new SearchData("A\u0F73", "A\u0F71", null,
0280: Collator.TERTIARY, null, new int[] { -1 },
0281: new int[] { 0 }),
0282: new SearchData("\u0F73A", "\u0F72A", null,
0283: Collator.TERTIARY, null, new int[] { -1 },
0284: new int[] { 0 }),
0285: new SearchData(null, null, null, Collator.TERTIARY, null,
0286: new int[] { -1 }, new int[] { 0 }) };
0287:
0288: SearchData COMPOSITEBOUNDARIESCANONICAL[] = {
0289: new SearchData("\u00C0", "A", null, Collator.TERTIARY,
0290: null, new int[] { 0, -1 }, new int[] { 1 }),
0291: new SearchData("A\u00C0C", "A", null, Collator.TERTIARY,
0292: null, new int[] { 0, 1, -1 }, new int[] { 1, 1 }),
0293: new SearchData("\u00C0A", "A", null, Collator.TERTIARY,
0294: null, new int[] { 0, 1, -1 }, new int[] { 1, 1 }),
0295: new SearchData("B\u00C0", "A", null, Collator.TERTIARY,
0296: null, new int[] { 1, -1 }, new int[] { 1 }),
0297: new SearchData("\u00C0B", "A", null, Collator.TERTIARY,
0298: null, new int[] { 0, -1 }, new int[] { 1 }),
0299: new SearchData("\u00C0", "\u0300", null, Collator.TERTIARY,
0300: null, new int[] { 0, -1 }, new int[] { 1 }),
0301: new SearchData("\u0300\u00C0", "\u0300", null,
0302: Collator.TERTIARY, null, new int[] { 0, 1, -1 },
0303: new int[] { 1, 1 }),
0304: /* \u0300 blocked by \u0300 */
0305: new SearchData("\u00C0\u0300", "\u0300", null,
0306: Collator.TERTIARY, null, new int[] { 0, -1 },
0307: new int[] { 2 }),
0308: /* A + 030A + 0301 */
0309: new SearchData("\u01FA", "\u01FA", null, Collator.TERTIARY,
0310: null, new int[] { 0, -1 }, new int[] { 1 }),
0311: new SearchData("\u01FA", "\u030A", null, Collator.TERTIARY,
0312: null, new int[] { 0, -1 }, new int[] { 1 }),
0313: new SearchData("\u01FA", "A\u030A", null,
0314: Collator.TERTIARY, null, new int[] { 0, -1 },
0315: new int[] { 1 }),
0316: new SearchData("\u01FA", "\u030AA", null,
0317: Collator.TERTIARY, null, new int[] { -1 },
0318: new int[] { 0 }),
0319: new SearchData("\u01FA", "\u0301", null, Collator.TERTIARY,
0320: null, new int[] { 0, -1 }, new int[] { 1 }),
0321: /* blocked accent */
0322: new SearchData("\u01FA", "A\u0301", null,
0323: Collator.TERTIARY, null, new int[] { -1 },
0324: new int[] { 0 }),
0325: new SearchData("\u01FA", "\u0301A", null,
0326: Collator.TERTIARY, null, new int[] { -1 },
0327: new int[] { 0 }),
0328: new SearchData("\u01FA", "\u030A\u0301", null,
0329: Collator.TERTIARY, null, new int[] { 0, -1 },
0330: new int[] { 1 }),
0331: new SearchData("A\u01FA", "A\u030A", null,
0332: Collator.TERTIARY, null, new int[] { 1, -1 },
0333: new int[] { 1 }),
0334: new SearchData("\u01FAA", "\u0301A", null,
0335: Collator.TERTIARY, null, new int[] { 0, -1 },
0336: new int[] { 2 }),
0337: new SearchData("\u0F73", "\u0F73", null, Collator.TERTIARY,
0338: null, new int[] { 0, -1 }, new int[] { 1 }),
0339: new SearchData("\u0F73", "\u0F71", null, Collator.TERTIARY,
0340: null, new int[] { 0, -1 }, new int[] { 1 }),
0341: new SearchData("\u0F73", "\u0F72", null, Collator.TERTIARY,
0342: null, new int[] { 0, -1 }, new int[] { 1 }),
0343: new SearchData("\u0F73", "\u0F71\u0F72", null,
0344: Collator.TERTIARY, null, new int[] { 0, -1 },
0345: new int[] { 1 }),
0346: new SearchData("A\u0F73", "A\u0F71", null,
0347: Collator.TERTIARY, null, new int[] { 0, -1 },
0348: new int[] { 2 }),
0349: new SearchData("\u0F73A", "\u0F72A", null,
0350: Collator.TERTIARY, null, new int[] { 0, -1 },
0351: new int[] { 2 }),
0352: new SearchData(
0353: "\u01FA A\u0301\u030A A\u030A\u0301 A\u030A \u01FA",
0354: "A\u030A", null, Collator.TERTIARY, null,
0355: new int[] { 0, 6, 10, 13, -1 }, new int[] { 1, 3,
0356: 2, 1 }),
0357: new SearchData(null, null, null, Collator.TERTIARY, null,
0358: new int[] { -1 }, new int[] { 0 }) };
0359:
0360: SearchData SUPPLEMENTARY[] = {
0361: /* 012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
0362: new SearchData(
0363: "abc \uD800\uDC00 \uD800\uDC01 \uD801\uDC00 \uD800\uDC00abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00",
0364: "\uD800\uDC00", null, Collator.TERTIARY, null,
0365: new int[] { 4, 13, 22, 26, 29, -1 }, new int[] { 2,
0366: 2, 2, 2, 2 }),
0367: new SearchData("and\uD834\uDDB9this sentence",
0368: "\uD834\uDDB9", null, Collator.TERTIARY, null,
0369: new int[] { 3, -1 }, new int[] { 2 }),
0370: new SearchData("and \uD834\uDDB9 this sentence",
0371: " \uD834\uDDB9 ", null, Collator.TERTIARY, null,
0372: new int[] { 3, -1 }, new int[] { 4 }),
0373: new SearchData("and-\uD834\uDDB9-this sentence",
0374: "-\uD834\uDDB9-", null, Collator.TERTIARY, null,
0375: new int[] { 3, -1 }, new int[] { 4 }),
0376: new SearchData("and,\uD834\uDDB9,this sentence",
0377: ",\uD834\uDDB9,", null, Collator.TERTIARY, null,
0378: new int[] { 3, -1 }, new int[] { 4 }),
0379: new SearchData("and?\uD834\uDDB9?this sentence",
0380: "?\uD834\uDDB9?", null, Collator.TERTIARY, null,
0381: new int[] { 3, -1 }, new int[] { 4 }),
0382: new SearchData(null, null, null, Collator.TERTIARY, null,
0383: new int[] { -1 }, new int[] { 0 }) };
0384:
0385: String CONTRACTIONRULE = "&z = ab/c < AB < X\u0300 < ABC < X\u0300\u0315";
0386:
0387: SearchData CONTRACTION[] = {
0388: /* common discontiguous */
0389: new SearchData("A\u0300\u0315", "\u0300", null,
0390: Collator.TERTIARY, null, new int[] { -1 },
0391: new int[] { 0 }),
0392: new SearchData("A\u0300\u0315", "\u0300\u0315", null,
0393: Collator.TERTIARY, null, new int[] { 1, -1 },
0394: new int[] { 2 }),
0395: /* contraction prefix */
0396: new SearchData("AB\u0315C", "A", null, Collator.TERTIARY,
0397: null, new int[] { -1 }, new int[] { 0 }),
0398: new SearchData("AB\u0315C", "AB", null, Collator.TERTIARY,
0399: null, new int[] { 0, -1 }, new int[] { 2 }),
0400: new SearchData("AB\u0315C", "\u0315", null,
0401: Collator.TERTIARY, null, new int[] { 2, -1 },
0402: new int[] { 1 }),
0403: /* discontiguous problem here for backwards iteration.
0404: accents not found because discontiguous stores all information */
0405: new SearchData("X\u0300\u0319\u0315", "\u0319", null,
0406: Collator.TERTIARY, null, new int[] { -1 },
0407: new int[] { 0 }),
0408: /* ends not with a contraction character */
0409: new SearchData("X\u0315\u0300D", "\u0300\u0315", null,
0410: Collator.TERTIARY, null, new int[] { -1 },
0411: new int[] { 0 }),
0412: new SearchData("X\u0315\u0300D", "X\u0300\u0315", null,
0413: Collator.TERTIARY, null, new int[] { 0, -1 },
0414: new int[] { 3 }),
0415: new SearchData("X\u0300\u031A\u0315D", "X\u0300", null,
0416: Collator.TERTIARY, null, new int[] { -1 },
0417: new int[] { 0 }),
0418: /* blocked discontiguous */
0419: new SearchData("X\u0300\u031A\u0315D", "\u031A\u0315D",
0420: null, Collator.TERTIARY, null, new int[] { -1 },
0421: new int[] { 0 }),
0422: new SearchData("ab", "z", null, Collator.TERTIARY, null,
0423: new int[] { 0, -1 }, new int[] { 2 }),
0424: new SearchData(null, null, null, Collator.TERTIARY, null,
0425: new int[] { -1 }, new int[] { 0 }) };
0426:
0427: SearchData CONTRACTIONCANONICAL[] = {
0428: /* common discontiguous */
0429: new SearchData("A\u0300\u0315", "\u0300", null,
0430: Collator.TERTIARY, null, new int[] { 1, -1 },
0431: new int[] { 2 }),
0432: new SearchData("A\u0300\u0315", "\u0300\u0315", null,
0433: Collator.TERTIARY, null, new int[] { 1, -1 },
0434: new int[] { 2 }),
0435: /* contraction prefix */
0436: new SearchData("AB\u0315C", "A", null, Collator.TERTIARY,
0437: null, new int[] { -1 }, new int[] { 0 }),
0438: new SearchData("AB\u0315C", "AB", null, Collator.TERTIARY,
0439: null, new int[] { 0, -1 }, new int[] { 2 }),
0440: new SearchData("AB\u0315C", "\u0315", null,
0441: Collator.TERTIARY, null, new int[] { 2, -1 },
0442: new int[] { 1 }),
0443: /* discontiguous problem here for backwards iteration.
0444: forwards gives 0, 4 but backwards give 1, 3 */
0445: /* {"X\u0300\u0319\u0315", "\u0319", null, Collator.TERTIARY, null, {0, -1},
0446: {4}}, */
0447:
0448: /* ends not with a contraction character */
0449: new SearchData("X\u0315\u0300D", "\u0300\u0315", null,
0450: Collator.TERTIARY, null, new int[] { -1 },
0451: new int[] { 0 }),
0452: new SearchData("X\u0315\u0300D", "X\u0300\u0315", null,
0453: Collator.TERTIARY, null, new int[] { 0, -1 },
0454: new int[] { 3 }),
0455: new SearchData("X\u0300\u031A\u0315D", "X\u0300", null,
0456: Collator.TERTIARY, null, new int[] { 0, -1 },
0457: new int[] { 4 }),
0458: /* blocked discontiguous */
0459: new SearchData("X\u0300\u031A\u0315D", "\u031A\u0315D",
0460: null, Collator.TERTIARY, null, new int[] { 1, -1 },
0461: new int[] { 4 }),
0462: new SearchData("ab", "z", null, Collator.TERTIARY, null,
0463: new int[] { 0, -1 }, new int[] { 2 }),
0464: new SearchData(null, null, null, Collator.TERTIARY, null,
0465: new int[] { -1 }, new int[] { 0 }) };
0466:
0467: SearchData MATCH[] = {
0468: new SearchData("a busy bee is a very busy beeee", "bee",
0469: null, Collator.TERTIARY, null, new int[] { 7, 26,
0470: -1 }, new int[] { 3, 3 }),
0471: /* 012345678901234567890123456789012345678901234567890 */
0472: new SearchData(
0473: "a busy bee is a very busy beeee with no bee life",
0474: "bee", null, Collator.TERTIARY, null, new int[] {
0475: 7, 26, 40, -1 }, new int[] { 3, 3, 3 }),
0476: new SearchData(null, null, null, Collator.TERTIARY, null,
0477: new int[] { -1 }, new int[] { 0 }) };
0478:
0479: String IGNORABLERULE = "&a = \u0300";
0480:
0481: SearchData IGNORABLE[] = {
0482: new SearchData("\u0315\u0300 \u0315\u0300\u0315 ",
0483: "\u0300", null, Collator.PRIMARY, null, new int[] {
0484: 0, 3, -1 }, new int[] { 2, 3 }),
0485: new SearchData(null, null, null, Collator.TERTIARY, null,
0486: new int[] { -1 }, new int[] { 0 }) };
0487:
0488: SearchData NORMCANONICAL[] = {
0489: new SearchData("\u0300\u0325", "\u0300", null,
0490: Collator.TERTIARY, null, new int[] { 0, -1 },
0491: new int[] { 2 }),
0492: new SearchData("\u0300\u0325", "\u0325", null,
0493: Collator.TERTIARY, null, new int[] { 0, -1 },
0494: new int[] { 2 }),
0495: new SearchData("a\u0300\u0325", "\u0325\u0300", null,
0496: Collator.TERTIARY, null, new int[] { 1, -1 },
0497: new int[] { 2 }),
0498: new SearchData("a\u0300\u0325", "\u0300\u0325", null,
0499: Collator.TERTIARY, null, new int[] { 1, -1 },
0500: new int[] { 2 }),
0501: new SearchData("a\u0300\u0325", "\u0325", null,
0502: Collator.TERTIARY, null, new int[] { 1, -1 },
0503: new int[] { 2 }),
0504: new SearchData("a\u0300\u0325", "\u0300", null,
0505: Collator.TERTIARY, null, new int[] { 1, -1 },
0506: new int[] { 2 }),
0507: new SearchData(null, null, null, Collator.TERTIARY, null,
0508: new int[] { -1 }, new int[] { 0 }) };
0509:
0510: SearchData NORMEXACT[] = {
0511: new SearchData("a\u0300\u0325", "\u0325\u0300", null,
0512: Collator.TERTIARY, null, new int[] { 1, -1 },
0513: new int[] { 2 }),
0514: new SearchData(null, null, null, Collator.TERTIARY, null,
0515: new int[] { -1 }, new int[] { 0 }) };
0516:
0517: SearchData NONNORMEXACT[] = {
0518: new SearchData("a\u0300\u0325", "\u0325\u0300", null,
0519: Collator.TERTIARY, null, new int[] { -1 },
0520: new int[] { 0 }),
0521: new SearchData(null, null, null, Collator.TERTIARY, null,
0522: new int[] { -1 }, new int[] { 0 }) };
0523:
0524: SearchData OVERLAP[] = {
0525: new SearchData("abababab", "abab", null, Collator.TERTIARY,
0526: null, new int[] { 0, 2, 4, -1 }, new int[] { 4, 4,
0527: 4 }),
0528: new SearchData(null, null, null, Collator.TERTIARY, null,
0529: new int[] { -1 }, new int[] { 0 }) };
0530:
0531: SearchData NONOVERLAP[] = {
0532: new SearchData("abababab", "abab", null, Collator.TERTIARY,
0533: null, new int[] { 0, 4, -1 }, new int[] { 4, 4 }),
0534: new SearchData(null, null, null, Collator.TERTIARY, null,
0535: new int[] { -1 }, new int[] { 0 }) };
0536:
0537: SearchData OVERLAPCANONICAL[] = {
0538: new SearchData("abababab", "abab", null, Collator.TERTIARY,
0539: null, new int[] { 0, 2, 4, -1 }, new int[] { 4, 4,
0540: 4 }),
0541: new SearchData(null, null, null, Collator.TERTIARY, null,
0542: new int[] { -1 }, new int[] { 0 }) };
0543:
0544: SearchData NONOVERLAPCANONICAL[] = {
0545: new SearchData("abababab", "abab", null, Collator.TERTIARY,
0546: null, new int[] { 0, 4, -1 }, new int[] { 4, 4 }),
0547: new SearchData(null, null, null, Collator.TERTIARY, null,
0548: new int[] { -1 }, new int[] { 0 }) };
0549:
0550: SearchData PATTERNCANONICAL[] = {
0551: new SearchData(
0552: "The quick brown fox jumps over the lazy foxes",
0553: "the", null, Collator.PRIMARY, null, new int[] { 0,
0554: 31, -1 }, new int[] { 3, 3 }),
0555: new SearchData(
0556: "The quick brown fox jumps over the lazy foxes",
0557: "fox", null, Collator.PRIMARY, null, new int[] {
0558: 16, 40, -1 }, new int[] { 3, 3 }),
0559: new SearchData(null, null, null, Collator.TERTIARY, null,
0560: new int[] { -1 }, new int[] { 0 }) };
0561:
0562: SearchData PATTERN[] = {
0563: new SearchData(
0564: "The quick brown fox jumps over the lazy foxes",
0565: "the", null, Collator.PRIMARY, null, new int[] { 0,
0566: 31, -1 }, new int[] { 3, 3 }),
0567: new SearchData(
0568: "The quick brown fox jumps over the lazy foxes",
0569: "fox", null, Collator.PRIMARY, null, new int[] {
0570: 16, 40, -1 }, new int[] { 3, 3 }),
0571: new SearchData(null, null, null, Collator.TERTIARY, null,
0572: new int[] { -1 }, new int[] { 0 }) };
0573:
0574: SearchData STRENGTH[] = {
0575: /*012345678901234567890123456789012345678901234567890123456789*/
0576: new SearchData(
0577: "The quick brown fox jumps over the lazy foxes",
0578: "fox", "en", Collator.PRIMARY, null, new int[] {
0579: 16, 40, -1 }, new int[] { 3, 3 }),
0580: new SearchData(
0581: "The quick brown fox jumps over the lazy foxes",
0582: "fox", "en", Collator.PRIMARY, "wordbreaker",
0583: new int[] { 16, -1 }, new int[] { 3 }),
0584: new SearchData(
0585: "blackbirds Pat p\u00E9ch\u00E9 p\u00EAche p\u00E9cher p\u00EAcher Tod T\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe",
0586: "peche", "fr", Collator.PRIMARY, null, new int[] {
0587: 15, 21, 27, 34, -1 }, new int[] { 5, 5, 5,
0588: 5 }),
0589: new SearchData("This is a toe T\u00F6ne", "toe", "de",
0590: Collator.PRIMARY, null, new int[] { 10, 14, -1 },
0591: new int[] { 3, 2 }),
0592: new SearchData(
0593: "A channel, another CHANNEL, more Channels, and one last channel...",
0594: "channel", "es", Collator.PRIMARY, null, new int[] {
0595: 2, 19, 33, 56, -1 },
0596: new int[] { 7, 7, 7, 7 }),
0597: new SearchData(null, null, null, Collator.TERTIARY, null,
0598: new int[] { -1 }, new int[] { 0 }) };
0599:
0600: SearchData STRENGTHCANONICAL[] = {
0601: /*012345678901234567890123456789012345678901234567890123456789 */
0602: new SearchData(
0603: "The quick brown fox jumps over the lazy foxes",
0604: "fox", "en", Collator.PRIMARY, null, new int[] {
0605: 16, 40, -1 }, new int[] { 3, 3 }),
0606: new SearchData(
0607: "The quick brown fox jumps over the lazy foxes",
0608: "fox", "en", Collator.PRIMARY, "wordbreaker",
0609: new int[] { 16, -1 }, new int[] { 3 }),
0610: new SearchData(
0611: "blackbirds Pat p\u00E9ch\u00E9 p\u00EAche p\u00E9cher p\u00EAcher Tod T\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe",
0612: "peche", "fr", Collator.PRIMARY, null, new int[] {
0613: 15, 21, 27, 34, -1 }, new int[] { 5, 5, 5,
0614: 5 }),
0615: new SearchData("This is a toe T\u00F6ne", "toe", "de",
0616: Collator.PRIMARY, null, new int[] { 10, 14, -1 },
0617: new int[] { 3, 2 }),
0618: new SearchData(
0619: "A channel, another CHANNEL, more Channels, and one last channel...",
0620: "channel", "es", Collator.PRIMARY, null, new int[] {
0621: 2, 19, 33, 56, -1 },
0622: new int[] { 7, 7, 7, 7 }),
0623: new SearchData(null, null, null, Collator.TERTIARY, null,
0624: new int[] { -1 }, new int[] { 0 }) };
0625:
0626: SearchData SUPPLEMENTARYCANONICAL[] = {
0627: /*012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
0628: new SearchData(
0629: "abc \uD800\uDC00 \uD800\uDC01 \uD801\uDC00 \uD800\uDC00abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00",
0630: "\uD800\uDC00", null, Collator.TERTIARY, null,
0631: new int[] { 4, 13, 22, 26, 29, -1 }, new int[] { 2,
0632: 2, 2, 2, 2 }),
0633: new SearchData("and\uD834\uDDB9this sentence",
0634: "\uD834\uDDB9", null, Collator.TERTIARY, null,
0635: new int[] { 3, -1 }, new int[] { 2 }),
0636: new SearchData("and \uD834\uDDB9 this sentence",
0637: " \uD834\uDDB9 ", null, Collator.TERTIARY, null,
0638: new int[] { 3, -1 }, new int[] { 4 }),
0639: new SearchData("and-\uD834\uDDB9-this sentence",
0640: "-\uD834\uDDB9-", null, Collator.TERTIARY, null,
0641: new int[] { 3, -1 }, new int[] { 4 }),
0642: new SearchData("and,\uD834\uDDB9,this sentence",
0643: ",\uD834\uDDB9,", null, Collator.TERTIARY, null,
0644: new int[] { 3, -1 }, new int[] { 4 }),
0645: new SearchData("and?\uD834\uDDB9?this sentence",
0646: "?\uD834\uDDB9?", null, Collator.TERTIARY, null,
0647: new int[] { 3, -1 }, new int[] { 4 }),
0648: new SearchData(null, null, null, Collator.TERTIARY, null,
0649: new int[] { -1 }, new int[] { 0 }) };
0650:
0651: static SearchData VARIABLE[] = {
0652: /*012345678901234567890123456789012345678901234567890123456789*/
0653: new SearchData(
0654: "blackbirds black blackbirds blackbird black-bird",
0655: "blackbird", null, Collator.TERTIARY, null,
0656: new int[] { 0, 17, 28, 38, -1 }, new int[] { 9, 9,
0657: 9, 10 }),
0658:
0659: /* to see that it doesn't go into an infinite loop if the start of text
0660: is a ignorable character */
0661: new SearchData(" on", "go", null, Collator.TERTIARY, null,
0662: new int[] { -1 }, new int[] { 0 }),
0663: new SearchData("abcdefghijklmnopqrstuvwxyz", " ", null,
0664: Collator.PRIMARY, null, new int[] { 0, 1, 2, 3, 4,
0665: 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
0666: 17, 18, 19, 20, 21, 22, 23, 24, 25, -1 },
0667: new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0668: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }),
0669:
0670: /* testing tightest match */
0671: new SearchData(" abc a bc ab c a bc ab c",
0672: "abc", null, Collator.QUATERNARY, null, new int[] {
0673: 1, -1 }, new int[] { 3 }),
0674: /*012345678901234567890123456789012345678901234567890123456789 */
0675: new SearchData(" abc a bc ab c a bc ab c",
0676: "abc", null, Collator.SECONDARY, null, new int[] {
0677: 1, 6, 13, 21, 31, -1 }, new int[] { 3, 4,
0678: 4, 5, 5 }),
0679:
0680: /* totally ignorable text */
0681: new SearchData(" ---------------", "abc", null,
0682: Collator.SECONDARY, null, new int[] { -1 },
0683: new int[] { 0 }),
0684: new SearchData(null, null, null, Collator.TERTIARY, null,
0685: new int[] { -1 }, new int[] { 0 }) };
0686:
0687: static SearchData TEXTCANONICAL[] = {
0688: new SearchData("the foxy brown fox", "fox", null,
0689: Collator.TERTIARY, null, new int[] { 4, 15, -1 },
0690: new int[] { 3, 3 }),
0691: new SearchData("the quick brown fox", "fox", null,
0692: Collator.TERTIARY, null, new int[] { 16, -1 },
0693: new int[] { 3 }),
0694: new SearchData(null, null, null, Collator.TERTIARY, null,
0695: new int[] { -1 }, new int[] { 0 }) };
0696:
0697: /**
0698: * Constructor
0699: */
0700: public SearchTest() {
0701:
0702: }
0703:
0704: protected void init() throws Exception {
0705: m_en_us_ = (RuleBasedCollator) Collator.getInstance(Locale.US);
0706: m_fr_fr_ = (RuleBasedCollator) Collator
0707: .getInstance(Locale.FRANCE);
0708: m_de_ = (RuleBasedCollator) Collator.getInstance(new Locale(
0709: "de", "DE"));
0710: m_es_ = (RuleBasedCollator) Collator.getInstance(new Locale(
0711: "es", "ES"));
0712: m_en_wordbreaker_ = BreakIterator.getWordInstance();
0713: m_en_characterbreaker_ = BreakIterator.getCharacterInstance();
0714: String rules = m_de_.getRules() + EXTRACOLLATIONRULE;
0715: m_de_ = new RuleBasedCollator(rules);
0716: rules = m_es_.getRules() + EXTRACOLLATIONRULE;
0717: m_es_ = new RuleBasedCollator(rules);
0718:
0719: }
0720:
0721: public static void main(String[] args) throws Exception {
0722: new SearchTest().run(args);
0723: // new SearchTest().TestContraction();
0724: }
0725:
0726: RuleBasedCollator getCollator(String collator) {
0727: if (collator == null) {
0728: return m_en_us_;
0729: }
0730: if (collator.equals("fr")) {
0731: return m_fr_fr_;
0732: } else if (collator.equals("de")) {
0733: return m_de_;
0734: } else if (collator.equals("es")) {
0735: return m_es_;
0736: } else {
0737: return m_en_us_;
0738: }
0739: }
0740:
0741: BreakIterator getBreakIterator(String breaker) {
0742: if (breaker == null) {
0743: return null;
0744: }
0745: if (breaker.equals("wordbreaker")) {
0746: return m_en_wordbreaker_;
0747: } else {
0748: return m_en_characterbreaker_;
0749: }
0750: }
0751:
0752: boolean assertCanonicalEqual(SearchData search) {
0753: Collator collator = getCollator(search.collator);
0754: BreakIterator breaker = getBreakIterator(search.breaker);
0755: StringSearch strsrch;
0756:
0757: String text = search.text;
0758: String pattern = search.pattern;
0759:
0760: if (breaker != null) {
0761: breaker.setText(text);
0762: }
0763: collator.setStrength(search.strength);
0764: try {
0765: strsrch = new StringSearch(pattern,
0766: new StringCharacterIterator(text),
0767: (RuleBasedCollator) collator, breaker);
0768: strsrch.setCanonical(true);
0769: } catch (Exception e) {
0770: errln("Error opening string search" + e.getMessage());
0771: return false;
0772: }
0773:
0774: if (!assertEqualWithStringSearch(strsrch, search)) {
0775: collator.setStrength(Collator.TERTIARY);
0776: return false;
0777: }
0778: collator.setStrength(Collator.TERTIARY);
0779: return true;
0780: }
0781:
0782: boolean assertEqual(SearchData search) {
0783: Collator collator = getCollator(search.collator);
0784: BreakIterator breaker = getBreakIterator(search.breaker);
0785: StringSearch strsrch;
0786:
0787: String text = search.text;
0788: String pattern = search.pattern;
0789:
0790: if (breaker != null) {
0791: breaker.setText(text);
0792: }
0793: collator.setStrength(search.strength);
0794: try {
0795: strsrch = new StringSearch(pattern,
0796: new StringCharacterIterator(text),
0797: (RuleBasedCollator) collator, breaker);
0798: } catch (Exception e) {
0799: errln("Error opening string search " + e.getMessage());
0800: return false;
0801: }
0802:
0803: if (!assertEqualWithStringSearch(strsrch, search)) {
0804: collator.setStrength(Collator.TERTIARY);
0805: return false;
0806: }
0807: collator.setStrength(Collator.TERTIARY);
0808: return true;
0809: }
0810:
0811: boolean assertEqualWithAttribute(SearchData search,
0812: boolean canonical, boolean overlap) {
0813: Collator collator = getCollator(search.collator);
0814: BreakIterator breaker = getBreakIterator(search.breaker);
0815: StringSearch strsrch;
0816:
0817: String text = search.text;
0818: String pattern = search.pattern;
0819:
0820: if (breaker != null) {
0821: breaker.setText(text);
0822: }
0823: collator.setStrength(search.strength);
0824: try {
0825: strsrch = new StringSearch(pattern,
0826: new StringCharacterIterator(text),
0827: (RuleBasedCollator) collator, breaker);
0828: strsrch.setCanonical(canonical);
0829: strsrch.setOverlapping(overlap);
0830: } catch (Exception e) {
0831: errln("Error opening string search " + e.getMessage());
0832: return false;
0833: }
0834:
0835: if (!assertEqualWithStringSearch(strsrch, search)) {
0836: collator.setStrength(Collator.TERTIARY);
0837: return false;
0838: }
0839: collator.setStrength(Collator.TERTIARY);
0840: return true;
0841: }
0842:
0843: boolean assertEqualWithStringSearch(StringSearch strsrch,
0844: SearchData search) {
0845: int count = 0;
0846: int matchindex = search.offset[count];
0847: String matchtext;
0848:
0849: if (strsrch.getMatchStart() != SearchIterator.DONE
0850: || strsrch.getMatchLength() != 0) {
0851: errln("Error with the initialization of match start and length");
0852: }
0853: // start of following matches
0854: while (matchindex >= 0) {
0855: int matchlength = search.size[count];
0856: strsrch.next();
0857: //int x = strsrch.getMatchStart();
0858: if (matchindex != strsrch.getMatchStart()
0859: || matchlength != strsrch.getMatchLength()) {
0860: errln("Text: " + search.text);
0861: errln("Pattern: " + strsrch.getPattern());
0862: errln("Error following match found at "
0863: + strsrch.getMatchStart() + ", "
0864: + strsrch.getMatchLength());
0865: return false;
0866: }
0867: count++;
0868:
0869: matchtext = strsrch.getMatchedText();
0870: String targetText = search.text;
0871: if (matchlength > 0
0872: && targetText.substring(matchindex,
0873: matchindex + matchlength).compareTo(
0874: matchtext) != 0) {
0875: errln("Error getting following matched text");
0876: }
0877:
0878: matchindex = search.offset[count];
0879: }
0880: strsrch.next();
0881: if (strsrch.getMatchStart() != SearchIterator.DONE
0882: || strsrch.getMatchLength() != 0) {
0883: errln("Text: " + search.text);
0884: errln("Pattern: " + strsrch.getPattern());
0885: errln("Error following match found at "
0886: + strsrch.getMatchStart() + ", "
0887: + strsrch.getMatchLength());
0888: return false;
0889: }
0890: // start of preceding matches
0891: count = count == 0 ? 0 : count - 1;
0892: matchindex = search.offset[count];
0893: while (matchindex >= 0) {
0894: int matchlength = search.size[count];
0895: strsrch.previous();
0896: if (matchindex != strsrch.getMatchStart()
0897: || matchlength != strsrch.getMatchLength()) {
0898: errln("Text: " + search.text);
0899: errln("Pattern: " + strsrch.getPattern());
0900: errln("Error following match found at "
0901: + strsrch.getMatchStart() + ", "
0902: + strsrch.getMatchLength());
0903: return false;
0904: }
0905:
0906: matchtext = strsrch.getMatchedText();
0907: String targetText = search.text;
0908: if (matchlength > 0
0909: && targetText.substring(matchindex,
0910: matchindex + matchlength).compareTo(
0911: matchtext) != 0) {
0912: errln("Error getting following matched text");
0913: }
0914:
0915: matchindex = count > 0 ? search.offset[count - 1] : -1;
0916: count--;
0917: }
0918: strsrch.previous();
0919: if (strsrch.getMatchStart() != SearchIterator.DONE
0920: || strsrch.getMatchLength() != 0) {
0921: errln("Text: " + search.text);
0922: errln("Pattern: " + strsrch.getPattern());
0923: errln("Error following match found at "
0924: + strsrch.getMatchStart() + ", "
0925: + strsrch.getMatchLength());
0926: return false;
0927: }
0928: return true;
0929: }
0930:
0931: public void TestConstructor() {
0932: String pattern = "pattern";
0933: String text = "text";
0934: StringCharacterIterator textiter = new StringCharacterIterator(
0935: text);
0936: Collator defaultcollator = Collator.getInstance();
0937: BreakIterator breaker = BreakIterator.getCharacterInstance();
0938: breaker.setText(text);
0939: StringSearch search = new StringSearch(pattern, text);
0940: if (!search.getPattern().equals(pattern)
0941: || !search.getTarget().equals(textiter)
0942: || !search.getCollator().equals(defaultcollator)
0943: || !search.getBreakIterator().equals(breaker)) {
0944: errln("StringSearch(String, String) error");
0945: }
0946: search = new StringSearch(pattern, textiter, m_fr_fr_);
0947: if (!search.getPattern().equals(pattern)
0948: || !search.getTarget().equals(textiter)
0949: || !search.getCollator().equals(m_fr_fr_)
0950: || !search.getBreakIterator().equals(breaker)) {
0951: errln("StringSearch(String, StringCharacterIterator, "
0952: + "RuleBasedCollator) error");
0953: }
0954: Locale de = new Locale("de", "DE");
0955: breaker = BreakIterator.getCharacterInstance(de);
0956: breaker.setText(text);
0957: search = new StringSearch(pattern, textiter, de);
0958: if (!search.getPattern().equals(pattern)
0959: || !search.getTarget().equals(textiter)
0960: || !search.getCollator().equals(
0961: Collator.getInstance(de))
0962: || !search.getBreakIterator().equals(breaker)) {
0963: errln("StringSearch(String, StringCharacterIterator, Locale) "
0964: + "error");
0965: }
0966:
0967: search = new StringSearch(pattern, textiter, m_fr_fr_,
0968: m_en_wordbreaker_);
0969: if (!search.getPattern().equals(pattern)
0970: || !search.getTarget().equals(textiter)
0971: || !search.getCollator().equals(m_fr_fr_)
0972: || !search.getBreakIterator().equals(m_en_wordbreaker_)) {
0973: errln("StringSearch(String, StringCharacterIterator, Locale) "
0974: + "error");
0975: }
0976: }
0977:
0978: public void TestBasic() {
0979: int count = 0;
0980: while (BASIC[count].text != null) {
0981: if (!assertEqual(BASIC[count])) {
0982: errln("Error at test number " + count);
0983: }
0984: count++;
0985: }
0986: }
0987:
0988: public void TestBreakIterator() {
0989:
0990: String text = BREAKITERATOREXACT[0].text;
0991: String pattern = BREAKITERATOREXACT[0].pattern;
0992: StringSearch strsrch = null;
0993: try {
0994: strsrch = new StringSearch(pattern,
0995: new StringCharacterIterator(text), m_en_us_, null);
0996: } catch (Exception e) {
0997: errln("Error opening string search");
0998: return;
0999: }
1000:
1001: strsrch.setBreakIterator(null);
1002: if (strsrch.getBreakIterator() != null) {
1003: errln("Error usearch_getBreakIterator returned wrong object");
1004: }
1005:
1006: strsrch.setBreakIterator(m_en_characterbreaker_);
1007: if (!strsrch.getBreakIterator().equals(m_en_characterbreaker_)) {
1008: errln("Error usearch_getBreakIterator returned wrong object");
1009: }
1010:
1011: strsrch.setBreakIterator(m_en_wordbreaker_);
1012: if (!strsrch.getBreakIterator().equals(m_en_wordbreaker_)) {
1013: errln("Error usearch_getBreakIterator returned wrong object");
1014: }
1015:
1016: int count = 0;
1017: while (count < 4) {
1018: // special purposes for tests numbers 0-3
1019: SearchData search = BREAKITERATOREXACT[count];
1020: RuleBasedCollator collator = getCollator(search.collator);
1021: BreakIterator breaker = getBreakIterator(search.breaker);
1022: //StringSearch strsrch;
1023:
1024: text = search.text;
1025: pattern = search.pattern;
1026: if (breaker != null) {
1027: breaker.setText(text);
1028: }
1029: collator.setStrength(search.strength);
1030: strsrch = new StringSearch(pattern,
1031: new StringCharacterIterator(text), collator,
1032: breaker);
1033: if (strsrch.getBreakIterator() != breaker) {
1034: errln("Error setting break iterator");
1035: }
1036: if (!assertEqualWithStringSearch(strsrch, search)) {
1037: collator.setStrength(Collator.TERTIARY);
1038: }
1039: search = BREAKITERATOREXACT[count + 1];
1040: breaker = getBreakIterator(search.breaker);
1041: if (breaker != null) {
1042: breaker.setText(text);
1043: }
1044: strsrch.setBreakIterator(breaker);
1045: if (strsrch.getBreakIterator() != breaker) {
1046: errln("Error setting break iterator");
1047: }
1048: strsrch.reset();
1049: if (!assertEqualWithStringSearch(strsrch, search)) {
1050: errln("Error at test number " + count);
1051: }
1052: count += 2;
1053: }
1054: count = 0;
1055: while (BREAKITERATOREXACT[count].text != null) {
1056: if (!assertEqual(BREAKITERATOREXACT[count])) {
1057: errln("Error at test number " + count);
1058: }
1059: count++;
1060: }
1061: }
1062:
1063: public void TestBreakIteratorCanonical() {
1064: int count = 0;
1065: while (count < 4) {
1066: // special purposes for tests numbers 0-3
1067: SearchData search = BREAKITERATORCANONICAL[count];
1068:
1069: String text = search.text;
1070: String pattern = search.pattern;
1071: RuleBasedCollator collator = getCollator(search.collator);
1072: collator.setStrength(search.strength);
1073:
1074: BreakIterator breaker = getBreakIterator(search.breaker);
1075: StringSearch strsrch = null;
1076: try {
1077: strsrch = new StringSearch(pattern,
1078: new StringCharacterIterator(text), collator,
1079: breaker);
1080: } catch (Exception e) {
1081: errln("Error creating string search data");
1082: return;
1083: }
1084: strsrch.setCanonical(true);
1085: if (!strsrch.getBreakIterator().equals(breaker)) {
1086: errln("Error setting break iterator");
1087: return;
1088: }
1089: if (!assertEqualWithStringSearch(strsrch, search)) {
1090: collator.setStrength(Collator.TERTIARY);
1091: return;
1092: }
1093: search = BREAKITERATOREXACT[count + 1];
1094: breaker = getBreakIterator(search.breaker);
1095: breaker.setText(strsrch.getTarget());
1096: strsrch.setBreakIterator(breaker);
1097: if (!strsrch.getBreakIterator().equals(breaker)) {
1098: errln("Error setting break iterator");
1099: return;
1100: }
1101: strsrch.reset();
1102: strsrch.setCanonical(true);
1103: if (!assertEqualWithStringSearch(strsrch, search)) {
1104: errln("Error at test number " + count);
1105: return;
1106: }
1107: count += 2;
1108: }
1109: count = 0;
1110: while (BREAKITERATORCANONICAL[count].text != null) {
1111: if (!assertEqual(BREAKITERATORCANONICAL[count])) {
1112: errln("Error at test number " + count);
1113: return;
1114: }
1115: count++;
1116: }
1117: }
1118:
1119: public void TestCanonical() {
1120: int count = 0;
1121: while (BASICCANONICAL[count].text != null) {
1122: if (!assertCanonicalEqual(BASICCANONICAL[count])) {
1123: errln("Error at test number " + count);
1124: }
1125: count++;
1126: }
1127: }
1128:
1129: public void TestCollator() {
1130: // test collator that thinks "o" and "p" are the same thing
1131: String text = COLLATOR[0].text;
1132: String pattern = COLLATOR[0].pattern;
1133: StringSearch strsrch = null;
1134: try {
1135: strsrch = new StringSearch(pattern,
1136: new StringCharacterIterator(text), m_en_us_, null);
1137: } catch (Exception e) {
1138: errln("Error opening string search ");
1139: return;
1140: }
1141: if (!assertEqualWithStringSearch(strsrch, COLLATOR[0])) {
1142: return;
1143: }
1144: String rules = TESTCOLLATORRULE;
1145: RuleBasedCollator tailored = null;
1146: try {
1147: tailored = new RuleBasedCollator(rules);
1148: tailored.setStrength(COLLATOR[1].strength);
1149: } catch (Exception e) {
1150: errln("Error opening rule based collator ");
1151: return;
1152: }
1153:
1154: strsrch.setCollator(tailored);
1155: if (!strsrch.getCollator().equals(tailored)) {
1156: errln("Error setting rule based collator");
1157: }
1158: strsrch.reset();
1159: if (!assertEqualWithStringSearch(strsrch, COLLATOR[1])) {
1160: return;
1161: }
1162: strsrch.setCollator(m_en_us_);
1163: strsrch.reset();
1164: if (!strsrch.getCollator().equals(m_en_us_)) {
1165: errln("Error setting rule based collator");
1166: }
1167: if (!assertEqualWithStringSearch(strsrch, COLLATOR[0])) {
1168: errln("Error searching collator test");
1169: }
1170: }
1171:
1172: public void TestCollatorCanonical() {
1173: /* test collator that thinks "o" and "p" are the same thing */
1174: String text = COLLATORCANONICAL[0].text;
1175: String pattern = COLLATORCANONICAL[0].pattern;
1176:
1177: StringSearch strsrch = null;
1178: try {
1179: strsrch = new StringSearch(pattern,
1180: new StringCharacterIterator(text), m_en_us_, null);
1181: strsrch.setCanonical(true);
1182: } catch (Exception e) {
1183: errln("Error opening string search ");
1184: }
1185:
1186: if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[0])) {
1187: return;
1188: }
1189:
1190: String rules = TESTCOLLATORRULE;
1191: RuleBasedCollator tailored = null;
1192: try {
1193: tailored = new RuleBasedCollator(rules);
1194: tailored.setStrength(COLLATORCANONICAL[1].strength);
1195: tailored.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1196: } catch (Exception e) {
1197: errln("Error opening rule based collator ");
1198: }
1199:
1200: strsrch.setCollator(tailored);
1201: if (!strsrch.getCollator().equals(tailored)) {
1202: errln("Error setting rule based collator");
1203: }
1204: strsrch.reset();
1205: strsrch.setCanonical(true);
1206: if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[1])) {
1207: logln("COLLATORCANONICAL[1] failed"); // Error should already be reported.
1208: }
1209: strsrch.setCollator(m_en_us_);
1210: strsrch.reset();
1211: if (!strsrch.getCollator().equals(m_en_us_)) {
1212: errln("Error setting rule based collator");
1213: }
1214: if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[0])) {
1215: logln("COLLATORCANONICAL[0] failed"); // Error should already be reported.
1216: }
1217: }
1218:
1219: public void TestCompositeBoundaries() {
1220: int count = 0;
1221: while (COMPOSITEBOUNDARIES[count].text != null) {
1222: // logln("composite " + count);
1223: if (!assertEqual(COMPOSITEBOUNDARIES[count])) {
1224: errln("Error at test number " + count);
1225: }
1226: count++;
1227: }
1228: }
1229:
1230: public void TestCompositeBoundariesCanonical() {
1231: int count = 0;
1232: while (COMPOSITEBOUNDARIESCANONICAL[count].text != null) {
1233: // logln("composite " + count);
1234: if (!assertCanonicalEqual(COMPOSITEBOUNDARIESCANONICAL[count])) {
1235: errln("Error at test number " + count);
1236: }
1237: count++;
1238: }
1239: }
1240:
1241: public void TestContraction() {
1242: String rules = CONTRACTIONRULE;
1243: RuleBasedCollator collator = null;
1244: try {
1245: collator = new RuleBasedCollator(rules);
1246: collator.setStrength(Collator.TERTIARY);
1247: collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1248: } catch (Exception e) {
1249: errln("Error opening collator ");
1250: }
1251: String text = "text";
1252: String pattern = "pattern";
1253: StringSearch strsrch = null;
1254: try {
1255: strsrch = new StringSearch(pattern,
1256: new StringCharacterIterator(text), collator, null);
1257: } catch (Exception e) {
1258: errln("Error opening string search ");
1259: }
1260:
1261: int count = 0;
1262: while (CONTRACTION[count].text != null) {
1263: text = CONTRACTION[count].text;
1264: pattern = CONTRACTION[count].pattern;
1265: strsrch.setTarget(new StringCharacterIterator(text));
1266: strsrch.setPattern(pattern);
1267: if (!assertEqualWithStringSearch(strsrch,
1268: CONTRACTION[count])) {
1269: errln("Error at test number " + count);
1270: }
1271: count++;
1272: }
1273: }
1274:
1275: public void TestContractionCanonical() {
1276: String rules = CONTRACTIONRULE;
1277: RuleBasedCollator collator = null;
1278: try {
1279: collator = new RuleBasedCollator(rules);
1280: collator.setStrength(Collator.TERTIARY);
1281: collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1282: } catch (Exception e) {
1283: errln("Error opening collator ");
1284: }
1285: String text = "text";
1286: String pattern = "pattern";
1287: StringSearch strsrch = null;
1288: try {
1289: strsrch = new StringSearch(pattern,
1290: new StringCharacterIterator(text), collator, null);
1291: strsrch.setCanonical(true);
1292: } catch (Exception e) {
1293: errln("Error opening string search");
1294: }
1295:
1296: int count = 0;
1297: while (CONTRACTIONCANONICAL[count].text != null) {
1298: text = CONTRACTIONCANONICAL[count].text;
1299: pattern = CONTRACTIONCANONICAL[count].pattern;
1300: strsrch.setTarget(new StringCharacterIterator(text));
1301: strsrch.setPattern(pattern);
1302: if (!assertEqualWithStringSearch(strsrch,
1303: CONTRACTIONCANONICAL[count])) {
1304: errln("Error at test number " + count);
1305: }
1306: count++;
1307: }
1308: }
1309:
1310: public void TestGetMatch() {
1311: SearchData search = MATCH[0];
1312: String text = search.text;
1313: String pattern = search.pattern;
1314:
1315: StringSearch strsrch = null;
1316: try {
1317: strsrch = new StringSearch(pattern,
1318: new StringCharacterIterator(text), m_en_us_, null);
1319: } catch (Exception e) {
1320: errln("Error opening string search ");
1321: return;
1322: }
1323:
1324: int count = 0;
1325: int matchindex = search.offset[count];
1326: String matchtext;
1327: while (matchindex >= 0) {
1328: int matchlength = search.size[count];
1329: strsrch.next();
1330: if (matchindex != strsrch.getMatchStart()
1331: || matchlength != strsrch.getMatchLength()) {
1332: errln("Text: " + search.text);
1333: errln("Pattern: " + strsrch.getPattern());
1334: errln("Error match found at " + strsrch.getMatchStart()
1335: + ", " + strsrch.getMatchLength());
1336: return;
1337: }
1338: count++;
1339:
1340: matchtext = strsrch.getMatchedText();
1341: if (matchtext.length() != matchlength) {
1342: errln("Error getting match text");
1343: }
1344: matchindex = search.offset[count];
1345: }
1346: strsrch.next();
1347: if (strsrch.getMatchStart() != StringSearch.DONE
1348: || strsrch.getMatchLength() != 0) {
1349: errln("Error end of match not found");
1350: }
1351: matchtext = strsrch.getMatchedText();
1352: if (matchtext != null) {
1353: errln("Error getting null matches");
1354: }
1355: }
1356:
1357: public void TestGetSetAttribute() {
1358: String pattern = "pattern";
1359: String text = "text";
1360: StringSearch strsrch = null;
1361: try {
1362: strsrch = new StringSearch(pattern,
1363: new StringCharacterIterator(text), m_en_us_, null);
1364: } catch (Exception e) {
1365: errln("Error opening search");
1366: return;
1367: }
1368:
1369: if (strsrch.isOverlapping()) {
1370: errln("Error default overlaping should be false");
1371: }
1372: strsrch.setOverlapping(true);
1373: if (!strsrch.isOverlapping()) {
1374: errln("Error setting overlap true");
1375: }
1376: strsrch.setOverlapping(false);
1377: if (strsrch.isOverlapping()) {
1378: errln("Error setting overlap false");
1379: }
1380:
1381: strsrch.setCanonical(true);
1382: if (!strsrch.isCanonical()) {
1383: errln("Error setting canonical match true");
1384: }
1385: strsrch.setCanonical(false);
1386: if (strsrch.isCanonical()) {
1387: errln("Error setting canonical match false");
1388: }
1389:
1390: }
1391:
1392: public void TestGetSetOffset() {
1393: String pattern = "1234567890123456";
1394: String text = "12345678901234567890123456789012";
1395: StringSearch strsrch = null;
1396: try {
1397: strsrch = new StringSearch(pattern,
1398: new StringCharacterIterator(text), m_en_us_, null);
1399: } catch (Exception e) {
1400: errln("Error opening search");
1401:
1402: return;
1403: }
1404:
1405: /* testing out of bounds error */
1406: try {
1407: strsrch.setIndex(-1);
1408: errln("Error expecting set offset error");
1409: } catch (IndexOutOfBoundsException e) {
1410: logln("PASS: strsrch.setIndex(-1) failed as expected");
1411: }
1412:
1413: try {
1414: strsrch.setIndex(128);
1415: errln("Error expecting set offset error");
1416: } catch (IndexOutOfBoundsException e) {
1417: logln("PASS: strsrch.setIndex(128) failed as expected");
1418: }
1419:
1420: int index = 0;
1421: while (BASIC[index].text != null) {
1422: SearchData search = BASIC[index++];
1423:
1424: text = search.text;
1425: pattern = search.pattern;
1426: strsrch.setTarget(new StringCharacterIterator(text));
1427: strsrch.setPattern(pattern);
1428: strsrch.getCollator().setStrength(search.strength);
1429: strsrch.reset();
1430:
1431: int count = 0;
1432: int matchindex = search.offset[count];
1433:
1434: while (matchindex >= 0) {
1435: int matchlength = search.size[count];
1436: strsrch.next();
1437: if (matchindex != strsrch.getMatchStart()
1438: || matchlength != strsrch.getMatchLength()) {
1439: errln("Text: " + text);
1440: errln("Pattern: " + strsrch.getPattern());
1441: errln("Error match found at "
1442: + strsrch.getMatchStart() + ", "
1443: + strsrch.getMatchLength());
1444: return;
1445: }
1446: matchindex = search.offset[count + 1] == -1 ? -1
1447: : search.offset[count + 2];
1448: if (search.offset[count + 1] != -1) {
1449: strsrch.setIndex(search.offset[count + 1] + 1);
1450: if (strsrch.getIndex() != search.offset[count + 1] + 1) {
1451: errln("Error setting offset\n");
1452: return;
1453: }
1454: }
1455:
1456: count += 2;
1457: }
1458: strsrch.next();
1459: if (strsrch.getMatchStart() != StringSearch.DONE) {
1460: errln("Text: " + text);
1461: errln("Pattern: " + strsrch.getPattern());
1462: errln("Error match found at " + strsrch.getMatchStart()
1463: + ", " + strsrch.getMatchLength());
1464: return;
1465: }
1466: }
1467: strsrch.getCollator().setStrength(Collator.TERTIARY);
1468: }
1469:
1470: public void TestGetSetOffsetCanonical() {
1471:
1472: String text = "text";
1473: String pattern = "pattern";
1474: StringSearch strsrch = null;
1475: try {
1476: strsrch = new StringSearch(pattern,
1477: new StringCharacterIterator(text), m_en_us_, null);
1478: } catch (Exception e) {
1479: errln("Fail to open StringSearch!");
1480: return;
1481: }
1482: strsrch.setCanonical(true);
1483: /* testing out of bounds error */
1484: try {
1485: strsrch.setIndex(-1);
1486: errln("Error expecting set offset error");
1487: } catch (IndexOutOfBoundsException e) {
1488: logln("PASS: strsrch.setIndex(-1) failed as expected");
1489: }
1490: try {
1491: strsrch.setIndex(128);
1492: errln("Error expecting set offset error");
1493: } catch (IndexOutOfBoundsException e) {
1494: logln("PASS: strsrch.setIndex(128) failed as expected");
1495: }
1496:
1497: int index = 0;
1498: while (BASICCANONICAL[index].text != null) {
1499: SearchData search = BASICCANONICAL[index++];
1500: if (BASICCANONICAL[index].text == null) {
1501: // skip the last one
1502: break;
1503: }
1504:
1505: text = search.text;
1506: pattern = search.pattern;
1507: strsrch.setTarget(new StringCharacterIterator(text));
1508: strsrch.setPattern(pattern);
1509: int count = 0;
1510: int matchindex = search.offset[count];
1511: while (matchindex >= 0) {
1512: int matchlength = search.size[count];
1513: strsrch.next();
1514: if (matchindex != strsrch.getMatchStart()
1515: || matchlength != strsrch.getMatchLength()) {
1516: errln("Text: " + text);
1517: errln("Pattern: " + strsrch.getPattern());
1518: errln("Error match found at "
1519: + strsrch.getMatchStart() + ", "
1520: + strsrch.getMatchLength());
1521: return;
1522: }
1523: matchindex = search.offset[count + 1] == -1 ? -1
1524: : search.offset[count + 2];
1525: if (search.offset[count + 1] != -1) {
1526: strsrch.setIndex(search.offset[count + 1] + 1);
1527: if (strsrch.getIndex() != search.offset[count + 1] + 1) {
1528: errln("Error setting offset");
1529: return;
1530: }
1531: }
1532:
1533: count += 2;
1534: }
1535: strsrch.next();
1536: if (strsrch.getMatchStart() != StringSearch.DONE) {
1537: errln("Text: " + text);
1538: errln("Pattern: %s" + strsrch.getPattern());
1539: errln("Error match found at " + strsrch.getMatchStart()
1540: + ", " + strsrch.getMatchLength());
1541: return;
1542: }
1543: }
1544: strsrch.getCollator().setStrength(Collator.TERTIARY);
1545: }
1546:
1547: public void TestIgnorable() {
1548: String rules = IGNORABLERULE;
1549: int count = 0;
1550: RuleBasedCollator collator = null;
1551: try {
1552: collator = new RuleBasedCollator(rules);
1553: collator.setStrength(IGNORABLE[count].strength);
1554: collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1555: } catch (Exception e) {
1556: errln("Error opening collator ");
1557: return;
1558: }
1559: String pattern = "pattern";
1560: String text = "text";
1561: StringSearch strsrch = null;
1562: try {
1563: strsrch = new StringSearch(pattern,
1564: new StringCharacterIterator(text), collator, null);
1565: } catch (Exception e) {
1566: errln("Error opening string search ");
1567: return;
1568: }
1569:
1570: while (IGNORABLE[count].text != null) {
1571: text = IGNORABLE[count].text;
1572: pattern = IGNORABLE[count].pattern;
1573: strsrch.setTarget(new StringCharacterIterator(text));
1574: strsrch.setPattern(pattern);
1575: if (!assertEqualWithStringSearch(strsrch, IGNORABLE[count])) {
1576: errln("Error at test number " + count);
1577: }
1578: count++;
1579: }
1580: }
1581:
1582: public void TestInitialization() {
1583: String pattern;
1584: String text;
1585: String temp = "a";
1586: StringSearch result;
1587:
1588: /* simple test on the pattern ce construction */
1589: pattern = temp + temp;
1590: text = temp + temp + temp;
1591: try {
1592: result = new StringSearch(pattern,
1593: new StringCharacterIterator(text), m_en_us_, null);
1594: } catch (Exception e) {
1595: errln("Error opening search ");
1596: return;
1597: }
1598:
1599: /* testing if an extremely large pattern will fail the initialization */
1600: pattern = "";
1601: for (int count = 0; count < 512; count++) {
1602: pattern += temp;
1603: }
1604: try {
1605: result = new StringSearch(pattern,
1606: new StringCharacterIterator(text), m_en_us_, null);
1607: logln("pattern:" + result.getPattern());
1608: } catch (Exception e) {
1609: errln("Fail: an extremely large pattern will fail the initialization");
1610: return;
1611: }
1612: if (result != result) {
1613: errln("Error: string search object expected to match itself");
1614: }
1615:
1616: }
1617:
1618: public void TestNormCanonical() {
1619: m_en_us_.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1620: int count = 0;
1621: while (NORMCANONICAL[count].text != null) {
1622: if (!assertCanonicalEqual(NORMCANONICAL[count])) {
1623: errln("Error at test number " + count);
1624: }
1625: count++;
1626: }
1627: m_en_us_.setDecomposition(Collator.NO_DECOMPOSITION);
1628: }
1629:
1630: public void TestNormExact() {
1631: int count = 0;
1632: m_en_us_.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1633: while (BASIC[count].text != null) {
1634: if (!assertEqual(BASIC[count])) {
1635: errln("Error at test number " + count);
1636: }
1637: count++;
1638: }
1639: count = 0;
1640: while (NORMEXACT[count].text != null) {
1641: if (!assertEqual(NORMEXACT[count])) {
1642: errln("Error at test number " + count);
1643: }
1644: count++;
1645: }
1646: m_en_us_.setDecomposition(Collator.NO_DECOMPOSITION);
1647: count = 0;
1648: while (NONNORMEXACT[count].text != null) {
1649: if (!assertEqual(NONNORMEXACT[count])) {
1650: errln("Error at test number " + count);
1651: }
1652: count++;
1653: }
1654: }
1655:
1656: public void TestOpenClose() {
1657: StringSearch result;
1658: BreakIterator breakiter = m_en_wordbreaker_;
1659: String pattern = "";
1660: String text = "";
1661: String temp = "a";
1662: StringCharacterIterator chariter = new StringCharacterIterator(
1663: text);
1664:
1665: /* testing null arguments */
1666: try {
1667: result = new StringSearch(pattern,
1668: new StringCharacterIterator(text), null, null);
1669: errln("Error: null arguments should produce an error");
1670: } catch (Exception e) {
1671: logln("PASS: null arguments failed as expected");
1672: }
1673:
1674: chariter.setText(text);
1675: try {
1676: result = new StringSearch(pattern, chariter, null, null);
1677: errln("Error: null arguments should produce an error");
1678: } catch (Exception e) {
1679: logln("PASS: null arguments failed as expected");
1680: }
1681:
1682: text = String.valueOf(0x1);
1683: try {
1684: result = new StringSearch(pattern,
1685: new StringCharacterIterator(text), null, null);
1686: errln("Error: Empty pattern should produce an error");
1687: } catch (Exception e) {
1688: logln("PASS: Empty pattern failed as expected");
1689: }
1690:
1691: chariter.setText(text);
1692: try {
1693: result = new StringSearch(pattern, chariter, null, null);
1694: errln("Error: Empty pattern should produce an error");
1695: } catch (Exception e) {
1696: logln("PASS: Empty pattern failed as expected");
1697: }
1698:
1699: text = "";
1700: pattern = temp;
1701: try {
1702: result = new StringSearch(pattern,
1703: new StringCharacterIterator(text), null, null);
1704: errln("Error: Empty text should produce an error");
1705: } catch (Exception e) {
1706: logln("PASS: Empty text failed as expected");
1707: }
1708:
1709: chariter.setText(text);
1710: try {
1711: result = new StringSearch(pattern, chariter, null, null);
1712: errln("Error: Empty text should produce an error");
1713: } catch (Exception e) {
1714: logln("PASS: Empty text failed as expected");
1715: }
1716:
1717: text += temp;
1718: try {
1719: result = new StringSearch(pattern,
1720: new StringCharacterIterator(text), null, null);
1721: errln("Error: null arguments should produce an error");
1722: } catch (Exception e) {
1723: logln("PASS: null arguments failed as expected");
1724: }
1725:
1726: chariter.setText(text);
1727: try {
1728: result = new StringSearch(pattern, chariter, null, null);
1729: errln("Error: null arguments should produce an error");
1730: } catch (Exception e) {
1731: logln("PASS: null arguments failed as expected");
1732: }
1733:
1734: try {
1735: result = new StringSearch(pattern,
1736: new StringCharacterIterator(text), m_en_us_, null);
1737: } catch (Exception e) {
1738: errln("Error: null break iterator is valid for opening search");
1739: }
1740:
1741: try {
1742: result = new StringSearch(pattern, chariter, m_en_us_, null);
1743: } catch (Exception e) {
1744: errln("Error: null break iterator is valid for opening search");
1745: }
1746:
1747: try {
1748: result = new StringSearch(pattern,
1749: new StringCharacterIterator(text), Locale.ENGLISH);
1750: } catch (Exception e) {
1751: errln("Error: null break iterator is valid for opening search");
1752: }
1753:
1754: try {
1755: result = new StringSearch(pattern, chariter, Locale.ENGLISH);
1756: } catch (Exception e) {
1757: errln("Error: null break iterator is valid for opening search");
1758: }
1759:
1760: try {
1761: result = new StringSearch(pattern,
1762: new StringCharacterIterator(text), m_en_us_,
1763: breakiter);
1764: } catch (Exception e) {
1765: errln("Error: Break iterator is valid for opening search");
1766: }
1767:
1768: try {
1769: result = new StringSearch(pattern, chariter, m_en_us_, null);
1770: logln("pattern:" + result.getPattern());
1771: } catch (Exception e) {
1772: errln("Error: Break iterator is valid for opening search");
1773: }
1774: }
1775:
1776: public void TestOverlap() {
1777: int count = 0;
1778: while (OVERLAP[count].text != null) {
1779: if (!assertEqualWithAttribute(OVERLAP[count], false, true)) {
1780: errln("Error at overlap test number " + count);
1781: }
1782: count++;
1783: }
1784: count = 0;
1785: while (NONOVERLAP[count].text != null) {
1786: if (!assertEqual(NONOVERLAP[count])) {
1787: errln("Error at non overlap test number " + count);
1788: }
1789: count++;
1790: }
1791:
1792: count = 0;
1793: while (count < 1) {
1794: SearchData search = (OVERLAP[count]);
1795: String text = search.text;
1796: String pattern = search.pattern;
1797:
1798: RuleBasedCollator collator = getCollator(search.collator);
1799: StringSearch strsrch = null;
1800: try {
1801: strsrch = new StringSearch(pattern,
1802: new StringCharacterIterator(text), collator,
1803: null);
1804: } catch (Exception e) {
1805: errln("error open StringSearch");
1806: return;
1807: }
1808:
1809: strsrch.setOverlapping(true);
1810: if (!strsrch.isOverlapping()) {
1811: errln("Error setting overlap option");
1812: }
1813: if (!assertEqualWithStringSearch(strsrch, search)) {
1814: return;
1815: }
1816:
1817: search = NONOVERLAP[count];
1818: strsrch.setOverlapping(false);
1819: if (strsrch.isOverlapping()) {
1820: errln("Error setting overlap option");
1821: }
1822: strsrch.reset();
1823: if (!assertEqualWithStringSearch(strsrch, search)) {
1824: errln("Error at test number " + count);
1825: }
1826: count++;
1827: }
1828: }
1829:
1830: public void TestOverlapCanonical() {
1831: int count = 0;
1832: while (OVERLAPCANONICAL[count].text != null) {
1833: if (!assertEqualWithAttribute(OVERLAPCANONICAL[count],
1834: true, true)) {
1835: errln("Error at overlap test number %d" + count);
1836: }
1837: count++;
1838: }
1839: count = 0;
1840: while (NONOVERLAP[count].text != null) {
1841: if (!assertCanonicalEqual(NONOVERLAPCANONICAL[count])) {
1842: errln("Error at non overlap test number %d" + count);
1843: }
1844: count++;
1845: }
1846:
1847: count = 0;
1848: while (count < 1) {
1849: /* UChar temp[128];
1850: const SearchData *search = &(OVERLAPCANONICAL[count]);
1851: UErrorCode status = U_ZERO_ERROR;*/
1852: SearchData search = OVERLAPCANONICAL[count];
1853:
1854: /*u_unescape(search.text, temp, 128);
1855: UnicodeString text;
1856: text.setTo(temp, u_strlen(temp));
1857: u_unescape(search.pattern, temp, 128);
1858: UnicodeString pattern;
1859: pattern.setTo(temp, u_strlen(temp));*/
1860: RuleBasedCollator collator = getCollator(search.collator);
1861: StringSearch strsrch = new StringSearch(search.pattern,
1862: new StringCharacterIterator(search.text), collator,
1863: null);
1864: strsrch.setCanonical(true);
1865: strsrch.setOverlapping(true);
1866: if (strsrch.isOverlapping() != true) {
1867: errln("Error setting overlap option");
1868: }
1869: if (!assertEqualWithStringSearch(strsrch, search)) {
1870: strsrch = null;
1871: return;
1872: }
1873: search = NONOVERLAPCANONICAL[count];
1874: strsrch.setOverlapping(false);
1875: if (strsrch.isOverlapping() != false) {
1876: errln("Error setting overlap option");
1877: }
1878: strsrch.reset();
1879: if (!assertEqualWithStringSearch(strsrch, search)) {
1880: strsrch = null;
1881: errln("Error at test number %d" + count);
1882: }
1883:
1884: count++;
1885: strsrch = null;
1886: }
1887: }
1888:
1889: public void TestPattern() {
1890: m_en_us_.setStrength(PATTERN[0].strength);
1891: StringSearch strsrch = new StringSearch(PATTERN[0].pattern,
1892: new StringCharacterIterator(PATTERN[0].text), m_en_us_,
1893: null);
1894:
1895: /*if (U_FAILURE(status)) {
1896: errln("Error opening string search %s", u_errorName(status));
1897: m_en_us_.setStrength(getECollationStrength(UCOL_TERTIARY));
1898: if (strsrch != NULL) {
1899: delete strsrch;
1900: }
1901: return;
1902: }*/
1903:
1904: if (strsrch.getPattern() != PATTERN[0].pattern) {
1905: errln("Error setting pattern");
1906: }
1907: if (!assertEqualWithStringSearch(strsrch, PATTERN[0])) {
1908: m_en_us_.setStrength(Collator.TERTIARY);
1909: if (strsrch != null) {
1910: strsrch = null;
1911: }
1912: return;
1913: }
1914:
1915: strsrch.setPattern(PATTERN[1].pattern);
1916: if (PATTERN[1].pattern != strsrch.getPattern()) {
1917: errln("Error setting pattern");
1918: m_en_us_.setStrength(Collator.TERTIARY);
1919: if (strsrch != null) {
1920: strsrch = null;
1921: }
1922: return;
1923: }
1924: strsrch.reset();
1925:
1926: if (!assertEqualWithStringSearch(strsrch, PATTERN[1])) {
1927: m_en_us_.setStrength(Collator.TERTIARY);
1928: if (strsrch != null) {
1929: strsrch = null;
1930: }
1931: return;
1932: }
1933:
1934: strsrch.setPattern(PATTERN[0].pattern);
1935: if (PATTERN[0].pattern != strsrch.getPattern()) {
1936: errln("Error setting pattern");
1937: m_en_us_.setStrength(Collator.TERTIARY);
1938: if (strsrch != null) {
1939: strsrch = null;
1940: }
1941: return;
1942: }
1943: strsrch.reset();
1944:
1945: if (!assertEqualWithStringSearch(strsrch, PATTERN[0])) {
1946: m_en_us_.setStrength(Collator.TERTIARY);
1947: if (strsrch != null) {
1948: strsrch = null;
1949: }
1950: return;
1951: }
1952: /* enormous pattern size to see if this crashes */
1953: String pattern = "";
1954: for (int templength = 0; templength != 512; templength++) {
1955: pattern += 0x61;
1956: }
1957: try {
1958: strsrch.setPattern(pattern);
1959: } catch (Exception e) {
1960: errln("Error setting pattern with size 512");
1961: }
1962:
1963: m_en_us_.setStrength(Collator.TERTIARY);
1964: if (strsrch != null) {
1965: strsrch = null;
1966: }
1967: }
1968:
1969: public void TestPatternCanonical() {
1970: //StringCharacterIterator text = new StringCharacterIterator(PATTERNCANONICAL[0].text);
1971: m_en_us_.setStrength(PATTERNCANONICAL[0].strength);
1972: StringSearch strsrch = new StringSearch(
1973: PATTERNCANONICAL[0].pattern,
1974: new StringCharacterIterator(PATTERNCANONICAL[0].text),
1975: m_en_us_, null);
1976: strsrch.setCanonical(true);
1977:
1978: if (PATTERNCANONICAL[0].pattern != strsrch.getPattern()) {
1979: errln("Error setting pattern");
1980: }
1981: if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[0])) {
1982: m_en_us_.setStrength(Collator.TERTIARY);
1983: strsrch = null;
1984: return;
1985: }
1986:
1987: strsrch.setPattern(PATTERNCANONICAL[1].pattern);
1988: if (PATTERNCANONICAL[1].pattern != strsrch.getPattern()) {
1989: errln("Error setting pattern");
1990: m_en_us_.setStrength(Collator.TERTIARY);
1991: strsrch = null;
1992: return;
1993: }
1994: strsrch.reset();
1995: strsrch.setCanonical(true);
1996:
1997: if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[1])) {
1998: m_en_us_.setStrength(Collator.TERTIARY);
1999: strsrch = null;
2000: return;
2001: }
2002:
2003: strsrch.setPattern(PATTERNCANONICAL[0].pattern);
2004: if (PATTERNCANONICAL[0].pattern != strsrch.getPattern()) {
2005: errln("Error setting pattern");
2006: m_en_us_.setStrength(Collator.TERTIARY);
2007: strsrch = null;
2008: return;
2009: }
2010:
2011: strsrch.reset();
2012: strsrch.setCanonical(true);
2013: if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[0])) {
2014: m_en_us_.setStrength(Collator.TERTIARY);
2015: strsrch = null;
2016: return;
2017: }
2018: }
2019:
2020: public void TestReset() {
2021: StringCharacterIterator text = new StringCharacterIterator(
2022: "fish fish");
2023: String pattern = "s";
2024:
2025: StringSearch strsrch = new StringSearch(pattern, text,
2026: m_en_us_, null);
2027: strsrch.setOverlapping(true);
2028: strsrch.setCanonical(true);
2029: strsrch.setIndex(9);
2030: strsrch.reset();
2031: if (strsrch.isCanonical() || strsrch.isOverlapping()
2032: || strsrch.getIndex() != 0
2033: || strsrch.getMatchLength() != 0
2034: || strsrch.getMatchStart() != SearchIterator.DONE) {
2035: errln("Error resetting string search");
2036: }
2037:
2038: strsrch.previous();
2039: if (strsrch.getMatchStart() != 7
2040: || strsrch.getMatchLength() != 1) {
2041: errln("Error resetting string search\n");
2042: }
2043: }
2044:
2045: public void TestSetMatch() {
2046: int count = 0;
2047: while (MATCH[count].text != null) {
2048: SearchData search = MATCH[count];
2049: StringSearch strsrch = new StringSearch(search.pattern,
2050: new StringCharacterIterator(search.text), m_en_us_,
2051: null);
2052:
2053: int size = 0;
2054: while (search.offset[size] != -1) {
2055: size++;
2056: }
2057:
2058: if (strsrch.first() != search.offset[0]) {
2059: errln("Error getting first match");
2060: }
2061: if (strsrch.last() != search.offset[size - 1]) {
2062: errln("Error getting last match");
2063: }
2064:
2065: int index = 0;
2066: while (index < size) {
2067: if (index + 2 < size) {
2068: if (strsrch.following(search.offset[index + 2] - 1) != search.offset[index + 2]) {
2069: errln("Error getting following match at index "
2070: + (search.offset[index + 2] - 1));
2071: }
2072: }
2073: if (index + 1 < size) {
2074: if (strsrch.preceding(search.offset[index + 1]
2075: + search.size[index + 1] + 1) != search.offset[index + 1]) {
2076: errln("Error getting preceeding match at index "
2077: + (search.offset[index + 1] + 1));
2078: }
2079: }
2080: index += 2;
2081: }
2082:
2083: if (strsrch.following(search.text.length()) != SearchIterator.DONE) {
2084: errln("Error expecting out of bounds match");
2085: }
2086: if (strsrch.preceding(0) != SearchIterator.DONE) {
2087: errln("Error expecting out of bounds match");
2088: }
2089: count++;
2090: strsrch = null;
2091: }
2092: }
2093:
2094: public void TestStrength() {
2095: int count = 0;
2096: while (STRENGTH[count].text != null) {
2097: if (count == 3)
2098: count++;
2099: if (!assertEqual(STRENGTH[count])) {
2100: errln("Error at test number " + count);
2101: }
2102: count++;
2103: }
2104: }
2105:
2106: public void TestStrengthCanonical() {
2107: int count = 0;
2108: while (STRENGTHCANONICAL[count].text != null) {
2109: if (count == 3)
2110: count++;
2111: if (!assertCanonicalEqual(STRENGTHCANONICAL[count])) {
2112: errln("Error at test number" + count);
2113: }
2114: count++;
2115: }
2116: }
2117:
2118: public void TestSupplementary() {
2119: int count = 0;
2120: while (SUPPLEMENTARY[count].text != null) {
2121: if (!assertEqual(SUPPLEMENTARY[count])) {
2122: errln("Error at test number " + count);
2123: }
2124: count++;
2125: }
2126: }
2127:
2128: public void TestSupplementaryCanonical() {
2129: int count = 0;
2130: while (SUPPLEMENTARYCANONICAL[count].text != null) {
2131: if (!assertCanonicalEqual(SUPPLEMENTARYCANONICAL[count])) {
2132: errln("Error at test number" + count);
2133: }
2134: count++;
2135: }
2136: }
2137:
2138: public void TestText() {
2139: SearchData TEXT[] = {
2140: new SearchData("the foxy brown fox", "fox", null,
2141: Collator.TERTIARY, null,
2142: new int[] { 4, 15, -1 }, new int[] { 3, 3 }),
2143: new SearchData("the quick brown fox", "fox", null,
2144: Collator.TERTIARY, null, new int[] { 16, -1 },
2145: new int[] { 3 }),
2146: new SearchData(null, null, null, Collator.TERTIARY,
2147: null, new int[] { -1 }, new int[] { 0 }) };
2148: StringCharacterIterator t = new StringCharacterIterator(
2149: TEXT[0].text);
2150: StringSearch strsrch = new StringSearch(TEXT[0].pattern, t,
2151: m_en_us_, null);
2152:
2153: if (!t.equals(strsrch.getTarget())) {
2154: errln("Error setting text");
2155: }
2156: if (!assertEqualWithStringSearch(strsrch, TEXT[0])) {
2157: errln("Error at assertEqualWithStringSearch");
2158: return;
2159: }
2160:
2161: t = new StringCharacterIterator(TEXT[1].text);
2162: strsrch.setTarget(t);
2163: if (!t.equals(strsrch.getTarget())) {
2164: errln("Error setting text");
2165: return;
2166: }
2167:
2168: if (!assertEqualWithStringSearch(strsrch, TEXT[1])) {
2169: errln("Error at assertEqualWithStringSearch");
2170: return;
2171: }
2172: }
2173:
2174: public void TestTextCanonical() {
2175: StringCharacterIterator t = new StringCharacterIterator(
2176: TEXTCANONICAL[0].text);
2177: StringSearch strsrch = new StringSearch(
2178: TEXTCANONICAL[0].pattern, t, m_en_us_, null);
2179: strsrch.setCanonical(true);
2180:
2181: if (!t.equals(strsrch.getTarget())) {
2182: errln("Error setting text");
2183: }
2184: if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[0])) {
2185: strsrch = null;
2186: return;
2187: }
2188:
2189: t = new StringCharacterIterator(TEXTCANONICAL[1].text);
2190: strsrch.setTarget(t);
2191: if (!t.equals(strsrch.getTarget())) {
2192: errln("Error setting text");
2193: strsrch = null;
2194: return;
2195: }
2196:
2197: if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[1])) {
2198: strsrch = null;
2199: return;
2200: }
2201:
2202: t = new StringCharacterIterator(TEXTCANONICAL[0].text);
2203: strsrch.setTarget(t);
2204: if (!t.equals(strsrch.getTarget())) {
2205: errln("Error setting text");
2206: strsrch = null;
2207: return;
2208: }
2209:
2210: if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[0])) {
2211: errln("Error at assertEqualWithStringSearch");
2212: strsrch = null;
2213: return;
2214: }
2215: }
2216:
2217: public void TestVariable() {
2218: int count = 0;
2219: m_en_us_.setAlternateHandlingShifted(true);
2220: while (VARIABLE[count].text != null) {
2221: // logln("variable" + count);
2222: if (!assertEqual(VARIABLE[count])) {
2223: errln("Error at test number " + count);
2224: }
2225: count++;
2226: }
2227: m_en_us_.setAlternateHandlingShifted(false);
2228: }
2229:
2230: public void TestVariableCanonical() {
2231: int count = 0;
2232: m_en_us_.setAlternateHandlingShifted(true);
2233: while (VARIABLE[count].text != null) {
2234: // logln("variable " + count);
2235: if (!assertCanonicalEqual(VARIABLE[count])) {
2236: errln("Error at test number " + count);
2237: }
2238: count++;
2239: }
2240: m_en_us_.setAlternateHandlingShifted(false);
2241: }
2242:
2243: public void TestSubClass() {
2244: class TestSearch extends SearchIterator {
2245: String pattern;
2246: String text;
2247:
2248: TestSearch(StringCharacterIterator target,
2249: BreakIterator breaker, String pattern) {
2250: super (target, breaker);
2251: this .pattern = pattern;
2252: StringBuffer buffer = new StringBuffer();
2253: while (targetText.getIndex() != targetText
2254: .getEndIndex()) {
2255: buffer.append(targetText.current());
2256: targetText.next();
2257: }
2258: text = buffer.toString();
2259: targetText.setIndex(targetText.getBeginIndex());
2260: }
2261:
2262: protected int handleNext(int start) {
2263: int match = text.indexOf(pattern, start);
2264: if (match < 0) {
2265: targetText.last();
2266: return DONE;
2267: }
2268: targetText.setIndex(match);
2269: setMatchLength(pattern.length());
2270: return match;
2271: }
2272:
2273: protected int handlePrevious(int start) {
2274: int match = text.lastIndexOf(pattern, start - 1);
2275: if (match < 0) {
2276: targetText.setIndex(0);
2277: return DONE;
2278: }
2279: targetText.setIndex(match);
2280: setMatchLength(pattern.length());
2281: return match;
2282: }
2283:
2284: public int getIndex() {
2285: int result = targetText.getIndex();
2286: if (result < 0 || result >= text.length()) {
2287: return DONE;
2288: }
2289: return result;
2290: }
2291: }
2292:
2293: TestSearch search = new TestSearch(new StringCharacterIterator(
2294: "abc abcd abc"), null, "abc");
2295: int expected[] = { 0, 4, 9 };
2296: for (int i = 0; i < expected.length; i++) {
2297: if (search.next() != expected[i]) {
2298: errln("Error getting next match");
2299: }
2300: if (search.getMatchLength() != search.pattern.length()) {
2301: errln("Error getting next match length");
2302: }
2303: }
2304: if (search.next() != SearchIterator.DONE) {
2305: errln("Error should have reached the end of the iteration");
2306: }
2307: for (int i = expected.length - 1; i >= 0; i--) {
2308: if (search.previous() != expected[i]) {
2309: errln("Error getting next match");
2310: }
2311: if (search.getMatchLength() != search.pattern.length()) {
2312: errln("Error getting next match length");
2313: }
2314: }
2315: if (search.previous() != SearchIterator.DONE) {
2316: errln("Error should have reached the start of the iteration");
2317: }
2318: }
2319: }
|