0001: /*
0002: *******************************************************************************
0003: * Copyright (C) 1996-2006, International Business Machines Corporation and *
0004: * others. All Rights Reserved. *
0005: *******************************************************************************
0006: */
0007: package com.ibm.icu.dev.test.translit;
0008:
0009: import com.ibm.icu.lang.*;
0010: import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
0011: import com.ibm.icu.text.*;
0012: import com.ibm.icu.dev.test.*;
0013: import com.ibm.icu.impl.PrettyPrinter;
0014: import com.ibm.icu.impl.Utility;
0015: import com.ibm.icu.impl.SortedSetRelation;
0016: import java.util.*;
0017: import java.text.ParsePosition;
0018:
0019: /**
0020: * @test
0021: * @summary General test of UnicodeSet
0022: */
0023: public class UnicodeSetTest extends TestFmwk {
0024:
0025: static final String NOT = "%%%%";
0026:
0027: public static void main(String[] args) throws Exception {
0028: new UnicodeSetTest().run(args);
0029: }
0030:
0031: /**
0032: * Test toPattern().
0033: */
0034: public void TestToPattern() throws Exception {
0035: // Test that toPattern() round trips with syntax characters
0036: // and whitespace.
0037: for (int i = 0; i < OTHER_TOPATTERN_TESTS.length; ++i) {
0038: checkPat(OTHER_TOPATTERN_TESTS[i], new UnicodeSet(
0039: OTHER_TOPATTERN_TESTS[i]));
0040: }
0041: for (int i = 0; i <= 0x10FFFF; ++i) {
0042: if ((i <= 0xFF && !UCharacter.isLetter(i))
0043: || UCharacter.isWhitespace(i)) {
0044: // check various combinations to make sure they all work.
0045: if (i != 0 && !toPatternAux(i, i))
0046: continue;
0047: if (!toPatternAux(0, i))
0048: continue;
0049: if (!toPatternAux(i, 0xFFFF))
0050: continue;
0051: }
0052: }
0053:
0054: // Test pattern behavior of multicharacter strings.
0055: UnicodeSet s = new UnicodeSet("[a-z {aa} {ab}]");
0056: expectToPattern(s, "[a-z{aa}{ab}]", new String[] { "aa", "ab",
0057: NOT, "ac" });
0058: s.add("ac");
0059: expectToPattern(s, "[a-z{aa}{ab}{ac}]", new String[] { "aa",
0060: "ab", "ac", NOT, "xy" });
0061:
0062: s.applyPattern("[a-z {\\{l} {r\\}}]");
0063: expectToPattern(s, "[a-z{r\\}}{\\{l}]", new String[] { "{l",
0064: "r}", NOT, "xy" });
0065: s.add("[]");
0066: expectToPattern(s, "[a-z{\\[\\]}{r\\}}{\\{l}]", new String[] {
0067: "{l", "r}", "[]", NOT, "xy" });
0068:
0069: s.applyPattern("[a-z {\u4E01\u4E02}{\\n\\r}]");
0070: expectToPattern(s, "[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]",
0071: new String[] { "\u4E01\u4E02", "\n\r" });
0072:
0073: s.clear();
0074: s.add("abc");
0075: s.add("abc");
0076: expectToPattern(s, "[{abc}]", new String[] { "abc", NOT, "ab" });
0077:
0078: // JB#3400: For 2 character ranges prefer [ab] to [a-b]
0079: s.clear();
0080: s.add('a', 'b');
0081: expectToPattern(s, "[ab]", null);
0082:
0083: // Cover applyPattern, applyPropertyAlias
0084: s.clear();
0085: s.applyPattern("[ab ]", true);
0086: expectToPattern(s, "[ab]", new String[] { "a", NOT, "ab" });
0087: s.clear();
0088: s.applyPattern("[ab ]", false);
0089: expectToPattern(s, "[\\\u0020ab]", new String[] { "a",
0090: "\u0020", NOT, "ab" });
0091:
0092: s.clear();
0093: s.applyPropertyAlias("nv", "0.5");
0094: expectToPattern(
0095: s,
0096: "[\\u00BD\\u0F2A\\u2CFD\\U00010141\\U00010175\\U00010176]",
0097: null);
0098: // Unicode 4.1 adds \u2CFD\U00010141\U00010175\U00010176 with numeric value 1/2
0099:
0100: s.clear();
0101: s.applyPropertyAlias("gc", "Lu");
0102: // TODO expectToPattern(s, what?)
0103: }
0104:
0105: static String[] OTHER_TOPATTERN_TESTS = { "[[:latin:]&[:greek:]]",
0106: "[[:latin:]-[:greek:]]", "[:nonspacing mark:]" };
0107:
0108: public boolean toPatternAux(int start, int end) {
0109: // use Integer.toString because Utility.hex doesn't handle ints
0110: String source = "0x"
0111: + Integer.toString(start, 16).toUpperCase();
0112: if (start != end)
0113: source += "..0x" + Integer.toString(end, 16).toUpperCase();
0114: UnicodeSet testSet = new UnicodeSet();
0115: testSet.add(start, end);
0116: return checkPat(source, testSet);
0117: }
0118:
0119: boolean checkPat(String source, UnicodeSet testSet) {
0120: String pat = "";
0121: try {
0122: // What we want to make sure of is that a pattern generated
0123: // by toPattern(), with or without escaped unprintables, can
0124: // be passed back into the UnicodeSet constructor.
0125: String pat0 = testSet.toPattern(true);
0126: if (!checkPat(source + " (escaped)", testSet, pat0))
0127: return false;
0128:
0129: //String pat1 = unescapeLeniently(pat0);
0130: //if (!checkPat(source + " (in code)", testSet, pat1)) return false;
0131:
0132: String pat2 = testSet.toPattern(false);
0133: if (!checkPat(source, testSet, pat2))
0134: return false;
0135:
0136: //String pat3 = unescapeLeniently(pat2);
0137: //if (!checkPat(source + " (in code)", testSet, pat3)) return false;
0138:
0139: //logln(source + " => " + pat0 + ", " + pat1 + ", " + pat2 + ", " + pat3);
0140: logln(source + " => " + pat0 + ", " + pat2);
0141: } catch (Exception e) {
0142: errln("EXCEPTION in toPattern: " + source + " => " + pat);
0143: return false;
0144: }
0145: return true;
0146: }
0147:
0148: boolean checkPat(String source, UnicodeSet testSet, String pat) {
0149: UnicodeSet testSet2 = new UnicodeSet(pat);
0150: if (!testSet2.equals(testSet)) {
0151: errln("Fail toPattern: " + source + "; " + pat + " => "
0152: + testSet2.toPattern(false) + ", expected "
0153: + testSet.toPattern(false));
0154: return false;
0155: }
0156: return true;
0157: }
0158:
0159: // NOTE: copied the following from Utility. There ought to be a version in there with a flag
0160: // that does the Java stuff
0161:
0162: public static int unescapeAt(String s, int[] offset16) {
0163: int c;
0164: int result = 0;
0165: int n = 0;
0166: int minDig = 0;
0167: int maxDig = 0;
0168: int bitsPerDigit = 4;
0169: int dig;
0170: int i;
0171:
0172: /* Check that offset is in range */
0173: int offset = offset16[0];
0174: int length = s.length();
0175: if (offset < 0 || offset >= length) {
0176: return -1;
0177: }
0178:
0179: /* Fetch first UChar after '\\' */
0180: c = UTF16.charAt(s, offset);
0181: offset += UTF16.getCharCount(c);
0182:
0183: /* Convert hexadecimal and octal escapes */
0184: switch (c) {
0185: case 'u':
0186: minDig = maxDig = 4;
0187: break;
0188: /*
0189: case 'U':
0190: minDig = maxDig = 8;
0191: break;
0192: case 'x':
0193: minDig = 1;
0194: maxDig = 2;
0195: break;
0196: */
0197: default:
0198: dig = UCharacter.digit(c, 8);
0199: if (dig >= 0) {
0200: minDig = 1;
0201: maxDig = 3;
0202: n = 1; /* Already have first octal digit */
0203: bitsPerDigit = 3;
0204: result = dig;
0205: }
0206: break;
0207: }
0208: if (minDig != 0) {
0209: while (offset < length && n < maxDig) {
0210: // TEMPORARY
0211: // TODO: Restore the char32-based code when UCharacter.digit
0212: // is working (Bug 66).
0213:
0214: //c = UTF16.charAt(s, offset);
0215: //dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
0216: c = s.charAt(offset);
0217: dig = Character.digit((char) c, (bitsPerDigit == 3) ? 8
0218: : 16);
0219: if (dig < 0) {
0220: break;
0221: }
0222: result = (result << bitsPerDigit) | dig;
0223: //offset += UTF16.getCharCount(c);
0224: ++offset;
0225: ++n;
0226: }
0227: if (n < minDig) {
0228: return -1;
0229: }
0230: offset16[0] = offset;
0231: return result;
0232: }
0233:
0234: /* Convert C-style escapes in table */
0235: for (i = 0; i < UNESCAPE_MAP.length; i += 2) {
0236: if (c == UNESCAPE_MAP[i]) {
0237: offset16[0] = offset;
0238: return UNESCAPE_MAP[i + 1];
0239: } else if (c < UNESCAPE_MAP[i]) {
0240: break;
0241: }
0242: }
0243:
0244: /* If no special forms are recognized, then consider
0245: * the backslash to generically escape the next character. */
0246: offset16[0] = offset;
0247: return c;
0248: }
0249:
0250: /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
0251: static private final char[] UNESCAPE_MAP = {
0252: /*" 0x22, 0x22 */
0253: /*' 0x27, 0x27 */
0254: /*? 0x3F, 0x3F */
0255: /*\ 0x5C, 0x5C */
0256: /*a*/0x61, 0x07,
0257: /*b*/0x62, 0x08,
0258: /*f*/0x66, 0x0c,
0259: /*n*/0x6E, 0x0a,
0260: /*r*/0x72, 0x0d,
0261: /*t*/0x74, 0x09,
0262: /*v*/0x76, 0x0b };
0263:
0264: /**
0265: * Convert all escapes in a given string using unescapeAt().
0266: * Leave invalid escape sequences unchanged.
0267: */
0268: public static String unescapeLeniently(String s) {
0269: StringBuffer buf = new StringBuffer();
0270: int[] pos = new int[1];
0271: for (int i = 0; i < s.length();) {
0272: char c = s.charAt(i++);
0273: if (c == '\\') {
0274: pos[0] = i;
0275: int e = unescapeAt(s, pos);
0276: if (e < 0) {
0277: buf.append(c);
0278: } else {
0279: UTF16.append(buf, e);
0280: i = pos[0];
0281: }
0282: } else {
0283: buf.append(c);
0284: }
0285: }
0286: return buf.toString();
0287: }
0288:
0289: public void TestPatterns() {
0290: UnicodeSet set = new UnicodeSet();
0291: expectPattern(set, "[[a-m]&[d-z]&[k-y]]", "km");
0292: expectPattern(set, "[[a-z]-[m-y]-[d-r]]", "aczz");
0293: expectPattern(set, "[a\\-z]", "--aazz");
0294: expectPattern(set, "[-az]", "--aazz");
0295: expectPattern(set, "[az-]", "--aazz");
0296: expectPattern(set, "[[[a-z]-[aeiou]i]]", "bdfnptvz");
0297:
0298: // Throw in a test of complement
0299: set.complement();
0300: String exp = '\u0000' + "aeeoouu" + (char) ('z' + 1) + '\uFFFF';
0301: expectPairs(set, exp);
0302: }
0303:
0304: public void TestCategories() {
0305: int failures = 0;
0306: UnicodeSet set = new UnicodeSet("[:Lu:]");
0307: expectContainment(set, "ABC", "abc");
0308:
0309: // Make sure generation of L doesn't pollute cached Lu set
0310: // First generate L, then Lu
0311: // not used int TOP = 0x200; // Don't need to go over the whole range:
0312: set = new UnicodeSet("[:L:]");
0313: for (int i = 0; i < 0x200; ++i) {
0314: boolean l = UCharacter.isLetter(i);
0315: if (l != set.contains((char) i)) {
0316: errln("FAIL: L contains " + (char) i + " = "
0317: + set.contains((char) i));
0318: if (++failures == 10)
0319: break;
0320: }
0321: }
0322:
0323: set = new UnicodeSet("[:Lu:]");
0324: for (int i = 0; i < 0x200; ++i) {
0325: boolean lu = (UCharacter.getType(i) == ECharacterCategory.UPPERCASE_LETTER);
0326: if (lu != set.contains((char) i)) {
0327: errln("FAIL: Lu contains " + (char) i + " = "
0328: + set.contains((char) i));
0329: if (++failures == 20)
0330: break;
0331: }
0332: }
0333: }
0334:
0335: public void TestAddRemove() {
0336: UnicodeSet set = new UnicodeSet();
0337: set.add('a', 'z');
0338: expectPairs(set, "az");
0339: set.remove('m', 'p');
0340: expectPairs(set, "alqz");
0341: set.remove('e', 'g');
0342: expectPairs(set, "adhlqz");
0343: set.remove('d', 'i');
0344: expectPairs(set, "acjlqz");
0345: set.remove('c', 'r');
0346: expectPairs(set, "absz");
0347: set.add('f', 'q');
0348: expectPairs(set, "abfqsz");
0349: set.remove('a', 'g');
0350: expectPairs(set, "hqsz");
0351: set.remove('a', 'z');
0352: expectPairs(set, "");
0353:
0354: // Try removing an entire set from another set
0355: expectPattern(set, "[c-x]", "cx");
0356: UnicodeSet set2 = new UnicodeSet();
0357: expectPattern(set2, "[f-ky-za-bc[vw]]", "acfkvwyz");
0358: set.removeAll(set2);
0359: expectPairs(set, "deluxx");
0360:
0361: // Try adding an entire set to another set
0362: expectPattern(set, "[jackiemclean]", "aacceein");
0363: expectPattern(set2, "[hitoshinamekatajamesanderson]",
0364: "aadehkmort");
0365: set.addAll(set2);
0366: expectPairs(set, "aacehort");
0367:
0368: // Test commutativity
0369: expectPattern(set, "[hitoshinamekatajamesanderson]",
0370: "aadehkmort");
0371: expectPattern(set2, "[jackiemclean]", "aacceein");
0372: set.addAll(set2);
0373: expectPairs(set, "aacehort");
0374: }
0375:
0376: /**
0377: * Make sure minimal representation is maintained.
0378: */
0379: public void TestMinimalRep() {
0380: // This is pretty thoroughly tested by checkCanonicalRep()
0381: // run against the exhaustive operation results. Use the code
0382: // here for debugging specific spot problems.
0383:
0384: // 1 overlap against 2
0385: UnicodeSet set = new UnicodeSet("[h-km-q]");
0386: UnicodeSet set2 = new UnicodeSet("[i-o]");
0387: set.addAll(set2);
0388: expectPairs(set, "hq");
0389: // right
0390: set.applyPattern("[a-m]");
0391: set2.applyPattern("[e-o]");
0392: set.addAll(set2);
0393: expectPairs(set, "ao");
0394: // left
0395: set.applyPattern("[e-o]");
0396: set2.applyPattern("[a-m]");
0397: set.addAll(set2);
0398: expectPairs(set, "ao");
0399: // 1 overlap against 3
0400: set.applyPattern("[a-eg-mo-w]");
0401: set2.applyPattern("[d-q]");
0402: set.addAll(set2);
0403: expectPairs(set, "aw");
0404: }
0405:
0406: public void TestAPI() {
0407: // default ct
0408: UnicodeSet set = new UnicodeSet();
0409: if (!set.isEmpty() || set.getRangeCount() != 0) {
0410: errln("FAIL, set should be empty but isn't: " + set);
0411: }
0412:
0413: // clear(), isEmpty()
0414: set.add('a');
0415: if (set.isEmpty()) {
0416: errln("FAIL, set shouldn't be empty but is: " + set);
0417: }
0418: set.clear();
0419: if (!set.isEmpty()) {
0420: errln("FAIL, set should be empty but isn't: " + set);
0421: }
0422:
0423: // size()
0424: set.clear();
0425: if (set.size() != 0) {
0426: errln("FAIL, size should be 0, but is " + set.size() + ": "
0427: + set);
0428: }
0429: set.add('a');
0430: if (set.size() != 1) {
0431: errln("FAIL, size should be 1, but is " + set.size() + ": "
0432: + set);
0433: }
0434: set.add('1', '9');
0435: if (set.size() != 10) {
0436: errln("FAIL, size should be 10, but is " + set.size()
0437: + ": " + set);
0438: }
0439: set.clear();
0440: set.complement();
0441: if (set.size() != 0x110000) {
0442: errln("FAIL, size should be 0x110000, but is" + set.size());
0443: }
0444:
0445: // contains(first, last)
0446: set.clear();
0447: set.applyPattern("[A-Y 1-8 b-d l-y]");
0448: for (int i = 0; i < set.getRangeCount(); ++i) {
0449: int a = set.getRangeStart(i);
0450: int b = set.getRangeEnd(i);
0451: if (!set.contains(a, b)) {
0452: errln("FAIL, should contain " + (char) a + '-'
0453: + (char) b + " but doesn't: " + set);
0454: }
0455: if (set.contains((char) (a - 1), b)) {
0456: errln("FAIL, shouldn't contain " + (char) (a - 1) + '-'
0457: + (char) b + " but does: " + set);
0458: }
0459: if (set.contains(a, (char) (b + 1))) {
0460: errln("FAIL, shouldn't contain " + (char) a + '-'
0461: + (char) (b + 1) + " but does: " + set);
0462: }
0463: }
0464:
0465: // Ported InversionList test.
0466: UnicodeSet a = new UnicodeSet((char) 3, (char) 10);
0467: UnicodeSet b = new UnicodeSet((char) 7, (char) 15);
0468: UnicodeSet c = new UnicodeSet();
0469:
0470: logln("a [3-10]: " + a);
0471: logln("b [7-15]: " + b);
0472: c.set(a);
0473: c.addAll(b);
0474: UnicodeSet exp = new UnicodeSet((char) 3, (char) 15);
0475: if (c.equals(exp)) {
0476: logln("c.set(a).add(b): " + c);
0477: } else {
0478: errln("FAIL: c.set(a).add(b) = " + c + ", expect " + exp);
0479: }
0480: c.complement();
0481: exp.set((char) 0, (char) 2);
0482: exp.add((char) 16, UnicodeSet.MAX_VALUE);
0483: if (c.equals(exp)) {
0484: logln("c.complement(): " + c);
0485: } else {
0486: errln(Utility.escape("FAIL: c.complement() = " + c
0487: + ", expect " + exp));
0488: }
0489: c.complement();
0490: exp.set((char) 3, (char) 15);
0491: if (c.equals(exp)) {
0492: logln("c.complement(): " + c);
0493: } else {
0494: errln("FAIL: c.complement() = " + c + ", expect " + exp);
0495: }
0496: c.set(a);
0497: c.complementAll(b);
0498: exp.set((char) 3, (char) 6);
0499: exp.add((char) 11, (char) 15);
0500: if (c.equals(exp)) {
0501: logln("c.set(a).complement(b): " + c);
0502: } else {
0503: errln("FAIL: c.set(a).complement(b) = " + c + ", expect "
0504: + exp);
0505: }
0506:
0507: exp.set(c);
0508: c = bitsToSet(setToBits(c));
0509: if (c.equals(exp)) {
0510: logln("bitsToSet(setToBits(c)): " + c);
0511: } else {
0512: errln("FAIL: bitsToSet(setToBits(c)) = " + c + ", expect "
0513: + exp);
0514: }
0515:
0516: // Additional tests for coverage JB#2118
0517: //UnicodeSet::complement(class UnicodeString const &)
0518: //UnicodeSet::complementAll(class UnicodeString const &)
0519: //UnicodeSet::containsNone(class UnicodeSet const &)
0520: //UnicodeSet::containsNone(long,long)
0521: //UnicodeSet::containsSome(class UnicodeSet const &)
0522: //UnicodeSet::containsSome(long,long)
0523: //UnicodeSet::removeAll(class UnicodeString const &)
0524: //UnicodeSet::retain(long)
0525: //UnicodeSet::retainAll(class UnicodeString const &)
0526: //UnicodeSet::serialize(unsigned short *,long,enum UErrorCode &)
0527: //UnicodeSetIterator::getString(void)
0528: set.clear();
0529: set.complement("ab");
0530: exp.applyPattern("[{ab}]");
0531: if (!set.equals(exp)) {
0532: errln("FAIL: complement(\"ab\")");
0533: return;
0534: }
0535:
0536: UnicodeSetIterator iset = new UnicodeSetIterator(set);
0537: if (!iset.next()
0538: || iset.codepoint != UnicodeSetIterator.IS_STRING) {
0539: errln("FAIL: UnicodeSetIterator.next/IS_STRING");
0540: } else if (!iset.string.equals("ab")) {
0541: errln("FAIL: UnicodeSetIterator.string");
0542: }
0543:
0544: set.add((char) 0x61, (char) 0x7A);
0545: set.complementAll("alan");
0546: exp.applyPattern("[{ab}b-kmo-z]");
0547: if (!set.equals(exp)) {
0548: errln("FAIL: complementAll(\"alan\")");
0549: return;
0550: }
0551:
0552: exp.applyPattern("[a-z]");
0553: if (set.containsNone(exp)) {
0554: errln("FAIL: containsNone(UnicodeSet)");
0555: }
0556: if (!set.containsSome(exp)) {
0557: errln("FAIL: containsSome(UnicodeSet)");
0558: }
0559: exp.applyPattern("[aln]");
0560: if (!set.containsNone(exp)) {
0561: errln("FAIL: containsNone(UnicodeSet)");
0562: }
0563: if (set.containsSome(exp)) {
0564: errln("FAIL: containsSome(UnicodeSet)");
0565: }
0566:
0567: if (set.containsNone((char) 0x61, (char) 0x7A)) {
0568: errln("FAIL: containsNone(char, char)");
0569: }
0570: if (!set.containsSome((char) 0x61, (char) 0x7A)) {
0571: errln("FAIL: containsSome(char, char)");
0572: }
0573: if (!set.containsNone((char) 0x41, (char) 0x5A)) {
0574: errln("FAIL: containsNone(char, char)");
0575: }
0576: if (set.containsSome((char) 0x41, (char) 0x5A)) {
0577: errln("FAIL: containsSome(char, char)");
0578: }
0579:
0580: set.removeAll("liu");
0581: exp.applyPattern("[{ab}b-hj-kmo-tv-z]");
0582: if (!set.equals(exp)) {
0583: errln("FAIL: removeAll(\"liu\")");
0584: return;
0585: }
0586:
0587: set.retainAll("star");
0588: exp.applyPattern("[rst]");
0589: if (!set.equals(exp)) {
0590: errln("FAIL: retainAll(\"star\")");
0591: return;
0592: }
0593:
0594: set.retain((char) 0x73);
0595: exp.applyPattern("[s]");
0596: if (!set.equals(exp)) {
0597: errln("FAIL: retain('s')");
0598: return;
0599: }
0600:
0601: // ICU 2.6 coverage tests
0602: // public final UnicodeSet retain(String s);
0603: // public final UnicodeSet remove(int c);
0604: // public final UnicodeSet remove(String s);
0605: // public int hashCode();
0606: set.applyPattern("[a-z{ab}{cd}]");
0607: set.retain("cd");
0608: exp.applyPattern("[{cd}]");
0609: if (!set.equals(exp)) {
0610: errln("FAIL: retain(\"cd\")");
0611: return;
0612: }
0613:
0614: set.applyPattern("[a-z{ab}{cd}]");
0615: set.remove((char) 0x63);
0616: exp.applyPattern("[abd-z{ab}{cd}]");
0617: if (!set.equals(exp)) {
0618: errln("FAIL: remove('c')");
0619: return;
0620: }
0621:
0622: set.remove("cd");
0623: exp.applyPattern("[abd-z{ab}]");
0624: if (!set.equals(exp)) {
0625: errln("FAIL: remove(\"cd\")");
0626: return;
0627: }
0628:
0629: if (set.hashCode() != exp.hashCode()) {
0630: errln("FAIL: hashCode() unequal");
0631: }
0632: exp.clear();
0633: if (set.hashCode() == exp.hashCode()) {
0634: errln("FAIL: hashCode() equal");
0635: }
0636:
0637: {
0638: //Cover addAll(Collection) and addAllTo(Collection)
0639: // Seems that there is a bug in addAll(Collection) operation
0640: // Ram also add a similar test to UtilityTest.java
0641: logln("Testing addAll(Collection) ... ");
0642: String[] array = { "a", "b", "c", "de" };
0643: List list = Arrays.asList(array);
0644: Set aset = new HashSet(list);
0645: logln(" *** The source set's size is: " + aset.size());
0646:
0647: set.clear();
0648: set.addAll(aset);
0649: if (set.size() != aset.size()) {
0650: errln("FAIL: After addAll, the UnicodeSet size expected "
0651: + aset.size()
0652: + ", "
0653: + set.size()
0654: + " seen instead!");
0655: } else {
0656: logln("OK: After addAll, the UnicodeSet size got "
0657: + set.size());
0658: }
0659:
0660: List list2 = new ArrayList();
0661: set.addAllTo(list2);
0662:
0663: //verify the result
0664: log(" *** The elements are: ");
0665: String s = set.toPattern(true);
0666: logln(s);
0667: Iterator myiter = list2.iterator();
0668: while (myiter.hasNext()) {
0669: log(myiter.next().toString() + " ");
0670: }
0671: logln(""); // a new line
0672: }
0673:
0674: }
0675:
0676: public void TestStrings() {
0677: // Object[][] testList = {
0678: // {I_EQUALS, UnicodeSet.fromAll("abc"),
0679: // new UnicodeSet("[a-c]")},
0680: //
0681: // {I_EQUALS, UnicodeSet.from("ch").add('a','z').add("ll"),
0682: // new UnicodeSet("[{ll}{ch}a-z]")},
0683: //
0684: // {I_EQUALS, UnicodeSet.from("ab}c"),
0685: // new UnicodeSet("[{ab\\}c}]")},
0686: //
0687: // {I_EQUALS, new UnicodeSet('a','z').add('A', 'Z').retain('M','m').complement('X'),
0688: // new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]")},
0689: // };
0690: //
0691: // for (int i = 0; i < testList.length; ++i) {
0692: // expectRelation(testList[i][0], testList[i][1], testList[i][2], "(" + i + ")");
0693: // }
0694:
0695: UnicodeSet[][] testList = {
0696: { UnicodeSet.fromAll("abc"), new UnicodeSet("[a-c]") },
0697:
0698: { UnicodeSet.from("ch").add('a', 'z').add("ll"),
0699: new UnicodeSet("[{ll}{ch}a-z]") },
0700:
0701: { UnicodeSet.from("ab}c"), new UnicodeSet("[{ab\\}c}]") },
0702:
0703: {
0704: new UnicodeSet('a', 'z').add('A', 'Z').retain(
0705: 'M', 'm').complement('X'),
0706: new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]") }, };
0707:
0708: for (int i = 0; i < testList.length; ++i) {
0709: if (!testList[i][0].equals(testList[i][1])) {
0710: errln("FAIL: sets unequal; see source code (" + i + ")");
0711: }
0712: }
0713: }
0714:
0715: static final Integer I_ANY = new Integer(SortedSetRelation.ANY),
0716: I_CONTAINS = new Integer(SortedSetRelation.CONTAINS),
0717: I_DISJOINT = new Integer(SortedSetRelation.DISJOINT),
0718: I_NO_B = new Integer(SortedSetRelation.NO_B),
0719: I_ISCONTAINED = new Integer(SortedSetRelation.ISCONTAINED),
0720: I_EQUALS = new Integer(SortedSetRelation.EQUALS),
0721: I_NO_A = new Integer(SortedSetRelation.NO_A),
0722: I_NONE = new Integer(SortedSetRelation.NONE);
0723:
0724: public void TestSetRelation() {
0725:
0726: String[] choices = { "a", "b", "cd", "ef" };
0727: int limit = 1 << choices.length;
0728:
0729: SortedSet iset = new TreeSet();
0730: SortedSet jset = new TreeSet();
0731:
0732: for (int i = 0; i < limit; ++i) {
0733: pick(i, choices, iset);
0734: for (int j = 0; j < limit; ++j) {
0735: pick(j, choices, jset);
0736: checkSetRelation(iset, jset, "(" + i + ")");
0737: }
0738: }
0739: }
0740:
0741: public void TestSetSpeed() {
0742: // skip unless verbose
0743: if (!isVerbose())
0744: return;
0745:
0746: SetSpeed2(100);
0747: SetSpeed2(1000);
0748: }
0749:
0750: public void SetSpeed2(int size) {
0751:
0752: SortedSet iset = new TreeSet();
0753: SortedSet jset = new TreeSet();
0754:
0755: for (int i = 0; i < size * 2; i += 2) { // only even values
0756: iset.add(new Integer(i));
0757: jset.add(new Integer(i));
0758: }
0759:
0760: int iterations = 1000000 / size;
0761:
0762: logln("Timing comparison of Java vs Utility");
0763: logln("For about " + size
0764: + " objects that are almost all the same.");
0765:
0766: CheckSpeed(iset, jset, "when a = b", iterations);
0767:
0768: iset.add(new Integer(size + 1)); // add odd value in middle
0769:
0770: CheckSpeed(iset, jset, "when a contains b", iterations);
0771: CheckSpeed(jset, iset, "when b contains a", iterations);
0772:
0773: jset.add(new Integer(size - 1)); // add different odd value in middle
0774:
0775: CheckSpeed(jset, iset, "when a, b are disjoint", iterations);
0776: }
0777:
0778: void CheckSpeed(SortedSet iset, SortedSet jset, String message,
0779: int iterations) {
0780: CheckSpeed2(iset, jset, message, iterations);
0781: CheckSpeed3(iset, jset, message, iterations);
0782: }
0783:
0784: void CheckSpeed2(SortedSet iset, SortedSet jset, String message,
0785: int iterations) {
0786: boolean x;
0787: boolean y;
0788:
0789: // make sure code is loaded:
0790: x = iset.containsAll(jset);
0791: y = SortedSetRelation.hasRelation(iset,
0792: SortedSetRelation.CONTAINS, jset);
0793: if (x != y)
0794: errln("FAIL contains comparison");
0795:
0796: double start = System.currentTimeMillis();
0797: for (int i = 0; i < iterations; ++i) {
0798: x |= iset.containsAll(jset);
0799: }
0800: double middle = System.currentTimeMillis();
0801: for (int i = 0; i < iterations; ++i) {
0802: y |= SortedSetRelation.hasRelation(iset,
0803: SortedSetRelation.CONTAINS, jset);
0804: }
0805: double end = System.currentTimeMillis();
0806:
0807: double jtime = (middle - start) / iterations;
0808: double utime = (end - middle) / iterations;
0809:
0810: java.text.NumberFormat nf = java.text.NumberFormat
0811: .getPercentInstance();
0812: logln("Test contains: " + message + ": Java: " + jtime
0813: + ", Utility: " + utime + ", u:j: "
0814: + nf.format(utime / jtime));
0815: }
0816:
0817: void CheckSpeed3(SortedSet iset, SortedSet jset, String message,
0818: int iterations) {
0819: boolean x;
0820: boolean y;
0821:
0822: // make sure code is loaded:
0823: x = iset.equals(jset);
0824: y = SortedSetRelation.hasRelation(iset,
0825: SortedSetRelation.EQUALS, jset);
0826: if (x != y)
0827: errln("FAIL equality comparison");
0828:
0829: double start = System.currentTimeMillis();
0830: for (int i = 0; i < iterations; ++i) {
0831: x |= iset.equals(jset);
0832: }
0833: double middle = System.currentTimeMillis();
0834: for (int i = 0; i < iterations; ++i) {
0835: y |= SortedSetRelation.hasRelation(iset,
0836: SortedSetRelation.EQUALS, jset);
0837: }
0838: double end = System.currentTimeMillis();
0839:
0840: double jtime = (middle - start) / iterations;
0841: double utime = (end - middle) / iterations;
0842:
0843: java.text.NumberFormat nf = java.text.NumberFormat
0844: .getPercentInstance();
0845: logln("Test equals: " + message + ": Java: " + jtime
0846: + ", Utility: " + utime + ", u:j: "
0847: + nf.format(utime / jtime));
0848: }
0849:
0850: void pick(int bits, Object[] examples, SortedSet output) {
0851: output.clear();
0852: for (int k = 0; k < 32; ++k) {
0853: if (((1 << k) & bits) != 0)
0854: output.add(examples[k]);
0855: }
0856: }
0857:
0858: public static final String[] RELATION_NAME = { "both-are-null",
0859: "a-is-null", "equals", "is-contained-in", "b-is-null",
0860: "is-disjoint_with", "contains", "any", };
0861:
0862: boolean dumbHasRelation(Collection A, int filter, Collection B) {
0863: Collection ab = new TreeSet(A);
0864: ab.retainAll(B);
0865: if (ab.size() > 0 && (filter & SortedSetRelation.A_AND_B) == 0)
0866: return false;
0867:
0868: // A - B size == A.size - A&B.size
0869: if (A.size() > ab.size()
0870: && (filter & SortedSetRelation.A_NOT_B) == 0)
0871: return false;
0872:
0873: // B - A size == B.size - A&B.size
0874: if (B.size() > ab.size()
0875: && (filter & SortedSetRelation.B_NOT_A) == 0)
0876: return false;
0877:
0878: return true;
0879: }
0880:
0881: void checkSetRelation(SortedSet a, SortedSet b, String message) {
0882: for (int i = 0; i < 8; ++i) {
0883:
0884: boolean hasRelation = SortedSetRelation
0885: .hasRelation(a, i, b);
0886: boolean dumbHasRelation = dumbHasRelation(a, i, b);
0887:
0888: logln(message + " " + hasRelation + ":\t" + a + "\t"
0889: + RELATION_NAME[i] + "\t" + b);
0890:
0891: if (hasRelation != dumbHasRelation) {
0892: errln("FAIL: " + message + " " + dumbHasRelation
0893: + ":\t" + a + "\t" + RELATION_NAME[i] + "\t"
0894: + b);
0895: }
0896: }
0897: logln("");
0898: }
0899:
0900: /**
0901: * Test the [:Latin:] syntax.
0902: */
0903: public void TestScriptSet() {
0904:
0905: expectContainment("[:Latin:]", "aA",
0906: CharsToUnicodeString("\\u0391\\u03B1"));
0907:
0908: expectContainment("[:Greek:]",
0909: CharsToUnicodeString("\\u0391\\u03B1"), "aA");
0910:
0911: /* Jitterbug 1423 */
0912: expectContainment("[[:Common:][:Inherited:]]",
0913: CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"),
0914: "aA");
0915:
0916: }
0917:
0918: /**
0919: * Test the [:Latin:] syntax.
0920: */
0921: public void TestPropertySet() {
0922: String[] DATA = {
0923: // Pattern, Chars IN, Chars NOT in
0924:
0925: "[:Latin:]",
0926: "aA",
0927: "\u0391\u03B1",
0928:
0929: "[\\p{Greek}]",
0930: "\u0391\u03B1",
0931: "aA",
0932:
0933: "\\P{ GENERAL Category = upper case letter }",
0934: "abc",
0935: "ABC",
0936:
0937: // Combining class: @since ICU 2.2
0938: // Check both symbolic and numeric
0939: "\\p{ccc=Nukta}",
0940: "\u0ABC",
0941: "abc",
0942:
0943: "\\p{Canonical Combining Class = 11}",
0944: "\u05B1",
0945: "\u05B2",
0946:
0947: "[:c c c = iota subscript :]",
0948: "\u0345",
0949: "xyz",
0950:
0951: // Bidi class: @since ICU 2.2
0952: "\\p{bidiclass=lefttoright}",
0953: "abc",
0954: "\u0671\u0672",
0955:
0956: // Binary properties: @since ICU 2.2
0957: "\\p{ideographic}",
0958: "\u4E0A",
0959: "x",
0960:
0961: "[:math=false:]",
0962: "q)*(", // )(and * were removed from math in Unicode 4.0.1
0963: "+<>^",
0964:
0965: // JB#1767 \N{}, \p{ASCII}
0966: "[:Ascii:]",
0967: "abc\u0000\u007F",
0968: "\u0080\u4E00",
0969:
0970: "[\\N{ latin small letter a }[:name= latin small letter z:]]",
0971: "az",
0972: "qrs",
0973:
0974: // JB#2015
0975: "[:any:]",
0976: "a\\U0010FFFF",
0977: "",
0978:
0979: "[:nv=0.5:]",
0980: "\u00BD\u0F2A",
0981: "\u00BC",
0982:
0983: // JB#2653: Age
0984: "[:Age=1.1:]",
0985: "\u03D6", // 1.1
0986: "\u03D8\u03D9", // 3.2
0987:
0988: "[:Age=3.1:]",
0989: "\\u1800\\u3400\\U0002f800",
0990: "\\u0220\\u034f\\u30ff\\u33ff\\ufe73\\U00010000\\U00050000",
0991:
0992: // JB#2350: Case_Sensitive
0993: "[:Case Sensitive:]",
0994: "A\u1FFC\\U00010410",
0995: ";\u00B4\\U00010500",
0996:
0997: // Regex compatibility test
0998: "[-b]", // leading '-' is literal
0999: "-b",
1000: "ac",
1001:
1002: "[^-b]", // leading '-' is literal
1003: "ac",
1004: "-b",
1005:
1006: "[b-]", // trailing '-' is literal
1007: "-b",
1008: "ac",
1009:
1010: "[^b-]", // trailing '-' is literal
1011: "ac",
1012: "-b",
1013:
1014: "[a-b-]", // trailing '-' is literal
1015: "ab-",
1016: "c=",
1017:
1018: "[[a-q]&[p-z]-]", // trailing '-' is literal
1019: "pq-",
1020: "or=",
1021:
1022: "[\\s|\\)|:|$|\\>]", // from regex tests
1023: "s|):$>",
1024: "\\abc",
1025:
1026: "[\uDC00cd]", // JB#2906: isolated trail at start
1027: "cd\uDC00",
1028: "ab\uD800\\U00010000",
1029:
1030: "[ab\uD800]", // JB#2906: isolated trail at start
1031: "ab\uD800",
1032: "cd\uDC00\\U00010000",
1033:
1034: "[ab\uD800cd]", // JB#2906: isolated lead in middle
1035: "abcd\uD800",
1036: "ef\uDC00\\U00010000",
1037:
1038: "[ab\uDC00cd]", // JB#2906: isolated trail in middle
1039: "abcd\uDC00",
1040: "ef\uD800\\U00010000",
1041:
1042: "[:^lccc=0:]", // Lead canonical class
1043: "\u0300\u0301",
1044: "abcd\u00c0\u00c5",
1045:
1046: "[:^tccc=0:]", // Trail canonical class
1047: "\u0300\u0301\u00c0\u00c5",
1048: "abcd",
1049:
1050: "[[:^lccc=0:][:^tccc=0:]]", // Lead and trail canonical class
1051: "\u0300\u0301\u00c0\u00c5",
1052: "abcd",
1053:
1054: "[[:^lccc=0:]-[:^tccc=0:]]", // Stuff that starts with an accent but ends with a base (none right now)
1055: "",
1056: "abcd\u0300\u0301\u00c0\u00c5",
1057:
1058: "[[:ccc=0:]-[:lccc=0:]-[:tccc=0:]]", // Weirdos. Complete canonical class is zero, but both lead and trail are not
1059: "\u0F73\u0F75\u0F81", "abcd\u0300\u0301\u00c0\u00c5",
1060:
1061: "[:Assigned:]",
1062: "A\\uE000\\uF8FF\\uFDC7\\U00010000\\U0010FFFD",
1063: "\\u0888\\uFDD3\\uFFFE\\U00050005",
1064:
1065: };
1066:
1067: for (int i = 0; i < DATA.length; i += 3) {
1068: expectContainment(DATA[i], DATA[i + 1], DATA[i + 2]);
1069: }
1070: }
1071:
1072: public void TestUnicodeSetStrings() {
1073: UnicodeSet uset = new UnicodeSet("[a{bc}{cd}pqr\u0000]");
1074: logln(uset + " ~ " + uset.getRegexEquivalent());
1075: String[][] testStrings = { { "x", "none" }, { "bc", "all" },
1076: { "cdbca", "all" }, { "a", "all" }, { "bcx", "some" },
1077: { "ab", "some" }, { "acb", "some" },
1078: { "bcda", "some" }, { "dccbx", "none" }, };
1079: for (int i = 0; i < testStrings.length; ++i) {
1080: check(uset, testStrings[i][0], testStrings[i][1]);
1081: }
1082: }
1083:
1084: private void check(UnicodeSet uset, String string,
1085: String desiredStatus) {
1086: boolean shouldContainAll = desiredStatus.equals("all");
1087: boolean shouldContainNone = desiredStatus.equals("none");
1088: if (uset.containsAll(string) != shouldContainAll) {
1089: errln("containsAll " + string + " should be "
1090: + shouldContainAll);
1091: } else {
1092: logln("containsAll " + string + " = " + shouldContainAll);
1093: }
1094: if (uset.containsNone(string) != shouldContainNone) {
1095: errln("containsNone " + string + " should be "
1096: + shouldContainNone);
1097: } else {
1098: logln("containsNone " + string + " = " + shouldContainNone);
1099: }
1100: }
1101:
1102: /**
1103: * Test cloning of UnicodeSet
1104: */
1105: public void TestClone() {
1106: UnicodeSet s = new UnicodeSet("[abcxyz]");
1107: UnicodeSet t = (UnicodeSet) s.clone();
1108: expectContainment(t, "abc", "def");
1109: }
1110:
1111: /**
1112: * Test the indexOf() and charAt() methods.
1113: */
1114: public void TestIndexOf() {
1115: UnicodeSet set = new UnicodeSet("[a-cx-y3578]");
1116: for (int i = 0; i < set.size(); ++i) {
1117: int c = set.charAt(i);
1118: if (set.indexOf(c) != i) {
1119: errln("FAIL: charAt(" + i + ") = " + c
1120: + " => indexOf() => " + set.indexOf(c));
1121: }
1122: }
1123: int c = set.charAt(set.size());
1124: if (c != -1) {
1125: errln("FAIL: charAt(<out of range>) = "
1126: + Utility.escape(String.valueOf(c)));
1127: }
1128: int j = set.indexOf('q');
1129: if (j != -1) {
1130: errln("FAIL: indexOf('q') = " + j);
1131: }
1132: }
1133:
1134: public void TestContainsString() {
1135: UnicodeSet x = new UnicodeSet("[a{bc}]");
1136: if (x.contains("abc"))
1137: errln("FAIL");
1138: }
1139:
1140: public void TestExhaustive() {
1141: // exhaustive tests. Simulate UnicodeSets with integers.
1142: // That gives us very solid tests (except for large memory tests).
1143:
1144: char limit = (char) 128;
1145:
1146: for (char i = 0; i < limit; ++i) {
1147: logln("Testing " + i + ", " + bitsToSet(i));
1148: _testComplement(i);
1149:
1150: // AS LONG AS WE ARE HERE, check roundtrip
1151: checkRoundTrip(bitsToSet(i));
1152:
1153: for (char j = 0; j < limit; ++j) {
1154: _testAdd(i, j);
1155: _testXor(i, j);
1156: _testRetain(i, j);
1157: _testRemove(i, j);
1158: }
1159: }
1160: }
1161:
1162: /**
1163: * Make sure each script name and abbreviated name can be used
1164: * to construct a UnicodeSet.
1165: */
1166: public void TestScriptNames() {
1167: for (int i = 0; i < UScript.CODE_LIMIT; ++i) {
1168: for (int j = 0; j < 2; ++j) {
1169: String pat = "";
1170: try {
1171: String name = (j == 0) ? UScript.getName(i)
1172: : UScript.getShortName(i);
1173: pat = "[:" + name + ":]";
1174: UnicodeSet set = new UnicodeSet(pat);
1175: logln("Ok: " + pat);
1176: } catch (IllegalArgumentException e) {
1177: if (pat.length() == 0) {
1178: errln("FAIL (in UScript): No name for script "
1179: + i);
1180: } else {
1181: errln("FAIL: Couldn't create " + pat);
1182: }
1183: }
1184: }
1185: }
1186: }
1187:
1188: /**
1189: * Test closure API.
1190: */
1191: public void TestCloseOver() {
1192: String CASE = String.valueOf(UnicodeSet.CASE);
1193: String[] DATA = {
1194: // selector, input, output
1195: CASE, "[aq\u00DF{Bc}{bC}{Fi}]",
1196: "[aAqQ\u00DF\uFB01{ss}{bc}{fi}]",
1197:
1198: CASE,
1199: "[\u01F1]", // 'DZ'
1200: "[\u01F1\u01F2\u01F3]",
1201:
1202: CASE, "[\u1FB4]", "[\u1FB4{\u03AC\u03B9}]",
1203:
1204: CASE, "[{F\uFB01}]", "[\uFB03{ffi}]",
1205:
1206: CASE, "[a-z]", "[A-Za-z\u017F\u212A]", CASE, "[abc]",
1207: "[A-Ca-c]", CASE, "[ABC]", "[A-Ca-c]", };
1208:
1209: UnicodeSet s = new UnicodeSet();
1210: UnicodeSet t = new UnicodeSet();
1211: for (int i = 0; i < DATA.length; i += 3) {
1212: int selector = Integer.parseInt(DATA[i]);
1213: String pat = DATA[i + 1];
1214: String exp = DATA[i + 2];
1215: s.applyPattern(pat);
1216: s.closeOver(selector);
1217: t.applyPattern(exp);
1218: if (s.equals(t)) {
1219: logln("Ok: " + pat + ".closeOver(" + selector + ") => "
1220: + exp);
1221: } else {
1222: errln("FAIL: " + pat + ".closeOver(" + selector
1223: + ") => " + s.toPattern(true) + ", expected "
1224: + exp);
1225: }
1226: }
1227:
1228: // Test the pattern API
1229: s.applyPattern("[abc]", UnicodeSet.CASE);
1230: expectContainment(s, "abcABC", "defDEF");
1231: s = new UnicodeSet("[^abc]", UnicodeSet.CASE);
1232: expectContainment(s, "defDEF", "abcABC");
1233: }
1234:
1235: public void TestEscapePattern() {
1236: // The following pattern must contain at least one range "c-d"
1237: // for which isRuleWhiteSpace(c) or isRuleWhiteSpace(d) is true.
1238: String pattern = "[\\uFEFF \\u200E-\\u20FF \\uFFF9-\\uFFFC \\U0001D173-\\U0001D17A \\U000F0000-\\U000FFFFD ]";
1239: String exp = "[\\u200E-\\u20FF\\uFEFF\\uFFF9-\\uFFFC\\U0001D173-\\U0001D17A\\U000F0000-\\U000FFFFD]";
1240: // We test this with two passes; in the second pass we
1241: // pre-unescape the pattern. Since U+200E is rule whitespace,
1242: // this fails -- which is what we expect.
1243: for (int pass = 1; pass <= 2; ++pass) {
1244: String pat = pattern;
1245: if (pass == 2) {
1246: pat = Utility.unescape(pat);
1247: }
1248: // Pattern is only good for pass 1
1249: boolean isPatternValid = (pass == 1);
1250:
1251: UnicodeSet set = null;
1252: try {
1253: set = new UnicodeSet(pat);
1254: } catch (IllegalArgumentException e) {
1255: set = null;
1256: }
1257: if ((set != null) != isPatternValid) {
1258: errln("FAIL: applyPattern(" + Utility.escape(pat)
1259: + ") => " + set);
1260: continue;
1261: }
1262: if (set == null) {
1263: continue;
1264: }
1265: if (set.contains((char) 0x0644)) {
1266: errln("FAIL: " + Utility.escape(pat)
1267: + " contains(U+0664)");
1268: }
1269:
1270: String newpat = set.toPattern(true);
1271: if (newpat.equals(exp)) {
1272: logln(Utility.escape(pat) + " => " + newpat);
1273: } else {
1274: errln("FAIL: " + Utility.escape(pat) + " => " + newpat);
1275: }
1276:
1277: for (int i = 0; i < set.getRangeCount(); ++i) {
1278: StringBuffer str = new StringBuffer("Range ");
1279: str.append((char) (0x30 + i)).append(": ");
1280: UTF16.append(str, set.getRangeStart(i));
1281: str.append(" - ");
1282: UTF16.append(str, set.getRangeEnd(i));
1283: String s = Utility.escape(str.toString() + " ("
1284: + set.getRangeStart(i) + " - "
1285: + set.getRangeEnd(i) + ")");
1286: if (set.getRangeStart(i) < 0) {
1287: errln("FAIL: " + s);
1288: } else {
1289: logln(s);
1290: }
1291: }
1292: }
1293: }
1294:
1295: public void TestSymbolTable() {
1296: // Multiple test cases can be set up here. Each test case
1297: // is terminated by null:
1298: // var, value, var, value,..., input pat., exp. output pat., null
1299: String DATA[] = { "us", "a-z", "[0-1$us]", "[0-1a-z]", null,
1300: "us", "[a-z]", "[0-1$us]", "[0-1[a-z]]", null, "us",
1301: "\\[a\\-z\\]", "[0-1$us]", "[-01\\[\\]az]", null };
1302:
1303: for (int i = 0; i < DATA.length; ++i) {
1304: TokenSymbolTable sym = new TokenSymbolTable();
1305:
1306: // Set up variables
1307: while (DATA[i + 2] != null) {
1308: sym.add(DATA[i], DATA[i + 1]);
1309: i += 2;
1310: }
1311:
1312: // Input pattern and expected output pattern
1313: String inpat = DATA[i], exppat = DATA[i + 1];
1314: i += 2;
1315:
1316: ParsePosition pos = new ParsePosition(0);
1317: UnicodeSet us = new UnicodeSet(inpat, pos, sym);
1318:
1319: // results
1320: if (pos.getIndex() != inpat.length()) {
1321: errln("Failed to read to end of string \"" + inpat
1322: + "\": read to " + pos.getIndex()
1323: + ", length is " + inpat.length());
1324: }
1325:
1326: UnicodeSet us2 = new UnicodeSet(exppat);
1327: if (!us.equals(us2)) {
1328: errln("Failed, got " + us + ", expected " + us2);
1329: } else {
1330: logln("Ok, got " + us);
1331: }
1332:
1333: //cover Unicode(String,ParsePosition,SymbolTable,int)
1334: ParsePosition inpos = new ParsePosition(0);
1335: UnicodeSet inSet = new UnicodeSet(inpat, inpos, sym,
1336: UnicodeSet.IGNORE_SPACE);
1337: UnicodeSet expSet = new UnicodeSet(exppat);
1338: if (!inSet.equals(expSet)) {
1339: errln("FAIL: Failed, got " + inSet + ", expected "
1340: + expSet);
1341: } else {
1342: logln("OK: got " + inSet);
1343: }
1344: }
1345: }
1346:
1347: /**
1348: * Test that Posix style character classes [:digit:], etc.
1349: * have the Unicode definitions from TR 18.
1350: */
1351: public void TestPosixClasses() {
1352: expectEqual("POSIX alpha", "[:alpha:]", "\\p{Alphabetic}");
1353: expectEqual("POSIX lower", "[:lower:]", "\\p{lowercase}");
1354: expectEqual("POSIX upper", "[:upper:]", "\\p{Uppercase}");
1355: expectEqual("POSIX punct", "[:punct:]", "\\p{gc=Punctuation}");
1356: expectEqual("POSIX digit", "[:digit:]", "\\p{gc=DecimalNumber}");
1357: expectEqual("POSIX xdigit", "[:xdigit:]",
1358: "[\\p{DecimalNumber}\\p{HexDigit}]");
1359: expectEqual("POSIX alnum", "[:alnum:]",
1360: "[\\p{Alphabetic}\\p{DecimalNumber}]");
1361: expectEqual("POSIX space", "[:space:]", "\\p{Whitespace}");
1362: expectEqual(
1363: "POSIX blank",
1364: "[:blank:]",
1365: "[\\p{Whitespace}-[\\u000a\\u000B\\u000c\\u000d\\u0085\\p{LineSeparator}\\p{ParagraphSeparator}]]");
1366: expectEqual("POSIX cntrl", "[:cntrl:]", "\\p{Control}");
1367: expectEqual("POSIX graph", "[:graph:]",
1368: "[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]");
1369: expectEqual("POSIX print", "[:print:]",
1370: "[[:graph:][:blank:]-[\\p{Control}]]");
1371: }
1372:
1373: /**
1374: * Test that frozen classes disallow changes. For 4217
1375: */
1376: public void TestFrozen() {
1377: UnicodeSet test = new UnicodeSet("[[:whitespace:]A]");
1378: test.freeze();
1379: checkModification(test, true);
1380: checkModification(test, false);
1381: }
1382:
1383: public void checkModification(UnicodeSet original, boolean isFrozen) {
1384: main: for (int i = 0;; ++i) {
1385: UnicodeSet test = (UnicodeSet) (isFrozen ? original.clone()
1386: : original.cloneAsThawed());
1387: boolean gotException = true;
1388: boolean checkEquals = true;
1389: try {
1390: switch (i) {
1391: case 0:
1392: test.add(0);
1393: break;
1394: case 1:
1395: test.add(0, 1);
1396: break;
1397: case 2:
1398: test.add("a");
1399: break;
1400: case 3:
1401: List a = new ArrayList();
1402: a.add("a");
1403: test.addAll(a);
1404: break;
1405: case 4:
1406: test.addAll("ab");
1407: break;
1408: case 5:
1409: test.addAll(new UnicodeSet("[ab]"));
1410: break;
1411: case 6:
1412: test.applyIntPropertyValue(0, 0);
1413: break;
1414: case 7:
1415: test.applyPattern("[ab]");
1416: break;
1417: case 8:
1418: test.applyPattern("[ab]", true);
1419: break;
1420: case 9:
1421: test.applyPattern("[ab]", 0);
1422: break;
1423: case 10:
1424: test.applyPropertyAlias("hex", "true");
1425: break;
1426: case 11:
1427: test.applyPropertyAlias("hex", "true", null);
1428: break;
1429: case 12:
1430: test.closeOver(UnicodeSet.CASE);
1431: break;
1432: case 13:
1433: test.compact();
1434: checkEquals = false;
1435: break;
1436: case 14:
1437: test.complement(0);
1438: break;
1439: case 15:
1440: test.complement(0, 0);
1441: break;
1442: case 16:
1443: test.complement("ab");
1444: break;
1445: case 17:
1446: test.complementAll("ab");
1447: break;
1448: case 18:
1449: test.complementAll(new UnicodeSet("[ab]"));
1450: break;
1451: case 19:
1452: test.remove(' ');
1453: break;
1454: case 20:
1455: test.remove(' ', 'a');
1456: break;
1457: case 21:
1458: test.remove(" ");
1459: break;
1460: case 22:
1461: test.removeAll(" a");
1462: break;
1463: case 23:
1464: test.removeAll(new UnicodeSet("[\\ a]"));
1465: break;
1466: case 24:
1467: test.retain(' ');
1468: break;
1469: case 25:
1470: test.retain(' ', 'a');
1471: break;
1472: case 26:
1473: test.retain(" ");
1474: break;
1475: case 27:
1476: test.retainAll(" a");
1477: break;
1478: case 28:
1479: test.retainAll(new UnicodeSet("[\\ a]"));
1480: break;
1481: case 29:
1482: test.set(0, 1);
1483: break;
1484: case 30:
1485: test.set(new UnicodeSet("[ab]"));
1486: break;
1487:
1488: default:
1489: continue main; // so we don't keep having to change the endpoint, and gaps are not skipped.
1490: case 35:
1491: return;
1492: }
1493: gotException = false;
1494: } catch (UnsupportedOperationException e) {
1495: // do nothing
1496: }
1497: if (isFrozen && !gotException)
1498: errln(i
1499: + ") attempt to modify frozen object didn't result in an exception");
1500: if (!isFrozen && gotException)
1501: errln(i
1502: + ") attempt to modify thawed object did result in an exception");
1503: if (checkEquals) {
1504: if (test.equals(original)) {
1505: if (!isFrozen)
1506: errln(i
1507: + ") attempt to modify thawed object didn't change the object");
1508: } else { // unequal
1509: if (isFrozen)
1510: errln(i
1511: + ") attempt to modify frozen object changed the object");
1512: }
1513: }
1514: }
1515: }
1516:
1517: String[] prettyData = { "[\\uD7DE-\\uD90C \\uDCB5-\\uDD9F]", // special case
1518: "[:any:]", "[:whitespace:]", "[:linebreak=AL:]", };
1519:
1520: public void TestPrettyPrinting() {
1521: try {
1522: PrettyPrinter pp = new PrettyPrinter();
1523:
1524: int i = 0;
1525: for (; i < prettyData.length; ++i) {
1526: UnicodeSet test = new UnicodeSet(prettyData[i]);
1527: checkPrettySet(pp, i, test);
1528: }
1529: Random random = new Random(0);
1530: UnicodeSet test = new UnicodeSet();
1531: for (; i < 1000; ++i) {
1532: double start = random.nextGaussian() * 0x10000;
1533: if (start < 0)
1534: start = -start;
1535: if (start > 0x10FFFF) {
1536: start = 0x10FFFF;
1537: }
1538: double end = random.nextGaussian() * 0x100;
1539: if (end < 0)
1540: end = -end;
1541: end = start + end;
1542: if (end > 0x10FFFF) {
1543: end = 0x10FFFF;
1544: }
1545: test.complement((int) start, (int) end);
1546: checkPrettySet(pp, i, test);
1547: }
1548: } catch (RuntimeException ex) {
1549: warnln("Could not load Collator");
1550: }
1551: }
1552:
1553: private void checkPrettySet(PrettyPrinter pp, int i, UnicodeSet test) {
1554: String pretty = pp.toPattern(test);
1555: UnicodeSet retry = new UnicodeSet(pretty);
1556: if (!test.equals(retry)) {
1557: errln(i + ". Failed test: " + test + " != " + pretty);
1558: } else {
1559: logln(i + ". Worked for " + truncate(test.toString())
1560: + " => " + truncate(pretty));
1561: }
1562: }
1563:
1564: private String truncate(String string) {
1565: if (string.length() <= 100)
1566: return string;
1567: return string.substring(0, 97) + "...";
1568: }
1569:
1570: public class TokenSymbolTable implements SymbolTable {
1571: HashMap contents = new HashMap();
1572:
1573: /**
1574: * (Non-SymbolTable API) Add the given variable and value to
1575: * the table. Variable should NOT contain leading '$'.
1576: */
1577: public void add(String var, String value) {
1578: char[] buffer = new char[value.length()];
1579: value.getChars(0, value.length(), buffer, 0);
1580: add(var, buffer);
1581: }
1582:
1583: /**
1584: * (Non-SymbolTable API) Add the given variable and value to
1585: * the table. Variable should NOT contain leading '$'.
1586: */
1587: public void add(String var, char[] body) {
1588: logln("TokenSymbolTable: add \"" + var + "\" => \""
1589: + new String(body) + "\"");
1590: contents.put(var, body);
1591: }
1592:
1593: /* (non-Javadoc)
1594: * @see com.ibm.icu.text.SymbolTable#lookup(java.lang.String)
1595: */
1596: public char[] lookup(String s) {
1597: logln("TokenSymbolTable: lookup \"" + s + "\" => \""
1598: + new String((char[]) contents.get(s)) + "\"");
1599: return (char[]) contents.get(s);
1600: }
1601:
1602: /* (non-Javadoc)
1603: * @see com.ibm.icu.text.SymbolTable#lookupMatcher(int)
1604: */
1605: public UnicodeMatcher lookupMatcher(int ch) {
1606: return null;
1607: }
1608:
1609: /* (non-Javadoc)
1610: * @see com.ibm.icu.text.SymbolTable#parseReference(java.lang.String,
1611: java.text.ParsePosition, int)
1612: */
1613: public String parseReference(String text, ParsePosition pos,
1614: int limit) {
1615: int cp;
1616: int start = pos.getIndex();
1617: int i;
1618: for (i = start; i < limit; i += UTF16.getCharCount(cp)) {
1619: cp = UTF16.charAt(text, i);
1620: if (!com.ibm.icu.lang.UCharacter
1621: .isUnicodeIdentifierPart(cp)) {
1622: break;
1623: }
1624: }
1625: logln("TokenSymbolTable: parse \"" + text + "\" from "
1626: + start + " to " + i + " => \""
1627: + text.substring(start, i) + "\"");
1628: pos.setIndex(i);
1629: return text.substring(start, i);
1630: }
1631: }
1632:
1633: public void TestSurrogate() {
1634: String DATA[] = {
1635: // These should all behave identically
1636: "[abc\\uD800\\uDC00]", "[abc\uD800\uDC00]",
1637: "[abc\\U00010000]", };
1638: for (int i = 0; i < DATA.length; ++i) {
1639: logln("Test pattern " + i + " :" + Utility.escape(DATA[i]));
1640: UnicodeSet set = new UnicodeSet(DATA[i]);
1641: expectContainment(set,
1642: CharsToUnicodeString("abc\\U00010000"),
1643: "\uD800;\uDC00"); // split apart surrogate-pair
1644: if (set.size() != 4) {
1645: errln(Utility.escape("FAIL: " + DATA[i] + ".size() == "
1646: + set.size() + ", expected 4"));
1647: }
1648: }
1649: }
1650:
1651: void _testComplement(int a) {
1652: UnicodeSet x = bitsToSet(a);
1653: UnicodeSet z = bitsToSet(a);
1654: z.complement();
1655: int c = setToBits(z);
1656: if (c != (~a)) {
1657: errln("FAILED: add: ~" + x + " != " + z);
1658: errln("FAILED: add: ~" + a + " != " + c);
1659: }
1660: checkCanonicalRep(z, "complement " + a);
1661: }
1662:
1663: void _testAdd(int a, int b) {
1664: UnicodeSet x = bitsToSet(a);
1665: UnicodeSet y = bitsToSet(b);
1666: UnicodeSet z = bitsToSet(a);
1667: z.addAll(y);
1668: int c = setToBits(z);
1669: if (c != (a | b)) {
1670: errln(Utility.escape("FAILED: add: " + x + " | " + y
1671: + " != " + z));
1672: errln("FAILED: add: " + a + " | " + b + " != " + c);
1673: }
1674: checkCanonicalRep(z, "add " + a + "," + b);
1675: }
1676:
1677: void _testRetain(int a, int b) {
1678: UnicodeSet x = bitsToSet(a);
1679: UnicodeSet y = bitsToSet(b);
1680: UnicodeSet z = bitsToSet(a);
1681: z.retainAll(y);
1682: int c = setToBits(z);
1683: if (c != (a & b)) {
1684: errln("FAILED: retain: " + x + " & " + y + " != " + z);
1685: errln("FAILED: retain: " + a + " & " + b + " != " + c);
1686: }
1687: checkCanonicalRep(z, "retain " + a + "," + b);
1688: }
1689:
1690: void _testRemove(int a, int b) {
1691: UnicodeSet x = bitsToSet(a);
1692: UnicodeSet y = bitsToSet(b);
1693: UnicodeSet z = bitsToSet(a);
1694: z.removeAll(y);
1695: int c = setToBits(z);
1696: if (c != (a & ~b)) {
1697: errln("FAILED: remove: " + x + " &~ " + y + " != " + z);
1698: errln("FAILED: remove: " + a + " &~ " + b + " != " + c);
1699: }
1700: checkCanonicalRep(z, "remove " + a + "," + b);
1701: }
1702:
1703: void _testXor(int a, int b) {
1704: UnicodeSet x = bitsToSet(a);
1705: UnicodeSet y = bitsToSet(b);
1706: UnicodeSet z = bitsToSet(a);
1707: z.complementAll(y);
1708: int c = setToBits(z);
1709: if (c != (a ^ b)) {
1710: errln("FAILED: complement: " + x + " ^ " + y + " != " + z);
1711: errln("FAILED: complement: " + a + " ^ " + b + " != " + c);
1712: }
1713: checkCanonicalRep(z, "complement " + a + "," + b);
1714: }
1715:
1716: /**
1717: * Check that ranges are monotonically increasing and non-
1718: * overlapping.
1719: */
1720: void checkCanonicalRep(UnicodeSet set, String msg) {
1721: int n = set.getRangeCount();
1722: if (n < 0) {
1723: errln("FAIL result of " + msg
1724: + ": range count should be >= 0 but is " + n
1725: + " for " + Utility.escape(set.toString()));
1726: return;
1727: }
1728: int last = 0;
1729: for (int i = 0; i < n; ++i) {
1730: int start = set.getRangeStart(i);
1731: int end = set.getRangeEnd(i);
1732: if (start > end) {
1733: errln("FAIL result of " + msg + ": range " + (i + 1)
1734: + " start > end: " + start + ", " + end
1735: + " for " + Utility.escape(set.toString()));
1736: }
1737: if (i > 0 && start <= last) {
1738: errln("FAIL result of " + msg + ": range " + (i + 1)
1739: + " overlaps previous range: " + start + ", "
1740: + end + " for "
1741: + Utility.escape(set.toString()));
1742: }
1743: last = end;
1744: }
1745: }
1746:
1747: /**
1748: * Convert a bitmask to a UnicodeSet.
1749: */
1750: UnicodeSet bitsToSet(int a) {
1751: UnicodeSet result = new UnicodeSet();
1752: for (int i = 0; i < 32; ++i) {
1753: if ((a & (1 << i)) != 0) {
1754: result.add((char) i, (char) i);
1755: }
1756: }
1757:
1758: return result;
1759: }
1760:
1761: /**
1762: * Convert a UnicodeSet to a bitmask. Only the characters
1763: * U+0000 to U+0020 are represented in the bitmask.
1764: */
1765: static int setToBits(UnicodeSet x) {
1766: int result = 0;
1767: for (int i = 0; i < 32; ++i) {
1768: if (x.contains((char) i)) {
1769: result |= (1 << i);
1770: }
1771: }
1772: return result;
1773: }
1774:
1775: /**
1776: * Return the representation of an inversion list based UnicodeSet
1777: * as a pairs list. Ranges are listed in ascending Unicode order.
1778: * For example, the set [a-zA-M3] is represented as "33AMaz".
1779: */
1780: static String getPairs(UnicodeSet set) {
1781: StringBuffer pairs = new StringBuffer();
1782: for (int i = 0; i < set.getRangeCount(); ++i) {
1783: int start = set.getRangeStart(i);
1784: int end = set.getRangeEnd(i);
1785: if (end > 0xFFFF) {
1786: end = 0xFFFF;
1787: i = set.getRangeCount(); // Should be unnecessary
1788: }
1789: pairs.append((char) start).append((char) end);
1790: }
1791: return pairs.toString();
1792: }
1793:
1794: /**
1795: * Test function. Make sure that the sets have the right relation
1796: */
1797:
1798: void expectRelation(Object relationObj, Object set1Obj,
1799: Object set2Obj, String message) {
1800: int relation = ((Integer) relationObj).intValue();
1801: UnicodeSet set1 = (UnicodeSet) set1Obj;
1802: UnicodeSet set2 = (UnicodeSet) set2Obj;
1803:
1804: // by-the-by, check the iterator
1805: checkRoundTrip(set1);
1806: checkRoundTrip(set2);
1807:
1808: boolean contains = set1.containsAll(set2);
1809: boolean isContained = set2.containsAll(set1);
1810: boolean disjoint = set1.containsNone(set2);
1811: boolean equals = set1.equals(set2);
1812:
1813: UnicodeSet intersection = new UnicodeSet(set1).retainAll(set2);
1814: UnicodeSet minus12 = new UnicodeSet(set1).removeAll(set2);
1815: UnicodeSet minus21 = new UnicodeSet(set2).removeAll(set1);
1816:
1817: // test basic properties
1818:
1819: if (contains != (intersection.size() == set2.size())) {
1820: errln("FAIL contains1" + set1.toPattern(true) + ", "
1821: + set2.toPattern(true));
1822: }
1823:
1824: if (contains != (intersection.equals(set2))) {
1825: errln("FAIL contains2" + set1.toPattern(true) + ", "
1826: + set2.toPattern(true));
1827: }
1828:
1829: if (isContained != (intersection.size() == set1.size())) {
1830: errln("FAIL isContained1" + set1.toPattern(true) + ", "
1831: + set2.toPattern(true));
1832: }
1833:
1834: if (isContained != (intersection.equals(set1))) {
1835: errln("FAIL isContained2" + set1.toPattern(true) + ", "
1836: + set2.toPattern(true));
1837: }
1838:
1839: if ((contains && isContained) != equals) {
1840: errln("FAIL equals" + set1.toPattern(true) + ", "
1841: + set2.toPattern(true));
1842: }
1843:
1844: if (disjoint != (intersection.size() == 0)) {
1845: errln("FAIL disjoint" + set1.toPattern(true) + ", "
1846: + set2.toPattern(true));
1847: }
1848:
1849: // Now see if the expected relation is true
1850: int status = (minus12.size() != 0 ? 4 : 0)
1851: | (intersection.size() != 0 ? 2 : 0)
1852: | (minus21.size() != 0 ? 1 : 0);
1853:
1854: if (status != relation) {
1855: errln("FAIL relation incorrect" + message + "; desired = "
1856: + RELATION_NAME[relation] + "; found = "
1857: + RELATION_NAME[status] + "; set1 = "
1858: + set1.toPattern(true) + "; set2 = "
1859: + set2.toPattern(true));
1860: }
1861: }
1862:
1863: /**
1864: * Basic consistency check for a few items.
1865: * That the iterator works, and that we can create a pattern and
1866: * get the same thing back
1867: */
1868:
1869: void checkRoundTrip(UnicodeSet s) {
1870: String pat = s.toPattern(false);
1871: UnicodeSet t = copyWithIterator(s, false);
1872: checkEqual(s, t, "iterator roundtrip");
1873:
1874: t = copyWithIterator(s, true); // try range
1875: checkEqual(s, t, "iterator roundtrip");
1876:
1877: t = new UnicodeSet(pat);
1878: checkEqual(s, t, "toPattern(false)");
1879:
1880: pat = s.toPattern(true);
1881: t = new UnicodeSet(pat);
1882: checkEqual(s, t, "toPattern(true)");
1883: }
1884:
1885: UnicodeSet copyWithIterator(UnicodeSet s, boolean withRange) {
1886: UnicodeSet t = new UnicodeSet();
1887: UnicodeSetIterator it = new UnicodeSetIterator(s);
1888: if (withRange) {
1889: while (it.nextRange()) {
1890: if (it.codepoint == UnicodeSetIterator.IS_STRING) {
1891: t.add(it.string);
1892: } else {
1893: t.add(it.codepoint, it.codepointEnd);
1894: }
1895: }
1896: } else {
1897: while (it.next()) {
1898: if (it.codepoint == UnicodeSetIterator.IS_STRING) {
1899: t.add(it.string);
1900: } else {
1901: t.add(it.codepoint);
1902: }
1903: }
1904: }
1905: return t;
1906: }
1907:
1908: boolean checkEqual(UnicodeSet s, UnicodeSet t, String message) {
1909: if (!s.equals(t)) {
1910: errln("FAIL " + message + "; source = " + s.toPattern(true)
1911: + "; result = " + t.toPattern(true));
1912: return false;
1913: }
1914: return true;
1915: }
1916:
1917: void expectEqual(String name, String pat1, String pat2) {
1918: UnicodeSet set1, set2;
1919: try {
1920: set1 = new UnicodeSet(pat1);
1921: set2 = new UnicodeSet(pat2);
1922: } catch (IllegalArgumentException e) {
1923: errln("FAIL: Couldn't create UnicodeSet from pattern for \""
1924: + name + "\": " + e.getMessage());
1925: return;
1926: }
1927: if (!set1.equals(set2)) {
1928: errln("FAIL: Sets built from patterns differ for \"" + name
1929: + "\"");
1930: }
1931: }
1932:
1933: /**
1934: * Expect the given set to contain the characters in charsIn and
1935: * to not contain those in charsOut.
1936: */
1937: void expectContainment(String pat, String charsIn, String charsOut) {
1938: UnicodeSet set;
1939: try {
1940: set = new UnicodeSet(pat);
1941: } catch (IllegalArgumentException e) {
1942: errln("FAIL: Couldn't create UnicodeSet from pattern \""
1943: + pat + "\": " + e.getMessage());
1944: return;
1945: }
1946: expectContainment(set, charsIn, charsOut);
1947: }
1948:
1949: /**
1950: * Expect the given set to contain the characters in charsIn and
1951: * to not contain those in charsOut.
1952: */
1953: void expectContainment(UnicodeSet set, String charsIn,
1954: String charsOut) {
1955: StringBuffer bad = new StringBuffer();
1956: if (charsIn != null) {
1957: charsIn = Utility.unescape(charsIn);
1958: for (int i = 0; i < charsIn.length();) {
1959: int c = UTF16.charAt(charsIn, i);
1960: i += UTF16.getCharCount(c);
1961: if (!set.contains(c)) {
1962: UTF16.append(bad, c);
1963: }
1964: }
1965: if (bad.length() > 0) {
1966: errln(Utility.escape("FAIL: set " + set
1967: + " does not contain " + bad
1968: + ", expected containment of " + charsIn));
1969: } else {
1970: logln(Utility.escape("Ok: set " + set + " contains "
1971: + charsIn));
1972: }
1973: }
1974: if (charsOut != null) {
1975: charsOut = Utility.unescape(charsOut);
1976: bad.setLength(0);
1977: for (int i = 0; i < charsOut.length();) {
1978: int c = UTF16.charAt(charsOut, i);
1979: i += UTF16.getCharCount(c);
1980: if (set.contains(c)) {
1981: UTF16.append(bad, c);
1982: }
1983: }
1984: if (bad.length() > 0) {
1985: errln(Utility.escape("FAIL: set " + set + " contains "
1986: + bad + ", expected non-containment of "
1987: + charsOut));
1988: } else {
1989: logln(Utility.escape("Ok: set " + set
1990: + " does not contain " + charsOut));
1991: }
1992: }
1993: }
1994:
1995: void expectPattern(UnicodeSet set, String pattern,
1996: String expectedPairs) {
1997: set.applyPattern(pattern);
1998: if (!getPairs(set).equals(expectedPairs)) {
1999: errln("FAIL: applyPattern(\"" + pattern + "\") => pairs \""
2000: + Utility.escape(getPairs(set)) + "\", expected \""
2001: + Utility.escape(expectedPairs) + "\"");
2002: } else {
2003: logln("Ok: applyPattern(\"" + pattern + "\") => pairs \""
2004: + Utility.escape(getPairs(set)) + "\"");
2005: }
2006: }
2007:
2008: void expectToPattern(UnicodeSet set, String expPat,
2009: String[] expStrings) {
2010: String pat = set.toPattern(true);
2011: if (pat.equals(expPat)) {
2012: logln("Ok: toPattern() => \"" + pat + "\"");
2013: } else {
2014: errln("FAIL: toPattern() => \"" + pat + "\", expected \""
2015: + expPat + "\"");
2016: return;
2017: }
2018: if (expStrings == null) {
2019: return;
2020: }
2021: boolean in = true;
2022: for (int i = 0; i < expStrings.length; ++i) {
2023: if (expStrings[i] == NOT) { // sic; pointer comparison
2024: in = false;
2025: continue;
2026: }
2027: boolean contained = set.contains(expStrings[i]);
2028: if (contained == in) {
2029: logln("Ok: "
2030: + expPat
2031: + (contained ? " contains {"
2032: : " does not contain {")
2033: + Utility.escape(expStrings[i]) + "}");
2034: } else {
2035: errln("FAIL: "
2036: + expPat
2037: + (contained ? " contains {"
2038: : " does not contain {")
2039: + Utility.escape(expStrings[i]) + "}");
2040: }
2041: }
2042: }
2043:
2044: void expectPairs(UnicodeSet set, String expectedPairs) {
2045: if (!getPairs(set).equals(expectedPairs)) {
2046: errln("FAIL: Expected pair list \""
2047: + Utility.escape(expectedPairs) + "\", got \""
2048: + Utility.escape(getPairs(set)) + "\"");
2049: }
2050: }
2051:
2052: static final String CharsToUnicodeString(String s) {
2053: return Utility.unescape(s);
2054: }
2055:
2056: }
|