Source Code Cross Referenced for UnicodeSetTest.java in » Internationalization-Localization » icu4j » com » ibm » icu » dev » test » translit » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.dev.test.translit
Source Cross Referenced Class Diagram Java Document (Java Doc)
0001:        /*
0002:         *******************************************************************************
0003:         * Copyright (C) 1996-2006, International Business Machines Corporation and    *
0004:         * others. All Rights Reserved.                                                *
0005:         *******************************************************************************
0006:         */
0007:        package com.ibm.icu.dev.test.translit;
0008:
0009:        import com.ibm.icu.lang.*;
0010:        import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
0011:        import com.ibm.icu.text.*;
0012:        import com.ibm.icu.dev.test.*;
0013:        import com.ibm.icu.impl.PrettyPrinter;
0014:        import com.ibm.icu.impl.Utility;
0015:        import com.ibm.icu.impl.SortedSetRelation;
0016:        import java.util.*;
0017:        import java.text.ParsePosition;
0018:
0019:        /**
0020:         * @test
0021:         * @summary General test of UnicodeSet
0022:         */
0023:        public class UnicodeSetTest extends TestFmwk {
0024:
0025:            static final String NOT = "%%%%";
0026:
0027:            public static void main(String[] args) throws Exception {
0028:                new UnicodeSetTest().run(args);
0029:            }
0030:
0031:            /**
0032:             * Test toPattern().
0033:             */
0034:            public void TestToPattern() throws Exception {
0035:                // Test that toPattern() round trips with syntax characters
0036:                // and whitespace.
0037:                for (int i = 0; i < OTHER_TOPATTERN_TESTS.length; ++i) {
0038:                    checkPat(OTHER_TOPATTERN_TESTS[i], new UnicodeSet(
0039:                            OTHER_TOPATTERN_TESTS[i]));
0040:                }
0041:                for (int i = 0; i <= 0x10FFFF; ++i) {
0042:                    if ((i <= 0xFF && !UCharacter.isLetter(i))
0043:                            || UCharacter.isWhitespace(i)) {
0044:                        // check various combinations to make sure they all work.
0045:                        if (i != 0 && !toPatternAux(i, i))
0046:                            continue;
0047:                        if (!toPatternAux(0, i))
0048:                            continue;
0049:                        if (!toPatternAux(i, 0xFFFF))
0050:                            continue;
0051:                    }
0052:                }
0053:
0054:                // Test pattern behavior of multicharacter strings.
0055:                UnicodeSet s = new UnicodeSet("[a-z {aa} {ab}]");
0056:                expectToPattern(s, "[a-z{aa}{ab}]", new String[] { "aa", "ab",
0057:                        NOT, "ac" });
0058:                s.add("ac");
0059:                expectToPattern(s, "[a-z{aa}{ab}{ac}]", new String[] { "aa",
0060:                        "ab", "ac", NOT, "xy" });
0061:
0062:                s.applyPattern("[a-z {\\{l} {r\\}}]");
0063:                expectToPattern(s, "[a-z{r\\}}{\\{l}]", new String[] { "{l",
0064:                        "r}", NOT, "xy" });
0065:                s.add("[]");
0066:                expectToPattern(s, "[a-z{\\[\\]}{r\\}}{\\{l}]", new String[] {
0067:                        "{l", "r}", "[]", NOT, "xy" });
0068:
0069:                s.applyPattern("[a-z {\u4E01\u4E02}{\\n\\r}]");
0070:                expectToPattern(s, "[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]",
0071:                        new String[] { "\u4E01\u4E02", "\n\r" });
0072:
0073:                s.clear();
0074:                s.add("abc");
0075:                s.add("abc");
0076:                expectToPattern(s, "[{abc}]", new String[] { "abc", NOT, "ab" });
0077:
0078:                // JB#3400: For 2 character ranges prefer [ab] to [a-b]
0079:                s.clear();
0080:                s.add('a', 'b');
0081:                expectToPattern(s, "[ab]", null);
0082:
0083:                // Cover applyPattern, applyPropertyAlias
0084:                s.clear();
0085:                s.applyPattern("[ab ]", true);
0086:                expectToPattern(s, "[ab]", new String[] { "a", NOT, "ab" });
0087:                s.clear();
0088:                s.applyPattern("[ab ]", false);
0089:                expectToPattern(s, "[\\\u0020ab]", new String[] { "a",
0090:                        "\u0020", NOT, "ab" });
0091:
0092:                s.clear();
0093:                s.applyPropertyAlias("nv", "0.5");
0094:                expectToPattern(
0095:                        s,
0096:                        "[\\u00BD\\u0F2A\\u2CFD\\U00010141\\U00010175\\U00010176]",
0097:                        null);
0098:                // Unicode 4.1 adds \u2CFD\U00010141\U00010175\U00010176 with numeric value 1/2
0099:
0100:                s.clear();
0101:                s.applyPropertyAlias("gc", "Lu");
0102:                // TODO expectToPattern(s, what?)
0103:            }
0104:
0105:            static String[] OTHER_TOPATTERN_TESTS = { "[[:latin:]&[:greek:]]",
0106:                    "[[:latin:]-[:greek:]]", "[:nonspacing mark:]" };
0107:
0108:            public boolean toPatternAux(int start, int end) {
0109:                // use Integer.toString because Utility.hex doesn't handle ints
0110:                String source = "0x"
0111:                        + Integer.toString(start, 16).toUpperCase();
0112:                if (start != end)
0113:                    source += "..0x" + Integer.toString(end, 16).toUpperCase();
0114:                UnicodeSet testSet = new UnicodeSet();
0115:                testSet.add(start, end);
0116:                return checkPat(source, testSet);
0117:            }
0118:
0119:            boolean checkPat(String source, UnicodeSet testSet) {
0120:                String pat = "";
0121:                try {
0122:                    // What we want to make sure of is that a pattern generated
0123:                    // by toPattern(), with or without escaped unprintables, can
0124:                    // be passed back into the UnicodeSet constructor.
0125:                    String pat0 = testSet.toPattern(true);
0126:                    if (!checkPat(source + " (escaped)", testSet, pat0))
0127:                        return false;
0128:
0129:                    //String pat1 = unescapeLeniently(pat0);
0130:                    //if (!checkPat(source + " (in code)", testSet, pat1)) return false;
0131:
0132:                    String pat2 = testSet.toPattern(false);
0133:                    if (!checkPat(source, testSet, pat2))
0134:                        return false;
0135:
0136:                    //String pat3 = unescapeLeniently(pat2);
0137:                    //if (!checkPat(source + " (in code)", testSet, pat3)) return false;
0138:
0139:                    //logln(source + " => " + pat0 + ", " + pat1 + ", " + pat2 + ", " + pat3);
0140:                    logln(source + " => " + pat0 + ", " + pat2);
0141:                } catch (Exception e) {
0142:                    errln("EXCEPTION in toPattern: " + source + " => " + pat);
0143:                    return false;
0144:                }
0145:                return true;
0146:            }
0147:
0148:            boolean checkPat(String source, UnicodeSet testSet, String pat) {
0149:                UnicodeSet testSet2 = new UnicodeSet(pat);
0150:                if (!testSet2.equals(testSet)) {
0151:                    errln("Fail toPattern: " + source + "; " + pat + " => "
0152:                            + testSet2.toPattern(false) + ", expected "
0153:                            + testSet.toPattern(false));
0154:                    return false;
0155:                }
0156:                return true;
0157:            }
0158:
0159:            // NOTE: copied the following from Utility. There ought to be a version in there with a flag
0160:            // that does the Java stuff
0161:
0162:            public static int unescapeAt(String s, int[] offset16) {
0163:                int c;
0164:                int result = 0;
0165:                int n = 0;
0166:                int minDig = 0;
0167:                int maxDig = 0;
0168:                int bitsPerDigit = 4;
0169:                int dig;
0170:                int i;
0171:
0172:                /* Check that offset is in range */
0173:                int offset = offset16[0];
0174:                int length = s.length();
0175:                if (offset < 0 || offset >= length) {
0176:                    return -1;
0177:                }
0178:
0179:                /* Fetch first UChar after '\\' */
0180:                c = UTF16.charAt(s, offset);
0181:                offset += UTF16.getCharCount(c);
0182:
0183:                /* Convert hexadecimal and octal escapes */
0184:                switch (c) {
0185:                case 'u':
0186:                    minDig = maxDig = 4;
0187:                    break;
0188:                /*
0189:                case 'U':
0190:                    minDig = maxDig = 8;
0191:                    break;
0192:                case 'x':
0193:                    minDig = 1;
0194:                    maxDig = 2;
0195:                    break;
0196:                 */
0197:                default:
0198:                    dig = UCharacter.digit(c, 8);
0199:                    if (dig >= 0) {
0200:                        minDig = 1;
0201:                        maxDig = 3;
0202:                        n = 1; /* Already have first octal digit */
0203:                        bitsPerDigit = 3;
0204:                        result = dig;
0205:                    }
0206:                    break;
0207:                }
0208:                if (minDig != 0) {
0209:                    while (offset < length && n < maxDig) {
0210:                        // TEMPORARY
0211:                        // TODO: Restore the char32-based code when UCharacter.digit
0212:                        // is working (Bug 66).
0213:
0214:                        //c = UTF16.charAt(s, offset);
0215:                        //dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
0216:                        c = s.charAt(offset);
0217:                        dig = Character.digit((char) c, (bitsPerDigit == 3) ? 8
0218:                                : 16);
0219:                        if (dig < 0) {
0220:                            break;
0221:                        }
0222:                        result = (result << bitsPerDigit) | dig;
0223:                        //offset += UTF16.getCharCount(c);
0224:                        ++offset;
0225:                        ++n;
0226:                    }
0227:                    if (n < minDig) {
0228:                        return -1;
0229:                    }
0230:                    offset16[0] = offset;
0231:                    return result;
0232:                }
0233:
0234:                /* Convert C-style escapes in table */
0235:                for (i = 0; i < UNESCAPE_MAP.length; i += 2) {
0236:                    if (c == UNESCAPE_MAP[i]) {
0237:                        offset16[0] = offset;
0238:                        return UNESCAPE_MAP[i + 1];
0239:                    } else if (c < UNESCAPE_MAP[i]) {
0240:                        break;
0241:                    }
0242:                }
0243:
0244:                /* If no special forms are recognized, then consider
0245:                 * the backslash to generically escape the next character. */
0246:                offset16[0] = offset;
0247:                return c;
0248:            }
0249:
0250:            /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
0251:            static private final char[] UNESCAPE_MAP = {
0252:            /*"   0x22, 0x22 */
0253:            /*'   0x27, 0x27 */
0254:            /*?   0x3F, 0x3F */
0255:            /*\   0x5C, 0x5C */
0256:            /*a*/0x61, 0x07,
0257:            /*b*/0x62, 0x08,
0258:            /*f*/0x66, 0x0c,
0259:            /*n*/0x6E, 0x0a,
0260:            /*r*/0x72, 0x0d,
0261:            /*t*/0x74, 0x09,
0262:            /*v*/0x76, 0x0b };
0263:
0264:            /**
0265:             * Convert all escapes in a given string using unescapeAt().
0266:             * Leave invalid escape sequences unchanged.
0267:             */
0268:            public static String unescapeLeniently(String s) {
0269:                StringBuffer buf = new StringBuffer();
0270:                int[] pos = new int[1];
0271:                for (int i = 0; i < s.length();) {
0272:                    char c = s.charAt(i++);
0273:                    if (c == '\\') {
0274:                        pos[0] = i;
0275:                        int e = unescapeAt(s, pos);
0276:                        if (e < 0) {
0277:                            buf.append(c);
0278:                        } else {
0279:                            UTF16.append(buf, e);
0280:                            i = pos[0];
0281:                        }
0282:                    } else {
0283:                        buf.append(c);
0284:                    }
0285:                }
0286:                return buf.toString();
0287:            }
0288:
0289:            public void TestPatterns() {
0290:                UnicodeSet set = new UnicodeSet();
0291:                expectPattern(set, "[[a-m]&[d-z]&[k-y]]", "km");
0292:                expectPattern(set, "[[a-z]-[m-y]-[d-r]]", "aczz");
0293:                expectPattern(set, "[a\\-z]", "--aazz");
0294:                expectPattern(set, "[-az]", "--aazz");
0295:                expectPattern(set, "[az-]", "--aazz");
0296:                expectPattern(set, "[[[a-z]-[aeiou]i]]", "bdfnptvz");
0297:
0298:                // Throw in a test of complement
0299:                set.complement();
0300:                String exp = '\u0000' + "aeeoouu" + (char) ('z' + 1) + '\uFFFF';
0301:                expectPairs(set, exp);
0302:            }
0303:
0304:            public void TestCategories() {
0305:                int failures = 0;
0306:                UnicodeSet set = new UnicodeSet("[:Lu:]");
0307:                expectContainment(set, "ABC", "abc");
0308:
0309:                // Make sure generation of L doesn't pollute cached Lu set
0310:                // First generate L, then Lu
0311:                // not used int TOP = 0x200; // Don't need to go over the whole range:
0312:                set = new UnicodeSet("[:L:]");
0313:                for (int i = 0; i < 0x200; ++i) {
0314:                    boolean l = UCharacter.isLetter(i);
0315:                    if (l != set.contains((char) i)) {
0316:                        errln("FAIL: L contains " + (char) i + " = "
0317:                                + set.contains((char) i));
0318:                        if (++failures == 10)
0319:                            break;
0320:                    }
0321:                }
0322:
0323:                set = new UnicodeSet("[:Lu:]");
0324:                for (int i = 0; i < 0x200; ++i) {
0325:                    boolean lu = (UCharacter.getType(i) == ECharacterCategory.UPPERCASE_LETTER);
0326:                    if (lu != set.contains((char) i)) {
0327:                        errln("FAIL: Lu contains " + (char) i + " = "
0328:                                + set.contains((char) i));
0329:                        if (++failures == 20)
0330:                            break;
0331:                    }
0332:                }
0333:            }
0334:
0335:            public void TestAddRemove() {
0336:                UnicodeSet set = new UnicodeSet();
0337:                set.add('a', 'z');
0338:                expectPairs(set, "az");
0339:                set.remove('m', 'p');
0340:                expectPairs(set, "alqz");
0341:                set.remove('e', 'g');
0342:                expectPairs(set, "adhlqz");
0343:                set.remove('d', 'i');
0344:                expectPairs(set, "acjlqz");
0345:                set.remove('c', 'r');
0346:                expectPairs(set, "absz");
0347:                set.add('f', 'q');
0348:                expectPairs(set, "abfqsz");
0349:                set.remove('a', 'g');
0350:                expectPairs(set, "hqsz");
0351:                set.remove('a', 'z');
0352:                expectPairs(set, "");
0353:
0354:                // Try removing an entire set from another set
0355:                expectPattern(set, "[c-x]", "cx");
0356:                UnicodeSet set2 = new UnicodeSet();
0357:                expectPattern(set2, "[f-ky-za-bc[vw]]", "acfkvwyz");
0358:                set.removeAll(set2);
0359:                expectPairs(set, "deluxx");
0360:
0361:                // Try adding an entire set to another set
0362:                expectPattern(set, "[jackiemclean]", "aacceein");
0363:                expectPattern(set2, "[hitoshinamekatajamesanderson]",
0364:                        "aadehkmort");
0365:                set.addAll(set2);
0366:                expectPairs(set, "aacehort");
0367:
0368:                // Test commutativity
0369:                expectPattern(set, "[hitoshinamekatajamesanderson]",
0370:                        "aadehkmort");
0371:                expectPattern(set2, "[jackiemclean]", "aacceein");
0372:                set.addAll(set2);
0373:                expectPairs(set, "aacehort");
0374:            }
0375:
0376:            /**
0377:             * Make sure minimal representation is maintained.
0378:             */
0379:            public void TestMinimalRep() {
0380:                // This is pretty thoroughly tested by checkCanonicalRep()
0381:                // run against the exhaustive operation results.  Use the code
0382:                // here for debugging specific spot problems.
0383:
0384:                // 1 overlap against 2
0385:                UnicodeSet set = new UnicodeSet("[h-km-q]");
0386:                UnicodeSet set2 = new UnicodeSet("[i-o]");
0387:                set.addAll(set2);
0388:                expectPairs(set, "hq");
0389:                // right
0390:                set.applyPattern("[a-m]");
0391:                set2.applyPattern("[e-o]");
0392:                set.addAll(set2);
0393:                expectPairs(set, "ao");
0394:                // left
0395:                set.applyPattern("[e-o]");
0396:                set2.applyPattern("[a-m]");
0397:                set.addAll(set2);
0398:                expectPairs(set, "ao");
0399:                // 1 overlap against 3
0400:                set.applyPattern("[a-eg-mo-w]");
0401:                set2.applyPattern("[d-q]");
0402:                set.addAll(set2);
0403:                expectPairs(set, "aw");
0404:            }
0405:
0406:            public void TestAPI() {
0407:                // default ct
0408:                UnicodeSet set = new UnicodeSet();
0409:                if (!set.isEmpty() || set.getRangeCount() != 0) {
0410:                    errln("FAIL, set should be empty but isn't: " + set);
0411:                }
0412:
0413:                // clear(), isEmpty()
0414:                set.add('a');
0415:                if (set.isEmpty()) {
0416:                    errln("FAIL, set shouldn't be empty but is: " + set);
0417:                }
0418:                set.clear();
0419:                if (!set.isEmpty()) {
0420:                    errln("FAIL, set should be empty but isn't: " + set);
0421:                }
0422:
0423:                // size()
0424:                set.clear();
0425:                if (set.size() != 0) {
0426:                    errln("FAIL, size should be 0, but is " + set.size() + ": "
0427:                            + set);
0428:                }
0429:                set.add('a');
0430:                if (set.size() != 1) {
0431:                    errln("FAIL, size should be 1, but is " + set.size() + ": "
0432:                            + set);
0433:                }
0434:                set.add('1', '9');
0435:                if (set.size() != 10) {
0436:                    errln("FAIL, size should be 10, but is " + set.size()
0437:                            + ": " + set);
0438:                }
0439:                set.clear();
0440:                set.complement();
0441:                if (set.size() != 0x110000) {
0442:                    errln("FAIL, size should be 0x110000, but is" + set.size());
0443:                }
0444:
0445:                // contains(first, last)
0446:                set.clear();
0447:                set.applyPattern("[A-Y 1-8 b-d l-y]");
0448:                for (int i = 0; i < set.getRangeCount(); ++i) {
0449:                    int a = set.getRangeStart(i);
0450:                    int b = set.getRangeEnd(i);
0451:                    if (!set.contains(a, b)) {
0452:                        errln("FAIL, should contain " + (char) a + '-'
0453:                                + (char) b + " but doesn't: " + set);
0454:                    }
0455:                    if (set.contains((char) (a - 1), b)) {
0456:                        errln("FAIL, shouldn't contain " + (char) (a - 1) + '-'
0457:                                + (char) b + " but does: " + set);
0458:                    }
0459:                    if (set.contains(a, (char) (b + 1))) {
0460:                        errln("FAIL, shouldn't contain " + (char) a + '-'
0461:                                + (char) (b + 1) + " but does: " + set);
0462:                    }
0463:                }
0464:
0465:                // Ported InversionList test.
0466:                UnicodeSet a = new UnicodeSet((char) 3, (char) 10);
0467:                UnicodeSet b = new UnicodeSet((char) 7, (char) 15);
0468:                UnicodeSet c = new UnicodeSet();
0469:
0470:                logln("a [3-10]: " + a);
0471:                logln("b [7-15]: " + b);
0472:                c.set(a);
0473:                c.addAll(b);
0474:                UnicodeSet exp = new UnicodeSet((char) 3, (char) 15);
0475:                if (c.equals(exp)) {
0476:                    logln("c.set(a).add(b): " + c);
0477:                } else {
0478:                    errln("FAIL: c.set(a).add(b) = " + c + ", expect " + exp);
0479:                }
0480:                c.complement();
0481:                exp.set((char) 0, (char) 2);
0482:                exp.add((char) 16, UnicodeSet.MAX_VALUE);
0483:                if (c.equals(exp)) {
0484:                    logln("c.complement(): " + c);
0485:                } else {
0486:                    errln(Utility.escape("FAIL: c.complement() = " + c
0487:                            + ", expect " + exp));
0488:                }
0489:                c.complement();
0490:                exp.set((char) 3, (char) 15);
0491:                if (c.equals(exp)) {
0492:                    logln("c.complement(): " + c);
0493:                } else {
0494:                    errln("FAIL: c.complement() = " + c + ", expect " + exp);
0495:                }
0496:                c.set(a);
0497:                c.complementAll(b);
0498:                exp.set((char) 3, (char) 6);
0499:                exp.add((char) 11, (char) 15);
0500:                if (c.equals(exp)) {
0501:                    logln("c.set(a).complement(b): " + c);
0502:                } else {
0503:                    errln("FAIL: c.set(a).complement(b) = " + c + ", expect "
0504:                            + exp);
0505:                }
0506:
0507:                exp.set(c);
0508:                c = bitsToSet(setToBits(c));
0509:                if (c.equals(exp)) {
0510:                    logln("bitsToSet(setToBits(c)): " + c);
0511:                } else {
0512:                    errln("FAIL: bitsToSet(setToBits(c)) = " + c + ", expect "
0513:                            + exp);
0514:                }
0515:
0516:                // Additional tests for coverage JB#2118
0517:                //UnicodeSet::complement(class UnicodeString const &)
0518:                //UnicodeSet::complementAll(class UnicodeString const &)
0519:                //UnicodeSet::containsNone(class UnicodeSet const &)
0520:                //UnicodeSet::containsNone(long,long)
0521:                //UnicodeSet::containsSome(class UnicodeSet const &)
0522:                //UnicodeSet::containsSome(long,long)
0523:                //UnicodeSet::removeAll(class UnicodeString const &)
0524:                //UnicodeSet::retain(long)
0525:                //UnicodeSet::retainAll(class UnicodeString const &)
0526:                //UnicodeSet::serialize(unsigned short *,long,enum UErrorCode &)
0527:                //UnicodeSetIterator::getString(void)
0528:                set.clear();
0529:                set.complement("ab");
0530:                exp.applyPattern("[{ab}]");
0531:                if (!set.equals(exp)) {
0532:                    errln("FAIL: complement(\"ab\")");
0533:                    return;
0534:                }
0535:
0536:                UnicodeSetIterator iset = new UnicodeSetIterator(set);
0537:                if (!iset.next()
0538:                        || iset.codepoint != UnicodeSetIterator.IS_STRING) {
0539:                    errln("FAIL: UnicodeSetIterator.next/IS_STRING");
0540:                } else if (!iset.string.equals("ab")) {
0541:                    errln("FAIL: UnicodeSetIterator.string");
0542:                }
0543:
0544:                set.add((char) 0x61, (char) 0x7A);
0545:                set.complementAll("alan");
0546:                exp.applyPattern("[{ab}b-kmo-z]");
0547:                if (!set.equals(exp)) {
0548:                    errln("FAIL: complementAll(\"alan\")");
0549:                    return;
0550:                }
0551:
0552:                exp.applyPattern("[a-z]");
0553:                if (set.containsNone(exp)) {
0554:                    errln("FAIL: containsNone(UnicodeSet)");
0555:                }
0556:                if (!set.containsSome(exp)) {
0557:                    errln("FAIL: containsSome(UnicodeSet)");
0558:                }
0559:                exp.applyPattern("[aln]");
0560:                if (!set.containsNone(exp)) {
0561:                    errln("FAIL: containsNone(UnicodeSet)");
0562:                }
0563:                if (set.containsSome(exp)) {
0564:                    errln("FAIL: containsSome(UnicodeSet)");
0565:                }
0566:
0567:                if (set.containsNone((char) 0x61, (char) 0x7A)) {
0568:                    errln("FAIL: containsNone(char, char)");
0569:                }
0570:                if (!set.containsSome((char) 0x61, (char) 0x7A)) {
0571:                    errln("FAIL: containsSome(char, char)");
0572:                }
0573:                if (!set.containsNone((char) 0x41, (char) 0x5A)) {
0574:                    errln("FAIL: containsNone(char, char)");
0575:                }
0576:                if (set.containsSome((char) 0x41, (char) 0x5A)) {
0577:                    errln("FAIL: containsSome(char, char)");
0578:                }
0579:
0580:                set.removeAll("liu");
0581:                exp.applyPattern("[{ab}b-hj-kmo-tv-z]");
0582:                if (!set.equals(exp)) {
0583:                    errln("FAIL: removeAll(\"liu\")");
0584:                    return;
0585:                }
0586:
0587:                set.retainAll("star");
0588:                exp.applyPattern("[rst]");
0589:                if (!set.equals(exp)) {
0590:                    errln("FAIL: retainAll(\"star\")");
0591:                    return;
0592:                }
0593:
0594:                set.retain((char) 0x73);
0595:                exp.applyPattern("[s]");
0596:                if (!set.equals(exp)) {
0597:                    errln("FAIL: retain('s')");
0598:                    return;
0599:                }
0600:
0601:                // ICU 2.6 coverage tests
0602:                // public final UnicodeSet retain(String s);
0603:                // public final UnicodeSet remove(int c);
0604:                // public final UnicodeSet remove(String s);
0605:                // public int hashCode();
0606:                set.applyPattern("[a-z{ab}{cd}]");
0607:                set.retain("cd");
0608:                exp.applyPattern("[{cd}]");
0609:                if (!set.equals(exp)) {
0610:                    errln("FAIL: retain(\"cd\")");
0611:                    return;
0612:                }
0613:
0614:                set.applyPattern("[a-z{ab}{cd}]");
0615:                set.remove((char) 0x63);
0616:                exp.applyPattern("[abd-z{ab}{cd}]");
0617:                if (!set.equals(exp)) {
0618:                    errln("FAIL: remove('c')");
0619:                    return;
0620:                }
0621:
0622:                set.remove("cd");
0623:                exp.applyPattern("[abd-z{ab}]");
0624:                if (!set.equals(exp)) {
0625:                    errln("FAIL: remove(\"cd\")");
0626:                    return;
0627:                }
0628:
0629:                if (set.hashCode() != exp.hashCode()) {
0630:                    errln("FAIL: hashCode() unequal");
0631:                }
0632:                exp.clear();
0633:                if (set.hashCode() == exp.hashCode()) {
0634:                    errln("FAIL: hashCode() equal");
0635:                }
0636:
0637:                {
0638:                    //Cover addAll(Collection) and addAllTo(Collection)  
0639:                    //  Seems that there is a bug in addAll(Collection) operation
0640:                    //    Ram also add a similar test to UtilityTest.java
0641:                    logln("Testing addAll(Collection) ... ");
0642:                    String[] array = { "a", "b", "c", "de" };
0643:                    List list = Arrays.asList(array);
0644:                    Set aset = new HashSet(list);
0645:                    logln(" *** The source set's size is: " + aset.size());
0646:
0647:                    set.clear();
0648:                    set.addAll(aset);
0649:                    if (set.size() != aset.size()) {
0650:                        errln("FAIL: After addAll, the UnicodeSet size expected "
0651:                                + aset.size()
0652:                                + ", "
0653:                                + set.size()
0654:                                + " seen instead!");
0655:                    } else {
0656:                        logln("OK: After addAll, the UnicodeSet size got "
0657:                                + set.size());
0658:                    }
0659:
0660:                    List list2 = new ArrayList();
0661:                    set.addAllTo(list2);
0662:
0663:                    //verify the result
0664:                    log(" *** The elements are: ");
0665:                    String s = set.toPattern(true);
0666:                    logln(s);
0667:                    Iterator myiter = list2.iterator();
0668:                    while (myiter.hasNext()) {
0669:                        log(myiter.next().toString() + "  ");
0670:                    }
0671:                    logln(""); // a new line
0672:                }
0673:
0674:            }
0675:
0676:            public void TestStrings() {
0677:                //        Object[][] testList = {
0678:                //            {I_EQUALS,  UnicodeSet.fromAll("abc"),
0679:                //                        new UnicodeSet("[a-c]")},
0680:                //                        
0681:                //            {I_EQUALS,  UnicodeSet.from("ch").add('a','z').add("ll"),
0682:                //                        new UnicodeSet("[{ll}{ch}a-z]")},
0683:                //                        
0684:                //            {I_EQUALS,  UnicodeSet.from("ab}c"),  
0685:                //                        new UnicodeSet("[{ab\\}c}]")},
0686:                //                        
0687:                //            {I_EQUALS,  new UnicodeSet('a','z').add('A', 'Z').retain('M','m').complement('X'), 
0688:                //                        new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]")},
0689:                //        };
0690:                //        
0691:                //        for (int i = 0; i < testList.length; ++i) {
0692:                //            expectRelation(testList[i][0], testList[i][1], testList[i][2], "(" + i + ")");
0693:                //        }        
0694:
0695:                UnicodeSet[][] testList = {
0696:                        { UnicodeSet.fromAll("abc"), new UnicodeSet("[a-c]") },
0697:
0698:                        { UnicodeSet.from("ch").add('a', 'z').add("ll"),
0699:                                new UnicodeSet("[{ll}{ch}a-z]") },
0700:
0701:                        { UnicodeSet.from("ab}c"), new UnicodeSet("[{ab\\}c}]") },
0702:
0703:                        {
0704:                                new UnicodeSet('a', 'z').add('A', 'Z').retain(
0705:                                        'M', 'm').complement('X'),
0706:                                new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]") }, };
0707:
0708:                for (int i = 0; i < testList.length; ++i) {
0709:                    if (!testList[i][0].equals(testList[i][1])) {
0710:                        errln("FAIL: sets unequal; see source code (" + i + ")");
0711:                    }
0712:                }
0713:            }
0714:
0715:            static final Integer I_ANY = new Integer(SortedSetRelation.ANY),
0716:                    I_CONTAINS = new Integer(SortedSetRelation.CONTAINS),
0717:                    I_DISJOINT = new Integer(SortedSetRelation.DISJOINT),
0718:                    I_NO_B = new Integer(SortedSetRelation.NO_B),
0719:                    I_ISCONTAINED = new Integer(SortedSetRelation.ISCONTAINED),
0720:                    I_EQUALS = new Integer(SortedSetRelation.EQUALS),
0721:                    I_NO_A = new Integer(SortedSetRelation.NO_A),
0722:                    I_NONE = new Integer(SortedSetRelation.NONE);
0723:
0724:            public void TestSetRelation() {
0725:
0726:                String[] choices = { "a", "b", "cd", "ef" };
0727:                int limit = 1 << choices.length;
0728:
0729:                SortedSet iset = new TreeSet();
0730:                SortedSet jset = new TreeSet();
0731:
0732:                for (int i = 0; i < limit; ++i) {
0733:                    pick(i, choices, iset);
0734:                    for (int j = 0; j < limit; ++j) {
0735:                        pick(j, choices, jset);
0736:                        checkSetRelation(iset, jset, "(" + i + ")");
0737:                    }
0738:                }
0739:            }
0740:
0741:            public void TestSetSpeed() {
0742:                // skip unless verbose
0743:                if (!isVerbose())
0744:                    return;
0745:
0746:                SetSpeed2(100);
0747:                SetSpeed2(1000);
0748:            }
0749:
0750:            public void SetSpeed2(int size) {
0751:
0752:                SortedSet iset = new TreeSet();
0753:                SortedSet jset = new TreeSet();
0754:
0755:                for (int i = 0; i < size * 2; i += 2) { // only even values
0756:                    iset.add(new Integer(i));
0757:                    jset.add(new Integer(i));
0758:                }
0759:
0760:                int iterations = 1000000 / size;
0761:
0762:                logln("Timing comparison of Java vs Utility");
0763:                logln("For about " + size
0764:                        + " objects that are almost all the same.");
0765:
0766:                CheckSpeed(iset, jset, "when a = b", iterations);
0767:
0768:                iset.add(new Integer(size + 1)); // add odd value in middle
0769:
0770:                CheckSpeed(iset, jset, "when a contains b", iterations);
0771:                CheckSpeed(jset, iset, "when b contains a", iterations);
0772:
0773:                jset.add(new Integer(size - 1)); // add different odd value in middle
0774:
0775:                CheckSpeed(jset, iset, "when a, b are disjoint", iterations);
0776:            }
0777:
0778:            void CheckSpeed(SortedSet iset, SortedSet jset, String message,
0779:                    int iterations) {
0780:                CheckSpeed2(iset, jset, message, iterations);
0781:                CheckSpeed3(iset, jset, message, iterations);
0782:            }
0783:
0784:            void CheckSpeed2(SortedSet iset, SortedSet jset, String message,
0785:                    int iterations) {
0786:                boolean x;
0787:                boolean y;
0788:
0789:                // make sure code is loaded:
0790:                x = iset.containsAll(jset);
0791:                y = SortedSetRelation.hasRelation(iset,
0792:                        SortedSetRelation.CONTAINS, jset);
0793:                if (x != y)
0794:                    errln("FAIL contains comparison");
0795:
0796:                double start = System.currentTimeMillis();
0797:                for (int i = 0; i < iterations; ++i) {
0798:                    x |= iset.containsAll(jset);
0799:                }
0800:                double middle = System.currentTimeMillis();
0801:                for (int i = 0; i < iterations; ++i) {
0802:                    y |= SortedSetRelation.hasRelation(iset,
0803:                            SortedSetRelation.CONTAINS, jset);
0804:                }
0805:                double end = System.currentTimeMillis();
0806:
0807:                double jtime = (middle - start) / iterations;
0808:                double utime = (end - middle) / iterations;
0809:
0810:                java.text.NumberFormat nf = java.text.NumberFormat
0811:                        .getPercentInstance();
0812:                logln("Test contains: " + message + ": Java: " + jtime
0813:                        + ", Utility: " + utime + ", u:j: "
0814:                        + nf.format(utime / jtime));
0815:            }
0816:
0817:            void CheckSpeed3(SortedSet iset, SortedSet jset, String message,
0818:                    int iterations) {
0819:                boolean x;
0820:                boolean y;
0821:
0822:                // make sure code is loaded:
0823:                x = iset.equals(jset);
0824:                y = SortedSetRelation.hasRelation(iset,
0825:                        SortedSetRelation.EQUALS, jset);
0826:                if (x != y)
0827:                    errln("FAIL equality comparison");
0828:
0829:                double start = System.currentTimeMillis();
0830:                for (int i = 0; i < iterations; ++i) {
0831:                    x |= iset.equals(jset);
0832:                }
0833:                double middle = System.currentTimeMillis();
0834:                for (int i = 0; i < iterations; ++i) {
0835:                    y |= SortedSetRelation.hasRelation(iset,
0836:                            SortedSetRelation.EQUALS, jset);
0837:                }
0838:                double end = System.currentTimeMillis();
0839:
0840:                double jtime = (middle - start) / iterations;
0841:                double utime = (end - middle) / iterations;
0842:
0843:                java.text.NumberFormat nf = java.text.NumberFormat
0844:                        .getPercentInstance();
0845:                logln("Test equals:   " + message + ": Java: " + jtime
0846:                        + ", Utility: " + utime + ", u:j: "
0847:                        + nf.format(utime / jtime));
0848:            }
0849:
0850:            void pick(int bits, Object[] examples, SortedSet output) {
0851:                output.clear();
0852:                for (int k = 0; k < 32; ++k) {
0853:                    if (((1 << k) & bits) != 0)
0854:                        output.add(examples[k]);
0855:                }
0856:            }
0857:
0858:            public static final String[] RELATION_NAME = { "both-are-null",
0859:                    "a-is-null", "equals", "is-contained-in", "b-is-null",
0860:                    "is-disjoint_with", "contains", "any", };
0861:
0862:            boolean dumbHasRelation(Collection A, int filter, Collection B) {
0863:                Collection ab = new TreeSet(A);
0864:                ab.retainAll(B);
0865:                if (ab.size() > 0 && (filter & SortedSetRelation.A_AND_B) == 0)
0866:                    return false;
0867:
0868:                // A - B size == A.size - A&B.size
0869:                if (A.size() > ab.size()
0870:                        && (filter & SortedSetRelation.A_NOT_B) == 0)
0871:                    return false;
0872:
0873:                // B - A size == B.size - A&B.size
0874:                if (B.size() > ab.size()
0875:                        && (filter & SortedSetRelation.B_NOT_A) == 0)
0876:                    return false;
0877:
0878:                return true;
0879:            }
0880:
0881:            void checkSetRelation(SortedSet a, SortedSet b, String message) {
0882:                for (int i = 0; i < 8; ++i) {
0883:
0884:                    boolean hasRelation = SortedSetRelation
0885:                            .hasRelation(a, i, b);
0886:                    boolean dumbHasRelation = dumbHasRelation(a, i, b);
0887:
0888:                    logln(message + " " + hasRelation + ":\t" + a + "\t"
0889:                            + RELATION_NAME[i] + "\t" + b);
0890:
0891:                    if (hasRelation != dumbHasRelation) {
0892:                        errln("FAIL: " + message + " " + dumbHasRelation
0893:                                + ":\t" + a + "\t" + RELATION_NAME[i] + "\t"
0894:                                + b);
0895:                    }
0896:                }
0897:                logln("");
0898:            }
0899:
0900:            /**
0901:             * Test the [:Latin:] syntax.
0902:             */
0903:            public void TestScriptSet() {
0904:
0905:                expectContainment("[:Latin:]", "aA",
0906:                        CharsToUnicodeString("\\u0391\\u03B1"));
0907:
0908:                expectContainment("[:Greek:]",
0909:                        CharsToUnicodeString("\\u0391\\u03B1"), "aA");
0910:
0911:                /* Jitterbug 1423 */
0912:                expectContainment("[[:Common:][:Inherited:]]",
0913:                        CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"),
0914:                        "aA");
0915:
0916:            }
0917:
0918:            /**
0919:             * Test the [:Latin:] syntax.
0920:             */
0921:            public void TestPropertySet() {
0922:                String[] DATA = {
0923:                        // Pattern, Chars IN, Chars NOT in
0924:
0925:                        "[:Latin:]",
0926:                        "aA",
0927:                        "\u0391\u03B1",
0928:
0929:                        "[\\p{Greek}]",
0930:                        "\u0391\u03B1",
0931:                        "aA",
0932:
0933:                        "\\P{ GENERAL Category = upper case letter }",
0934:                        "abc",
0935:                        "ABC",
0936:
0937:                        // Combining class: @since ICU 2.2
0938:                        // Check both symbolic and numeric
0939:                        "\\p{ccc=Nukta}",
0940:                        "\u0ABC",
0941:                        "abc",
0942:
0943:                        "\\p{Canonical Combining Class = 11}",
0944:                        "\u05B1",
0945:                        "\u05B2",
0946:
0947:                        "[:c c c = iota subscript :]",
0948:                        "\u0345",
0949:                        "xyz",
0950:
0951:                        // Bidi class: @since ICU 2.2
0952:                        "\\p{bidiclass=lefttoright}",
0953:                        "abc",
0954:                        "\u0671\u0672",
0955:
0956:                        // Binary properties: @since ICU 2.2
0957:                        "\\p{ideographic}",
0958:                        "\u4E0A",
0959:                        "x",
0960:
0961:                        "[:math=false:]",
0962:                        "q)*(", // )(and * were removed from math in Unicode 4.0.1
0963:                        "+<>^",
0964:
0965:                        // JB#1767 \N{}, \p{ASCII}
0966:                        "[:Ascii:]",
0967:                        "abc\u0000\u007F",
0968:                        "\u0080\u4E00",
0969:
0970:                        "[\\N{ latin small letter  a  }[:name= latin small letter z:]]",
0971:                        "az",
0972:                        "qrs",
0973:
0974:                        // JB#2015
0975:                        "[:any:]",
0976:                        "a\\U0010FFFF",
0977:                        "",
0978:
0979:                        "[:nv=0.5:]",
0980:                        "\u00BD\u0F2A",
0981:                        "\u00BC",
0982:
0983:                        // JB#2653: Age
0984:                        "[:Age=1.1:]",
0985:                        "\u03D6", // 1.1
0986:                        "\u03D8\u03D9", // 3.2
0987:
0988:                        "[:Age=3.1:]",
0989:                        "\\u1800\\u3400\\U0002f800",
0990:                        "\\u0220\\u034f\\u30ff\\u33ff\\ufe73\\U00010000\\U00050000",
0991:
0992:                        // JB#2350: Case_Sensitive
0993:                        "[:Case Sensitive:]",
0994:                        "A\u1FFC\\U00010410",
0995:                        ";\u00B4\\U00010500",
0996:
0997:                        // Regex compatibility test
0998:                        "[-b]", // leading '-' is literal
0999:                        "-b",
1000:                        "ac",
1001:
1002:                        "[^-b]", // leading '-' is literal
1003:                        "ac",
1004:                        "-b",
1005:
1006:                        "[b-]", // trailing '-' is literal
1007:                        "-b",
1008:                        "ac",
1009:
1010:                        "[^b-]", // trailing '-' is literal
1011:                        "ac",
1012:                        "-b",
1013:
1014:                        "[a-b-]", // trailing '-' is literal
1015:                        "ab-",
1016:                        "c=",
1017:
1018:                        "[[a-q]&[p-z]-]", // trailing '-' is literal
1019:                        "pq-",
1020:                        "or=",
1021:
1022:                        "[\\s|\\)|:|$|\\>]", // from regex tests
1023:                        "s|):$>",
1024:                        "\\abc",
1025:
1026:                        "[\uDC00cd]", // JB#2906: isolated trail at start
1027:                        "cd\uDC00",
1028:                        "ab\uD800\\U00010000",
1029:
1030:                        "[ab\uD800]", // JB#2906: isolated trail at start
1031:                        "ab\uD800",
1032:                        "cd\uDC00\\U00010000",
1033:
1034:                        "[ab\uD800cd]", // JB#2906: isolated lead in middle
1035:                        "abcd\uD800",
1036:                        "ef\uDC00\\U00010000",
1037:
1038:                        "[ab\uDC00cd]", // JB#2906: isolated trail in middle
1039:                        "abcd\uDC00",
1040:                        "ef\uD800\\U00010000",
1041:
1042:                        "[:^lccc=0:]", // Lead canonical class
1043:                        "\u0300\u0301",
1044:                        "abcd\u00c0\u00c5",
1045:
1046:                        "[:^tccc=0:]", // Trail canonical class
1047:                        "\u0300\u0301\u00c0\u00c5",
1048:                        "abcd",
1049:
1050:                        "[[:^lccc=0:][:^tccc=0:]]", // Lead and trail canonical class
1051:                        "\u0300\u0301\u00c0\u00c5",
1052:                        "abcd",
1053:
1054:                        "[[:^lccc=0:]-[:^tccc=0:]]", // Stuff that starts with an accent but ends with a base (none right now)
1055:                        "",
1056:                        "abcd\u0300\u0301\u00c0\u00c5",
1057:
1058:                        "[[:ccc=0:]-[:lccc=0:]-[:tccc=0:]]", // Weirdos. Complete canonical class is zero, but both lead and trail are not
1059:                        "\u0F73\u0F75\u0F81", "abcd\u0300\u0301\u00c0\u00c5",
1060:
1061:                        "[:Assigned:]",
1062:                        "A\\uE000\\uF8FF\\uFDC7\\U00010000\\U0010FFFD",
1063:                        "\\u0888\\uFDD3\\uFFFE\\U00050005",
1064:
1065:                };
1066:
1067:                for (int i = 0; i < DATA.length; i += 3) {
1068:                    expectContainment(DATA[i], DATA[i + 1], DATA[i + 2]);
1069:                }
1070:            }
1071:
1072:            public void TestUnicodeSetStrings() {
1073:                UnicodeSet uset = new UnicodeSet("[a{bc}{cd}pqr\u0000]");
1074:                logln(uset + " ~ " + uset.getRegexEquivalent());
1075:                String[][] testStrings = { { "x", "none" }, { "bc", "all" },
1076:                        { "cdbca", "all" }, { "a", "all" }, { "bcx", "some" },
1077:                        { "ab", "some" }, { "acb", "some" },
1078:                        { "bcda", "some" }, { "dccbx", "none" }, };
1079:                for (int i = 0; i < testStrings.length; ++i) {
1080:                    check(uset, testStrings[i][0], testStrings[i][1]);
1081:                }
1082:            }
1083:
1084:            private void check(UnicodeSet uset, String string,
1085:                    String desiredStatus) {
1086:                boolean shouldContainAll = desiredStatus.equals("all");
1087:                boolean shouldContainNone = desiredStatus.equals("none");
1088:                if (uset.containsAll(string) != shouldContainAll) {
1089:                    errln("containsAll " + string + " should be "
1090:                            + shouldContainAll);
1091:                } else {
1092:                    logln("containsAll " + string + " = " + shouldContainAll);
1093:                }
1094:                if (uset.containsNone(string) != shouldContainNone) {
1095:                    errln("containsNone " + string + " should be "
1096:                            + shouldContainNone);
1097:                } else {
1098:                    logln("containsNone " + string + " = " + shouldContainNone);
1099:                }
1100:            }
1101:
1102:            /**
1103:             * Test cloning of UnicodeSet
1104:             */
1105:            public void TestClone() {
1106:                UnicodeSet s = new UnicodeSet("[abcxyz]");
1107:                UnicodeSet t = (UnicodeSet) s.clone();
1108:                expectContainment(t, "abc", "def");
1109:            }
1110:
1111:            /**
1112:             * Test the indexOf() and charAt() methods.
1113:             */
1114:            public void TestIndexOf() {
1115:                UnicodeSet set = new UnicodeSet("[a-cx-y3578]");
1116:                for (int i = 0; i < set.size(); ++i) {
1117:                    int c = set.charAt(i);
1118:                    if (set.indexOf(c) != i) {
1119:                        errln("FAIL: charAt(" + i + ") = " + c
1120:                                + " => indexOf() => " + set.indexOf(c));
1121:                    }
1122:                }
1123:                int c = set.charAt(set.size());
1124:                if (c != -1) {
1125:                    errln("FAIL: charAt(<out of range>) = "
1126:                            + Utility.escape(String.valueOf(c)));
1127:                }
1128:                int j = set.indexOf('q');
1129:                if (j != -1) {
1130:                    errln("FAIL: indexOf('q') = " + j);
1131:                }
1132:            }
1133:
1134:            public void TestContainsString() {
1135:                UnicodeSet x = new UnicodeSet("[a{bc}]");
1136:                if (x.contains("abc"))
1137:                    errln("FAIL");
1138:            }
1139:
1140:            public void TestExhaustive() {
1141:                // exhaustive tests. Simulate UnicodeSets with integers.
1142:                // That gives us very solid tests (except for large memory tests).
1143:
1144:                char limit = (char) 128;
1145:
1146:                for (char i = 0; i < limit; ++i) {
1147:                    logln("Testing " + i + ", " + bitsToSet(i));
1148:                    _testComplement(i);
1149:
1150:                    // AS LONG AS WE ARE HERE, check roundtrip
1151:                    checkRoundTrip(bitsToSet(i));
1152:
1153:                    for (char j = 0; j < limit; ++j) {
1154:                        _testAdd(i, j);
1155:                        _testXor(i, j);
1156:                        _testRetain(i, j);
1157:                        _testRemove(i, j);
1158:                    }
1159:                }
1160:            }
1161:
1162:            /**
1163:             * Make sure each script name and abbreviated name can be used
1164:             * to construct a UnicodeSet.
1165:             */
1166:            public void TestScriptNames() {
1167:                for (int i = 0; i < UScript.CODE_LIMIT; ++i) {
1168:                    for (int j = 0; j < 2; ++j) {
1169:                        String pat = "";
1170:                        try {
1171:                            String name = (j == 0) ? UScript.getName(i)
1172:                                    : UScript.getShortName(i);
1173:                            pat = "[:" + name + ":]";
1174:                            UnicodeSet set = new UnicodeSet(pat);
1175:                            logln("Ok: " + pat);
1176:                        } catch (IllegalArgumentException e) {
1177:                            if (pat.length() == 0) {
1178:                                errln("FAIL (in UScript): No name for script "
1179:                                        + i);
1180:                            } else {
1181:                                errln("FAIL: Couldn't create " + pat);
1182:                            }
1183:                        }
1184:                    }
1185:                }
1186:            }
1187:
1188:            /**
1189:             * Test closure API.
1190:             */
1191:            public void TestCloseOver() {
1192:                String CASE = String.valueOf(UnicodeSet.CASE);
1193:                String[] DATA = {
1194:                        // selector, input, output
1195:                        CASE, "[aq\u00DF{Bc}{bC}{Fi}]",
1196:                        "[aAqQ\u00DF\uFB01{ss}{bc}{fi}]",
1197:
1198:                        CASE,
1199:                        "[\u01F1]", // 'DZ'
1200:                        "[\u01F1\u01F2\u01F3]",
1201:
1202:                        CASE, "[\u1FB4]", "[\u1FB4{\u03AC\u03B9}]",
1203:
1204:                        CASE, "[{F\uFB01}]", "[\uFB03{ffi}]",
1205:
1206:                        CASE, "[a-z]", "[A-Za-z\u017F\u212A]", CASE, "[abc]",
1207:                        "[A-Ca-c]", CASE, "[ABC]", "[A-Ca-c]", };
1208:
1209:                UnicodeSet s = new UnicodeSet();
1210:                UnicodeSet t = new UnicodeSet();
1211:                for (int i = 0; i < DATA.length; i += 3) {
1212:                    int selector = Integer.parseInt(DATA[i]);
1213:                    String pat = DATA[i + 1];
1214:                    String exp = DATA[i + 2];
1215:                    s.applyPattern(pat);
1216:                    s.closeOver(selector);
1217:                    t.applyPattern(exp);
1218:                    if (s.equals(t)) {
1219:                        logln("Ok: " + pat + ".closeOver(" + selector + ") => "
1220:                                + exp);
1221:                    } else {
1222:                        errln("FAIL: " + pat + ".closeOver(" + selector
1223:                                + ") => " + s.toPattern(true) + ", expected "
1224:                                + exp);
1225:                    }
1226:                }
1227:
1228:                // Test the pattern API
1229:                s.applyPattern("[abc]", UnicodeSet.CASE);
1230:                expectContainment(s, "abcABC", "defDEF");
1231:                s = new UnicodeSet("[^abc]", UnicodeSet.CASE);
1232:                expectContainment(s, "defDEF", "abcABC");
1233:            }
1234:
1235:            public void TestEscapePattern() {
1236:                // The following pattern must contain at least one range "c-d"
1237:                // for which isRuleWhiteSpace(c) or isRuleWhiteSpace(d) is true.
1238:                String pattern = "[\\uFEFF \\u200E-\\u20FF \\uFFF9-\\uFFFC \\U0001D173-\\U0001D17A \\U000F0000-\\U000FFFFD ]";
1239:                String exp = "[\\u200E-\\u20FF\\uFEFF\\uFFF9-\\uFFFC\\U0001D173-\\U0001D17A\\U000F0000-\\U000FFFFD]";
1240:                // We test this with two passes; in the second pass we
1241:                // pre-unescape the pattern.  Since U+200E is rule whitespace,
1242:                // this fails -- which is what we expect.
1243:                for (int pass = 1; pass <= 2; ++pass) {
1244:                    String pat = pattern;
1245:                    if (pass == 2) {
1246:                        pat = Utility.unescape(pat);
1247:                    }
1248:                    // Pattern is only good for pass 1
1249:                    boolean isPatternValid = (pass == 1);
1250:
1251:                    UnicodeSet set = null;
1252:                    try {
1253:                        set = new UnicodeSet(pat);
1254:                    } catch (IllegalArgumentException e) {
1255:                        set = null;
1256:                    }
1257:                    if ((set != null) != isPatternValid) {
1258:                        errln("FAIL: applyPattern(" + Utility.escape(pat)
1259:                                + ") => " + set);
1260:                        continue;
1261:                    }
1262:                    if (set == null) {
1263:                        continue;
1264:                    }
1265:                    if (set.contains((char) 0x0644)) {
1266:                        errln("FAIL: " + Utility.escape(pat)
1267:                                + " contains(U+0664)");
1268:                    }
1269:
1270:                    String newpat = set.toPattern(true);
1271:                    if (newpat.equals(exp)) {
1272:                        logln(Utility.escape(pat) + " => " + newpat);
1273:                    } else {
1274:                        errln("FAIL: " + Utility.escape(pat) + " => " + newpat);
1275:                    }
1276:
1277:                    for (int i = 0; i < set.getRangeCount(); ++i) {
1278:                        StringBuffer str = new StringBuffer("Range ");
1279:                        str.append((char) (0x30 + i)).append(": ");
1280:                        UTF16.append(str, set.getRangeStart(i));
1281:                        str.append(" - ");
1282:                        UTF16.append(str, set.getRangeEnd(i));
1283:                        String s = Utility.escape(str.toString() + " ("
1284:                                + set.getRangeStart(i) + " - "
1285:                                + set.getRangeEnd(i) + ")");
1286:                        if (set.getRangeStart(i) < 0) {
1287:                            errln("FAIL: " + s);
1288:                        } else {
1289:                            logln(s);
1290:                        }
1291:                    }
1292:                }
1293:            }
1294:
1295:            public void TestSymbolTable() {
1296:                // Multiple test cases can be set up here.  Each test case
1297:                // is terminated by null:
1298:                // var, value, var, value,..., input pat., exp. output pat., null
1299:                String DATA[] = { "us", "a-z", "[0-1$us]", "[0-1a-z]", null,
1300:                        "us", "[a-z]", "[0-1$us]", "[0-1[a-z]]", null, "us",
1301:                        "\\[a\\-z\\]", "[0-1$us]", "[-01\\[\\]az]", null };
1302:
1303:                for (int i = 0; i < DATA.length; ++i) {
1304:                    TokenSymbolTable sym = new TokenSymbolTable();
1305:
1306:                    // Set up variables
1307:                    while (DATA[i + 2] != null) {
1308:                        sym.add(DATA[i], DATA[i + 1]);
1309:                        i += 2;
1310:                    }
1311:
1312:                    // Input pattern and expected output pattern
1313:                    String inpat = DATA[i], exppat = DATA[i + 1];
1314:                    i += 2;
1315:
1316:                    ParsePosition pos = new ParsePosition(0);
1317:                    UnicodeSet us = new UnicodeSet(inpat, pos, sym);
1318:
1319:                    // results
1320:                    if (pos.getIndex() != inpat.length()) {
1321:                        errln("Failed to read to end of string \"" + inpat
1322:                                + "\": read to " + pos.getIndex()
1323:                                + ", length is " + inpat.length());
1324:                    }
1325:
1326:                    UnicodeSet us2 = new UnicodeSet(exppat);
1327:                    if (!us.equals(us2)) {
1328:                        errln("Failed, got " + us + ", expected " + us2);
1329:                    } else {
1330:                        logln("Ok, got " + us);
1331:                    }
1332:
1333:                    //cover Unicode(String,ParsePosition,SymbolTable,int)
1334:                    ParsePosition inpos = new ParsePosition(0);
1335:                    UnicodeSet inSet = new UnicodeSet(inpat, inpos, sym,
1336:                            UnicodeSet.IGNORE_SPACE);
1337:                    UnicodeSet expSet = new UnicodeSet(exppat);
1338:                    if (!inSet.equals(expSet)) {
1339:                        errln("FAIL: Failed, got " + inSet + ", expected "
1340:                                + expSet);
1341:                    } else {
1342:                        logln("OK: got " + inSet);
1343:                    }
1344:                }
1345:            }
1346:
1347:            /**
1348:             * Test that Posix style character classes [:digit:], etc.
1349:             *   have the Unicode definitions from TR 18.
1350:             */
1351:            public void TestPosixClasses() {
1352:                expectEqual("POSIX alpha", "[:alpha:]", "\\p{Alphabetic}");
1353:                expectEqual("POSIX lower", "[:lower:]", "\\p{lowercase}");
1354:                expectEqual("POSIX upper", "[:upper:]", "\\p{Uppercase}");
1355:                expectEqual("POSIX punct", "[:punct:]", "\\p{gc=Punctuation}");
1356:                expectEqual("POSIX digit", "[:digit:]", "\\p{gc=DecimalNumber}");
1357:                expectEqual("POSIX xdigit", "[:xdigit:]",
1358:                        "[\\p{DecimalNumber}\\p{HexDigit}]");
1359:                expectEqual("POSIX alnum", "[:alnum:]",
1360:                        "[\\p{Alphabetic}\\p{DecimalNumber}]");
1361:                expectEqual("POSIX space", "[:space:]", "\\p{Whitespace}");
1362:                expectEqual(
1363:                        "POSIX blank",
1364:                        "[:blank:]",
1365:                        "[\\p{Whitespace}-[\\u000a\\u000B\\u000c\\u000d\\u0085\\p{LineSeparator}\\p{ParagraphSeparator}]]");
1366:                expectEqual("POSIX cntrl", "[:cntrl:]", "\\p{Control}");
1367:                expectEqual("POSIX graph", "[:graph:]",
1368:                        "[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]");
1369:                expectEqual("POSIX print", "[:print:]",
1370:                        "[[:graph:][:blank:]-[\\p{Control}]]");
1371:            }
1372:
1373:            /**
1374:             * Test that frozen classes disallow changes. For 4217
1375:             */
1376:            public void TestFrozen() {
1377:                UnicodeSet test = new UnicodeSet("[[:whitespace:]A]");
1378:                test.freeze();
1379:                checkModification(test, true);
1380:                checkModification(test, false);
1381:            }
1382:
1383:            public void checkModification(UnicodeSet original, boolean isFrozen) {
1384:                main: for (int i = 0;; ++i) {
1385:                    UnicodeSet test = (UnicodeSet) (isFrozen ? original.clone()
1386:                            : original.cloneAsThawed());
1387:                    boolean gotException = true;
1388:                    boolean checkEquals = true;
1389:                    try {
1390:                        switch (i) {
1391:                        case 0:
1392:                            test.add(0);
1393:                            break;
1394:                        case 1:
1395:                            test.add(0, 1);
1396:                            break;
1397:                        case 2:
1398:                            test.add("a");
1399:                            break;
1400:                        case 3:
1401:                            List a = new ArrayList();
1402:                            a.add("a");
1403:                            test.addAll(a);
1404:                            break;
1405:                        case 4:
1406:                            test.addAll("ab");
1407:                            break;
1408:                        case 5:
1409:                            test.addAll(new UnicodeSet("[ab]"));
1410:                            break;
1411:                        case 6:
1412:                            test.applyIntPropertyValue(0, 0);
1413:                            break;
1414:                        case 7:
1415:                            test.applyPattern("[ab]");
1416:                            break;
1417:                        case 8:
1418:                            test.applyPattern("[ab]", true);
1419:                            break;
1420:                        case 9:
1421:                            test.applyPattern("[ab]", 0);
1422:                            break;
1423:                        case 10:
1424:                            test.applyPropertyAlias("hex", "true");
1425:                            break;
1426:                        case 11:
1427:                            test.applyPropertyAlias("hex", "true", null);
1428:                            break;
1429:                        case 12:
1430:                            test.closeOver(UnicodeSet.CASE);
1431:                            break;
1432:                        case 13:
1433:                            test.compact();
1434:                            checkEquals = false;
1435:                            break;
1436:                        case 14:
1437:                            test.complement(0);
1438:                            break;
1439:                        case 15:
1440:                            test.complement(0, 0);
1441:                            break;
1442:                        case 16:
1443:                            test.complement("ab");
1444:                            break;
1445:                        case 17:
1446:                            test.complementAll("ab");
1447:                            break;
1448:                        case 18:
1449:                            test.complementAll(new UnicodeSet("[ab]"));
1450:                            break;
1451:                        case 19:
1452:                            test.remove(' ');
1453:                            break;
1454:                        case 20:
1455:                            test.remove(' ', 'a');
1456:                            break;
1457:                        case 21:
1458:                            test.remove(" ");
1459:                            break;
1460:                        case 22:
1461:                            test.removeAll(" a");
1462:                            break;
1463:                        case 23:
1464:                            test.removeAll(new UnicodeSet("[\\ a]"));
1465:                            break;
1466:                        case 24:
1467:                            test.retain(' ');
1468:                            break;
1469:                        case 25:
1470:                            test.retain(' ', 'a');
1471:                            break;
1472:                        case 26:
1473:                            test.retain(" ");
1474:                            break;
1475:                        case 27:
1476:                            test.retainAll(" a");
1477:                            break;
1478:                        case 28:
1479:                            test.retainAll(new UnicodeSet("[\\ a]"));
1480:                            break;
1481:                        case 29:
1482:                            test.set(0, 1);
1483:                            break;
1484:                        case 30:
1485:                            test.set(new UnicodeSet("[ab]"));
1486:                            break;
1487:
1488:                        default:
1489:                            continue main; // so we don't keep having to change the endpoint, and gaps are not skipped.
1490:                        case 35:
1491:                            return;
1492:                        }
1493:                        gotException = false;
1494:                    } catch (UnsupportedOperationException e) {
1495:                        // do nothing
1496:                    }
1497:                    if (isFrozen && !gotException)
1498:                        errln(i
1499:                                + ") attempt to modify frozen object didn't result in an exception");
1500:                    if (!isFrozen && gotException)
1501:                        errln(i
1502:                                + ") attempt to modify thawed object did result in an exception");
1503:                    if (checkEquals) {
1504:                        if (test.equals(original)) {
1505:                            if (!isFrozen)
1506:                                errln(i
1507:                                        + ") attempt to modify thawed object didn't change the object");
1508:                        } else { // unequal
1509:                            if (isFrozen)
1510:                                errln(i
1511:                                        + ") attempt to modify frozen object changed the object");
1512:                        }
1513:                    }
1514:                }
1515:            }
1516:
1517:            String[] prettyData = { "[\\uD7DE-\\uD90C \\uDCB5-\\uDD9F]", // special case
1518:                    "[:any:]", "[:whitespace:]", "[:linebreak=AL:]", };
1519:
1520:            public void TestPrettyPrinting() {
1521:                try {
1522:                    PrettyPrinter pp = new PrettyPrinter();
1523:
1524:                    int i = 0;
1525:                    for (; i < prettyData.length; ++i) {
1526:                        UnicodeSet test = new UnicodeSet(prettyData[i]);
1527:                        checkPrettySet(pp, i, test);
1528:                    }
1529:                    Random random = new Random(0);
1530:                    UnicodeSet test = new UnicodeSet();
1531:                    for (; i < 1000; ++i) {
1532:                        double start = random.nextGaussian() * 0x10000;
1533:                        if (start < 0)
1534:                            start = -start;
1535:                        if (start > 0x10FFFF) {
1536:                            start = 0x10FFFF;
1537:                        }
1538:                        double end = random.nextGaussian() * 0x100;
1539:                        if (end < 0)
1540:                            end = -end;
1541:                        end = start + end;
1542:                        if (end > 0x10FFFF) {
1543:                            end = 0x10FFFF;
1544:                        }
1545:                        test.complement((int) start, (int) end);
1546:                        checkPrettySet(pp, i, test);
1547:                    }
1548:                } catch (RuntimeException ex) {
1549:                    warnln("Could not load Collator");
1550:                }
1551:            }
1552:
1553:            private void checkPrettySet(PrettyPrinter pp, int i, UnicodeSet test) {
1554:                String pretty = pp.toPattern(test);
1555:                UnicodeSet retry = new UnicodeSet(pretty);
1556:                if (!test.equals(retry)) {
1557:                    errln(i + ". Failed test: " + test + " != " + pretty);
1558:                } else {
1559:                    logln(i + ". Worked for " + truncate(test.toString())
1560:                            + " => " + truncate(pretty));
1561:                }
1562:            }
1563:
1564:            private String truncate(String string) {
1565:                if (string.length() <= 100)
1566:                    return string;
1567:                return string.substring(0, 97) + "...";
1568:            }
1569:
1570:            public class TokenSymbolTable implements  SymbolTable {
1571:                HashMap contents = new HashMap();
1572:
1573:                /**
1574:                 * (Non-SymbolTable API) Add the given variable and value to
1575:                 * the table.  Variable should NOT contain leading '$'.
1576:                 */
1577:                public void add(String var, String value) {
1578:                    char[] buffer = new char[value.length()];
1579:                    value.getChars(0, value.length(), buffer, 0);
1580:                    add(var, buffer);
1581:                }
1582:
1583:                /**
1584:                 * (Non-SymbolTable API) Add the given variable and value to
1585:                 * the table.  Variable should NOT contain leading '$'.
1586:                 */
1587:                public void add(String var, char[] body) {
1588:                    logln("TokenSymbolTable: add \"" + var + "\" => \""
1589:                            + new String(body) + "\"");
1590:                    contents.put(var, body);
1591:                }
1592:
1593:                /* (non-Javadoc)
1594:                 * @see com.ibm.icu.text.SymbolTable#lookup(java.lang.String)
1595:                 */
1596:                public char[] lookup(String s) {
1597:                    logln("TokenSymbolTable: lookup \"" + s + "\" => \""
1598:                            + new String((char[]) contents.get(s)) + "\"");
1599:                    return (char[]) contents.get(s);
1600:                }
1601:
1602:                /* (non-Javadoc)
1603:                 * @see com.ibm.icu.text.SymbolTable#lookupMatcher(int)
1604:                 */
1605:                public UnicodeMatcher lookupMatcher(int ch) {
1606:                    return null;
1607:                }
1608:
1609:                /* (non-Javadoc)
1610:                 * @see com.ibm.icu.text.SymbolTable#parseReference(java.lang.String,
1611:                 java.text.ParsePosition, int)
1612:                 */
1613:                public String parseReference(String text, ParsePosition pos,
1614:                        int limit) {
1615:                    int cp;
1616:                    int start = pos.getIndex();
1617:                    int i;
1618:                    for (i = start; i < limit; i += UTF16.getCharCount(cp)) {
1619:                        cp = UTF16.charAt(text, i);
1620:                        if (!com.ibm.icu.lang.UCharacter
1621:                                .isUnicodeIdentifierPart(cp)) {
1622:                            break;
1623:                        }
1624:                    }
1625:                    logln("TokenSymbolTable: parse \"" + text + "\" from "
1626:                            + start + " to " + i + " => \""
1627:                            + text.substring(start, i) + "\"");
1628:                    pos.setIndex(i);
1629:                    return text.substring(start, i);
1630:                }
1631:            }
1632:
1633:            public void TestSurrogate() {
1634:                String DATA[] = {
1635:                        // These should all behave identically
1636:                        "[abc\\uD800\\uDC00]", "[abc\uD800\uDC00]",
1637:                        "[abc\\U00010000]", };
1638:                for (int i = 0; i < DATA.length; ++i) {
1639:                    logln("Test pattern " + i + " :" + Utility.escape(DATA[i]));
1640:                    UnicodeSet set = new UnicodeSet(DATA[i]);
1641:                    expectContainment(set,
1642:                            CharsToUnicodeString("abc\\U00010000"),
1643:                            "\uD800;\uDC00"); // split apart surrogate-pair
1644:                    if (set.size() != 4) {
1645:                        errln(Utility.escape("FAIL: " + DATA[i] + ".size() == "
1646:                                + set.size() + ", expected 4"));
1647:                    }
1648:                }
1649:            }
1650:
1651:            void _testComplement(int a) {
1652:                UnicodeSet x = bitsToSet(a);
1653:                UnicodeSet z = bitsToSet(a);
1654:                z.complement();
1655:                int c = setToBits(z);
1656:                if (c != (~a)) {
1657:                    errln("FAILED: add: ~" + x + " != " + z);
1658:                    errln("FAILED: add: ~" + a + " != " + c);
1659:                }
1660:                checkCanonicalRep(z, "complement " + a);
1661:            }
1662:
1663:            void _testAdd(int a, int b) {
1664:                UnicodeSet x = bitsToSet(a);
1665:                UnicodeSet y = bitsToSet(b);
1666:                UnicodeSet z = bitsToSet(a);
1667:                z.addAll(y);
1668:                int c = setToBits(z);
1669:                if (c != (a | b)) {
1670:                    errln(Utility.escape("FAILED: add: " + x + " | " + y
1671:                            + " != " + z));
1672:                    errln("FAILED: add: " + a + " | " + b + " != " + c);
1673:                }
1674:                checkCanonicalRep(z, "add " + a + "," + b);
1675:            }
1676:
1677:            void _testRetain(int a, int b) {
1678:                UnicodeSet x = bitsToSet(a);
1679:                UnicodeSet y = bitsToSet(b);
1680:                UnicodeSet z = bitsToSet(a);
1681:                z.retainAll(y);
1682:                int c = setToBits(z);
1683:                if (c != (a & b)) {
1684:                    errln("FAILED: retain: " + x + " & " + y + " != " + z);
1685:                    errln("FAILED: retain: " + a + " & " + b + " != " + c);
1686:                }
1687:                checkCanonicalRep(z, "retain " + a + "," + b);
1688:            }
1689:
1690:            void _testRemove(int a, int b) {
1691:                UnicodeSet x = bitsToSet(a);
1692:                UnicodeSet y = bitsToSet(b);
1693:                UnicodeSet z = bitsToSet(a);
1694:                z.removeAll(y);
1695:                int c = setToBits(z);
1696:                if (c != (a & ~b)) {
1697:                    errln("FAILED: remove: " + x + " &~ " + y + " != " + z);
1698:                    errln("FAILED: remove: " + a + " &~ " + b + " != " + c);
1699:                }
1700:                checkCanonicalRep(z, "remove " + a + "," + b);
1701:            }
1702:
1703:            void _testXor(int a, int b) {
1704:                UnicodeSet x = bitsToSet(a);
1705:                UnicodeSet y = bitsToSet(b);
1706:                UnicodeSet z = bitsToSet(a);
1707:                z.complementAll(y);
1708:                int c = setToBits(z);
1709:                if (c != (a ^ b)) {
1710:                    errln("FAILED: complement: " + x + " ^ " + y + " != " + z);
1711:                    errln("FAILED: complement: " + a + " ^ " + b + " != " + c);
1712:                }
1713:                checkCanonicalRep(z, "complement " + a + "," + b);
1714:            }
1715:
1716:            /**
1717:             * Check that ranges are monotonically increasing and non-
1718:             * overlapping.
1719:             */
1720:            void checkCanonicalRep(UnicodeSet set, String msg) {
1721:                int n = set.getRangeCount();
1722:                if (n < 0) {
1723:                    errln("FAIL result of " + msg
1724:                            + ": range count should be >= 0 but is " + n
1725:                            + " for " + Utility.escape(set.toString()));
1726:                    return;
1727:                }
1728:                int last = 0;
1729:                for (int i = 0; i < n; ++i) {
1730:                    int start = set.getRangeStart(i);
1731:                    int end = set.getRangeEnd(i);
1732:                    if (start > end) {
1733:                        errln("FAIL result of " + msg + ": range " + (i + 1)
1734:                                + " start > end: " + start + ", " + end
1735:                                + " for " + Utility.escape(set.toString()));
1736:                    }
1737:                    if (i > 0 && start <= last) {
1738:                        errln("FAIL result of " + msg + ": range " + (i + 1)
1739:                                + " overlaps previous range: " + start + ", "
1740:                                + end + " for "
1741:                                + Utility.escape(set.toString()));
1742:                    }
1743:                    last = end;
1744:                }
1745:            }
1746:
1747:            /**
1748:             * Convert a bitmask to a UnicodeSet.
1749:             */
1750:            UnicodeSet bitsToSet(int a) {
1751:                UnicodeSet result = new UnicodeSet();
1752:                for (int i = 0; i < 32; ++i) {
1753:                    if ((a & (1 << i)) != 0) {
1754:                        result.add((char) i, (char) i);
1755:                    }
1756:                }
1757:
1758:                return result;
1759:            }
1760:
1761:            /**
1762:             * Convert a UnicodeSet to a bitmask.  Only the characters
1763:             * U+0000 to U+0020 are represented in the bitmask.
1764:             */
1765:            static int setToBits(UnicodeSet x) {
1766:                int result = 0;
1767:                for (int i = 0; i < 32; ++i) {
1768:                    if (x.contains((char) i)) {
1769:                        result |= (1 << i);
1770:                    }
1771:                }
1772:                return result;
1773:            }
1774:
1775:            /**
1776:             * Return the representation of an inversion list based UnicodeSet
1777:             * as a pairs list.  Ranges are listed in ascending Unicode order.
1778:             * For example, the set [a-zA-M3] is represented as "33AMaz".
1779:             */
1780:            static String getPairs(UnicodeSet set) {
1781:                StringBuffer pairs = new StringBuffer();
1782:                for (int i = 0; i < set.getRangeCount(); ++i) {
1783:                    int start = set.getRangeStart(i);
1784:                    int end = set.getRangeEnd(i);
1785:                    if (end > 0xFFFF) {
1786:                        end = 0xFFFF;
1787:                        i = set.getRangeCount(); // Should be unnecessary
1788:                    }
1789:                    pairs.append((char) start).append((char) end);
1790:                }
1791:                return pairs.toString();
1792:            }
1793:
1794:            /**
1795:             * Test function. Make sure that the sets have the right relation
1796:             */
1797:
1798:            void expectRelation(Object relationObj, Object set1Obj,
1799:                    Object set2Obj, String message) {
1800:                int relation = ((Integer) relationObj).intValue();
1801:                UnicodeSet set1 = (UnicodeSet) set1Obj;
1802:                UnicodeSet set2 = (UnicodeSet) set2Obj;
1803:
1804:                // by-the-by, check the iterator
1805:                checkRoundTrip(set1);
1806:                checkRoundTrip(set2);
1807:
1808:                boolean contains = set1.containsAll(set2);
1809:                boolean isContained = set2.containsAll(set1);
1810:                boolean disjoint = set1.containsNone(set2);
1811:                boolean equals = set1.equals(set2);
1812:
1813:                UnicodeSet intersection = new UnicodeSet(set1).retainAll(set2);
1814:                UnicodeSet minus12 = new UnicodeSet(set1).removeAll(set2);
1815:                UnicodeSet minus21 = new UnicodeSet(set2).removeAll(set1);
1816:
1817:                // test basic properties
1818:
1819:                if (contains != (intersection.size() == set2.size())) {
1820:                    errln("FAIL contains1" + set1.toPattern(true) + ", "
1821:                            + set2.toPattern(true));
1822:                }
1823:
1824:                if (contains != (intersection.equals(set2))) {
1825:                    errln("FAIL contains2" + set1.toPattern(true) + ", "
1826:                            + set2.toPattern(true));
1827:                }
1828:
1829:                if (isContained != (intersection.size() == set1.size())) {
1830:                    errln("FAIL isContained1" + set1.toPattern(true) + ", "
1831:                            + set2.toPattern(true));
1832:                }
1833:
1834:                if (isContained != (intersection.equals(set1))) {
1835:                    errln("FAIL isContained2" + set1.toPattern(true) + ", "
1836:                            + set2.toPattern(true));
1837:                }
1838:
1839:                if ((contains && isContained) != equals) {
1840:                    errln("FAIL equals" + set1.toPattern(true) + ", "
1841:                            + set2.toPattern(true));
1842:                }
1843:
1844:                if (disjoint != (intersection.size() == 0)) {
1845:                    errln("FAIL disjoint" + set1.toPattern(true) + ", "
1846:                            + set2.toPattern(true));
1847:                }
1848:
1849:                // Now see if the expected relation is true
1850:                int status = (minus12.size() != 0 ? 4 : 0)
1851:                        | (intersection.size() != 0 ? 2 : 0)
1852:                        | (minus21.size() != 0 ? 1 : 0);
1853:
1854:                if (status != relation) {
1855:                    errln("FAIL relation incorrect" + message + "; desired = "
1856:                            + RELATION_NAME[relation] + "; found = "
1857:                            + RELATION_NAME[status] + "; set1 = "
1858:                            + set1.toPattern(true) + "; set2 = "
1859:                            + set2.toPattern(true));
1860:                }
1861:            }
1862:
1863:            /**
1864:             * Basic consistency check for a few items.
1865:             * That the iterator works, and that we can create a pattern and
1866:             * get the same thing back
1867:             */
1868:
1869:            void checkRoundTrip(UnicodeSet s) {
1870:                String pat = s.toPattern(false);
1871:                UnicodeSet t = copyWithIterator(s, false);
1872:                checkEqual(s, t, "iterator roundtrip");
1873:
1874:                t = copyWithIterator(s, true); // try range
1875:                checkEqual(s, t, "iterator roundtrip");
1876:
1877:                t = new UnicodeSet(pat);
1878:                checkEqual(s, t, "toPattern(false)");
1879:
1880:                pat = s.toPattern(true);
1881:                t = new UnicodeSet(pat);
1882:                checkEqual(s, t, "toPattern(true)");
1883:            }
1884:
1885:            UnicodeSet copyWithIterator(UnicodeSet s, boolean withRange) {
1886:                UnicodeSet t = new UnicodeSet();
1887:                UnicodeSetIterator it = new UnicodeSetIterator(s);
1888:                if (withRange) {
1889:                    while (it.nextRange()) {
1890:                        if (it.codepoint == UnicodeSetIterator.IS_STRING) {
1891:                            t.add(it.string);
1892:                        } else {
1893:                            t.add(it.codepoint, it.codepointEnd);
1894:                        }
1895:                    }
1896:                } else {
1897:                    while (it.next()) {
1898:                        if (it.codepoint == UnicodeSetIterator.IS_STRING) {
1899:                            t.add(it.string);
1900:                        } else {
1901:                            t.add(it.codepoint);
1902:                        }
1903:                    }
1904:                }
1905:                return t;
1906:            }
1907:
1908:            boolean checkEqual(UnicodeSet s, UnicodeSet t, String message) {
1909:                if (!s.equals(t)) {
1910:                    errln("FAIL " + message + "; source = " + s.toPattern(true)
1911:                            + "; result = " + t.toPattern(true));
1912:                    return false;
1913:                }
1914:                return true;
1915:            }
1916:
1917:            void expectEqual(String name, String pat1, String pat2) {
1918:                UnicodeSet set1, set2;
1919:                try {
1920:                    set1 = new UnicodeSet(pat1);
1921:                    set2 = new UnicodeSet(pat2);
1922:                } catch (IllegalArgumentException e) {
1923:                    errln("FAIL: Couldn't create UnicodeSet from pattern for \""
1924:                            + name + "\": " + e.getMessage());
1925:                    return;
1926:                }
1927:                if (!set1.equals(set2)) {
1928:                    errln("FAIL: Sets built from patterns differ for \"" + name
1929:                            + "\"");
1930:                }
1931:            }
1932:
1933:            /**
1934:             * Expect the given set to contain the characters in charsIn and
1935:             * to not contain those in charsOut.
1936:             */
1937:            void expectContainment(String pat, String charsIn, String charsOut) {
1938:                UnicodeSet set;
1939:                try {
1940:                    set = new UnicodeSet(pat);
1941:                } catch (IllegalArgumentException e) {
1942:                    errln("FAIL: Couldn't create UnicodeSet from pattern \""
1943:                            + pat + "\": " + e.getMessage());
1944:                    return;
1945:                }
1946:                expectContainment(set, charsIn, charsOut);
1947:            }
1948:
1949:            /**
1950:             * Expect the given set to contain the characters in charsIn and
1951:             * to not contain those in charsOut.
1952:             */
1953:            void expectContainment(UnicodeSet set, String charsIn,
1954:                    String charsOut) {
1955:                StringBuffer bad = new StringBuffer();
1956:                if (charsIn != null) {
1957:                    charsIn = Utility.unescape(charsIn);
1958:                    for (int i = 0; i < charsIn.length();) {
1959:                        int c = UTF16.charAt(charsIn, i);
1960:                        i += UTF16.getCharCount(c);
1961:                        if (!set.contains(c)) {
1962:                            UTF16.append(bad, c);
1963:                        }
1964:                    }
1965:                    if (bad.length() > 0) {
1966:                        errln(Utility.escape("FAIL: set " + set
1967:                                + " does not contain " + bad
1968:                                + ", expected containment of " + charsIn));
1969:                    } else {
1970:                        logln(Utility.escape("Ok: set " + set + " contains "
1971:                                + charsIn));
1972:                    }
1973:                }
1974:                if (charsOut != null) {
1975:                    charsOut = Utility.unescape(charsOut);
1976:                    bad.setLength(0);
1977:                    for (int i = 0; i < charsOut.length();) {
1978:                        int c = UTF16.charAt(charsOut, i);
1979:                        i += UTF16.getCharCount(c);
1980:                        if (set.contains(c)) {
1981:                            UTF16.append(bad, c);
1982:                        }
1983:                    }
1984:                    if (bad.length() > 0) {
1985:                        errln(Utility.escape("FAIL: set " + set + " contains "
1986:                                + bad + ", expected non-containment of "
1987:                                + charsOut));
1988:                    } else {
1989:                        logln(Utility.escape("Ok: set " + set
1990:                                + " does not contain " + charsOut));
1991:                    }
1992:                }
1993:            }
1994:
1995:            void expectPattern(UnicodeSet set, String pattern,
1996:                    String expectedPairs) {
1997:                set.applyPattern(pattern);
1998:                if (!getPairs(set).equals(expectedPairs)) {
1999:                    errln("FAIL: applyPattern(\"" + pattern + "\") => pairs \""
2000:                            + Utility.escape(getPairs(set)) + "\", expected \""
2001:                            + Utility.escape(expectedPairs) + "\"");
2002:                } else {
2003:                    logln("Ok:   applyPattern(\"" + pattern + "\") => pairs \""
2004:                            + Utility.escape(getPairs(set)) + "\"");
2005:                }
2006:            }
2007:
2008:            void expectToPattern(UnicodeSet set, String expPat,
2009:                    String[] expStrings) {
2010:                String pat = set.toPattern(true);
2011:                if (pat.equals(expPat)) {
2012:                    logln("Ok:   toPattern() => \"" + pat + "\"");
2013:                } else {
2014:                    errln("FAIL: toPattern() => \"" + pat + "\", expected \""
2015:                            + expPat + "\"");
2016:                    return;
2017:                }
2018:                if (expStrings == null) {
2019:                    return;
2020:                }
2021:                boolean in = true;
2022:                for (int i = 0; i < expStrings.length; ++i) {
2023:                    if (expStrings[i] == NOT) { // sic; pointer comparison
2024:                        in = false;
2025:                        continue;
2026:                    }
2027:                    boolean contained = set.contains(expStrings[i]);
2028:                    if (contained == in) {
2029:                        logln("Ok: "
2030:                                + expPat
2031:                                + (contained ? " contains {"
2032:                                        : " does not contain {")
2033:                                + Utility.escape(expStrings[i]) + "}");
2034:                    } else {
2035:                        errln("FAIL: "
2036:                                + expPat
2037:                                + (contained ? " contains {"
2038:                                        : " does not contain {")
2039:                                + Utility.escape(expStrings[i]) + "}");
2040:                    }
2041:                }
2042:            }
2043:
2044:            void expectPairs(UnicodeSet set, String expectedPairs) {
2045:                if (!getPairs(set).equals(expectedPairs)) {
2046:                    errln("FAIL: Expected pair list \""
2047:                            + Utility.escape(expectedPairs) + "\", got \""
2048:                            + Utility.escape(getPairs(set)) + "\"");
2049:                }
2050:            }
2051:
2052:            static final String CharsToUnicodeString(String s) {
2053:                return Utility.unescape(s);
2054:            }
2055:
2056:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.