Source Code Cross Referenced for BasicTest.java in  » Internationalization-Localization » icu4j » com » ibm » icu » dev » test » normalizer » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.dev.test.normalizer 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        /*
0002:         *******************************************************************************
0003:         * Copyright (C) 1996-2006, International Business Machines Corporation and    *
0004:         * others. All Rights Reserved.                                                *
0005:         *******************************************************************************
0006:         */
0007:
0008:        package com.ibm.icu.dev.test.normalizer;
0009:
0010:        import java.text.StringCharacterIterator;
0011:        import java.util.Random;
0012:
0013:        import com.ibm.icu.dev.test.TestFmwk;
0014:        import com.ibm.icu.impl.NormalizerImpl;
0015:        import com.ibm.icu.impl.USerializedSet;
0016:        import com.ibm.icu.impl.Utility;
0017:        import com.ibm.icu.lang.*;
0018:        import com.ibm.icu.lang.UCharacter;
0019:        import com.ibm.icu.lang.UCharacterCategory;
0020:        import com.ibm.icu.text.Normalizer;
0021:        import com.ibm.icu.text.UCharacterIterator;
0022:        import com.ibm.icu.text.UTF16;
0023:        import com.ibm.icu.text.UnicodeSet;
0024:        import com.ibm.icu.text.UnicodeSetIterator;
0025:
0026:        public class BasicTest extends TestFmwk {
0027:            public static void main(String[] args) throws Exception {
0028:                new BasicTest().run(args);
0029:            }
0030:
0031:            String[][] canonTests = {
0032:                    // Input                Decomposed              Composed
0033:                    { "cat", "cat", "cat" },
0034:                    { "\u00e0ardvark", "a\u0300ardvark", "\u00e0ardvark", },
0035:
0036:                    { "\u1e0a", "D\u0307", "\u1e0a" }, // D-dot_above
0037:                    { "D\u0307", "D\u0307", "\u1e0a" }, // D dot_above
0038:
0039:                    { "\u1e0c\u0307", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_below dot_above
0040:                    { "\u1e0a\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_above dot_below
0041:                    { "D\u0307\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D dot_below dot_above
0042:
0043:                    { "\u1e10\u0307\u0323", "D\u0327\u0323\u0307",
0044:                            "\u1e10\u0323\u0307" }, // D dot_below cedilla dot_above
0045:                    { "D\u0307\u0328\u0323", "D\u0328\u0323\u0307",
0046:                            "\u1e0c\u0328\u0307" }, // D dot_above ogonek dot_below
0047:
0048:                    { "\u1E14", "E\u0304\u0300", "\u1E14" }, // E-macron-grave
0049:                    { "\u0112\u0300", "E\u0304\u0300", "\u1E14" }, // E-macron + grave
0050:                    { "\u00c8\u0304", "E\u0300\u0304", "\u00c8\u0304" }, // E-grave + macron
0051:
0052:                    { "\u212b", "A\u030a", "\u00c5" }, // angstrom_sign
0053:                    { "\u00c5", "A\u030a", "\u00c5" }, // A-ring
0054:
0055:                    { "\u00c4ffin", "A\u0308ffin", "\u00c4ffin" },
0056:                    { "\u00c4\uFB03n", "A\u0308\uFB03n", "\u00c4\uFB03n" },
0057:
0058:                    { "\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated with 3.0
0059:                    { "\u00fd\uFB03n", "y\u0301\uFB03n", "\u00fd\uFB03n" }, //updated with 3.0
0060:
0061:                    { "Henry IV", "Henry IV", "Henry IV" },
0062:                    { "Henry \u2163", "Henry \u2163", "Henry \u2163" },
0063:
0064:                    { "\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana)
0065:                    { "\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten
0066:                    { "\uFF76\uFF9E", "\uFF76\uFF9E", "\uFF76\uFF9E" }, // hw_ka + hw_ten
0067:                    { "\u30AB\uFF9E", "\u30AB\uFF9E", "\u30AB\uFF9E" }, // ka + hw_ten
0068:                    { "\uFF76\u3099", "\uFF76\u3099", "\uFF76\u3099" }, // hw_ka + ten
0069:
0070:                    { "A\u0300\u0316", "A\u0316\u0300", "\u00C0\u0316" },
0071:                    {
0072:                            "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e",
0073:                            "\\U0001D157\\U0001D165\\U0001D157\\U0001D165\\U0001D157\\U0001D165",
0074:                            "\\U0001D157\\U0001D165\\U0001D157\\U0001D165\\U0001D157\\U0001D165" }, };
0075:
0076:            String[][] compatTests = {
0077:                    // Input                Decomposed              Composed
0078:                    { "cat", "cat", "cat" },
0079:                    { "\uFB4f", "\u05D0\u05DC", "\u05D0\u05DC", }, // Alef-Lamed vs. Alef, Lamed
0080:
0081:                    { "\u00C4ffin", "A\u0308ffin", "\u00C4ffin" },
0082:                    { "\u00C4\uFB03n", "A\u0308ffin", "\u00C4ffin" }, // ffi ligature -> f + f + i
0083:
0084:                    { "\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated for 3.0
0085:                    { "\u00fd\uFB03n", "y\u0301ffin", "\u00fdffin" }, // ffi ligature -> f + f + i
0086:
0087:                    { "Henry IV", "Henry IV", "Henry IV" },
0088:                    { "Henry \u2163", "Henry IV", "Henry IV" },
0089:
0090:                    { "\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana)
0091:                    { "\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten
0092:
0093:                    { "\uFF76\u3099", "\u30AB\u3099", "\u30AC" }, // hw_ka + ten
0094:
0095:                    /* These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
0096:                    { "\uFF76\uFF9E", "\u30AB\u3099", "\u30AC" }, // hw_ka + hw_ten
0097:                    { "\u30AB\uFF9E", "\u30AB\u3099", "\u30AC" }, // ka + hw_ten
0098:
0099:            };
0100:
0101:            // With Canonical decomposition, Hangul syllables should get decomposed
0102:            // into Jamo, but Jamo characters should not be decomposed into
0103:            // conjoining Jamo
0104:            String[][] hangulCanon = {
0105:                    // Input                Decomposed              Composed
0106:                    { "\ud4db", "\u1111\u1171\u11b6", "\ud4db" },
0107:                    { "\u1111\u1171\u11b6", "\u1111\u1171\u11b6", "\ud4db" }, };
0108:
0109:            // With compatibility decomposition turned on,
0110:            // it should go all the way down to conjoining Jamo characters.
0111:            // THIS IS NO LONGER TRUE IN UNICODE v2.1.8, SO THIS TEST IS OBSOLETE
0112:            String[][] hangulCompat = {
0113:            // Input        Decomposed                          Composed
0114:            // { "\ud4db",     "\u1111\u116e\u1175\u11af\u11c2",   "\ud478\u1175\u11af\u11c2"  },
0115:            };
0116:
0117:            public void TestHangulCompose() throws Exception {
0118:                // Make sure that the static composition methods work
0119:                logln("Canonical composition...");
0120:                staticTest(Normalizer.NFC, hangulCanon, 2);
0121:                logln("Compatibility composition...");
0122:                staticTest(Normalizer.NFKC, hangulCompat, 2);
0123:                // Now try iterative composition....
0124:                logln("Iterative composition...");
0125:                Normalizer norm = new Normalizer("", Normalizer.NFC, 0);
0126:                iterateTest(norm, hangulCanon, 2);
0127:
0128:                norm.setMode(Normalizer.NFKD);
0129:                iterateTest(norm, hangulCompat, 2);
0130:
0131:                // And finally, make sure you can do it in reverse too
0132:                logln("Reverse iteration...");
0133:                norm.setMode(Normalizer.NFC);
0134:                backAndForth(norm, hangulCanon);
0135:            }
0136:
0137:            public void TestHangulDecomp() throws Exception {
0138:                // Make sure that the static decomposition methods work
0139:                logln("Canonical decomposition...");
0140:                staticTest(Normalizer.NFD, hangulCanon, 1);
0141:                logln("Compatibility decomposition...");
0142:                staticTest(Normalizer.NFKD, hangulCompat, 1);
0143:
0144:                // Now the iterative decomposition methods...
0145:                logln("Iterative decomposition...");
0146:                Normalizer norm = new Normalizer("", Normalizer.NFD, 0);
0147:                iterateTest(norm, hangulCanon, 1);
0148:
0149:                norm.setMode(Normalizer.NFKD);
0150:                iterateTest(norm, hangulCompat, 1);
0151:
0152:                // And finally, make sure you can do it in reverse too
0153:                logln("Reverse iteration...");
0154:                norm.setMode(Normalizer.NFD);
0155:                backAndForth(norm, hangulCanon);
0156:            }
0157:
0158:            public void TestNone() throws Exception {
0159:                Normalizer norm = new Normalizer("", Normalizer.NONE, 0);
0160:                iterateTest(norm, canonTests, 0);
0161:                staticTest(Normalizer.NONE, canonTests, 0);
0162:            }
0163:
0164:            public void TestDecomp() throws Exception {
0165:                Normalizer norm = new Normalizer("", Normalizer.NFD, 0);
0166:                iterateTest(norm, canonTests, 1);
0167:                staticTest(Normalizer.NFD, canonTests, 1);
0168:                decomposeTest(Normalizer.NFD, canonTests, 1);
0169:            }
0170:
0171:            public void TestCompatDecomp() throws Exception {
0172:                Normalizer norm = new Normalizer("", Normalizer.NFKD, 0);
0173:                iterateTest(norm, compatTests, 1);
0174:                staticTest(Normalizer.NFKD, compatTests, 1);
0175:                decomposeTest(Normalizer.NFKD, compatTests, 1);
0176:            }
0177:
0178:            public void TestCanonCompose() throws Exception {
0179:                Normalizer norm = new Normalizer("", Normalizer.NFC, 0);
0180:                iterateTest(norm, canonTests, 2);
0181:                staticTest(Normalizer.NFC, canonTests, 2);
0182:                composeTest(Normalizer.NFC, canonTests, 2);
0183:            }
0184:
0185:            public void TestCompatCompose() throws Exception {
0186:                Normalizer norm = new Normalizer("", Normalizer.NFKC, 0);
0187:                iterateTest(norm, compatTests, 2);
0188:                staticTest(Normalizer.NFKC, compatTests, 2);
0189:                composeTest(Normalizer.NFKC, compatTests, 2);
0190:            }
0191:
0192:            public void TestExplodingBase() throws Exception {
0193:                // \u017f - Latin small letter long s
0194:                // \u0307 - combining dot above
0195:                // \u1e61 - Latin small letter s with dot above
0196:                // \u1e9b - Latin small letter long s with dot above
0197:                String[][] canon = {
0198:                        // Input                Decomposed              Composed
0199:                        { "Tschu\u017f", "Tschu\u017f", "Tschu\u017f" },
0200:                        { "Tschu\u1e9b", "Tschu\u017f\u0307", "Tschu\u1e9b" }, };
0201:                String[][] compat = {
0202:                        // Input                Decomposed              Composed
0203:                        { "\u017f", "s", "s" },
0204:                        { "\u1e9b", "s\u0307", "\u1e61" }, };
0205:
0206:                staticTest(Normalizer.NFD, canon, 1);
0207:                staticTest(Normalizer.NFC, canon, 2);
0208:
0209:                staticTest(Normalizer.NFKD, compat, 1);
0210:                staticTest(Normalizer.NFKC, compat, 2);
0211:
0212:            }
0213:
0214:            /**
0215:             * The Tibetan vowel sign AA, 0f71, was messed up prior to
0216:             * Unicode version 2.1.9.
0217:             * Once 2.1.9 or 3.0 is released, uncomment this test.
0218:             */
0219:            public void TestTibetan() throws Exception {
0220:                String[][] decomp = { { "\u0f77", "\u0f77",
0221:                        "\u0fb2\u0f71\u0f80" } };
0222:                String[][] compose = { { "\u0fb2\u0f71\u0f80",
0223:                        "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80" } };
0224:
0225:                staticTest(Normalizer.NFD, decomp, 1);
0226:                staticTest(Normalizer.NFKD, decomp, 2);
0227:                staticTest(Normalizer.NFC, compose, 1);
0228:                staticTest(Normalizer.NFKC, compose, 2);
0229:            }
0230:
0231:            /**
0232:             * Make sure characters in the CompositionExclusion.txt list do not get
0233:             * composed to.
0234:             */
0235:            public void TestCompositionExclusion() throws Exception {
0236:                // This list is generated from CompositionExclusion.txt.
0237:                // Update whenever the normalizer tables are updated.  Note
0238:                // that we test all characters listed, even those that can be
0239:                // derived from the Unicode DB and are therefore commented
0240:                // out.
0241:                String EXCLUDED = "\u0340\u0341\u0343\u0344\u0374\u037E\u0387\u0958"
0242:                        + "\u0959\u095A\u095B\u095C\u095D\u095E\u095F\u09DC"
0243:                        + "\u09DD\u09DF\u0A33\u0A36\u0A59\u0A5A\u0A5B\u0A5E"
0244:                        + "\u0B5C\u0B5D\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69"
0245:                        + "\u0F73\u0F75\u0F76\u0F78\u0F81\u0F93\u0F9D\u0FA2"
0246:                        + "\u0FA7\u0FAC\u0FB9\u1F71\u1F73\u1F75\u1F77\u1F79"
0247:                        + "\u1F7B\u1F7D\u1FBB\u1FBE\u1FC9\u1FCB\u1FD3\u1FDB"
0248:                        + "\u1FE3\u1FEB\u1FEE\u1FEF\u1FF9\u1FFB\u1FFD\u2000"
0249:                        + "\u2001\u2126\u212A\u212B\u2329\u232A\uF900\uFA10"
0250:                        + "\uFA12\uFA15\uFA20\uFA22\uFA25\uFA26\uFA2A\uFB1F"
0251:                        + "\uFB2A\uFB2B\uFB2C\uFB2D\uFB2E\uFB2F\uFB30\uFB31"
0252:                        + "\uFB32\uFB33\uFB34\uFB35\uFB36\uFB38\uFB39\uFB3A"
0253:                        + "\uFB3B\uFB3C\uFB3E\uFB40\uFB41\uFB43\uFB44\uFB46"
0254:                        + "\uFB47\uFB48\uFB49\uFB4A\uFB4B\uFB4C\uFB4D\uFB4E";
0255:                for (int i = 0; i < EXCLUDED.length(); ++i) {
0256:                    String a = String.valueOf(EXCLUDED.charAt(i));
0257:                    String b = Normalizer.normalize(a, Normalizer.NFKD);
0258:                    String c = Normalizer.normalize(b, Normalizer.NFC);
0259:                    if (c.equals(a)) {
0260:                        errln("FAIL: " + hex(a) + " x DECOMP_COMPAT => "
0261:                                + hex(b) + " x COMPOSE => " + hex(c));
0262:                    } else if (isVerbose()) {
0263:                        logln("Ok: " + hex(a) + " x DECOMP_COMPAT => " + hex(b)
0264:                                + " x COMPOSE => " + hex(c));
0265:                    }
0266:                }
0267:                // The following method works too, but it is somewhat
0268:                // incestuous.  It uses UInfo, which is the same database that
0269:                // NormalizerBuilder uses, so if something is wrong with
0270:                // UInfo, the following test won't show it.  All it will show
0271:                // is that NormalizerBuilder has been run with whatever the
0272:                // current UInfo is.
0273:                //
0274:                // We comment this out in favor of the test above, which
0275:                // provides independent verification (but also requires
0276:                // independent updating).
0277:                //      logln("---");
0278:                //      UInfo uinfo = new UInfo();
0279:                //      for (int i=0; i<=0xFFFF; ++i) {
0280:                //          if (!uinfo.isExcludedComposition((char)i) ||
0281:                //              (!uinfo.hasCanonicalDecomposition((char)i) &&
0282:                //               !uinfo.hasCompatibilityDecomposition((char)i))) continue;
0283:                //          String a = String.valueOf((char)i);
0284:                //          String b = Normalizer.normalize(a,Normalizer.DECOMP_COMPAT,0);
0285:                //          String c = Normalizer.normalize(b,Normalizer.COMPOSE,0);
0286:                //          if (c.equals(a)) {
0287:                //              errln("FAIL: " + hex(a) + " x DECOMP_COMPAT => " +
0288:                //                    hex(b) + " x COMPOSE => " +
0289:                //                    hex(c));
0290:                //          } else if (isVerbose()) {
0291:                //              logln("Ok: " + hex(a) + " x DECOMP_COMPAT => " +
0292:                //                    hex(b) + " x COMPOSE => " +
0293:                //                    hex(c));
0294:                //          }
0295:                //      }
0296:            }
0297:
0298:            /**
0299:             * Test for a problem that showed up just before ICU 1.6 release
0300:             * having to do with combining characters with an index of zero.
0301:             * Such characters do not participate in any canonical
0302:             * decompositions.  However, having an index of zero means that
0303:             * they all share one typeMask[] entry, that is, they all have to
0304:             * map to the same canonical class, which is not the case, in
0305:             * reality.
0306:             */
0307:            public void TestZeroIndex() throws Exception {
0308:                String[] DATA = {
0309:                        // Expect col1 x COMPOSE_COMPAT => col2
0310:                        // Expect col2 x DECOMP => col3
0311:                        "A\u0316\u0300", "\u00C0\u0316", "A\u0316\u0300",
0312:                        "A\u0300\u0316", "\u00C0\u0316", "A\u0316\u0300",
0313:                        "A\u0327\u0300", "\u00C0\u0327", "A\u0327\u0300",
0314:                        "c\u0321\u0327", "c\u0321\u0327", "c\u0321\u0327",
0315:                        "c\u0327\u0321", "\u00E7\u0321", "c\u0327\u0321", };
0316:
0317:                for (int i = 0; i < DATA.length; i += 3) {
0318:                    String a = DATA[i];
0319:                    String b = Normalizer.normalize(a, Normalizer.NFKC);
0320:                    String exp = DATA[i + 1];
0321:                    if (b.equals(exp)) {
0322:                        logln("Ok: " + hex(a) + " x COMPOSE_COMPAT => "
0323:                                + hex(b));
0324:                    } else {
0325:                        errln("FAIL: " + hex(a) + " x COMPOSE_COMPAT => "
0326:                                + hex(b) + ", expect " + hex(exp));
0327:                    }
0328:                    a = Normalizer.normalize(b, Normalizer.NFD);
0329:                    exp = DATA[i + 2];
0330:                    if (a.equals(exp)) {
0331:                        logln("Ok: " + hex(b) + " x DECOMP => " + hex(a));
0332:                    } else {
0333:                        errln("FAIL: " + hex(b) + " x DECOMP => " + hex(a)
0334:                                + ", expect " + hex(exp));
0335:                    }
0336:                }
0337:            }
0338:
0339:            /**
0340:             * Test for a problem found by Verisign.  Problem is that
0341:             * characters at the start of a string are not put in canonical
0342:             * order correctly by compose() if there is no starter.
0343:             */
0344:            public void TestVerisign() throws Exception {
0345:                String[] inputs = {
0346:                        "\u05b8\u05b9\u05b1\u0591\u05c3\u05b0\u05ac\u059f",
0347:                        "\u0592\u05b7\u05bc\u05a5\u05b0\u05c0\u05c4\u05ad" };
0348:                String[] outputs = {
0349:                        "\u05b1\u05b8\u05b9\u0591\u05c3\u05b0\u05ac\u059f",
0350:                        "\u05b0\u05b7\u05bc\u05a5\u0592\u05c0\u05ad\u05c4" };
0351:
0352:                for (int i = 0; i < inputs.length; ++i) {
0353:                    String input = inputs[i];
0354:                    String output = outputs[i];
0355:                    String result = Normalizer.decompose(input, false);
0356:                    if (!result.equals(output)) {
0357:                        errln("FAIL input: " + hex(input));
0358:                        errln(" decompose: " + hex(result));
0359:                        errln("  expected: " + hex(output));
0360:                    }
0361:                    result = Normalizer.compose(input, false);
0362:                    if (!result.equals(output)) {
0363:                        errln("FAIL input: " + hex(input));
0364:                        errln("   compose: " + hex(result));
0365:                        errln("  expected: " + hex(output));
0366:                    }
0367:                }
0368:
0369:            }
0370:
0371:            public void TestQuickCheckResultNO() throws Exception {
0372:                final char CPNFD[] = { 0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
0373:                        0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E };
0374:                final char CPNFC[] = { 0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
0375:                        0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E };
0376:                final char CPNFKD[] = { 0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
0377:                        0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D };
0378:                final char CPNFKC[] = { 0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
0379:                        0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D };
0380:
0381:                final int SIZE = 10;
0382:
0383:                int count = 0;
0384:                for (; count < SIZE; count++) {
0385:                    if (Normalizer.quickCheck(String.valueOf(CPNFD[count]),
0386:                            Normalizer.NFD, 0) != Normalizer.NO) {
0387:                        errln("ERROR in NFD quick check at U+"
0388:                                + Integer.toHexString(CPNFD[count]));
0389:                        return;
0390:                    }
0391:                    if (Normalizer.quickCheck(String.valueOf(CPNFC[count]),
0392:                            Normalizer.NFC, 0) != Normalizer.NO) {
0393:                        errln("ERROR in NFC quick check at U+"
0394:                                + Integer.toHexString(CPNFC[count]));
0395:                        return;
0396:                    }
0397:                    if (Normalizer.quickCheck(String.valueOf(CPNFKD[count]),
0398:                            Normalizer.NFKD, 0) != Normalizer.NO) {
0399:                        errln("ERROR in NFKD quick check at U+"
0400:                                + Integer.toHexString(CPNFKD[count]));
0401:                        return;
0402:                    }
0403:                    if (Normalizer.quickCheck(String.valueOf(CPNFKC[count]),
0404:                            Normalizer.NFKC, 0) != Normalizer.NO) {
0405:                        errln("ERROR in NFKC quick check at U+"
0406:                                + Integer.toHexString(CPNFKC[count]));
0407:                        return;
0408:                    }
0409:                    // for improving coverage
0410:                    if (Normalizer.quickCheck(String.valueOf(CPNFKC[count]),
0411:                            Normalizer.NFKC) != Normalizer.NO) {
0412:                        errln("ERROR in NFKC quick check at U+"
0413:                                + Integer.toHexString(CPNFKC[count]));
0414:                        return;
0415:                    }
0416:                }
0417:            }
0418:
0419:            public void TestQuickCheckResultYES() throws Exception {
0420:                final char CPNFD[] = { 0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
0421:                        0x2261, 0x3075, 0x4000, 0x5000, 0xF000 };
0422:                final char CPNFC[] = { 0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
0423:                        0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000 };
0424:                final char CPNFKD[] = { 0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
0425:                        0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27 };
0426:                final char CPNFKC[] = { 0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
0427:                        0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E };
0428:
0429:                final int SIZE = 10;
0430:                int count = 0;
0431:
0432:                char cp = 0;
0433:                while (cp < 0xA0) {
0434:                    if (Normalizer.quickCheck(String.valueOf(cp),
0435:                            Normalizer.NFD, 0) != Normalizer.YES) {
0436:                        errln("ERROR in NFD quick check at U+"
0437:                                + Integer.toHexString(cp));
0438:                        return;
0439:                    }
0440:                    if (Normalizer.quickCheck(String.valueOf(cp),
0441:                            Normalizer.NFC, 0) != Normalizer.YES) {
0442:                        errln("ERROR in NFC quick check at U+"
0443:                                + Integer.toHexString(cp));
0444:                        return;
0445:                    }
0446:                    if (Normalizer.quickCheck(String.valueOf(cp),
0447:                            Normalizer.NFKD, 0) != Normalizer.YES) {
0448:                        errln("ERROR in NFKD quick check at U+"
0449:                                + Integer.toHexString(cp));
0450:                        return;
0451:                    }
0452:                    if (Normalizer.quickCheck(String.valueOf(cp),
0453:                            Normalizer.NFKC, 0) != Normalizer.YES) {
0454:                        errln("ERROR in NFKC quick check at U+"
0455:                                + Integer.toHexString(cp));
0456:                        return;
0457:                    }
0458:                    // improve the coverage
0459:                    if (Normalizer.quickCheck(String.valueOf(cp),
0460:                            Normalizer.NFKC) != Normalizer.YES) {
0461:                        errln("ERROR in NFKC quick check at U+"
0462:                                + Integer.toHexString(cp));
0463:                        return;
0464:                    }
0465:                    cp++;
0466:                }
0467:
0468:                for (; count < SIZE; count++) {
0469:                    if (Normalizer.quickCheck(String.valueOf(CPNFD[count]),
0470:                            Normalizer.NFD, 0) != Normalizer.YES) {
0471:                        errln("ERROR in NFD quick check at U+"
0472:                                + Integer.toHexString(CPNFD[count]));
0473:                        return;
0474:                    }
0475:                    if (Normalizer.quickCheck(String.valueOf(CPNFC[count]),
0476:                            Normalizer.NFC, 0) != Normalizer.YES) {
0477:                        errln("ERROR in NFC quick check at U+"
0478:                                + Integer.toHexString(CPNFC[count]));
0479:                        return;
0480:                    }
0481:                    if (Normalizer.quickCheck(String.valueOf(CPNFKD[count]),
0482:                            Normalizer.NFKD, 0) != Normalizer.YES) {
0483:                        errln("ERROR in NFKD quick check at U+"
0484:                                + Integer.toHexString(CPNFKD[count]));
0485:                        return;
0486:                    }
0487:                    if (Normalizer.quickCheck(String.valueOf(CPNFKC[count]),
0488:                            Normalizer.NFKC, 0) != Normalizer.YES) {
0489:                        errln("ERROR in NFKC quick check at U+"
0490:                                + Integer.toHexString(CPNFKC[count]));
0491:                        return;
0492:                    }
0493:                    // improve the coverage
0494:                    if (Normalizer.quickCheck(String.valueOf(CPNFKC[count]),
0495:                            Normalizer.NFKC) != Normalizer.YES) {
0496:                        errln("ERROR in NFKC quick check at U+"
0497:                                + Integer.toHexString(CPNFKC[count]));
0498:                        return;
0499:                    }
0500:                }
0501:            }
0502:
0503:            public void TestBengali() throws Exception {
0504:                String input = "\u09bc\u09be\u09cd\u09be";
0505:                String output = Normalizer.normalize(input, Normalizer.NFC);
0506:                if (!input.equals(output)) {
0507:                    errln("ERROR in NFC of string");
0508:                }
0509:            }
0510:
0511:            public void TestQuickCheckResultMAYBE() throws Exception {
0512:
0513:                final char[] CPNFC = { 0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
0514:                        0x116A, 0x1173, 0x1175, 0x3099, 0x309A };
0515:                final char[] CPNFKC = { 0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
0516:                        0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099 };
0517:
0518:                final int SIZE = 10;
0519:
0520:                int count = 0;
0521:
0522:                /* NFD and NFKD does not have any MAYBE codepoints */
0523:                for (; count < SIZE; count++) {
0524:                    if (Normalizer.quickCheck(String.valueOf(CPNFC[count]),
0525:                            Normalizer.NFC, 0) != Normalizer.MAYBE) {
0526:                        errln("ERROR in NFC quick check at U+"
0527:                                + Integer.toHexString(CPNFC[count]));
0528:                        return;
0529:                    }
0530:                    if (Normalizer.quickCheck(String.valueOf(CPNFKC[count]),
0531:                            Normalizer.NFKC, 0) != Normalizer.MAYBE) {
0532:                        errln("ERROR in NFKC quick check at U+"
0533:                                + Integer.toHexString(CPNFKC[count]));
0534:                        return;
0535:                    }
0536:                    if (Normalizer.quickCheck(new char[] { CPNFC[count] },
0537:                            Normalizer.NFC, 0) != Normalizer.MAYBE) {
0538:                        errln("ERROR in NFC quick check at U+"
0539:                                + Integer.toHexString(CPNFC[count]));
0540:                        return;
0541:                    }
0542:                    if (Normalizer.quickCheck(new char[] { CPNFKC[count] },
0543:                            Normalizer.NFKC, 0) != Normalizer.MAYBE) {
0544:                        errln("ERROR in NFKC quick check at U+"
0545:                                + Integer.toHexString(CPNFKC[count]));
0546:                        return;
0547:                    }
0548:                    if (Normalizer.quickCheck(new char[] { CPNFKC[count] },
0549:                            Normalizer.NONE, 0) != Normalizer.MAYBE) {
0550:                        errln("ERROR in NFKC quick check at U+"
0551:                                + Integer.toHexString(CPNFKC[count]));
0552:                        return;
0553:                    }
0554:                }
0555:            }
0556:
0557:            public void TestQuickCheckStringResult() throws Exception {
0558:                int count;
0559:                String d;
0560:                String c;
0561:
0562:                for (count = 0; count < canonTests.length; count++) {
0563:                    d = canonTests[count][1];
0564:                    c = canonTests[count][2];
0565:                    if (Normalizer.quickCheck(d, Normalizer.NFD, 0) != Normalizer.YES) {
0566:                        errln("ERROR in NFD quick check for string at count "
0567:                                + count);
0568:                        return;
0569:                    }
0570:
0571:                    if (Normalizer.quickCheck(c, Normalizer.NFC, 0) == Normalizer.NO) {
0572:                        errln("ERROR in NFC quick check for string at count "
0573:                                + count);
0574:                        return;
0575:                    }
0576:                }
0577:
0578:                for (count = 0; count < compatTests.length; count++) {
0579:                    d = compatTests[count][1];
0580:                    c = compatTests[count][2];
0581:                    if (Normalizer.quickCheck(d, Normalizer.NFKD, 0) != Normalizer.YES) {
0582:                        errln("ERROR in NFKD quick check for string at count "
0583:                                + count);
0584:                        return;
0585:                    }
0586:
0587:                    if (Normalizer.quickCheck(c, Normalizer.NFKC, 0) != Normalizer.YES) {
0588:                        errln("ERROR in NFKC quick check for string at count "
0589:                                + count);
0590:                        return;
0591:                    }
0592:                }
0593:            }
0594:
0595:            static final int qcToInt(Normalizer.QuickCheckResult qc) {
0596:                if (qc == Normalizer.NO) {
0597:                    return 0;
0598:                } else if (qc == Normalizer.YES) {
0599:                    return 1;
0600:                } else /* Normalizer.MAYBE */{
0601:                    return 2;
0602:                }
0603:            }
0604:
0605:            public void TestQuickCheckPerCP() {
0606:                int c, lead, trail;
0607:                String s, nfd;
0608:                int lccc1, lccc2, tccc1, tccc2;
0609:                int qc1, qc2;
0610:
0611:                if (UCharacter
0612:                        .getIntPropertyMaxValue(UProperty.NFD_QUICK_CHECK) != 1
0613:                        || // YES
0614:                        UCharacter
0615:                                .getIntPropertyMaxValue(UProperty.NFKD_QUICK_CHECK) != 1
0616:                        || UCharacter
0617:                                .getIntPropertyMaxValue(UProperty.NFC_QUICK_CHECK) != 2
0618:                        || // MAYBE
0619:                        UCharacter
0620:                                .getIntPropertyMaxValue(UProperty.NFKC_QUICK_CHECK) != 2
0621:                        || UCharacter
0622:                                .getIntPropertyMaxValue(UProperty.LEAD_CANONICAL_COMBINING_CLASS) != UCharacter
0623:                                .getIntPropertyMaxValue(UProperty.CANONICAL_COMBINING_CLASS)
0624:                        || UCharacter
0625:                                .getIntPropertyMaxValue(UProperty.TRAIL_CANONICAL_COMBINING_CLASS) != UCharacter
0626:                                .getIntPropertyMaxValue(UProperty.CANONICAL_COMBINING_CLASS)) {
0627:                    errln("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS");
0628:                }
0629:
0630:                /*
0631:                 * compare the quick check property values for some code points
0632:                 * to the quick check results for checking same-code point strings
0633:                 */
0634:                c = 0;
0635:                while (c < 0x110000) {
0636:                    s = UTF16.valueOf(c);
0637:
0638:                    qc1 = UCharacter.getIntPropertyValue(c,
0639:                            UProperty.NFC_QUICK_CHECK);
0640:                    qc2 = qcToInt(Normalizer.quickCheck(s, Normalizer.NFC));
0641:                    if (qc1 != qc2) {
0642:                        errln("getIntPropertyValue(NFC)=" + qc1 + " != " + qc2
0643:                                + "=quickCheck(NFC) for U+"
0644:                                + Integer.toHexString(c));
0645:                    }
0646:
0647:                    qc1 = UCharacter.getIntPropertyValue(c,
0648:                            UProperty.NFD_QUICK_CHECK);
0649:                    qc2 = qcToInt(Normalizer.quickCheck(s, Normalizer.NFD));
0650:                    if (qc1 != qc2) {
0651:                        errln("getIntPropertyValue(NFD)=" + qc1 + " != " + qc2
0652:                                + "=quickCheck(NFD) for U+"
0653:                                + Integer.toHexString(c));
0654:                    }
0655:
0656:                    qc1 = UCharacter.getIntPropertyValue(c,
0657:                            UProperty.NFKC_QUICK_CHECK);
0658:                    qc2 = qcToInt(Normalizer.quickCheck(s, Normalizer.NFKC));
0659:                    if (qc1 != qc2) {
0660:                        errln("getIntPropertyValue(NFKC)=" + qc1 + " != " + qc2
0661:                                + "=quickCheck(NFKC) for U+"
0662:                                + Integer.toHexString(c));
0663:                    }
0664:
0665:                    qc1 = UCharacter.getIntPropertyValue(c,
0666:                            UProperty.NFKD_QUICK_CHECK);
0667:                    qc2 = qcToInt(Normalizer.quickCheck(s, Normalizer.NFKD));
0668:                    if (qc1 != qc2) {
0669:                        errln("getIntPropertyValue(NFKD)=" + qc1 + " != " + qc2
0670:                                + "=quickCheck(NFKD) for U+"
0671:                                + Integer.toHexString(c));
0672:                    }
0673:
0674:                    nfd = Normalizer.normalize(s, Normalizer.NFD);
0675:                    lead = UTF16.charAt(nfd, 0);
0676:                    trail = UTF16.charAt(nfd, nfd.length() - 1);
0677:
0678:                    lccc1 = UCharacter.getIntPropertyValue(c,
0679:                            UProperty.LEAD_CANONICAL_COMBINING_CLASS);
0680:                    lccc2 = UCharacter.getCombiningClass(lead);
0681:                    tccc1 = UCharacter.getIntPropertyValue(c,
0682:                            UProperty.TRAIL_CANONICAL_COMBINING_CLASS);
0683:                    tccc2 = UCharacter.getCombiningClass(trail);
0684:
0685:                    if (lccc1 != lccc2) {
0686:                        errln("getIntPropertyValue(lccc)=" + lccc1 + " != "
0687:                                + lccc2 + "=getCombiningClass(lead) for U+"
0688:                                + Integer.toHexString(c));
0689:                    }
0690:                    if (tccc1 != tccc2) {
0691:                        errln("getIntPropertyValue(tccc)=" + tccc1 + " != "
0692:                                + tccc2 + "=getCombiningClass(trail) for U+"
0693:                                + Integer.toHexString(c));
0694:                    }
0695:
0696:                    /* skip some code points */
0697:                    c = (20 * c) / 19 + 1;
0698:                }
0699:            }
0700:
0701:            //------------------------------------------------------------------------
0702:            // Internal utilities
0703:            //
0704:            //------------------------------------------------------------------------
0705:            // Internal utilities
0706:            //
0707:
0708:            private void backAndForth(Normalizer iter, String input) {
0709:                iter.setText(input);
0710:
0711:                // Run through the iterator forwards and stick it into a StringBuffer
0712:                StringBuffer forward = new StringBuffer();
0713:                for (int ch = iter.first(); ch != Normalizer.DONE; ch = iter
0714:                        .next()) {
0715:                    forward.append(ch);
0716:                }
0717:
0718:                // Now do it backwards
0719:                StringBuffer reverse = new StringBuffer();
0720:                for (int ch = iter.last(); ch != Normalizer.DONE; ch = iter
0721:                        .previous()) {
0722:                    reverse.insert(0, ch);
0723:                }
0724:
0725:                if (!forward.toString().equals(reverse.toString())) {
0726:                    errln("FAIL: Forward/reverse mismatch for input "
0727:                            + hex(input) + ", forward: " + hex(forward)
0728:                            + ", backward: " + hex(reverse));
0729:                } else if (isVerbose()) {
0730:                    logln("Ok: Forward/reverse for input " + hex(input)
0731:                            + ", forward: " + hex(forward) + ", backward: "
0732:                            + hex(reverse));
0733:                }
0734:            }
0735:
0736:            private void backAndForth(Normalizer iter, String[][] tests) {
0737:                for (int i = 0; i < tests.length; i++) {
0738:                    iter.setText(tests[i][0]);
0739:
0740:                    // Run through the iterator forwards and stick it into a
0741:                    // StringBuffer
0742:                    StringBuffer forward = new StringBuffer();
0743:                    for (int ch = iter.first(); ch != Normalizer.DONE; ch = iter
0744:                            .next()) {
0745:                        forward.append(ch);
0746:                    }
0747:
0748:                    // Now do it backwards
0749:                    StringBuffer reverse = new StringBuffer();
0750:                    for (int ch = iter.last(); ch != Normalizer.DONE; ch = iter
0751:                            .previous()) {
0752:                        reverse.insert(0, ch);
0753:                    }
0754:
0755:                    if (!forward.toString().equals(reverse.toString())) {
0756:                        errln("FAIL: Forward/reverse mismatch for input "
0757:                                + hex(tests[i][0]) + ", forward: "
0758:                                + hex(forward) + ", backward: " + hex(reverse));
0759:                    } else if (isVerbose()) {
0760:                        logln("Ok: Forward/reverse for input "
0761:                                + hex(tests[i][0]) + ", forward: "
0762:                                + hex(forward) + ", backward: " + hex(reverse));
0763:                    }
0764:                }
0765:            }
0766:
0767:            private void staticTest(Normalizer.Mode mode, String[][] tests,
0768:                    int outCol) throws Exception {
0769:                for (int i = 0; i < tests.length; i++) {
0770:                    String input = Utility.unescape(tests[i][0]);
0771:                    String expect = Utility.unescape(tests[i][outCol]);
0772:
0773:                    logln("Normalizing '" + input + "' (" + hex(input) + ")");
0774:
0775:                    String output = Normalizer.normalize(input, mode);
0776:
0777:                    if (!output.equals(expect)) {
0778:                        errln("FAIL: case " + i + " expected '" + expect
0779:                                + "' (" + hex(expect) + ")" + " but got '"
0780:                                + output + "' (" + hex(output) + ")");
0781:                    }
0782:                }
0783:                char[] output = new char[1];
0784:                for (int i = 0; i < tests.length; i++) {
0785:                    char[] input = Utility.unescape(tests[i][0]).toCharArray();
0786:                    String expect = Utility.unescape(tests[i][outCol]);
0787:
0788:                    logln("Normalizing '" + new String(input) + "' ("
0789:                            + hex(new String(input)) + ")");
0790:                    int reqLength = 0;
0791:                    while (true) {
0792:                        try {
0793:                            reqLength = Normalizer.normalize(input, output,
0794:                                    mode, 0);
0795:                            if (reqLength <= output.length) {
0796:                                break;
0797:                            }
0798:                        } catch (IndexOutOfBoundsException e) {
0799:                            output = new char[Integer.parseInt(e.getMessage())];
0800:                            continue;
0801:                        }
0802:                    }
0803:                    if (!expect.equals(new String(output, 0, reqLength))) {
0804:                        errln("FAIL: case " + i + " expected '" + expect
0805:                                + "' (" + hex(expect) + ")" + " but got '"
0806:                                + new String(output) + "' ("
0807:                                + hex(new String(output)) + ")");
0808:                    }
0809:                }
0810:            }
0811:
0812:            private void decomposeTest(Normalizer.Mode mode, String[][] tests,
0813:                    int outCol) throws Exception {
0814:                for (int i = 0; i < tests.length; i++) {
0815:                    String input = Utility.unescape(tests[i][0]);
0816:                    String expect = Utility.unescape(tests[i][outCol]);
0817:
0818:                    logln("Normalizing '" + input + "' (" + hex(input) + ")");
0819:
0820:                    String output = Normalizer.decompose(input,
0821:                            mode == Normalizer.NFKD);
0822:
0823:                    if (!output.equals(expect)) {
0824:                        errln("FAIL: case " + i + " expected '" + expect
0825:                                + "' (" + hex(expect) + ")" + " but got '"
0826:                                + output + "' (" + hex(output) + ")");
0827:                    }
0828:                }
0829:                char[] output = new char[1];
0830:                for (int i = 0; i < tests.length; i++) {
0831:                    char[] input = Utility.unescape(tests[i][0]).toCharArray();
0832:                    String expect = Utility.unescape(tests[i][outCol]);
0833:
0834:                    logln("Normalizing '" + new String(input) + "' ("
0835:                            + hex(new String(input)) + ")");
0836:                    int reqLength = 0;
0837:                    while (true) {
0838:                        try {
0839:                            reqLength = Normalizer.decompose(input, output,
0840:                                    mode == Normalizer.NFKD, 0);
0841:                            if (reqLength <= output.length) {
0842:                                break;
0843:                            }
0844:                        } catch (IndexOutOfBoundsException e) {
0845:                            output = new char[Integer.parseInt(e.getMessage())];
0846:                            continue;
0847:                        }
0848:                    }
0849:                    if (!expect.equals(new String(output, 0, reqLength))) {
0850:                        errln("FAIL: case " + i + " expected '" + expect
0851:                                + "' (" + hex(expect) + ")" + " but got '"
0852:                                + new String(output) + "' ("
0853:                                + hex(new String(output)) + ")");
0854:                    }
0855:                }
0856:                output = new char[1];
0857:                for (int i = 0; i < tests.length; i++) {
0858:                    char[] input = Utility.unescape(tests[i][0]).toCharArray();
0859:                    String expect = Utility.unescape(tests[i][outCol]);
0860:
0861:                    logln("Normalizing '" + new String(input) + "' ("
0862:                            + hex(new String(input)) + ")");
0863:                    int reqLength = 0;
0864:                    while (true) {
0865:                        try {
0866:                            reqLength = Normalizer.decompose(input, 0,
0867:                                    input.length, output, 0, output.length,
0868:                                    mode == Normalizer.NFKD, 0);
0869:                            if (reqLength <= output.length) {
0870:                                break;
0871:                            }
0872:                        } catch (IndexOutOfBoundsException e) {
0873:                            output = new char[Integer.parseInt(e.getMessage())];
0874:                            continue;
0875:                        }
0876:                    }
0877:                    if (!expect.equals(new String(output, 0, reqLength))) {
0878:                        errln("FAIL: case " + i + " expected '" + expect
0879:                                + "' (" + hex(expect) + ")" + " but got '"
0880:                                + new String(output) + "' ("
0881:                                + hex(new String(output)) + ")");
0882:                    }
0883:                    char[] output2 = new char[reqLength * 2];
0884:                    System.arraycopy(output, 0, output2, 0, reqLength);
0885:                    int retLength = Normalizer.decompose(input, 0,
0886:                            input.length, output2, reqLength, output2.length,
0887:                            mode == Normalizer.NFKC, 0);
0888:                    if (retLength != reqLength) {
0889:                        logln("FAIL: Normalizer.compose did not return the expected length. Expected: "
0890:                                + reqLength + " Got: " + retLength);
0891:                    }
0892:                }
0893:            }
0894:
0895:            private void composeTest(Normalizer.Mode mode, String[][] tests,
0896:                    int outCol) throws Exception {
0897:                for (int i = 0; i < tests.length; i++) {
0898:                    String input = Utility.unescape(tests[i][0]);
0899:                    String expect = Utility.unescape(tests[i][outCol]);
0900:
0901:                    logln("Normalizing '" + input + "' (" + hex(input) + ")");
0902:
0903:                    String output = Normalizer.compose(input,
0904:                            mode == Normalizer.NFKC);
0905:
0906:                    if (!output.equals(expect)) {
0907:                        errln("FAIL: case " + i + " expected '" + expect
0908:                                + "' (" + hex(expect) + ")" + " but got '"
0909:                                + output + "' (" + hex(output) + ")");
0910:                    }
0911:                }
0912:                char[] output = new char[1];
0913:                for (int i = 0; i < tests.length; i++) {
0914:                    char[] input = Utility.unescape(tests[i][0]).toCharArray();
0915:                    String expect = Utility.unescape(tests[i][outCol]);
0916:
0917:                    logln("Normalizing '" + new String(input) + "' ("
0918:                            + hex(new String(input)) + ")");
0919:                    int reqLength = 0;
0920:                    while (true) {
0921:                        try {
0922:                            reqLength = Normalizer.compose(input, output,
0923:                                    mode == Normalizer.NFKC, 0);
0924:                            if (reqLength <= output.length) {
0925:                                break;
0926:                            }
0927:                        } catch (IndexOutOfBoundsException e) {
0928:                            output = new char[Integer.parseInt(e.getMessage())];
0929:                            continue;
0930:                        }
0931:                    }
0932:                    if (!expect.equals(new String(output, 0, reqLength))) {
0933:                        errln("FAIL: case " + i + " expected '" + expect
0934:                                + "' (" + hex(expect) + ")" + " but got '"
0935:                                + new String(output) + "' ("
0936:                                + hex(new String(output)) + ")");
0937:                    }
0938:                }
0939:                output = new char[1];
0940:                for (int i = 0; i < tests.length; i++) {
0941:                    char[] input = Utility.unescape(tests[i][0]).toCharArray();
0942:                    String expect = Utility.unescape(tests[i][outCol]);
0943:
0944:                    logln("Normalizing '" + new String(input) + "' ("
0945:                            + hex(new String(input)) + ")");
0946:                    int reqLength = 0;
0947:                    while (true) {
0948:                        try {
0949:                            reqLength = Normalizer.compose(input, 0,
0950:                                    input.length, output, 0, output.length,
0951:                                    mode == Normalizer.NFKC, 0);
0952:                            if (reqLength <= output.length) {
0953:                                break;
0954:                            }
0955:                        } catch (IndexOutOfBoundsException e) {
0956:                            output = new char[Integer.parseInt(e.getMessage())];
0957:                            continue;
0958:                        }
0959:                    }
0960:                    if (!expect.equals(new String(output, 0, reqLength))) {
0961:                        errln("FAIL: case " + i + " expected '" + expect
0962:                                + "' (" + hex(expect) + ")" + " but got '"
0963:                                + new String(output) + "' ("
0964:                                + hex(new String(output)) + ")");
0965:                    }
0966:
0967:                    char[] output2 = new char[reqLength * 2];
0968:                    System.arraycopy(output, 0, output2, 0, reqLength);
0969:                    int retLength = Normalizer.compose(input, 0, input.length,
0970:                            output2, reqLength, output2.length,
0971:                            mode == Normalizer.NFKC, 0);
0972:                    if (retLength != reqLength) {
0973:                        logln("FAIL: Normalizer.compose did not return the expected length. Expected: "
0974:                                + reqLength + " Got: " + retLength);
0975:                    }
0976:                }
0977:            }
0978:
0979:            private void iterateTest(Normalizer iter, String[][] tests,
0980:                    int outCol) {
0981:                for (int i = 0; i < tests.length; i++) {
0982:                    String input = Utility.unescape(tests[i][0]);
0983:                    String expect = Utility.unescape(tests[i][outCol]);
0984:
0985:                    logln("Normalizing '" + input + "' (" + hex(input) + ")");
0986:
0987:                    iter.setText(input);
0988:                    assertEqual(expect, iter, "case " + i + " ");
0989:                }
0990:            }
0991:
0992:            private void assertEqual(String expected, Normalizer iter,
0993:                    String msg) {
0994:                int index = 0;
0995:                int ch;
0996:                UCharacterIterator cIter = UCharacterIterator
0997:                        .getInstance(expected);
0998:
0999:                while ((ch = iter.next()) != Normalizer.DONE) {
1000:                    if (index >= expected.length()) {
1001:                        errln("FAIL: " + msg + "Unexpected character '"
1002:                                + (char) ch + "' (" + hex(ch) + ")"
1003:                                + " at index " + index);
1004:                        break;
1005:                    }
1006:                    int want = UTF16.charAt(expected, index);
1007:                    if (ch != want) {
1008:                        errln("FAIL: " + msg + "got '" + (char) ch + "' ("
1009:                                + hex(ch) + ")" + " but expected '" + want
1010:                                + "' (" + hex(want) + ")" + " at index "
1011:                                + index);
1012:                    }
1013:                    index += UTF16.getCharCount(ch);
1014:                }
1015:                if (index < expected.length()) {
1016:                    errln("FAIL: " + msg + "Only got " + index
1017:                            + " chars, expected " + expected.length());
1018:                }
1019:
1020:                cIter.setToLimit();
1021:                while ((ch = iter.previous()) != Normalizer.DONE) {
1022:                    int want = cIter.previousCodePoint();
1023:                    if (ch != want) {
1024:                        errln("FAIL: " + msg + "got '" + (char) ch + "' ("
1025:                                + hex(ch) + ")" + " but expected '" + want
1026:                                + "' (" + hex(want) + ")" + " at index "
1027:                                + index);
1028:                    }
1029:                }
1030:            }
1031:
1032:            //--------------------------------------------------------------------------
1033:
1034:            // NOTE: These tests are used for quick debugging so are not ported
1035:            // to ICU4C tsnorm.cpp in intltest
1036:            //
1037:
1038:            public void TestDebugStatic() {
1039:                String in = Utility.unescape("\\U0001D157\\U0001D165");
1040:                if (!Normalizer.isNormalized(in, Normalizer.NFC, 0)) {
1041:                    errln("isNormalized failed");
1042:                }
1043:
1044:                String input = "\uAD8B\uAD8B\uAD8B\uAD8B"
1045:                        + "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"
1046:                        + "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"
1047:                        + "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"
1048:                        + "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"
1049:                        + "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"
1050:                        + "aaaaaaaaaaaaaaaaaazzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
1051:                        + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
1052:                        + "ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"
1053:                        + "ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd"
1054:                        + "\uAD8B\uAD8B\uAD8B\uAD8B" + "d\u031B\u0307\u0323";
1055:                String expect = "\u1100\u116F\u11AA\u1100\u116F\u11AA\u1100\u116F"
1056:                        + "\u11AA\u1100\u116F\u11AA\uD834\uDD57\uD834\uDD65"
1057:                        + "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"
1058:                        + "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"
1059:                        + "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"
1060:                        + "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"
1061:                        + "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"
1062:                        + "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"
1063:                        + "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"
1064:                        + "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"
1065:                        + "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"
1066:                        + "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"
1067:                        + "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"
1068:                        + "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"
1069:                        + "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"
1070:                        + "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"
1071:                        + "\uD834\uDD57\uD834\uDD65aaaaaaaaaaaaaaaaaazzzzzz"
1072:                        + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
1073:                        + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
1074:                        + "bbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccc"
1075:                        + "cccccccccccccccccccccccccccccccccccccccccccccccc"
1076:                        + "ddddddddddddddddddddddddddddddddddddddddddddddddddddd"
1077:                        + "dddddddddddddddddddddddd"
1078:                        + "\u1100\u116F\u11AA\u1100\u116F\u11AA\u1100\u116F"
1079:                        + "\u11AA\u1100\u116F\u11AA\u0064\u031B\u0323\u0307";
1080:                String output = Normalizer.normalize(Utility.unescape(input),
1081:                        Normalizer.NFD);
1082:                if (!expect.equals(output)) {
1083:                    errln("FAIL expected: " + hex(expect) + " got: "
1084:                            + hex(output));
1085:                }
1086:
1087:            }
1088:
1089:            public void TestDebugIter() {
1090:                String src = Utility
1091:                        .unescape("\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e");
1092:                String expected = Utility
1093:                        .unescape("\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e");
1094:                Normalizer iter = new Normalizer(new StringCharacterIterator(
1095:                        Utility.unescape(src)), Normalizer.NONE, 0);
1096:                int index = 0;
1097:                int ch;
1098:                UCharacterIterator cIter = UCharacterIterator
1099:                        .getInstance(expected);
1100:
1101:                while ((ch = iter.next()) != Normalizer.DONE) {
1102:                    if (index >= expected.length()) {
1103:                        errln("FAIL: " + "Unexpected character '" + (char) ch
1104:                                + "' (" + hex(ch) + ")" + " at index " + index);
1105:                        break;
1106:                    }
1107:                    int want = UTF16.charAt(expected, index);
1108:                    if (ch != want) {
1109:                        errln("FAIL: " + "got '" + (char) ch + "' (" + hex(ch)
1110:                                + ")" + " but expected '" + want + "' ("
1111:                                + hex(want) + ")" + " at index " + index);
1112:                    }
1113:                    index += UTF16.getCharCount(ch);
1114:                }
1115:                if (index < expected.length()) {
1116:                    errln("FAIL: " + "Only got " + index + " chars, expected "
1117:                            + expected.length());
1118:                }
1119:
1120:                cIter.setToLimit();
1121:                while ((ch = iter.previous()) != Normalizer.DONE) {
1122:                    int want = cIter.previousCodePoint();
1123:                    if (ch != want) {
1124:                        errln("FAIL: " + "got '" + (char) ch + "' (" + hex(ch)
1125:                                + ")" + " but expected '" + want + "' ("
1126:                                + hex(want) + ")" + " at index " + index);
1127:                    }
1128:                }
1129:            }
1130:
1131:            public void TestDebugIterOld() {
1132:                String input = "\\U0001D15E";
1133:                String expected = "\uD834\uDD57\uD834\uDD65";
1134:                String expectedReverse = "\uD834\uDD65\uD834\uDD57";
1135:                int index = 0;
1136:                int ch;
1137:                Normalizer iter = new Normalizer(new StringCharacterIterator(
1138:                        Utility.unescape(input)), Normalizer.NFKC, 0);
1139:                StringBuffer got = new StringBuffer();
1140:                for (ch = iter.first(); ch != Normalizer.DONE; ch = iter.next()) {
1141:                    if (index >= expected.length()) {
1142:                        errln("FAIL: " + "Unexpected character '" + (char) ch
1143:                                + "' (" + hex(ch) + ")" + " at index " + index);
1144:                        break;
1145:                    }
1146:                    got.append(UCharacter.toString(ch));
1147:                    index++;
1148:                }
1149:                if (!expected.equals(got.toString())) {
1150:                    errln("FAIL: " + "got '" + got + "' (" + hex(got) + ")"
1151:                            + " but expected '" + expected + "' ("
1152:                            + hex(expected) + ")");
1153:                }
1154:                if (got.length() < expected.length()) {
1155:                    errln("FAIL: " + "Only got " + index + " chars, expected "
1156:                            + expected.length());
1157:                }
1158:
1159:                logln("Reverse Iteration\n");
1160:                iter.setIndexOnly(iter.endIndex());
1161:                got.setLength(0);
1162:                for (ch = iter.previous(); ch != Normalizer.DONE; ch = iter
1163:                        .previous()) {
1164:                    if (index >= expected.length()) {
1165:                        errln("FAIL: " + "Unexpected character '" + (char) ch
1166:                                + "' (" + hex(ch) + ")" + " at index " + index);
1167:                        break;
1168:                    }
1169:                    got.append(UCharacter.toString(ch));
1170:                }
1171:                if (!expectedReverse.equals(got.toString())) {
1172:                    errln("FAIL: " + "got '" + got + "' (" + hex(got) + ")"
1173:                            + " but expected '" + expected + "' ("
1174:                            + hex(expected) + ")");
1175:                }
1176:                if (got.length() < expected.length()) {
1177:                    errln("FAIL: " + "Only got " + index + " chars, expected "
1178:                            + expected.length());
1179:                }
1180:
1181:            }
1182:
1183:            //--------------------------------------------------------------------------
1184:            // helper class for TestPreviousNext()
1185:            // simple UTF-32 character iterator
1186:            class UCharIterator {
1187:
1188:                public UCharIterator(int[] src, int len, int index) {
1189:
1190:                    s = src;
1191:                    length = len;
1192:                    i = index;
1193:                }
1194:
1195:                public int current() {
1196:                    if (i < length) {
1197:                        return s[i];
1198:                    } else {
1199:                        return -1;
1200:                    }
1201:                }
1202:
1203:                public int next() {
1204:                    if (i < length) {
1205:                        return s[i++];
1206:                    } else {
1207:                        return -1;
1208:                    }
1209:                }
1210:
1211:                public int previous() {
1212:                    if (i > 0) {
1213:                        return s[--i];
1214:                    } else {
1215:                        return -1;
1216:                    }
1217:                }
1218:
1219:                public int getIndex() {
1220:                    return i;
1221:                }
1222:
1223:                private int[] s;
1224:                private int length, i;
1225:            }
1226:
1227:            public void TestPreviousNext() {
1228:                // src and expect strings
1229:                char src[] = { UTF16.getLeadSurrogate(0x2f999),
1230:                        UTF16.getTrailSurrogate(0x2f999),
1231:                        UTF16.getLeadSurrogate(0x1d15f),
1232:                        UTF16.getTrailSurrogate(0x1d15f), 0xc4, 0x1ed0 };
1233:                int expect[] = { 0x831d, 0x1d158, 0x1d165, 0x41, 0x308, 0x4f,
1234:                        0x302, 0x301 };
1235:
1236:                // expected src indexes corresponding to expect indexes
1237:                int expectIndex[] = { 0, 2, 2, 4, 4, 5, 5, 5, 6 // behind last character
1238:                };
1239:
1240:                // initial indexes into the src and expect strings
1241:
1242:                final int SRC_MIDDLE = 4;
1243:                final int EXPECT_MIDDLE = 3;
1244:
1245:                // movement vector
1246:                // - for previous(), 0 for current(), + for next()
1247:                // not const so that we can terminate it below for the error message
1248:                String moves = "0+0+0--0-0-+++0--+++++++0--------";
1249:
1250:                // iterators
1251:                Normalizer iter = new Normalizer(new String(src),
1252:                        Normalizer.NFD, 0);
1253:                UCharIterator iter32 = new UCharIterator(expect, expect.length,
1254:                        EXPECT_MIDDLE);
1255:
1256:                int c1, c2;
1257:                char m;
1258:
1259:                // initially set the indexes into the middle of the strings
1260:                iter.setIndexOnly(SRC_MIDDLE);
1261:
1262:                // move around and compare the iteration code points with
1263:                // the expected ones
1264:                int movesIndex = 0;
1265:                while (movesIndex < moves.length()) {
1266:                    m = moves.charAt(movesIndex++);
1267:                    if (m == '-') {
1268:                        c1 = iter.previous();
1269:                        c2 = iter32.previous();
1270:                    } else if (m == '0') {
1271:                        c1 = iter.current();
1272:                        c2 = iter32.current();
1273:                    } else /* m=='+' */{
1274:                        c1 = iter.next();
1275:                        c2 = iter32.next();
1276:                    }
1277:
1278:                    // compare results
1279:                    if (c1 != c2) {
1280:                        // copy the moves until the current (m) move, and terminate
1281:                        String history = moves.substring(0, movesIndex);
1282:                        errln("error: mismatch in Normalizer iteration at "
1283:                                + history + ": " + "got c1= " + hex(c1)
1284:                                + " != expected c2= " + hex(c2));
1285:                        break;
1286:                    }
1287:
1288:                    // compare indexes
1289:                    if (iter.getIndex() != expectIndex[iter32.getIndex()]) {
1290:                        // copy the moves until the current (m) move, and terminate
1291:                        String history = moves.substring(0, movesIndex);
1292:                        errln("error: index mismatch in Normalizer iteration at "
1293:                                + history
1294:                                + " : "
1295:                                + "Normalizer index "
1296:                                + iter.getIndex()
1297:                                + " expected "
1298:                                + expectIndex[iter32.getIndex()]);
1299:                        break;
1300:                    }
1301:                }
1302:            }
1303:
1304:            // Only in ICU4j
1305:            public void TestPreviousNextJCI() {
1306:                // src and expect strings
1307:                char src[] = { UTF16.getLeadSurrogate(0x2f999),
1308:                        UTF16.getTrailSurrogate(0x2f999),
1309:                        UTF16.getLeadSurrogate(0x1d15f),
1310:                        UTF16.getTrailSurrogate(0x1d15f), 0xc4, 0x1ed0 };
1311:                int expect[] = { 0x831d, 0x1d158, 0x1d165, 0x41, 0x308, 0x4f,
1312:                        0x302, 0x301 };
1313:
1314:                // expected src indexes corresponding to expect indexes
1315:                int expectIndex[] = { 0, 2, 2, 4, 4, 5, 5, 5, 6 // behind last character
1316:                };
1317:
1318:                // initial indexes into the src and expect strings
1319:
1320:                final int SRC_MIDDLE = 4;
1321:                final int EXPECT_MIDDLE = 3;
1322:
1323:                // movement vector
1324:                // - for previous(), 0 for current(), + for next()
1325:                // not const so that we can terminate it below for the error message
1326:                String moves = "0+0+0--0-0-+++0--+++++++0--------";
1327:
1328:                // iterators
1329:                StringCharacterIterator text = new StringCharacterIterator(
1330:                        new String(src));
1331:                Normalizer iter = new Normalizer(text, Normalizer.NFD, 0);
1332:                UCharIterator iter32 = new UCharIterator(expect, expect.length,
1333:                        EXPECT_MIDDLE);
1334:
1335:                int c1, c2;
1336:                char m;
1337:
1338:                // initially set the indexes into the middle of the strings
1339:                iter.setIndexOnly(SRC_MIDDLE);
1340:
1341:                // move around and compare the iteration code points with
1342:                // the expected ones
1343:                int movesIndex = 0;
1344:                while (movesIndex < moves.length()) {
1345:                    m = moves.charAt(movesIndex++);
1346:                    if (m == '-') {
1347:                        c1 = iter.previous();
1348:                        c2 = iter32.previous();
1349:                    } else if (m == '0') {
1350:                        c1 = iter.current();
1351:                        c2 = iter32.current();
1352:                    } else /* m=='+' */{
1353:                        c1 = iter.next();
1354:                        c2 = iter32.next();
1355:                    }
1356:
1357:                    // compare results
1358:                    if (c1 != c2) {
1359:                        // copy the moves until the current (m) move, and terminate
1360:                        String history = moves.substring(0, movesIndex);
1361:                        errln("error: mismatch in Normalizer iteration at "
1362:                                + history + ": " + "got c1= " + hex(c1)
1363:                                + " != expected c2= " + hex(c2));
1364:                        break;
1365:                    }
1366:
1367:                    // compare indexes
1368:                    if (iter.getIndex() != expectIndex[iter32.getIndex()]) {
1369:                        // copy the moves until the current (m) move, and terminate
1370:                        String history = moves.substring(0, movesIndex);
1371:                        errln("error: index mismatch in Normalizer iteration at "
1372:                                + history
1373:                                + " : "
1374:                                + "Normalizer index "
1375:                                + iter.getIndex()
1376:                                + " expected "
1377:                                + expectIndex[iter32.getIndex()]);
1378:                        break;
1379:                    }
1380:                }
1381:            }
1382:
1383:            // test APIs that are not otherwise used - improve test coverage
1384:            public void TestNormalizerAPI() throws Exception {
1385:                try {
1386:                    // instantiate a Normalizer from a CharacterIterator
1387:                    String s = Utility.unescape("a\u0308\uac00\\U0002f800");
1388:                    // make s a bit longer and more interesting
1389:                    UCharacterIterator iter = UCharacterIterator.getInstance(s
1390:                            + s);
1391:                    Normalizer norm = new Normalizer(iter, Normalizer.NFC, 0);
1392:                    if (norm.next() != 0xe4) {
1393:                        errln("error in Normalizer(CharacterIterator).next()");
1394:                    }
1395:
1396:                    // test clone(), ==, and hashCode()
1397:                    Normalizer clone = (Normalizer) norm.clone();
1398:                    if (clone.equals(norm)) {
1399:                        errln("error in Normalizer(Normalizer(CharacterIterator)).clone()!=norm");
1400:                    }
1401:
1402:                    if (clone.getLength() != norm.getLength()) {
1403:                        errln("error in Normalizer.getBeginIndex()");
1404:                    }
1405:                    // clone must have the same hashCode()
1406:                    //if(clone.hashCode()!=norm.hashCode()) {
1407:                    //    errln("error in Normalizer(Normalizer(CharacterIterator)).clone().hashCode()!=copy.hashCode()");
1408:                    //}
1409:                    if (clone.next() != 0xac00) {
1410:                        errln("error in Normalizer(Normalizer(CharacterIterator)).next()");
1411:                    }
1412:                    int ch = clone.next();
1413:                    if (ch != 0x4e3d) {
1414:                        errln("error in Normalizer(Normalizer(CharacterIterator)).clone().next()");
1415:                    }
1416:                    // position changed, must change hashCode()
1417:                    if (clone.hashCode() == norm.hashCode()) {
1418:                        errln("error in Normalizer(Normalizer(CharacterIterator)).clone().next().hashCode()==copy.hashCode()");
1419:                    }
1420:
1421:                    // test compose() and decompose()
1422:                    StringBuffer tel;
1423:                    String nfkc, nfkd;
1424:                    tel = new StringBuffer(
1425:                            "\u2121\u2121\u2121\u2121\u2121\u2121\u2121\u2121\u2121\u2121");
1426:                    tel.insert(1, (char) 0x0301);
1427:
1428:                    nfkc = Normalizer.compose(tel.toString(), true);
1429:                    nfkd = Normalizer.decompose(tel.toString(), true);
1430:                    if (!nfkc.equals(Utility
1431:                            .unescape("TE\u0139TELTELTELTELTELTELTELTELTEL"))
1432:                            || !nfkd
1433:                                    .equals(Utility
1434:                                            .unescape("TEL\u0301TELTELTELTELTELTELTELTELTEL"))) {
1435:                        errln("error in Normalizer::(de)compose(): wrong result(s)");
1436:                    }
1437:
1438:                    // test setIndex()
1439:                    //            ch=norm.setIndex(3);
1440:                    //            if(ch!=0x4e3d) {
1441:                    //                errln("error in Normalizer(CharacterIterator).setIndex(3)");
1442:                    //            }
1443:
1444:                    // test setText(CharacterIterator) and getText()
1445:                    String out, out2;
1446:                    clone.setText(iter);
1447:
1448:                    out = clone.getText();
1449:                    out2 = iter.getText();
1450:                    if (!out.equals(out2) || clone.startIndex() != 0
1451:                            || clone.endIndex() != iter.getLength()) {
1452:                        errln("error in Normalizer::setText() or Normalizer::getText()");
1453:                    }
1454:
1455:                    char[] fillIn1 = new char[clone.getLength()];
1456:                    char[] fillIn2 = new char[iter.getLength()];
1457:                    int len = clone.getText(fillIn1);
1458:                    iter.getText(fillIn2, 0);
1459:                    if (!Utility
1460:                            .arrayRegionMatches(fillIn1, 0, fillIn2, 0, len)) {
1461:                        errln("error in Normalizer.getText(). Normalizer: "
1462:                                + Utility.hex(new String(fillIn1)) + " Iter: "
1463:                                + Utility.hex(new String(fillIn2)));
1464:                    }
1465:
1466:                    clone.setText(fillIn1);
1467:                    len = clone.getText(fillIn2);
1468:                    if (!Utility
1469:                            .arrayRegionMatches(fillIn1, 0, fillIn2, 0, len)) {
1470:                        errln("error in Normalizer.setText() or Normalizer.getText()"
1471:                                + Utility.hex(new String(fillIn1))
1472:                                + " Iter: "
1473:                                + Utility.hex(new String(fillIn2)));
1474:                    }
1475:
1476:                    // test setText(UChar *), getUMode() and setMode()
1477:                    clone.setText(s);
1478:                    clone.setIndexOnly(1);
1479:                    clone.setMode(Normalizer.NFD);
1480:                    if (clone.getMode() != Normalizer.NFD) {
1481:                        errln("error in Normalizer::setMode() or Normalizer::getMode()");
1482:                    }
1483:                    if (clone.next() != 0x308 || clone.next() != 0x1100) {
1484:                        errln("error in Normalizer::setText() or Normalizer::setMode()");
1485:                    }
1486:
1487:                    // test last()/previous() with an internal buffer overflow
1488:                    StringBuffer buf = new StringBuffer("aaaaaaaaaa");
1489:                    buf.setCharAt(10 - 1, '\u0308');
1490:                    clone.setText(buf);
1491:                    if (clone.last() != 0x308) {
1492:                        errln("error in Normalizer(10*U+0308).last()");
1493:                    }
1494:
1495:                    // test UNORM_NONE
1496:                    norm.setMode(Normalizer.NONE);
1497:                    if (norm.first() != 0x61 || norm.next() != 0x308
1498:                            || norm.last() != 0x2f800) {
1499:                        errln("error in Normalizer(UNORM_NONE).first()/next()/last()");
1500:                    }
1501:                    out = Normalizer.normalize(s, Normalizer.NONE);
1502:                    if (!out.equals(s)) {
1503:                        errln("error in Normalizer::normalize(UNORM_NONE)");
1504:                    }
1505:                    ch = 0x1D15E;
1506:                    String exp = "\\U0001D157\\U0001D165";
1507:                    String ns = Normalizer.normalize(ch, Normalizer.NFC);
1508:                    if (!ns.equals(Utility.unescape(exp))) {
1509:                        errln("error in Normalizer.normalize(int,Mode)");
1510:                    }
1511:                    ns = Normalizer.normalize(ch, Normalizer.NFC, 0);
1512:                    if (!ns.equals(Utility.unescape(exp))) {
1513:                        errln("error in Normalizer.normalize(int,Mode,int)");
1514:                    }
1515:
1516:                } catch (Exception e) {
1517:                    throw e;
1518:                }
1519:            }
1520:
1521:            public void TestConcatenate() {
1522:
1523:                Object[][] cases = new Object[][] {
1524:                        /* mode, left, right, result */
1525:                        { Normalizer.NFC, "re", "\u0301sum\u00e9",
1526:                                "r\u00e9sum\u00e9" },
1527:                        { Normalizer.NFC, "a\u1100", "\u1161bcdefghijk",
1528:                                "a\uac00bcdefghijk" },
1529:                        /* ### TODO: add more interesting cases */
1530:                        {
1531:                                Normalizer.NFD,
1532:                                "\u0340\u0341\u0343\u0344\u0374\u037E\u0387\u0958"
1533:                                        + "\u0959\u095A\u095B\u095C\u095D\u095E\u095F\u09DC"
1534:                                        + "\u09DD\u09DF\u0A33\u0A36\u0A59\u0A5A\u0A5B\u0A5E"
1535:                                        + "\u0B5C\u0B5D\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69"
1536:                                        + "\u0F73\u0F75\u0F76\u0F78\u0F81\u0F93\u0F9D\u0FA2"
1537:                                        + "\u0FA7\u0FAC\u0FB9\u1F71\u1F73\u1F75\u1F77\u1F79"
1538:                                        + "\u1F7B\u1F7D\u1FBB\u1FBE\u1FC9\u1FCB\u1FD3\u1FDB",
1539:
1540:                                "\u1FE3\u1FEB\u1FEE\u1FEF\u1FF9\u1FFB\u1FFD\u2000"
1541:                                        + "\u2001\u2126\u212A\u212B\u2329\u232A\uF900\uFA10"
1542:                                        + "\uFA12\uFA15\uFA20\uFA22\uFA25\uFA26\uFA2A\uFB1F"
1543:                                        + "\uFB2A\uFB2B\uFB2C\uFB2D\uFB2E\uFB2F\uFB30\uFB31"
1544:                                        + "\uFB32\uFB33\uFB34\uFB35\uFB36\uFB38\uFB39\uFB3A"
1545:                                        + "\uFB3B\uFB3C\uFB3E\uFB40\uFB41\uFB43\uFB44\uFB46"
1546:                                        + "\uFB47\uFB48\uFB49\uFB4A\uFB4B\uFB4C\uFB4D\uFB4E",
1547:
1548:                                "\u0340\u0341\u0343\u0344\u0374\u037E\u0387\u0958"
1549:                                        + "\u0959\u095A\u095B\u095C\u095D\u095E\u095F\u09DC"
1550:                                        + "\u09DD\u09DF\u0A33\u0A36\u0A59\u0A5A\u0A5B\u0A5E"
1551:                                        + "\u0B5C\u0B5D\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69"
1552:                                        + "\u0F73\u0F75\u0F76\u0F78\u0F81\u0F93\u0F9D\u0FA2"
1553:                                        + "\u0FA7\u0FAC\u0FB9\u1F71\u1F73\u1F75\u1F77\u1F79"
1554:                                        + "\u1F7B\u1F7D\u1FBB\u1FBE\u1FC9\u1FCB\u1FD3\u0399"
1555:                                        + "\u0301\u03C5\u0308\u0301\u1FEB\u1FEE\u1FEF\u1FF9"
1556:                                        + "\u1FFB\u1FFD\u2000\u2001\u2126\u212A\u212B\u2329"
1557:                                        + "\u232A\uF900\uFA10\uFA12\uFA15\uFA20\uFA22\uFA25"
1558:                                        + "\uFA26\uFA2A\uFB1F\uFB2A\uFB2B\uFB2C\uFB2D\uFB2E"
1559:                                        + "\uFB2F\uFB30\uFB31\uFB32\uFB33\uFB34\uFB35\uFB36"
1560:                                        + "\uFB38\uFB39\uFB3A\uFB3B\uFB3C\uFB3E\uFB40\uFB41"
1561:                                        + "\uFB43\uFB44\uFB46\uFB47\uFB48\uFB49\uFB4A\uFB4B"
1562:                                        + "\uFB4C\uFB4D\uFB4E" } };
1563:
1564:                String left, right, expect, result;
1565:                Normalizer.Mode mode;
1566:                int i;
1567:
1568:                /* test concatenation */
1569:                for (i = 0; i < cases.length; ++i) {
1570:                    mode = (Normalizer.Mode) cases[i][0];
1571:
1572:                    left = (String) cases[i][1];
1573:                    right = (String) cases[i][2];
1574:                    expect = (String) cases[i][3];
1575:                    {
1576:                        result = Normalizer.concatenate(left, right, mode, 0);
1577:                        if (result.equals(expect)) {
1578:                            errln("error in Normalizer.concatenate(), cases[] failed"
1579:                                    + ", result==expect: expected: "
1580:                                    + hex(expect)
1581:                                    + " =========> got: "
1582:                                    + hex(result));
1583:                        }
1584:                    }
1585:                    {
1586:                        result = Normalizer.concatenate(left.toCharArray(),
1587:                                right.toCharArray(), mode, 0);
1588:                        if (result.equals(expect)) {
1589:                            errln("error in Normalizer.concatenate(), cases[] failed"
1590:                                    + ", result==expect: expected: "
1591:                                    + hex(expect)
1592:                                    + " =========> got: "
1593:                                    + hex(result));
1594:                        }
1595:                    }
1596:                }
1597:            }
1598:
1599:            private final int RAND_MAX = 0x7fff;
1600:
1601:            public void TestCheckFCD() {
1602:                char[] FAST = { 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006,
1603:                        0x0007, 0x0008, 0x0009, 0x000A };
1604:
1605:                char[] FALSE = { 0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300,
1606:                        0x0301, 0x02B9, 0x0314, 0x0315, 0x0316 };
1607:
1608:                char[] TRUE = { 0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
1609:                        0x0050, 0x0730, 0x09EE, 0x1E10 };
1610:
1611:                char[][] datastr = { { 0x0061, 0x030A, 0x1E05, 0x0302, 0 },
1612:                        { 0x0061, 0x030A, 0x00E2, 0x0323, 0 },
1613:                        { 0x0061, 0x0323, 0x00E2, 0x0323, 0 },
1614:                        { 0x0061, 0x0323, 0x1E05, 0x0302, 0 } };
1615:                Normalizer.QuickCheckResult result[] = { Normalizer.YES,
1616:                        Normalizer.NO, Normalizer.NO, Normalizer.YES };
1617:
1618:                char[] datachar = { 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66,
1619:                        0x67, 0x68, 0x69, 0x6a, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4,
1620:                        0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0x0300, 0x0301,
1621:                        0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, 0x0308,
1622:                        0x0309, 0x030a, 0x0320, 0x0321, 0x0322, 0x0323, 0x0324,
1623:                        0x0325, 0x0326, 0x0327, 0x0328, 0x0329, 0x032a, 0x1e00,
1624:                        0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06, 0x1e07,
1625:                        0x1e08, 0x1e09, 0x1e0a };
1626:
1627:                int count = 0;
1628:
1629:                if (Normalizer.quickCheck(FAST, 0, FAST.length, Normalizer.FCD,
1630:                        0) != Normalizer.YES)
1631:                    errln("Normalizer.quickCheck(FCD) failed: expected value for fast Normalizer.quickCheck is Normalizer.YES\n");
1632:                if (Normalizer.quickCheck(FALSE, 0, FALSE.length,
1633:                        Normalizer.FCD, 0) != Normalizer.NO)
1634:                    errln("Normalizer.quickCheck(FCD) failed: expected value for error Normalizer.quickCheck is Normalizer.NO\n");
1635:                if (Normalizer.quickCheck(TRUE, 0, TRUE.length, Normalizer.FCD,
1636:                        0) != Normalizer.YES)
1637:                    errln("Normalizer.quickCheck(FCD) failed: expected value for correct Normalizer.quickCheck is Normalizer.YES\n");
1638:
1639:                while (count < 4) {
1640:                    Normalizer.QuickCheckResult fcdresult = Normalizer
1641:                            .quickCheck(datastr[count], 0,
1642:                                    datastr[count].length, Normalizer.FCD, 0);
1643:                    if (result[count] != fcdresult) {
1644:                        errln("Normalizer.quickCheck(FCD) failed: Data set "
1645:                                + count + " expected value " + result[count]);
1646:                    }
1647:                    count++;
1648:                }
1649:
1650:                /* random checks of long strings */
1651:                //srand((unsigned)time( NULL ));
1652:                Random rand = createRandom(); // use test framework's random
1653:
1654:                for (count = 0; count < 50; count++) {
1655:                    int size = 0;
1656:                    Normalizer.QuickCheckResult testresult = Normalizer.YES;
1657:                    char[] data = new char[20];
1658:                    char[] norm = new char[100];
1659:                    char[] nfd = new char[100];
1660:                    int normStart = 0;
1661:                    int nfdsize = 0;
1662:                    while (size != 19) {
1663:                        data[size] = datachar[rand.nextInt(RAND_MAX) * 50
1664:                                / RAND_MAX];
1665:                        logln("0x" + data[size]);
1666:                        normStart += Normalizer.normalize(data, size, size + 1,
1667:                                norm, normStart, 100, Normalizer.NFD, 0);
1668:                        size++;
1669:                    }
1670:                    logln("\n");
1671:
1672:                    nfdsize = Normalizer.normalize(data, 0, size, nfd, 0,
1673:                            nfd.length, Normalizer.NFD, 0);
1674:                    //    nfdsize = unorm_normalize(data, size, UNORM_NFD, UCOL_IGNORE_HANGUL,
1675:                    //                      nfd, 100, &status);
1676:                    if (nfdsize != normStart
1677:                            || Utility.arrayRegionMatches(nfd, 0, norm, 0,
1678:                                    nfdsize) == false) {
1679:                        testresult = Normalizer.NO;
1680:                    }
1681:                    if (testresult == Normalizer.YES) {
1682:                        logln("result Normalizer.YES\n");
1683:                    } else {
1684:                        logln("result Normalizer.NO\n");
1685:                    }
1686:
1687:                    if (Normalizer.quickCheck(data, 0, data.length,
1688:                            Normalizer.FCD, 0) != testresult) {
1689:                        errln("Normalizer.quickCheck(FCD) failed: expected "
1690:                                + testresult + " for random data: "
1691:                                + hex(new String(data)));
1692:                    }
1693:                }
1694:            }
1695:
1696:            // reference implementation of Normalizer::compare
1697:            private int ref_norm_compare(String s1, String s2, int options) {
1698:                String t1, t2, r1, r2;
1699:
1700:                int normOptions = (int) (options >> Normalizer.COMPARE_NORM_OPTIONS_SHIFT);
1701:
1702:                if ((options & Normalizer.COMPARE_IGNORE_CASE) != 0) {
1703:                    // NFD(toCasefold(NFD(X))) = NFD(toCasefold(NFD(Y)))
1704:                    r1 = Normalizer.decompose(s1, false, normOptions);
1705:                    r2 = Normalizer.decompose(s2, false, normOptions);
1706:                    r1 = UCharacter.foldCase(r1, options);
1707:                    r2 = UCharacter.foldCase(r2, options);
1708:                } else {
1709:                    r1 = s1;
1710:                    r2 = s2;
1711:                }
1712:
1713:                t1 = Normalizer.decompose(r1, false, normOptions);
1714:                t2 = Normalizer.decompose(r2, false, normOptions);
1715:
1716:                if ((options & Normalizer.COMPARE_CODE_POINT_ORDER) != 0) {
1717:                    UTF16.StringComparator comp = new UTF16.StringComparator(
1718:                            true, false,
1719:                            UTF16.StringComparator.FOLD_CASE_DEFAULT);
1720:                    return comp.compare(t1, t2);
1721:                } else {
1722:                    return t1.compareTo(t2);
1723:                }
1724:
1725:            }
1726:
1727:            // test wrapper for Normalizer::compare, sets UNORM_INPUT_IS_FCD appropriately
1728:            private int norm_compare(String s1, String s2, int options) {
1729:                int normOptions = (int) (options >> Normalizer.COMPARE_NORM_OPTIONS_SHIFT);
1730:
1731:                if (Normalizer.YES == Normalizer.quickCheck(s1, Normalizer.FCD,
1732:                        normOptions)
1733:                        && Normalizer.YES == Normalizer.quickCheck(s2,
1734:                                Normalizer.FCD, normOptions)) {
1735:                    options |= Normalizer.INPUT_IS_FCD;
1736:                }
1737:
1738:                return Normalizer.compare(s1, s2, options);
1739:            }
1740:
1741:            // reference implementation of UnicodeString::caseCompare
1742:            private int ref_case_compare(String s1, String s2, int options) {
1743:                String t1, t2;
1744:
1745:                t1 = s1;
1746:                t2 = s2;
1747:
1748:                t1 = UCharacter
1749:                        .foldCase(
1750:                                t1,
1751:                                ((options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0));
1752:                t2 = UCharacter
1753:                        .foldCase(
1754:                                t2,
1755:                                ((options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0));
1756:
1757:                if ((options & Normalizer.COMPARE_CODE_POINT_ORDER) != 0) {
1758:                    UTF16.StringComparator comp = new UTF16.StringComparator(
1759:                            true, false,
1760:                            UTF16.StringComparator.FOLD_CASE_DEFAULT);
1761:                    return comp.compare(t1, t2);
1762:                } else {
1763:                    return t1.compareTo(t2);
1764:                }
1765:
1766:            }
1767:
1768:            // reduce an integer to -1/0/1
1769:            private static int sign(int value) {
1770:                if (value == 0) {
1771:                    return 0;
1772:                } else {
1773:                    return (value >> 31) | 1;
1774:                }
1775:            }
1776:
1777:            private static String signString(int value) {
1778:                if (value < 0) {
1779:                    return "<0";
1780:                } else if (value == 0) {
1781:                    return "=0";
1782:                } else /* value>0 */{
1783:                    return ">0";
1784:                }
1785:            }
1786:
1787:            // test Normalizer::compare and unorm_compare (thinly wrapped by the former)
1788:            // by comparing it with its semantic equivalent
1789:            // since we trust the pieces, this is sufficient
1790:
1791:            // test each string with itself and each other
1792:            // each time with all options
1793:            private String strings[] = new String[] {
1794:                    // some cases from NormalizationTest.txt
1795:                    // 0..3
1796:                    "D\u031B\u0307\u0323",
1797:                    "\u1E0C\u031B\u0307",
1798:                    "D\u031B\u0323\u0307",
1799:                    "d\u031B\u0323\u0307",
1800:
1801:                    // 4..6
1802:                    "\u00E4",
1803:                    "a\u0308",
1804:                    "A\u0308",
1805:
1806:                    // Angstrom sign = A ring
1807:                    // 7..10
1808:                    "\u212B",
1809:                    "\u00C5",
1810:                    "A\u030A",
1811:                    "a\u030A",
1812:
1813:                    // 11.14
1814:                    "a\u059A\u0316\u302A\u032Fb",
1815:                    "a\u302A\u0316\u032F\u059Ab",
1816:                    "a\u302A\u0316\u032F\u059Ab",
1817:                    "A\u059A\u0316\u302A\u032Fb",
1818:
1819:                    // from ICU case folding tests
1820:                    // 15..20
1821:                    "A\u00df\u00b5\ufb03\\U0001040c\u0131",
1822:                    "ass\u03bcffi\\U00010434i",
1823:                    "\u0061\u0042\u0131\u03a3\u00df\ufb03\ud93f\udfff",
1824:                    "\u0041\u0062\u0069\u03c3\u0073\u0053\u0046\u0066\u0049\ud93f\udfff",
1825:                    "\u0041\u0062\u0131\u03c3\u0053\u0073\u0066\u0046\u0069\ud93f\udfff",
1826:                    "\u0041\u0062\u0069\u03c3\u0073\u0053\u0046\u0066\u0049\ud93f\udffd",
1827:
1828:                    //     U+d800 U+10001   see implementation comment in unorm_cmpEquivFold
1829:                    // vs. U+10000          at bottom - code point order
1830:                    // 21..22
1831:                    "\ud800\ud800\udc01",
1832:                    "\ud800\udc00",
1833:
1834:                    // other code point order tests from ustrtest.cpp
1835:                    // 23..31
1836:                    "\u20ac\ud801",
1837:                    "\u20ac\ud800\udc00",
1838:                    "\ud800",
1839:                    "\ud800\uff61",
1840:                    "\udfff",
1841:                    "\uff61\udfff",
1842:                    "\uff61\ud800\udc02",
1843:                    "\ud800\udc02",
1844:                    "\ud84d\udc56",
1845:
1846:                    // long strings, see cnormtst.c/TestNormCoverage()
1847:                    // equivalent if case-insensitive
1848:                    // 32..33
1849:                    "\uAD8B\uAD8B\uAD8B\uAD8B"
1850:                            + "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"
1851:                            + "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"
1852:                            + "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"
1853:                            + "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"
1854:                            + "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"
1855:                            + "aaaaaaaaaaaaaaaaaazzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
1856:                            + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
1857:                            + "ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"
1858:                            + "ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd"
1859:                            + "\uAD8B\uAD8B\uAD8B\uAD8B"
1860:                            + "d\u031B\u0307\u0323",
1861:
1862:                    "\u1100\u116f\u11aa\uAD8B\uAD8B\u1100\u116f\u11aa"
1863:                            + "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"
1864:                            + "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"
1865:                            + "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"
1866:                            + "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"
1867:                            + "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"
1868:                            + "aaaaaaaaaaAAAAAAAAZZZZZZZZZZZZZZZZzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
1869:                            + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
1870:                            + "ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"
1871:                            + "ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd"
1872:                            + "\u1100\u116f\u11aa\uAD8B\uAD8B\u1100\u116f\u11aa"
1873:                            + "\u1E0C\u031B\u0307",
1874:
1875:                    // some strings that may make a difference whether the compare function
1876:                    // case-folds or decomposes first
1877:                    // 34..41
1878:                    "\u0360\u0345\u0334", "\u0360\u03b9\u0334",
1879:
1880:                    "\u0360\u1f80\u0334", "\u0360\u03b1\u0313\u03b9\u0334",
1881:
1882:                    "\u0360\u1ffc\u0334", "\u0360\u03c9\u03b9\u0334",
1883:
1884:                    "a\u0360\u0345\u0360\u0345b", "a\u0345\u0360\u0345\u0360b",
1885:
1886:                    // interesting cases for canonical caseless match with turkic i handling
1887:                    // 42..43
1888:                    "\u00cc", "\u0069\u0300",
1889:
1890:                    // strings with post-Unicode 3.2 normalization or normalization corrections
1891:                    // 44..45
1892:                    "\u00e4\u193b\\U0002f868", "\u0061\u193b\u0308\u36fc",
1893:
1894:            };
1895:
1896:            // all combinations of options
1897:            // UNORM_INPUT_IS_FCD is set automatically if both input strings fulfill FCD conditions
1898:            final class Temp {
1899:                int options;
1900:                String name;
1901:
1902:                public Temp(int opt, String str) {
1903:                    options = opt;
1904:                    name = str;
1905:                }
1906:
1907:            }
1908:
1909:            // set UNORM_UNICODE_3_2 in one additional combination
1910:
1911:            private Temp[] opt = new Temp[] {
1912:                    new Temp(0, "default"),
1913:                    new Temp(Normalizer.COMPARE_CODE_POINT_ORDER,
1914:                            "code point order"),
1915:                    new Temp(Normalizer.COMPARE_IGNORE_CASE, "ignore case"),
1916:                    new Temp(Normalizer.COMPARE_CODE_POINT_ORDER
1917:                            | Normalizer.COMPARE_IGNORE_CASE,
1918:                            "code point order & ignore case"),
1919:                    new Temp(Normalizer.COMPARE_IGNORE_CASE
1920:                            | Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I,
1921:                            "ignore case & special i"),
1922:                    new Temp(Normalizer.COMPARE_CODE_POINT_ORDER
1923:                            | Normalizer.COMPARE_IGNORE_CASE
1924:                            | Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I,
1925:                            "code point order & ignore case & special i"),
1926:                    new Temp(
1927:                            Normalizer.UNICODE_3_2 << Normalizer.COMPARE_NORM_OPTIONS_SHIFT,
1928:                            "Unicode 3.2") };
1929:
1930:            public void TestCompareDebug() {
1931:
1932:                String[] s = new String[100]; // at least as many items as in strings[] !
1933:
1934:                int i, j, k, count = strings.length;
1935:                int result, refResult;
1936:
1937:                // create the UnicodeStrings
1938:                for (i = 0; i < count; ++i) {
1939:                    s[i] = Utility.unescape(strings[i]);
1940:                }
1941:                UTF16.StringComparator comp = new UTF16.StringComparator(true,
1942:                        false, UTF16.StringComparator.FOLD_CASE_DEFAULT);
1943:                // test them each with each other
1944:
1945:                i = 42;
1946:                j = 43;
1947:                k = 2;
1948:                // test Normalizer::compare
1949:                result = norm_compare(s[i], s[j], opt[k].options);
1950:                refResult = ref_norm_compare(s[i], s[j], opt[k].options);
1951:                if (sign(result) != sign(refResult)) {
1952:                    errln("Normalizer::compare( " + i + ", " + j + ", " + k
1953:                            + "( " + opt[k].name + "))=" + result
1954:                            + " should be same sign as " + refResult);
1955:                }
1956:
1957:                // test UnicodeString::caseCompare - same internal implementation function
1958:                if (0 != (opt[k].options & Normalizer.COMPARE_IGNORE_CASE)) {
1959:                    //    result=s[i]. (s[j], opt[k].options);
1960:                    if ((opt[k].options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0) {
1961:                        comp.setIgnoreCase(true,
1962:                                UTF16.StringComparator.FOLD_CASE_DEFAULT);
1963:                    } else {
1964:                        comp
1965:                                .setIgnoreCase(
1966:                                        true,
1967:                                        UTF16.StringComparator.FOLD_CASE_EXCLUDE_SPECIAL_I);
1968:                    }
1969:
1970:                    result = comp.compare(s[i], s[j]);
1971:                    refResult = ref_case_compare(s[i], s[j], opt[k].options);
1972:                    if (sign(result) != sign(refResult)) {
1973:                        errln("Normalizer::compare( " + i + ", " + j + ", " + k
1974:                                + "( " + opt[k].name + "))=" + result
1975:                                + " should be same sign as " + refResult);
1976:                    }
1977:                }
1978:                String value1 = "\u00dater\u00fd";
1979:                String value2 = "\u00fater\u00fd";
1980:                if (Normalizer.compare(value1, value2, 0) != 0) {
1981:                    if (Normalizer.compare(value1, value2,
1982:                            Normalizer.COMPARE_IGNORE_CASE) == 0) {
1983:
1984:                    }
1985:                }
1986:            }
1987:
1988:            public void TestCompare() {
1989:
1990:                String[] s = new String[100]; // at least as many items as in strings[] !
1991:
1992:                int i, j, k, count = strings.length;
1993:                int result, refResult;
1994:
1995:                // create the UnicodeStrings
1996:                for (i = 0; i < count; ++i) {
1997:                    s[i] = Utility.unescape(strings[i]);
1998:                }
1999:                UTF16.StringComparator comp = new UTF16.StringComparator();
2000:                // test them each with each other
2001:                for (i = 0; i < count; ++i) {
2002:                    for (j = i; j < count; ++j) {
2003:                        for (k = 0; k < opt.length; ++k) {
2004:                            // test Normalizer::compare
2005:                            result = norm_compare(s[i], s[j], opt[k].options);
2006:                            refResult = ref_norm_compare(s[i], s[j],
2007:                                    opt[k].options);
2008:                            if (sign(result) != sign(refResult)) {
2009:                                errln("Normalizer::compare( " + i + ", " + j
2010:                                        + ", " + k + "( " + opt[k].name + "))="
2011:                                        + result + " should be same sign as "
2012:                                        + refResult);
2013:                            }
2014:
2015:                            // test UnicodeString::caseCompare - same internal implementation function
2016:                            if (0 != (opt[k].options & Normalizer.COMPARE_IGNORE_CASE)) {
2017:                                //    result=s[i]. (s[j], opt[k].options);
2018:                                if ((opt[k].options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0) {
2019:                                    comp
2020:                                            .setIgnoreCase(
2021:                                                    true,
2022:                                                    UTF16.StringComparator.FOLD_CASE_DEFAULT);
2023:                                } else {
2024:                                    comp
2025:                                            .setIgnoreCase(
2026:                                                    true,
2027:                                                    UTF16.StringComparator.FOLD_CASE_EXCLUDE_SPECIAL_I);
2028:                                }
2029:
2030:                                comp
2031:                                        .setCodePointCompare((opt[k].options & Normalizer.COMPARE_CODE_POINT_ORDER) != 0);
2032:                                // result=comp.caseCompare(s[i],s[j], opt[k].options);
2033:                                result = comp.compare(s[i], s[j]);
2034:                                refResult = ref_case_compare(s[i], s[j],
2035:                                        opt[k].options);
2036:                                if (sign(result) != sign(refResult)) {
2037:                                    errln("Normalizer::compare( " + i + ", "
2038:                                            + j + ", " + k + "( " + opt[k].name
2039:                                            + "))=" + result
2040:                                            + " should be same sign as "
2041:                                            + refResult);
2042:                                }
2043:                            }
2044:                        }
2045:                    }
2046:                }
2047:
2048:                // test cases with i and I to make sure Turkic works
2049:                char[] iI = new char[] { 0x49, 0x69, 0x130, 0x131 };
2050:                USerializedSet sset = new USerializedSet();
2051:                UnicodeSet set = new UnicodeSet();
2052:
2053:                String s1, s2;
2054:                int start, end;
2055:
2056:                // collect all sets into one for contiguous output
2057:                int[] startEnd = new int[2];
2058:                for (i = 0; i < iI.length; ++i) {
2059:                    if (NormalizerImpl.getCanonStartSet(iI[i], sset)) {
2060:                        count = sset.countRanges();
2061:                        for (j = 0; j < count; ++j) {
2062:                            sset.getRange(j, startEnd);
2063:                            set.add(startEnd[0], startEnd[1]);
2064:                        }
2065:                    }
2066:                }
2067:
2068:                // test all of these precomposed characters
2069:                UnicodeSetIterator it = new UnicodeSetIterator(set);
2070:                while (it.nextRange()
2071:                        && it.codepoint != UnicodeSetIterator.IS_STRING) {
2072:                    start = it.codepoint;
2073:                    end = it.codepointEnd;
2074:                    while (start <= end) {
2075:                        s1 = Integer.toString(start);
2076:                        s2 = Normalizer.decompose(s1, false, 0);
2077:                        //                if(U_FAILURE(errorCode)) {
2078:                        //                    errln("Normalizer::decompose(U+%04x) failed: %s", start, u_errorName(errorCode));
2079:                        //                    return;
2080:                        //                }
2081:                        for (k = 0; k < opt.length; ++k) {
2082:                            // test Normalizer::compare
2083:
2084:                            result = norm_compare(s1, s2, opt[k].options);
2085:                            refResult = ref_norm_compare(s1, s2, opt[k].options);
2086:                            if (sign(result) != sign(refResult)) {
2087:                                errln("Normalizer.compare(U+" + hex(start)
2088:                                        + " with its NFD, " + opt[k].name + ")"
2089:                                        + signString(result) + " should be "
2090:                                        + signString(refResult));
2091:                            }
2092:
2093:                            // test UnicodeString::caseCompare - same internal implementation function
2094:                            if ((opt[k].options & Normalizer.COMPARE_IGNORE_CASE) > 0) {
2095:                                if ((opt[k].options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0) {
2096:                                    comp
2097:                                            .setIgnoreCase(
2098:                                                    true,
2099:                                                    UTF16.StringComparator.FOLD_CASE_DEFAULT);
2100:                                } else {
2101:                                    comp
2102:                                            .setIgnoreCase(
2103:                                                    true,
2104:                                                    UTF16.StringComparator.FOLD_CASE_EXCLUDE_SPECIAL_I);
2105:                                }
2106:
2107:                                comp
2108:                                        .setCodePointCompare((opt[k].options & Normalizer.COMPARE_CODE_POINT_ORDER) != 0);
2109:
2110:                                result = comp.compare(s1, s2);
2111:                                refResult = ref_case_compare(s1, s2,
2112:                                        opt[k].options);
2113:                                if (sign(result) != sign(refResult)) {
2114:                                    errln("UTF16.compare(U+" + hex(start)
2115:                                            + " with its NFD, " + opt[k].name
2116:                                            + ")" + signString(result)
2117:                                            + " should be "
2118:                                            + signString(refResult));
2119:                                }
2120:                            }
2121:                        }
2122:
2123:                        ++start;
2124:                    }
2125:                }
2126:
2127:            }
2128:
2129:            // verify that case-folding does not un-FCD strings
2130:            int countFoldFCDExceptions(int foldingOptions) {
2131:                String s, d;
2132:                int c;
2133:                int count;
2134:                int/*unsigned*/cc, trailCC, foldCC, foldTrailCC;
2135:                Normalizer.QuickCheckResult qcResult;
2136:                int category;
2137:                boolean isNFD;
2138:
2139:                logln("Test if case folding may un-FCD a string (folding options 0x)"
2140:                        + hex(foldingOptions));
2141:
2142:                count = 0;
2143:                for (c = 0; c <= 0x10ffff; ++c) {
2144:                    category = UCharacter.getType(c);
2145:                    if (category == UCharacterCategory.UNASSIGNED) {
2146:                        continue; // skip unassigned code points
2147:                    }
2148:                    if (c == 0xac00) {
2149:                        c = 0xd7a3; // skip Hangul - no case folding there
2150:                        continue;
2151:                    }
2152:                    // skip Han blocks - no case folding there either
2153:                    if (c == 0x3400) {
2154:                        c = 0x4db5;
2155:                        continue;
2156:                    }
2157:                    if (c == 0x4e00) {
2158:                        c = 0x9fa5;
2159:                        continue;
2160:                    }
2161:                    if (c == 0x20000) {
2162:                        c = 0x2a6d6;
2163:                        continue;
2164:                    }
2165:
2166:                    s = UTF16.valueOf(c);
2167:
2168:                    // get leading and trailing cc for c
2169:                    d = Normalizer.decompose(s, false);
2170:                    isNFD = s == d;
2171:                    cc = UCharacter.getCombiningClass(UTF16.charAt(d, 0));
2172:                    trailCC = UCharacter.getCombiningClass(UTF16.charAt(d, d
2173:                            .length() - 1));
2174:
2175:                    // get leading and trailing cc for the case-folding of c
2176:                    UCharacter.foldCase(s, (foldingOptions == 0));
2177:                    d = Normalizer.decompose(s, false);
2178:                    foldCC = UCharacter.getCombiningClass(UTF16.charAt(d, 0));
2179:                    foldTrailCC = UCharacter.getCombiningClass(UTF16.charAt(d,
2180:                            d.length() - 1));
2181:
2182:                    qcResult = Normalizer.quickCheck(s, Normalizer.FCD, 0);
2183:
2184:                    // bad:
2185:                    // - character maps to empty string: adjacent characters may then need reordering
2186:                    // - folding has different leading/trailing cc's, and they don't become just 0
2187:                    // - folding itself is not FCD
2188:                    if (qcResult != Normalizer.YES || s.length() == 0
2189:                            || (cc != foldCC && foldCC != 0)
2190:                            || (trailCC != foldTrailCC && foldTrailCC != 0)) {
2191:                        ++count;
2192:                        errln("U+"
2193:                                + hex(c)
2194:                                + ": case-folding may un-FCD a string (folding options 0x"
2195:                                + hex(foldingOptions) + ")");
2196:                        //errln("  cc %02x trailCC %02x    foldCC(U+%04lx) %02x foldTrailCC(U+%04lx) %02x   quickCheck(folded)=%d", cc, trailCC, UTF16.charAt(d,0), foldCC, UTF16.charAt(d,d.length()-1), foldTrailCC, qcResult);
2197:                        continue;
2198:                    }
2199:
2200:                    // also bad:
2201:                    // if a code point is in NFD but its case folding is not, then
2202:                    // unorm_compare will also fail
2203:                    if (isNFD
2204:                            && Normalizer.YES != Normalizer.quickCheck(s,
2205:                                    Normalizer.NFD, 0)) {
2206:                        ++count;
2207:                        errln("U+"
2208:                                + hex(c)
2209:                                + ": case-folding may un-FCD a string (folding options 0x"
2210:                                + hex(foldingOptions) + ")");
2211:                    }
2212:                }
2213:
2214:                logln("There are "
2215:                        + hex(count)
2216:                        + " code points for which case-folding may un-FCD a string (folding options"
2217:                        + foldingOptions + "x)");
2218:                return count;
2219:            }
2220:
2221:            public void TestFindFoldFCDExceptions() {
2222:                int count;
2223:
2224:                count = countFoldFCDExceptions(0);
2225:                count += countFoldFCDExceptions(Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I);
2226:                if (count > 0) {
2227:                    //*
2228:                    //* If case-folding un-FCDs any strings, then unorm_compare() must be
2229:                    //* re-implemented.
2230:                    //* It currently assumes that one can check for FCD then case-fold
2231:                    //* and then still have FCD strings for raw decomposition without reordering.
2232:                    //*
2233:                    errln("error: There are "
2234:                            + count
2235:                            + " code points for which case-folding"
2236:                            + " may un-FCD a string for all folding options.\n See comment"
2237:                            + " in BasicNormalizerTest::FindFoldFCDExceptions()!");
2238:                }
2239:            }
2240:
2241:            public void TestCombiningMarks() {
2242:                String src = "\u0f71\u0f72\u0f73\u0f74\u0f75";
2243:                String expected = "\u0F71\u0F71\u0F71\u0F72\u0F72\u0F74\u0F74";
2244:                String result = Normalizer.decompose(src, false);
2245:                if (!expected.equals(result)) {
2246:                    errln("Reordering of combining marks failed. Expected: "
2247:                            + Utility.hex(expected) + " Got: "
2248:                            + Utility.hex(result));
2249:                }
2250:            }
2251:
2252:            /*
2253:             * Re-enable this test when UTC fixes UAX 21
2254:            public void TestUAX21Failure(){
2255:                final String[][] cases = new String[][]{
2256:                        {"\u0061\u0345\u0360\u0345\u0062", "\u0061\u0360\u0345\u0345\u0062"},
2257:                        {"\u0061\u0345\u0345\u0360\u0062", "\u0061\u0360\u0345\u0345\u0062"},
2258:                        {"\u0061\u0345\u0360\u0362\u0360\u0062", "\u0061\u0362\u0360\u0360\u0345\u0062"},
2259:                        {"\u0061\u0360\u0345\u0360\u0362\u0062", "\u0061\u0362\u0360\u0360\u0345\u0062"},
2260:                        {"\u0061\u0345\u0360\u0362\u0361\u0062", "\u0061\u0362\u0360\u0361\u0345\u0062"},
2261:                        {"\u0061\u0361\u0345\u0360\u0362\u0062", "\u0061\u0362\u0361\u0360\u0345\u0062"},
2262:                };
2263:                for(int i = 0; i< cases.length; i++){
2264:                    String s1 =cases[0][0]; 
2265:                    String s2 = cases[0][1];
2266:                    if( (Normalizer.compare(s1,s2,Normalizer.FOLD_CASE_DEFAULT ==0)//case sensitive compare
2267:                        &&
2268:                        (Normalizer.compare(s1,s2,Normalizer.COMPARE_IGNORE_CASE)!=0)){
2269:                        errln("Normalizer.compare() failed for s1: " 
2270:                                + Utility.hex(s1) +" s2: " + Utility.hex(s2));
2271:                    }
2272:                }
2273:            }
2274:             */
2275:            public void TestFCNFKCClosure() {
2276:                final class TestStruct {
2277:                    int c;
2278:                    String s;
2279:
2280:                    TestStruct(int cp, String src) {
2281:                        c = cp;
2282:                        s = src;
2283:                    }
2284:                }
2285:
2286:                TestStruct[] tests = new TestStruct[] {
2287:                        new TestStruct(0x037A, "\u0020\u03B9"),
2288:                        new TestStruct(0x03D2, "\u03C5"),
2289:                        new TestStruct(0x20A8, "\u0072\u0073"),
2290:                        new TestStruct(0x210B, "\u0068"),
2291:                        new TestStruct(0x210C, "\u0068"),
2292:                        new TestStruct(0x2121, "\u0074\u0065\u006C"),
2293:                        new TestStruct(0x2122, "\u0074\u006D"),
2294:                        new TestStruct(0x2128, "\u007A"),
2295:                        new TestStruct(0x1D5DB, "\u0068"),
2296:                        new TestStruct(0x1D5ED, "\u007A"),
2297:                        new TestStruct(0x0061, "") };
2298:
2299:                for (int i = 0; i < tests.length; ++i) {
2300:                    String result = Normalizer.getFC_NFKC_Closure(tests[i].c);
2301:                    if (!result.equals(new String(tests[i].s))) {
2302:                        errln("getFC_NFKC_Closure(U+"
2303:                                + Integer.toHexString(tests[i].c)
2304:                                + ") is wrong");
2305:                    }
2306:                }
2307:
2308:                /* error handling */
2309:
2310:                int length = Normalizer.getFC_NFKC_Closure(0x5c, null);
2311:                if (length != 0) {
2312:                    errln("getFC_NFKC_Closure did not perform error handling correctly");
2313:                }
2314:            }
2315:
2316:            public void TestBugJ2324() {
2317:                /* String[] input = new String[]{
2318:                                     //"\u30FD\u3099",
2319:                                     "\u30FA\u309A",
2320:                                     "\u30FB\u309A",
2321:                                     "\u30FC\u309A",
2322:                                     "\u30FE\u309A",
2323:                                     "\u30FD\u309A",
2324:
2325:                 };*/
2326:                String troublesome = "\u309A";
2327:                for (int i = 0x3000; i < 0x3100; i++) {
2328:                    String input = ((char) i) + troublesome;
2329:                    try {
2330:                        /*  String result =*/Normalizer.compose(input, false);
2331:                    } catch (IndexOutOfBoundsException e) {
2332:                        errln("compose() failed for input: "
2333:                                + Utility.hex(input) + " Exception: "
2334:                                + e.toString());
2335:                    }
2336:                }
2337:
2338:            }
2339:
2340:            static final int D = 0, C = 1, KD = 2, KC = 3, FCD = 4, NONE = 5;
2341:
2342:            private static UnicodeSet[] initSkippables(UnicodeSet[] skipSets) {
2343:                if (skipSets.length < 4) {
2344:                    return null;
2345:                }
2346:                skipSets[D]
2347:                        .applyPattern(
2348:                                "[^\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6\\u00D9-\\u00DD"
2349:                                        + "\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6\\u00F9-\\u00FD"
2350:                                        + "\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130\\u0134-\\u0137"
2351:                                        + "\\u0139-\\u013E\\u0143-\\u0148\\u014C-\\u0151\\u0154-\\u0165"
2352:                                        + "\\u0168-\\u017E\\u01A0\\u01A1\\u01AF\\u01B0\\u01CD-\\u01DC"
2353:                                        + "\\u01DE-\\u01E3\\u01E6-\\u01F0\\u01F4\\u01F5\\u01F8-\\u021B"
2354:                                        + "\\u021E\\u021F\\u0226-\\u0233\\u0300-\\u034E\\u0350-\\u036F"
2355:                                        + "\\u0374\\u037E\\u0385-\\u038A\\u038C\\u038E-\\u0390\\u03AA-"
2356:                                        + "\\u03B0\\u03CA-\\u03CE\\u03D3\\u03D4\\u0400\\u0401\\u0403\\u0407"
2357:                                        + "\\u040C-\\u040E\\u0419\\u0439\\u0450\\u0451\\u0453\\u0457\\u045C"
2358:                                        + "-\\u045E\\u0476\\u0477\\u0483-\\u0486\\u04C1\\u04C2\\u04D0-"
2359:                                        + "\\u04D3\\u04D6\\u04D7\\u04DA-\\u04DF\\u04E2-\\u04E7\\u04EA-"
2360:                                        + "\\u04F5\\u04F8\\u04F9\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4"
2361:                                        + "\\u05C5\\u05C7\\u0610-\\u0615\\u0622-\\u0626\\u064B-\\u065E"
2362:                                        + "\\u0670\\u06C0\\u06C2\\u06D3\\u06D6-\\u06DC\\u06DF-\\u06E4"
2363:                                        + "\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-"
2364:                                        + "\\u07F3\\u0929\\u0931\\u0934\\u093C\\u094D\\u0951-\\u0954\\u0958"
2365:                                        + "-\\u095F\\u09BC\\u09CB-\\u09CD\\u09DC\\u09DD\\u09DF\\u0A33"
2366:                                        + "\\u0A36\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C"
2367:                                        + "\\u0B48\\u0B4B-\\u0B4D\\u0B5C\\u0B5D\\u0B94\\u0BCA-\\u0BCD"
2368:                                        + "\\u0C48\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CC0\\u0CC7\\u0CC8\\u0CCA"
2369:                                        + "\\u0CCB\\u0CCD\\u0D4A-\\u0D4D\\u0DCA\\u0DDA\\u0DDC-\\u0DDE"
2370:                                        + "\\u0E38-\\u0E3A\\u0E48-\\u0E4B\\u0EB8\\u0EB9\\u0EC8-\\u0ECB"
2371:                                        + "\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F43\\u0F4D\\u0F52\\u0F57"
2372:                                        + "\\u0F5C\\u0F69\\u0F71-\\u0F76\\u0F78\\u0F7A-\\u0F7D\\u0F80-"
2373:                                        + "\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9"
2374:                                        + "\\u0FC6\\u1026\\u1037\\u1039\\u135F\\u1714\\u1734\\u17D2\\u17DD"
2375:                                        + "\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1B06\\u1B08\\u1B0A\\u1B0C"
2376:                                        + "\\u1B0E\\u1B12\\u1B34\\u1B3B\\u1B3D\\u1B40\\u1B41\\u1B43\\u1B44"
2377:                                        + "\\u1B6B-\\u1B73\\u1DC0-\\u1DCA\\u1DFE-\\u1E99\\u1E9B\\u1EA0-"
2378:                                        + "\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-"
2379:                                        + "\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D"
2380:                                        + "\\u1F80-\\u1FB4\\u1FB6-\\u1FBC\\u1FBE\\u1FC1-\\u1FC4\\u1FC6-"
2381:                                        + "\\u1FD3\\u1FD6-\\u1FDB\\u1FDD-\\u1FEF\\u1FF2-\\u1FF4\\u1FF6-"
2382:                                        + "\\u1FFD\\u2000\\u2001\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20EF"
2383:                                        + "\\u2126\\u212A\\u212B\\u219A\\u219B\\u21AE\\u21CD-\\u21CF\\u2204"
2384:                                        + "\\u2209\\u220C\\u2224\\u2226\\u2241\\u2244\\u2247\\u2249\\u2260"
2385:                                        + "\\u2262\\u226D-\\u2271\\u2274\\u2275\\u2278\\u2279\\u2280\\u2281"
2386:                                        + "\\u2284\\u2285\\u2288\\u2289\\u22AC-\\u22AF\\u22E0-\\u22E3"
2387:                                        + "\\u22EA-\\u22ED\\u2329\\u232A\\u2ADC\\u302A-\\u302F\\u304C"
2388:                                        + "\\u304E\\u3050\\u3052\\u3054\\u3056\\u3058\\u305A\\u305C\\u305E"
2389:                                        + "\\u3060\\u3062\\u3065\\u3067\\u3069\\u3070\\u3071\\u3073\\u3074"
2390:                                        + "\\u3076\\u3077\\u3079\\u307A\\u307C\\u307D\\u3094\\u3099\\u309A"
2391:                                        + "\\u309E\\u30AC\\u30AE\\u30B0\\u30B2\\u30B4\\u30B6\\u30B8\\u30BA"
2392:                                        + "\\u30BC\\u30BE\\u30C0\\u30C2\\u30C5\\u30C7\\u30C9\\u30D0\\u30D1"
2393:                                        + "\\u30D3\\u30D4\\u30D6\\u30D7\\u30D9\\u30DA\\u30DC\\u30DD\\u30F4"
2394:                                        + "\\u30F7-\\u30FA\\u30FE\\uA806\\uAC00-\\uD7A3\\uF900-\\uFA0D"
2395:                                        + "\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A"
2396:                                        + "-\\uFA2D\\uFA30-\\uFA6A\\uFA70-\\uFAD9\\uFB1D-\\uFB1F\\uFB2A-"
2397:                                        + "\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46"
2398:                                        + "-\\uFB4E\\uFE20-\\uFE23\\U00010A0D\\U00010A0F\\U00010A38-\\U0001"
2399:                                        + "0A3A\\U00010A3F\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001D172"
2400:                                        + "\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
2401:                                        + "\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0002"
2402:                                        + "F800-\\U0002FA1D]", false);
2403:
2404:                skipSets[C]
2405:                        .applyPattern(
2406:                                "[^<->A-PR-Za-pr-z\\u00A8\\u00C0-\\u00CF\\u00D1-\\u00D6\\u00D8-"
2407:                                        + "\\u00DD\\u00E0-\\u00EF\\u00F1-\\u00F6\\u00F8-\\u00FD\\u00FF-"
2408:                                        + "\\u0103\\u0106-\\u010F\\u0112-\\u0117\\u011A-\\u0121\\u0124"
2409:                                        + "\\u0125\\u0128-\\u012D\\u0130\\u0139\\u013A\\u013D\\u013E\\u0143"
2410:                                        + "\\u0144\\u0147\\u0148\\u014C-\\u0151\\u0154\\u0155\\u0158-"
2411:                                        + "\\u015D\\u0160\\u0161\\u0164\\u0165\\u0168-\\u0171\\u0174-"
2412:                                        + "\\u017F\\u01A0\\u01A1\\u01AF\\u01B0\\u01B7\\u01CD-\\u01DC\\u01DE"
2413:                                        + "-\\u01E1\\u01E6-\\u01EB\\u01F4\\u01F5\\u01F8-\\u01FB\\u0200-"
2414:                                        + "\\u021B\\u021E\\u021F\\u0226-\\u0233\\u0292\\u0300-\\u034E"
2415:                                        + "\\u0350-\\u036F\\u0374\\u037E\\u0387\\u0391\\u0395\\u0397\\u0399"
2416:                                        + "\\u039F\\u03A1\\u03A5\\u03A9\\u03AC\\u03AE\\u03B1\\u03B5\\u03B7"
2417:                                        + "\\u03B9\\u03BF\\u03C1\\u03C5\\u03C9-\\u03CB\\u03CE\\u03D2\\u0406"
2418:                                        + "\\u0410\\u0413\\u0415-\\u0418\\u041A\\u041E\\u0423\\u0427\\u042B"
2419:                                        + "\\u042D\\u0430\\u0433\\u0435-\\u0438\\u043A\\u043E\\u0443\\u0447"
2420:                                        + "\\u044B\\u044D\\u0456\\u0474\\u0475\\u0483-\\u0486\\u04D8\\u04D9"
2421:                                        + "\\u04E8\\u04E9\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5"
2422:                                        + "\\u05C7\\u0610-\\u0615\\u0622\\u0623\\u0627\\u0648\\u064A-"
2423:                                        + "\\u065E\\u0670\\u06C1\\u06D2\\u06D5-\\u06DC\\u06DF-\\u06E4"
2424:                                        + "\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-"
2425:                                        + "\\u07F3\\u0928\\u0930\\u0933\\u093C\\u094D\\u0951-\\u0954\\u0958"
2426:                                        + "-\\u095F\\u09BC\\u09BE\\u09C7\\u09CD\\u09D7\\u09DC\\u09DD\\u09DF"
2427:                                        + "\\u0A33\\u0A36\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD"
2428:                                        + "\\u0B3C\\u0B3E\\u0B47\\u0B4D\\u0B56\\u0B57\\u0B5C\\u0B5D\\u0B92"
2429:                                        + "\\u0BBE\\u0BC6\\u0BC7\\u0BCD\\u0BD7\\u0C46\\u0C4D\\u0C55\\u0C56"
2430:                                        + "\\u0CBC\\u0CBF\\u0CC2\\u0CC6\\u0CCA\\u0CCD\\u0CD5\\u0CD6\\u0D3E"
2431:                                        + "\\u0D46\\u0D47\\u0D4D\\u0D57\\u0DCA\\u0DCF\\u0DD9\\u0DDC\\u0DDF"
2432:                                        + "\\u0E38-\\u0E3A\\u0E48-\\u0E4B\\u0EB8\\u0EB9\\u0EC8-\\u0ECB"
2433:                                        + "\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F43\\u0F4D\\u0F52\\u0F57"
2434:                                        + "\\u0F5C\\u0F69\\u0F71-\\u0F76\\u0F78\\u0F7A-\\u0F7D\\u0F80-"
2435:                                        + "\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9"
2436:                                        + "\\u0FC6\\u1025\\u102E\\u1037\\u1039\\u1100-\\u1112\\u1161-"
2437:                                        + "\\u1175\\u11A8-\\u11C2\\u135F\\u1714\\u1734\\u17D2\\u17DD\\u18A9"
2438:                                        + "\\u1939-\\u193B\\u1A17\\u1A18\\u1B05\\u1B07\\u1B09\\u1B0B\\u1B0D"
2439:                                        + "\\u1B11\\u1B34\\u1B35\\u1B3A\\u1B3C\\u1B3E\\u1B3F\\u1B42\\u1B44"
2440:                                        + "\\u1B6B-\\u1B73\\u1DC0-\\u1DCA\\u1DFE-\\u1E03\\u1E0A-\\u1E0F"
2441:                                        + "\\u1E12-\\u1E1B\\u1E20-\\u1E27\\u1E2A-\\u1E41\\u1E44-\\u1E53"
2442:                                        + "\\u1E58-\\u1E7D\\u1E80-\\u1E87\\u1E8E-\\u1E91\\u1E96-\\u1E99"
2443:                                        + "\\u1EA0-\\u1EF3\\u1EF6-\\u1EF9\\u1F00-\\u1F11\\u1F18\\u1F19"
2444:                                        + "\\u1F20-\\u1F31\\u1F38\\u1F39\\u1F40\\u1F41\\u1F48\\u1F49\\u1F50"
2445:                                        + "\\u1F51\\u1F59\\u1F60-\\u1F71\\u1F73-\\u1F75\\u1F77\\u1F79"
2446:                                        + "\\u1F7B-\\u1F7D\\u1F80\\u1F81\\u1F88\\u1F89\\u1F90\\u1F91\\u1F98"
2447:                                        + "\\u1F99\\u1FA0\\u1FA1\\u1FA8\\u1FA9\\u1FB3\\u1FB6\\u1FBB\\u1FBC"
2448:                                        + "\\u1FBE\\u1FBF\\u1FC3\\u1FC6\\u1FC9\\u1FCB\\u1FCC\\u1FD3\\u1FDB"
2449:                                        + "\\u1FE3\\u1FEB\\u1FEE\\u1FEF\\u1FF3\\u1FF6\\u1FF9\\u1FFB-\\u1FFE"
2450:                                        + "\\u2000\\u2001\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20EF\\u2126"
2451:                                        + "\\u212A\\u212B\\u2190\\u2192\\u2194\\u21D0\\u21D2\\u21D4\\u2203"
2452:                                        + "\\u2208\\u220B\\u2223\\u2225\\u223C\\u2243\\u2245\\u2248\\u224D"
2453:                                        + "\\u2261\\u2264\\u2265\\u2272\\u2273\\u2276\\u2277\\u227A-\\u227D"
2454:                                        + "\\u2282\\u2283\\u2286\\u2287\\u2291\\u2292\\u22A2\\u22A8\\u22A9"
2455:                                        + "\\u22AB\\u22B2-\\u22B5\\u2329\\u232A\\u2ADC\\u302A-\\u302F"
2456:                                        + "\\u3046\\u304B\\u304D\\u304F\\u3051\\u3053\\u3055\\u3057\\u3059"
2457:                                        + "\\u305B\\u305D\\u305F\\u3061\\u3064\\u3066\\u3068\\u306F\\u3072"
2458:                                        + "\\u3075\\u3078\\u307B\\u3099\\u309A\\u309D\\u30A6\\u30AB\\u30AD"
2459:                                        + "\\u30AF\\u30B1\\u30B3\\u30B5\\u30B7\\u30B9\\u30BB\\u30BD\\u30BF"
2460:                                        + "\\u30C1\\u30C4\\u30C6\\u30C8\\u30CF\\u30D2\\u30D5\\u30D8\\u30DB"
2461:                                        + "\\u30EF-\\u30F2\\u30FD\\uA806\\uAC00\\uAC1C\\uAC38\\uAC54\\uAC70"
2462:                                        + "\\uAC8C\\uACA8\\uACC4\\uACE0\\uACFC\\uAD18\\uAD34\\uAD50\\uAD6C"
2463:                                        + "\\uAD88\\uADA4\\uADC0\\uADDC\\uADF8\\uAE14\\uAE30\\uAE4C\\uAE68"
2464:                                        + "\\uAE84\\uAEA0\\uAEBC\\uAED8\\uAEF4\\uAF10\\uAF2C\\uAF48\\uAF64"
2465:                                        + "\\uAF80\\uAF9C\\uAFB8\\uAFD4\\uAFF0\\uB00C\\uB028\\uB044\\uB060"
2466:                                        + "\\uB07C\\uB098\\uB0B4\\uB0D0\\uB0EC\\uB108\\uB124\\uB140\\uB15C"
2467:                                        + "\\uB178\\uB194\\uB1B0\\uB1CC\\uB1E8\\uB204\\uB220\\uB23C\\uB258"
2468:                                        + "\\uB274\\uB290\\uB2AC\\uB2C8\\uB2E4\\uB300\\uB31C\\uB338\\uB354"
2469:                                        + "\\uB370\\uB38C\\uB3A8\\uB3C4\\uB3E0\\uB3FC\\uB418\\uB434\\uB450"
2470:                                        + "\\uB46C\\uB488\\uB4A4\\uB4C0\\uB4DC\\uB4F8\\uB514\\uB530\\uB54C"
2471:                                        + "\\uB568\\uB584\\uB5A0\\uB5BC\\uB5D8\\uB5F4\\uB610\\uB62C\\uB648"
2472:                                        + "\\uB664\\uB680\\uB69C\\uB6B8\\uB6D4\\uB6F0\\uB70C\\uB728\\uB744"
2473:                                        + "\\uB760\\uB77C\\uB798\\uB7B4\\uB7D0\\uB7EC\\uB808\\uB824\\uB840"
2474:                                        + "\\uB85C\\uB878\\uB894\\uB8B0\\uB8CC\\uB8E8\\uB904\\uB920\\uB93C"
2475:                                        + "\\uB958\\uB974\\uB990\\uB9AC\\uB9C8\\uB9E4\\uBA00\\uBA1C\\uBA38"
2476:                                        + "\\uBA54\\uBA70\\uBA8C\\uBAA8\\uBAC4\\uBAE0\\uBAFC\\uBB18\\uBB34"
2477:                                        + "\\uBB50\\uBB6C\\uBB88\\uBBA4\\uBBC0\\uBBDC\\uBBF8\\uBC14\\uBC30"
2478:                                        + "\\uBC4C\\uBC68\\uBC84\\uBCA0\\uBCBC\\uBCD8\\uBCF4\\uBD10\\uBD2C"
2479:                                        + "\\uBD48\\uBD64\\uBD80\\uBD9C\\uBDB8\\uBDD4\\uBDF0\\uBE0C\\uBE28"
2480:                                        + "\\uBE44\\uBE60\\uBE7C\\uBE98\\uBEB4\\uBED0\\uBEEC\\uBF08\\uBF24"
2481:                                        + "\\uBF40\\uBF5C\\uBF78\\uBF94\\uBFB0\\uBFCC\\uBFE8\\uC004\\uC020"
2482:                                        + "\\uC03C\\uC058\\uC074\\uC090\\uC0AC\\uC0C8\\uC0E4\\uC100\\uC11C"
2483:                                        + "\\uC138\\uC154\\uC170\\uC18C\\uC1A8\\uC1C4\\uC1E0\\uC1FC\\uC218"
2484:                                        + "\\uC234\\uC250\\uC26C\\uC288\\uC2A4\\uC2C0\\uC2DC\\uC2F8\\uC314"
2485:                                        + "\\uC330\\uC34C\\uC368\\uC384\\uC3A0\\uC3BC\\uC3D8\\uC3F4\\uC410"
2486:                                        + "\\uC42C\\uC448\\uC464\\uC480\\uC49C\\uC4B8\\uC4D4\\uC4F0\\uC50C"
2487:                                        + "\\uC528\\uC544\\uC560\\uC57C\\uC598\\uC5B4\\uC5D0\\uC5EC\\uC608"
2488:                                        + "\\uC624\\uC640\\uC65C\\uC678\\uC694\\uC6B0\\uC6CC\\uC6E8\\uC704"
2489:                                        + "\\uC720\\uC73C\\uC758\\uC774\\uC790\\uC7AC\\uC7C8\\uC7E4\\uC800"
2490:                                        + "\\uC81C\\uC838\\uC854\\uC870\\uC88C\\uC8A8\\uC8C4\\uC8E0\\uC8FC"
2491:                                        + "\\uC918\\uC934\\uC950\\uC96C\\uC988\\uC9A4\\uC9C0\\uC9DC\\uC9F8"
2492:                                        + "\\uCA14\\uCA30\\uCA4C\\uCA68\\uCA84\\uCAA0\\uCABC\\uCAD8\\uCAF4"
2493:                                        + "\\uCB10\\uCB2C\\uCB48\\uCB64\\uCB80\\uCB9C\\uCBB8\\uCBD4\\uCBF0"
2494:                                        + "\\uCC0C\\uCC28\\uCC44\\uCC60\\uCC7C\\uCC98\\uCCB4\\uCCD0\\uCCEC"
2495:                                        + "\\uCD08\\uCD24\\uCD40\\uCD5C\\uCD78\\uCD94\\uCDB0\\uCDCC\\uCDE8"
2496:                                        + "\\uCE04\\uCE20\\uCE3C\\uCE58\\uCE74\\uCE90\\uCEAC\\uCEC8\\uCEE4"
2497:                                        + "\\uCF00\\uCF1C\\uCF38\\uCF54\\uCF70\\uCF8C\\uCFA8\\uCFC4\\uCFE0"
2498:                                        + "\\uCFFC\\uD018\\uD034\\uD050\\uD06C\\uD088\\uD0A4\\uD0C0\\uD0DC"
2499:                                        + "\\uD0F8\\uD114\\uD130\\uD14C\\uD168\\uD184\\uD1A0\\uD1BC\\uD1D8"
2500:                                        + "\\uD1F4\\uD210\\uD22C\\uD248\\uD264\\uD280\\uD29C\\uD2B8\\uD2D4"
2501:                                        + "\\uD2F0\\uD30C\\uD328\\uD344\\uD360\\uD37C\\uD398\\uD3B4\\uD3D0"
2502:                                        + "\\uD3EC\\uD408\\uD424\\uD440\\uD45C\\uD478\\uD494\\uD4B0\\uD4CC"
2503:                                        + "\\uD4E8\\uD504\\uD520\\uD53C\\uD558\\uD574\\uD590\\uD5AC\\uD5C8"
2504:                                        + "\\uD5E4\\uD600\\uD61C\\uD638\\uD654\\uD670\\uD68C\\uD6A8\\uD6C4"
2505:                                        + "\\uD6E0\\uD6FC\\uD718\\uD734\\uD750\\uD76C\\uD788\\uF900-\\uFA0D"
2506:                                        + "\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A"
2507:                                        + "-\\uFA2D\\uFA30-\\uFA6A\\uFA70-\\uFAD9\\uFB1D-\\uFB1F\\uFB2A-"
2508:                                        + "\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46"
2509:                                        + "-\\uFB4E\\uFE20-\\uFE23\\U00010A0D\\U00010A0F\\U00010A38-\\U0001"
2510:                                        + "0A3A\\U00010A3F\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001D172"
2511:                                        + "\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
2512:                                        + "\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0002"
2513:                                        + "F800-\\U0002FA1D]", false);
2514:
2515:                skipSets[KD]
2516:                        .applyPattern(
2517:                                "[^\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5\\u00B8-\\u00BA"
2518:                                        + "\\u00BC-\\u00BE\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6"
2519:                                        + "\\u00D9-\\u00DD\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6"
2520:                                        + "\\u00F9-\\u00FD\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130"
2521:                                        + "\\u0132-\\u0137\\u0139-\\u0140\\u0143-\\u0149\\u014C-\\u0151"
2522:                                        + "\\u0154-\\u0165\\u0168-\\u017F\\u01A0\\u01A1\\u01AF\\u01B0"
2523:                                        + "\\u01C4-\\u01DC\\u01DE-\\u01E3\\u01E6-\\u01F5\\u01F8-\\u021B"
2524:                                        + "\\u021E\\u021F\\u0226-\\u0233\\u02B0-\\u02B8\\u02D8-\\u02DD"
2525:                                        + "\\u02E0-\\u02E4\\u0300-\\u034E\\u0350-\\u036F\\u0374\\u037A"
2526:                                        + "\\u037E\\u0384-\\u038A\\u038C\\u038E-\\u0390\\u03AA-\\u03B0"
2527:                                        + "\\u03CA-\\u03CE\\u03D0-\\u03D6\\u03F0-\\u03F2\\u03F4\\u03F5"
2528:                                        + "\\u03F9\\u0400\\u0401\\u0403\\u0407\\u040C-\\u040E\\u0419\\u0439"
2529:                                        + "\\u0450\\u0451\\u0453\\u0457\\u045C-\\u045E\\u0476\\u0477\\u0483"
2530:                                        + "-\\u0486\\u04C1\\u04C2\\u04D0-\\u04D3\\u04D6\\u04D7\\u04DA-"
2531:                                        + "\\u04DF\\u04E2-\\u04E7\\u04EA-\\u04F5\\u04F8\\u04F9\\u0587"
2532:                                        + "\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5\\u05C7\\u0610"
2533:                                        + "-\\u0615\\u0622-\\u0626\\u064B-\\u065E\\u0670\\u0675-\\u0678"
2534:                                        + "\\u06C0\\u06C2\\u06D3\\u06D6-\\u06DC\\u06DF-\\u06E4\\u06E7"
2535:                                        + "\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-\\u07F3"
2536:                                        + "\\u0929\\u0931\\u0934\\u093C\\u094D\\u0951-\\u0954\\u0958-"
2537:                                        + "\\u095F\\u09BC\\u09CB-\\u09CD\\u09DC\\u09DD\\u09DF\\u0A33\\u0A36"
2538:                                        + "\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C\\u0B48"
2539:                                        + "\\u0B4B-\\u0B4D\\u0B5C\\u0B5D\\u0B94\\u0BCA-\\u0BCD\\u0C48"
2540:                                        + "\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CC0\\u0CC7\\u0CC8\\u0CCA\\u0CCB"
2541:                                        + "\\u0CCD\\u0D4A-\\u0D4D\\u0DCA\\u0DDA\\u0DDC-\\u0DDE\\u0E33"
2542:                                        + "\\u0E38-\\u0E3A\\u0E48-\\u0E4B\\u0EB3\\u0EB8\\u0EB9\\u0EC8-"
2543:                                        + "\\u0ECB\\u0EDC\\u0EDD\\u0F0C\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39"
2544:                                        + "\\u0F43\\u0F4D\\u0F52\\u0F57\\u0F5C\\u0F69\\u0F71-\\u0F7D\\u0F80"
2545:                                        + "-\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9"
2546:                                        + "\\u0FC6\\u1026\\u1037\\u1039\\u10FC\\u135F\\u1714\\u1734\\u17D2"
2547:                                        + "\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1B06\\u1B08\\u1B0A"
2548:                                        + "\\u1B0C\\u1B0E\\u1B12\\u1B34\\u1B3B\\u1B3D\\u1B40\\u1B41\\u1B43"
2549:                                        + "\\u1B44\\u1B6B-\\u1B73\\u1D2C-\\u1D2E\\u1D30-\\u1D3A\\u1D3C-"
2550:                                        + "\\u1D4D\\u1D4F-\\u1D6A\\u1D78\\u1D9B-\\u1DCA\\u1DFE-\\u1E9B"
2551:                                        + "\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45"
2552:                                        + "\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-"
2553:                                        + "\\u1F7D\\u1F80-\\u1FB4\\u1FB6-\\u1FC4\\u1FC6-\\u1FD3\\u1FD6-"
2554:                                        + "\\u1FDB\\u1FDD-\\u1FEF\\u1FF2-\\u1FF4\\u1FF6-\\u1FFE\\u2000-"
2555:                                        + "\\u200A\\u2011\\u2017\\u2024-\\u2026\\u202F\\u2033\\u2034\\u2036"
2556:                                        + "\\u2037\\u203C\\u203E\\u2047-\\u2049\\u2057\\u205F\\u2070\\u2071"
2557:                                        + "\\u2074-\\u208E\\u2090-\\u2094\\u20A8\\u20D0-\\u20DC\\u20E1"
2558:                                        + "\\u20E5-\\u20EF\\u2100-\\u2103\\u2105-\\u2107\\u2109-\\u2113"
2559:                                        + "\\u2115\\u2116\\u2119-\\u211D\\u2120-\\u2122\\u2124\\u2126"
2560:                                        + "\\u2128\\u212A-\\u212D\\u212F-\\u2131\\u2133-\\u2139\\u213B-"
2561:                                        + "\\u2140\\u2145-\\u2149\\u2153-\\u217F\\u219A\\u219B\\u21AE"
2562:                                        + "\\u21CD-\\u21CF\\u2204\\u2209\\u220C\\u2224\\u2226\\u222C\\u222D"
2563:                                        + "\\u222F\\u2230\\u2241\\u2244\\u2247\\u2249\\u2260\\u2262\\u226D-"
2564:                                        + "\\u2271\\u2274\\u2275\\u2278\\u2279\\u2280\\u2281\\u2284\\u2285"
2565:                                        + "\\u2288\\u2289\\u22AC-\\u22AF\\u22E0-\\u22E3\\u22EA-\\u22ED"
2566:                                        + "\\u2329\\u232A\\u2460-\\u24EA\\u2A0C\\u2A74-\\u2A76\\u2ADC"
2567:                                        + "\\u2D6F\\u2E9F\\u2EF3\\u2F00-\\u2FD5\\u3000\\u302A-\\u302F"
2568:                                        + "\\u3036\\u3038-\\u303A\\u304C\\u304E\\u3050\\u3052\\u3054\\u3056"
2569:                                        + "\\u3058\\u305A\\u305C\\u305E\\u3060\\u3062\\u3065\\u3067\\u3069"
2570:                                        + "\\u3070\\u3071\\u3073\\u3074\\u3076\\u3077\\u3079\\u307A\\u307C"
2571:                                        + "\\u307D\\u3094\\u3099-\\u309C\\u309E\\u309F\\u30AC\\u30AE\\u30B0"
2572:                                        + "\\u30B2\\u30B4\\u30B6\\u30B8\\u30BA\\u30BC\\u30BE\\u30C0\\u30C2"
2573:                                        + "\\u30C5\\u30C7\\u30C9\\u30D0\\u30D1\\u30D3\\u30D4\\u30D6\\u30D7"
2574:                                        + "\\u30D9\\u30DA\\u30DC\\u30DD\\u30F4\\u30F7-\\u30FA\\u30FE\\u30FF"
2575:                                        + "\\u3131-\\u318E\\u3192-\\u319F\\u3200-\\u321E\\u3220-\\u3243"
2576:                                        + "\\u3250-\\u327E\\u3280-\\u32FE\\u3300-\\u33FF\\uA806\\uAC00-"
2577:                                        + "\\uD7A3\\uF900-\\uFA0D\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20"
2578:                                        + "\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D\\uFA30-\\uFA6A\\uFA70-"
2579:                                        + "\\uFAD9\\uFB00-\\uFB06\\uFB13-\\uFB17\\uFB1D-\\uFB36\\uFB38-"
2580:                                        + "\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3"
2581:                                        + "-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFC\\uFE10-"
2582:                                        + "\\uFE19\\uFE20-\\uFE23\\uFE30-\\uFE44\\uFE47-\\uFE52\\uFE54-"
2583:                                        + "\\uFE66\\uFE68-\\uFE6B\\uFE70-\\uFE72\\uFE74\\uFE76-\\uFEFC"
2584:                                        + "\\uFF01-\\uFFBE\\uFFC2-\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7"
2585:                                        + "\\uFFDA-\\uFFDC\\uFFE0-\\uFFE6\\uFFE8-\\uFFEE\\U00010A0D\\U00010"
2586:                                        + "A0F\\U00010A38-\\U00010A3A\\U00010A3F\\U0001D15E-\\U0001D169"
2587:                                        + "\\U0001D16D-\\U0001D172\\U0001D17B-\\U0001D182\\U0001D185-"
2588:                                        + "\\U0001D18B\\U0001D1AA-\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001"
2589:                                        + "D242-\\U0001D244\\U0001D400-\\U0001D454\\U0001D456-\\U0001D49C"
2590:                                        + "\\U0001D49E\\U0001D49F\\U0001D4A2\\U0001D4A5\\U0001D4A6\\U0001D4"
2591:                                        + "A9-\\U0001D4AC\\U0001D4AE-\\U0001D4B9\\U0001D4BB\\U0001D4BD-"
2592:                                        + "\\U0001D4C3\\U0001D4C5-\\U0001D505\\U0001D507-\\U0001D50A\\U0001"
2593:                                        + "D50D-\\U0001D514\\U0001D516-\\U0001D51C\\U0001D51E-\\U0001D539"
2594:                                        + "\\U0001D53B-\\U0001D53E\\U0001D540-\\U0001D544\\U0001D546\\U0001"
2595:                                        + "D54A-\\U0001D550\\U0001D552-\\U0001D6A5\\U0001D6A8-\\U0001D7CB"
2596:                                        + "\\U0001D7CE-\\U0001D7FF\\U0002F800-\\U0002FA1D]",
2597:                                false);
2598:
2599:                skipSets[KC]
2600:                        .applyPattern(
2601:                                "[^<->A-PR-Za-pr-z\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5"
2602:                                        + "\\u00B8-\\u00BA\\u00BC-\\u00BE\\u00C0-\\u00CF\\u00D1-\\u00D6"
2603:                                        + "\\u00D8-\\u00DD\\u00E0-\\u00EF\\u00F1-\\u00F6\\u00F8-\\u00FD"
2604:                                        + "\\u00FF-\\u0103\\u0106-\\u010F\\u0112-\\u0117\\u011A-\\u0121"
2605:                                        + "\\u0124\\u0125\\u0128-\\u012D\\u0130\\u0132\\u0133\\u0139\\u013A"
2606:                                        + "\\u013D-\\u0140\\u0143\\u0144\\u0147-\\u0149\\u014C-\\u0151"
2607:                                        + "\\u0154\\u0155\\u0158-\\u015D\\u0160\\u0161\\u0164\\u0165\\u0168"
2608:                                        + "-\\u0171\\u0174-\\u017F\\u01A0\\u01A1\\u01AF\\u01B0\\u01B7"
2609:                                        + "\\u01C4-\\u01DC\\u01DE-\\u01E1\\u01E6-\\u01EB\\u01F1-\\u01F5"
2610:                                        + "\\u01F8-\\u01FB\\u0200-\\u021B\\u021E\\u021F\\u0226-\\u0233"
2611:                                        + "\\u0292\\u02B0-\\u02B8\\u02D8-\\u02DD\\u02E0-\\u02E4\\u0300-"
2612:                                        + "\\u034E\\u0350-\\u036F\\u0374\\u037A\\u037E\\u0384\\u0385\\u0387"
2613:                                        + "\\u0391\\u0395\\u0397\\u0399\\u039F\\u03A1\\u03A5\\u03A9\\u03AC"
2614:                                        + "\\u03AE\\u03B1\\u03B5\\u03B7\\u03B9\\u03BF\\u03C1\\u03C5\\u03C9-"
2615:                                        + "\\u03CB\\u03CE\\u03D0-\\u03D6\\u03F0-\\u03F2\\u03F4\\u03F5"
2616:                                        + "\\u03F9\\u0406\\u0410\\u0413\\u0415-\\u0418\\u041A\\u041E\\u0423"
2617:                                        + "\\u0427\\u042B\\u042D\\u0430\\u0433\\u0435-\\u0438\\u043A\\u043E"
2618:                                        + "\\u0443\\u0447\\u044B\\u044D\\u0456\\u0474\\u0475\\u0483-\\u0486"
2619:                                        + "\\u04D8\\u04D9\\u04E8\\u04E9\\u0587\\u0591-\\u05BD\\u05BF\\u05C1"
2620:                                        + "\\u05C2\\u05C4\\u05C5\\u05C7\\u0610-\\u0615\\u0622\\u0623\\u0627"
2621:                                        + "\\u0648\\u064A-\\u065E\\u0670\\u0675-\\u0678\\u06C1\\u06D2"
2622:                                        + "\\u06D5-\\u06DC\\u06DF-\\u06E4\\u06E7\\u06E8\\u06EA-\\u06ED"
2623:                                        + "\\u0711\\u0730-\\u074A\\u07EB-\\u07F3\\u0928\\u0930\\u0933"
2624:                                        + "\\u093C\\u094D\\u0951-\\u0954\\u0958-\\u095F\\u09BC\\u09BE"
2625:                                        + "\\u09C7\\u09CD\\u09D7\\u09DC\\u09DD\\u09DF\\u0A33\\u0A36\\u0A3C"
2626:                                        + "\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C\\u0B3E\\u0B47"
2627:                                        + "\\u0B4D\\u0B56\\u0B57\\u0B5C\\u0B5D\\u0B92\\u0BBE\\u0BC6\\u0BC7"
2628:                                        + "\\u0BCD\\u0BD7\\u0C46\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CBF\\u0CC2"
2629:                                        + "\\u0CC6\\u0CCA\\u0CCD\\u0CD5\\u0CD6\\u0D3E\\u0D46\\u0D47\\u0D4D"
2630:                                        + "\\u0D57\\u0DCA\\u0DCF\\u0DD9\\u0DDC\\u0DDF\\u0E33\\u0E38-\\u0E3A"
2631:                                        + "\\u0E48-\\u0E4B\\u0EB3\\u0EB8\\u0EB9\\u0EC8-\\u0ECB\\u0EDC"
2632:                                        + "\\u0EDD\\u0F0C\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F43\\u0F4D"
2633:                                        + "\\u0F52\\u0F57\\u0F5C\\u0F69\\u0F71-\\u0F7D\\u0F80-\\u0F84"
2634:                                        + "\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9\\u0FC6"
2635:                                        + "\\u1025\\u102E\\u1037\\u1039\\u10FC\\u1100-\\u1112\\u1161-"
2636:                                        + "\\u1175\\u11A8-\\u11C2\\u135F\\u1714\\u1734\\u17D2\\u17DD\\u18A9"
2637:                                        + "\\u1939-\\u193B\\u1A17\\u1A18\\u1B05\\u1B07\\u1B09\\u1B0B\\u1B0D"
2638:                                        + "\\u1B11\\u1B34\\u1B35\\u1B3A\\u1B3C\\u1B3E\\u1B3F\\u1B42\\u1B44"
2639:                                        + "\\u1B6B-\\u1B73\\u1D2C-\\u1D2E\\u1D30-\\u1D3A\\u1D3C-\\u1D4D"
2640:                                        + "\\u1D4F-\\u1D6A\\u1D78\\u1D9B-\\u1DCA\\u1DFE-\\u1E03\\u1E0A-"
2641:                                        + "\\u1E0F\\u1E12-\\u1E1B\\u1E20-\\u1E27\\u1E2A-\\u1E41\\u1E44-"
2642:                                        + "\\u1E53\\u1E58-\\u1E7D\\u1E80-\\u1E87\\u1E8E-\\u1E91\\u1E96-"
2643:                                        + "\\u1E9B\\u1EA0-\\u1EF3\\u1EF6-\\u1EF9\\u1F00-\\u1F11\\u1F18"
2644:                                        + "\\u1F19\\u1F20-\\u1F31\\u1F38\\u1F39\\u1F40\\u1F41\\u1F48\\u1F49"
2645:                                        + "\\u1F50\\u1F51\\u1F59\\u1F60-\\u1F71\\u1F73-\\u1F75\\u1F77"
2646:                                        + "\\u1F79\\u1F7B-\\u1F7D\\u1F80\\u1F81\\u1F88\\u1F89\\u1F90\\u1F91"
2647:                                        + "\\u1F98\\u1F99\\u1FA0\\u1FA1\\u1FA8\\u1FA9\\u1FB3\\u1FB6\\u1FBB-"
2648:                                        + "\\u1FC1\\u1FC3\\u1FC6\\u1FC9\\u1FCB-\\u1FCF\\u1FD3\\u1FDB\\u1FDD"
2649:                                        + "-\\u1FDF\\u1FE3\\u1FEB\\u1FED-\\u1FEF\\u1FF3\\u1FF6\\u1FF9"
2650:                                        + "\\u1FFB-\\u1FFE\\u2000-\\u200A\\u2011\\u2017\\u2024-\\u2026"
2651:                                        + "\\u202F\\u2033\\u2034\\u2036\\u2037\\u203C\\u203E\\u2047-\\u2049"
2652:                                        + "\\u2057\\u205F\\u2070\\u2071\\u2074-\\u208E\\u2090-\\u2094"
2653:                                        + "\\u20A8\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20EF\\u2100-\\u2103"
2654:                                        + "\\u2105-\\u2107\\u2109-\\u2113\\u2115\\u2116\\u2119-\\u211D"
2655:                                        + "\\u2120-\\u2122\\u2124\\u2126\\u2128\\u212A-\\u212D\\u212F-"
2656:                                        + "\\u2131\\u2133-\\u2139\\u213B-\\u2140\\u2145-\\u2149\\u2153-"
2657:                                        + "\\u217F\\u2190\\u2192\\u2194\\u21D0\\u21D2\\u21D4\\u2203\\u2208"
2658:                                        + "\\u220B\\u2223\\u2225\\u222C\\u222D\\u222F\\u2230\\u223C\\u2243"
2659:                                        + "\\u2245\\u2248\\u224D\\u2261\\u2264\\u2265\\u2272\\u2273\\u2276"
2660:                                        + "\\u2277\\u227A-\\u227D\\u2282\\u2283\\u2286\\u2287\\u2291\\u2292"
2661:                                        + "\\u22A2\\u22A8\\u22A9\\u22AB\\u22B2-\\u22B5\\u2329\\u232A\\u2460"
2662:                                        + "-\\u24EA\\u2A0C\\u2A74-\\u2A76\\u2ADC\\u2D6F\\u2E9F\\u2EF3"
2663:                                        + "\\u2F00-\\u2FD5\\u3000\\u302A-\\u302F\\u3036\\u3038-\\u303A"
2664:                                        + "\\u3046\\u304B\\u304D\\u304F\\u3051\\u3053\\u3055\\u3057\\u3059"
2665:                                        + "\\u305B\\u305D\\u305F\\u3061\\u3064\\u3066\\u3068\\u306F\\u3072"
2666:                                        + "\\u3075\\u3078\\u307B\\u3099-\\u309D\\u309F\\u30A6\\u30AB\\u30AD"
2667:                                        + "\\u30AF\\u30B1\\u30B3\\u30B5\\u30B7\\u30B9\\u30BB\\u30BD\\u30BF"
2668:                                        + "\\u30C1\\u30C4\\u30C6\\u30C8\\u30CF\\u30D2\\u30D5\\u30D8\\u30DB"
2669:                                        + "\\u30EF-\\u30F2\\u30FD\\u30FF\\u3131-\\u318E\\u3192-\\u319F"
2670:                                        + "\\u3200-\\u321E\\u3220-\\u3243\\u3250-\\u327E\\u3280-\\u32FE"
2671:                                        + "\\u3300-\\u33FF\\uA806\\uAC00\\uAC1C\\uAC38\\uAC54\\uAC70\\uAC8C"
2672:                                        + "\\uACA8\\uACC4\\uACE0\\uACFC\\uAD18\\uAD34\\uAD50\\uAD6C\\uAD88"
2673:                                        + "\\uADA4\\uADC0\\uADDC\\uADF8\\uAE14\\uAE30\\uAE4C\\uAE68\\uAE84"
2674:                                        + "\\uAEA0\\uAEBC\\uAED8\\uAEF4\\uAF10\\uAF2C\\uAF48\\uAF64\\uAF80"
2675:                                        + "\\uAF9C\\uAFB8\\uAFD4\\uAFF0\\uB00C\\uB028\\uB044\\uB060\\uB07C"
2676:                                        + "\\uB098\\uB0B4\\uB0D0\\uB0EC\\uB108\\uB124\\uB140\\uB15C\\uB178"
2677:                                        + "\\uB194\\uB1B0\\uB1CC\\uB1E8\\uB204\\uB220\\uB23C\\uB258\\uB274"
2678:                                        + "\\uB290\\uB2AC\\uB2C8\\uB2E4\\uB300\\uB31C\\uB338\\uB354\\uB370"
2679:                                        + "\\uB38C\\uB3A8\\uB3C4\\uB3E0\\uB3FC\\uB418\\uB434\\uB450\\uB46C"
2680:                                        + "\\uB488\\uB4A4\\uB4C0\\uB4DC\\uB4F8\\uB514\\uB530\\uB54C\\uB568"
2681:                                        + "\\uB584\\uB5A0\\uB5BC\\uB5D8\\uB5F4\\uB610\\uB62C\\uB648\\uB664"
2682:                                        + "\\uB680\\uB69C\\uB6B8\\uB6D4\\uB6F0\\uB70C\\uB728\\uB744\\uB760"
2683:                                        + "\\uB77C\\uB798\\uB7B4\\uB7D0\\uB7EC\\uB808\\uB824\\uB840\\uB85C"
2684:                                        + "\\uB878\\uB894\\uB8B0\\uB8CC\\uB8E8\\uB904\\uB920\\uB93C\\uB958"
2685:                                        + "\\uB974\\uB990\\uB9AC\\uB9C8\\uB9E4\\uBA00\\uBA1C\\uBA38\\uBA54"
2686:                                        + "\\uBA70\\uBA8C\\uBAA8\\uBAC4\\uBAE0\\uBAFC\\uBB18\\uBB34\\uBB50"
2687:                                        + "\\uBB6C\\uBB88\\uBBA4\\uBBC0\\uBBDC\\uBBF8\\uBC14\\uBC30\\uBC4C"
2688:                                        + "\\uBC68\\uBC84\\uBCA0\\uBCBC\\uBCD8\\uBCF4\\uBD10\\uBD2C\\uBD48"
2689:                                        + "\\uBD64\\uBD80\\uBD9C\\uBDB8\\uBDD4\\uBDF0\\uBE0C\\uBE28\\uBE44"
2690:                                        + "\\uBE60\\uBE7C\\uBE98\\uBEB4\\uBED0\\uBEEC\\uBF08\\uBF24\\uBF40"
2691:                                        + "\\uBF5C\\uBF78\\uBF94\\uBFB0\\uBFCC\\uBFE8\\uC004\\uC020\\uC03C"
2692:                                        + "\\uC058\\uC074\\uC090\\uC0AC\\uC0C8\\uC0E4\\uC100\\uC11C\\uC138"
2693:                                        + "\\uC154\\uC170\\uC18C\\uC1A8\\uC1C4\\uC1E0\\uC1FC\\uC218\\uC234"
2694:                                        + "\\uC250\\uC26C\\uC288\\uC2A4\\uC2C0\\uC2DC\\uC2F8\\uC314\\uC330"
2695:                                        + "\\uC34C\\uC368\\uC384\\uC3A0\\uC3BC\\uC3D8\\uC3F4\\uC410\\uC42C"
2696:                                        + "\\uC448\\uC464\\uC480\\uC49C\\uC4B8\\uC4D4\\uC4F0\\uC50C\\uC528"
2697:                                        + "\\uC544\\uC560\\uC57C\\uC598\\uC5B4\\uC5D0\\uC5EC\\uC608\\uC624"
2698:                                        + "\\uC640\\uC65C\\uC678\\uC694\\uC6B0\\uC6CC\\uC6E8\\uC704\\uC720"
2699:                                        + "\\uC73C\\uC758\\uC774\\uC790\\uC7AC\\uC7C8\\uC7E4\\uC800\\uC81C"
2700:                                        + "\\uC838\\uC854\\uC870\\uC88C\\uC8A8\\uC8C4\\uC8E0\\uC8FC\\uC918"
2701:                                        + "\\uC934\\uC950\\uC96C\\uC988\\uC9A4\\uC9C0\\uC9DC\\uC9F8\\uCA14"
2702:                                        + "\\uCA30\\uCA4C\\uCA68\\uCA84\\uCAA0\\uCABC\\uCAD8\\uCAF4\\uCB10"
2703:                                        + "\\uCB2C\\uCB48\\uCB64\\uCB80\\uCB9C\\uCBB8\\uCBD4\\uCBF0\\uCC0C"
2704:                                        + "\\uCC28\\uCC44\\uCC60\\uCC7C\\uCC98\\uCCB4\\uCCD0\\uCCEC\\uCD08"
2705:                                        + "\\uCD24\\uCD40\\uCD5C\\uCD78\\uCD94\\uCDB0\\uCDCC\\uCDE8\\uCE04"
2706:                                        + "\\uCE20\\uCE3C\\uCE58\\uCE74\\uCE90\\uCEAC\\uCEC8\\uCEE4\\uCF00"
2707:                                        + "\\uCF1C\\uCF38\\uCF54\\uCF70\\uCF8C\\uCFA8\\uCFC4\\uCFE0\\uCFFC"
2708:                                        + "\\uD018\\uD034\\uD050\\uD06C\\uD088\\uD0A4\\uD0C0\\uD0DC\\uD0F8"
2709:                                        + "\\uD114\\uD130\\uD14C\\uD168\\uD184\\uD1A0\\uD1BC\\uD1D8\\uD1F4"
2710:                                        + "\\uD210\\uD22C\\uD248\\uD264\\uD280\\uD29C\\uD2B8\\uD2D4\\uD2F0"
2711:                                        + "\\uD30C\\uD328\\uD344\\uD360\\uD37C\\uD398\\uD3B4\\uD3D0\\uD3EC"
2712:                                        + "\\uD408\\uD424\\uD440\\uD45C\\uD478\\uD494\\uD4B0\\uD4CC\\uD4E8"
2713:                                        + "\\uD504\\uD520\\uD53C\\uD558\\uD574\\uD590\\uD5AC\\uD5C8\\uD5E4"
2714:                                        + "\\uD600\\uD61C\\uD638\\uD654\\uD670\\uD68C\\uD6A8\\uD6C4\\uD6E0"
2715:                                        + "\\uD6FC\\uD718\\uD734\\uD750\\uD76C\\uD788\\uF900-\\uFA0D\\uFA10"
2716:                                        + "\\uFA12\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A-"
2717:                                        + "\\uFA2D\\uFA30-\\uFA6A\\uFA70-\\uFAD9\\uFB00-\\uFB06\\uFB13-"
2718:                                        + "\\uFB17\\uFB1D-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41"
2719:                                        + "\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F"
2720:                                        + "\\uFD92-\\uFDC7\\uFDF0-\\uFDFC\\uFE10-\\uFE19\\uFE20-\\uFE23"
2721:                                        + "\\uFE30-\\uFE44\\uFE47-\\uFE52\\uFE54-\\uFE66\\uFE68-\\uFE6B"
2722:                                        + "\\uFE70-\\uFE72\\uFE74\\uFE76-\\uFEFC\\uFF01-\\uFFBE\\uFFC2-"
2723:                                        + "\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7\\uFFDA-\\uFFDC\\uFFE0-"
2724:                                        + "\\uFFE6\\uFFE8-\\uFFEE\\U00010A0D\\U00010A0F\\U00010A38-\\U00010"
2725:                                        + "A3A\\U00010A3F\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001D172"
2726:                                        + "\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
2727:                                        + "\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0001"
2728:                                        + "D400-\\U0001D454\\U0001D456-\\U0001D49C\\U0001D49E\\U0001D49F"
2729:                                        + "\\U0001D4A2\\U0001D4A5\\U0001D4A6\\U0001D4A9-\\U0001D4AC\\U0001D"
2730:                                        + "4AE-\\U0001D4B9\\U0001D4BB\\U0001D4BD-\\U0001D4C3\\U0001D4C5-"
2731:                                        + "\\U0001D505\\U0001D507-\\U0001D50A\\U0001D50D-\\U0001D514\\U0001"
2732:                                        + "D516-\\U0001D51C\\U0001D51E-\\U0001D539\\U0001D53B-\\U0001D53E"
2733:                                        + "\\U0001D540-\\U0001D544\\U0001D546\\U0001D54A-\\U0001D550\\U0001"
2734:                                        + "D552-\\U0001D6A5\\U0001D6A8-\\U0001D7CB\\U0001D7CE-\\U0001D7FF"
2735:                                        + "\\U0002F800-\\U0002FA1D]", false);
2736:
2737:                return skipSets;
2738:            }
2739:
2740:            public void TestSkippable() {
2741:                UnicodeSet starts;
2742:                UnicodeSet[] skipSets = new UnicodeSet[] { new UnicodeSet(), //NFD
2743:                        new UnicodeSet(), //NFC
2744:                        new UnicodeSet(), //NFKC
2745:                        new UnicodeSet(), //NFKD
2746:                        new UnicodeSet(), //FCD
2747:                        new UnicodeSet(), //NONE
2748:                };
2749:                UnicodeSet[] expectSets = new UnicodeSet[] { new UnicodeSet(),
2750:                        new UnicodeSet(), new UnicodeSet(), new UnicodeSet(),
2751:                        new UnicodeSet(), new UnicodeSet(), };
2752:                StringBuffer s, pattern;
2753:                int start, limit, rangeEnd;
2754:                int i, range, count;
2755:                starts = new UnicodeSet();
2756:                /*
2757:                //[\u0350-\u0357\u035D-\u035F\u0610-\u0615\u0656-\u0658\u0CBC\u17DD\u1939-\u193B]
2758:                for(int ch=0;ch<=0x10FFFF;ch++){
2759:                        if(Normalizer.isNFSkippable(ch, Normalizer.NFD)) {
2760:                            skipSets[D].add(ch);
2761:                        }
2762:                        if(Normalizer.isNFSkippable(ch, Normalizer.NFKD)) {
2763:                            skipSets[KD].add(ch);
2764:                        }
2765:                        if(Normalizer.isNFSkippable(ch, Normalizer.NFC)) {
2766:                            skipSets[C].add(ch);
2767:                        }
2768:                        if(Normalizer.isNFSkippable(ch, Normalizer.NFKC)) {
2769:                            skipSets[KC].add(ch);
2770:                        }
2771:                        if(Normalizer.isNFSkippable(ch, Normalizer.FCD)) {
2772:                            skipSets[FCD].add(ch);
2773:                        }
2774:                        if(Normalizer.isNFSkippable(ch, Normalizer.NONE)) {
2775:                            skipSets[NONE].add(ch);
2776:                        }
2777:                }
2778:                 */
2779:                // build NF*Skippable sets from runtime data 
2780:                NormalizerImpl.addPropertyStarts(starts);
2781:                count = starts.getRangeCount();
2782:
2783:                start = limit = 0;
2784:                rangeEnd = 0;
2785:                range = 0;
2786:                for (;;) {
2787:                    if (start < limit) {
2788:                        // get properties for start and apply them to [start..limit[ 
2789:                        if (Normalizer.isNFSkippable(start, Normalizer.NFD)) {
2790:                            skipSets[D].add(start, limit - 1);
2791:                        }
2792:                        if (Normalizer.isNFSkippable(start, Normalizer.NFKD)) {
2793:                            skipSets[KD].add(start, limit - 1);
2794:                        }
2795:                        if (Normalizer.isNFSkippable(start, Normalizer.NFC)) {
2796:                            skipSets[C].add(start, limit - 1);
2797:                        }
2798:                        if (Normalizer.isNFSkippable(start, Normalizer.NFKC)) {
2799:                            skipSets[KC].add(start, limit - 1);
2800:                        }
2801:                        if (Normalizer.isNFSkippable(start, Normalizer.FCD)) {
2802:                            skipSets[FCD].add(start, limit - 1);
2803:                        }
2804:                        if (Normalizer.isNFSkippable(start, Normalizer.NONE)) {
2805:                            skipSets[NONE].add(start, limit - 1);
2806:                        }
2807:
2808:                    }
2809:
2810:                    // go to next range of same properties 
2811:                    start = limit;
2812:                    if (++limit > rangeEnd) {
2813:                        if (range < count) {
2814:                            limit = starts.getRangeStart(range);
2815:                            rangeEnd = starts.getRangeEnd(range);
2816:                            ++range;
2817:                        } else if (range == count) {
2818:                            // additional range to complete the Unicode code space 
2819:                            limit = rangeEnd = 0x110000;
2820:                            ++range;
2821:                        } else {
2822:                            break;
2823:                        }
2824:                    }
2825:                }
2826:
2827:                expectSets = initSkippables(expectSets);
2828:                if (expectSets[D].contains(0x0350)) {
2829:                    errln("expectSets[D] contains 0x0350");
2830:                }
2831:                //expectSets.length for now do not test FCD and NONE since there is no data
2832:                for (i = 0; i < 4; ++i) {
2833:
2834:                    if (!skipSets[i].equals(expectSets[i])) {
2835:                        errln("error: TestSkippable skipSets["
2836:                                + i
2837:                                + "]!=expectedSets["
2838:                                + i
2839:                                + "]\n"
2840:                                + "May need to update hardcoded UnicodeSet patterns in com.ibm.icu.dev.test.normalizer.BasicTest.java\n"
2841:                                + "See ICU4J - unicodetools.com.ibm.text.UCD.NFSkippable\n"
2842:                                + "Run com.ibm.text.UCD.Main with the option NFSkippable.");
2843:
2844:                        s = new StringBuffer();
2845:
2846:                        s.append("\n\nskip=       ");
2847:                        s.append(skipSets[i].toPattern(true));
2848:                        s.append("\n\n");
2849:
2850:                        s.append("skip-expect=");
2851:                        pattern = new StringBuffer(((UnicodeSet) skipSets[i]
2852:                                .clone()).removeAll(expectSets[i]).toPattern(
2853:                                true));
2854:                        s.append(pattern);
2855:
2856:                        pattern.delete(0, pattern.length());
2857:                        s.append("\n\nexpect-skip=");
2858:                        pattern = new StringBuffer(((UnicodeSet) expectSets[i]
2859:                                .clone()).removeAll(skipSets[i])
2860:                                .toPattern(true));
2861:                        s.append(pattern);
2862:                        s.append("\n\n");
2863:
2864:                        pattern.delete(0, pattern.length());
2865:                        s.append("\n\nintersection(expect,skip)=");
2866:                        UnicodeSet intersection = ((UnicodeSet) expectSets[i]
2867:                                .clone()).retainAll(skipSets[i]);
2868:                        pattern = new StringBuffer(intersection.toPattern(true));
2869:                        s.append(pattern);
2870:                        s.append("\n\n");
2871:
2872:                        errln(s.toString());
2873:                    }
2874:                }
2875:            }
2876:
2877:            public void TestBugJ2068() {
2878:                String sample = "The quick brown fox jumped over the lazy dog";
2879:                UCharacterIterator text = UCharacterIterator
2880:                        .getInstance(sample);
2881:                Normalizer norm = new Normalizer(text, Normalizer.NFC, 0);
2882:                text.setIndex(4);
2883:                if (text.current() == norm.current()) {
2884:                    errln("Normalizer is not cloning the UCharacterIterator");
2885:                }
2886:            }
2887:
2888:            public void TestGetCombiningClass() {
2889:                for (int i = 0; i < 0x10FFFF; i++) {
2890:                    int cc = UCharacter.getCombiningClass(i);
2891:                    if (0xD800 <= i && i <= 0xDFFF && cc > 0) {
2892:                        cc = UCharacter.getCombiningClass(i);
2893:                        errln("CC: " + cc + " for codepoint: "
2894:                                + Utility.hex(i, 8));
2895:                    }
2896:                }
2897:            }
2898:
2899:            public void TestGetNX() {
2900:                UnicodeSet set = NormalizerImpl
2901:                        .getNX(1 /*NormalizerImpl.NX_HANGUL*/);
2902:                if (!set.contains(0xac01)) {
2903:                    errln("getNX did not return correct set for NX_HANGUL");
2904:                }
2905:
2906:                set = NormalizerImpl.getNX(2/*NormalizerImpl.NX_CJK_COMPAT*/);
2907:                if (!set.contains('\uFA20')) {
2908:                    errln("getNX did not return correct set for NX_CJK_COMPAT");
2909:                }
2910:            }
2911:
2912:            public void TestSerializedSet() {
2913:                USerializedSet sset = new USerializedSet();
2914:                UnicodeSet set = new UnicodeSet();
2915:                int start, end;
2916:
2917:                // collect all sets into one for contiguous output
2918:                int[] startEnd = new int[2];
2919:
2920:                if (NormalizerImpl.getCanonStartSet(0x0130, sset)) {
2921:                    int count = sset.countRanges();
2922:                    for (int j = 0; j < count; ++j) {
2923:                        sset.getRange(j, startEnd);
2924:                        set.add(startEnd[0], startEnd[1]);
2925:                    }
2926:                }
2927:
2928:                // test all of these precomposed characters
2929:                UnicodeSetIterator it = new UnicodeSetIterator(set);
2930:                while (it.nextRange()
2931:                        && it.codepoint != UnicodeSetIterator.IS_STRING) {
2932:                    start = it.codepoint;
2933:                    end = it.codepointEnd;
2934:                    while (start <= end) {
2935:                        if (!sset.contains(start)) {
2936:                            errln("USerializedSet.contains failed for "
2937:                                    + Utility.hex(start, 8));
2938:                        }
2939:                    }
2940:                }
2941:            }
2942:
2943:            public void TestReturnFailure() {
2944:                char[] term = { 'r', '\u00e9', 's', 'u', 'm', '\u00e9' };
2945:                char[] decomposed_term = new char[10 + term.length + 2];
2946:                int rc = Normalizer.decompose(term, 0, term.length,
2947:                        decomposed_term, 0, decomposed_term.length, true, 0);
2948:                int rc1 = Normalizer.decompose(term, 0, term.length,
2949:                        decomposed_term, 10, decomposed_term.length, true, 0);
2950:                if (rc != rc1) {
2951:                    errln("Normalizer decompose did not return correct length");
2952:                }
2953:            }
2954:
2955:            private final static class TestCompositionCase {
2956:                public Normalizer.Mode mode;
2957:                public int options;
2958:                public String input, expect;
2959:
2960:                TestCompositionCase(Normalizer.Mode mode, int options,
2961:                        String input, String expect) {
2962:                    this .mode = mode;
2963:                    this .options = options;
2964:                    this .input = input;
2965:                    this .expect = expect;
2966:                }
2967:            }
2968:
2969:            public void TestComposition() {
2970:                final TestCompositionCase cases[] = new TestCompositionCase[] {
2971:                        /*
2972:                         * special cases for UAX #15 bug
2973:                         * see Unicode Public Review Issue #29
2974:                         * at http://www.unicode.org/review/resolved-pri.html#pri29
2975:                         */
2976:                        new TestCompositionCase(Normalizer.NFC, 0,
2977:                                "\u1100\u0300\u1161\u0327",
2978:                                "\u1100\u0300\u1161\u0327"),
2979:                        new TestCompositionCase(Normalizer.NFC, 0,
2980:                                "\u1100\u0300\u1161\u0327\u11a8",
2981:                                "\u1100\u0300\u1161\u0327\u11a8"),
2982:                        new TestCompositionCase(Normalizer.NFC, 0,
2983:                                "\uac00\u0300\u0327\u11a8",
2984:                                "\uac00\u0327\u0300\u11a8"),
2985:                        new TestCompositionCase(Normalizer.NFC, 0,
2986:                                "\u0b47\u0300\u0b3e", "\u0b47\u0300\u0b3e"),
2987:
2988:                        new TestCompositionCase(Normalizer.NFC,
2989:                                NormalizerImpl.BEFORE_PRI_29,
2990:                                "\u1100\u0300\u1161\u0327",
2991:                                "\uac00\u0300\u0327"),
2992:                        new TestCompositionCase(Normalizer.NFC,
2993:                                NormalizerImpl.BEFORE_PRI_29,
2994:                                "\u1100\u0300\u1161\u0327\u11a8",
2995:                                "\uac01\u0300\u0327"),
2996:                        new TestCompositionCase(Normalizer.NFC,
2997:                                NormalizerImpl.BEFORE_PRI_29,
2998:                                "\uac00\u0300\u0327\u11a8",
2999:                                "\uac01\u0327\u0300"),
3000:                        new TestCompositionCase(Normalizer.NFC,
3001:                                NormalizerImpl.BEFORE_PRI_29,
3002:                                "\u0b47\u0300\u0b3e", "\u0b4b\u0300")
3003:
3004:                /* TODO: add test cases for UNORM_FCC here (j2151) */
3005:                };
3006:
3007:                String output;
3008:                int i, length;
3009:
3010:                for (i = 0; i < cases.length; ++i) {
3011:                    output = Normalizer.normalize(cases[i].input,
3012:                            cases[i].mode, cases[i].options);
3013:                    if (!output.equals(cases[i].expect)) {
3014:                        errln("unexpected result for case " + i);
3015:                    }
3016:                }
3017:            }
3018:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.