001: /*
002: *******************************************************************************
003: * Copyright (C) 1996-2004, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */
007: package com.ibm.icu.dev.tool.translit;
008:
009: import com.ibm.icu.lang.*;
010: import com.ibm.icu.text.*; //import java.text.*;
011: import java.io.*;
012: import java.util.Locale;
013:
014: // com.ibm.icu.dev.tool.translit.UnicodeSetClosure
015: // com.ibm.icu.dev.test.translit.TransliteratorTest
016:
017: public class UnicodeSetClosure {
018: public static void main(String[] args) throws Exception {
019:
020: UnicodeSet foo = new UnicodeSet(
021: "[\u1FF6-\u1FFD\u2000-\u2001\u2126]");
022:
023: test();
024: if (foo == null) {
025: }
026:
027: /* The following is superceded by Alan's tool
028:
029: File f = new File("UnicodeSetClosure.txt");
030: String filename = f.getCanonicalFile().toString();
031: out = new PrintWriter(
032: new OutputStreamWriter(
033: new FileOutputStream(filename), "UTF-8"));
034: System.out.println("Writing " + filename);
035: out.print('\uFEFF'); // BOM
036:
037: generateSets("Latin-Katakana", true, Normalizer.DECOMP_COMPAT, true,
038: "[',.a-z~\u00DF\u00E6\u00F0\u00F8\u00FE\u02BE\u0300-\u034E\u0360-\u0362\u0483-\u0486\u0591-\u05A1\u05A3-\u05B9\u05BB-\u05BD\u05BF\u05C1-\u05C2\u05C4\u064B-\u0655\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7-\u06E8\u06EA-\u06ED\u0711\u0730-\u074A\u07A6-\u07B0\u0901-\u0902\u093C\u0941-\u0948\u094D\u0951-\u0954\u0962-\u0963\u0981\u09BC\u09C1-\u09C4\u09CD\u09E2-\u09E3\u0A02\u0A3C\u0A41-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A70-\u0A71\u0A81-\u0A82\u0ABC\u0AC1-\u0AC5\u0AC7-\u0AC8\u0ACD\u0B01\u0B3C\u0B3F\u0B41-\u0B43\u0B4D\u0B56\u0B82\u0BC0\u0BCD\u0C3E-\u0C40\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0CBF\u0CC6\u0CCC-\u0CCD\u0D41-\u0D43\u0D4D\u0DCA\u0DD2-\u0DD4\u0DD6\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB-\u0EBC\u0EC8-\u0ECD\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F71-\u0F7E\u0F80-\u0F84\u0F86-\u0F87\u0F90-\u0F97\u0F99-\u0FBC\u0FC6\u102D-\u1030\u1032\u1036-\u1037\u1039\u1058-\u1059\u17B7-\u17BD\u17C6\u17C9-\u17D3\u18A9\u20D0-\u20DC\u20E1\u302A-\u302F\uFB1E\uFE20-\uFE23\\U0001D167-\\U0001D169\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-\\U0001D1AD]"
039: );
040: generateSets("Latin-Katakana", false, Normalizer.DECOMP_COMPAT, false,
041: "[~\u3001-\u3002\u30A1-\u30AB\u30AD\u30AF\u30B1\u30B3\u30B5\u30B7\u30B9\u30BB\u30BD\u30BF\u30C1\u30C3-\u30C4\u30C6\u30C8\u30CA-\u30CF\u30D2\u30D5\u30D8\u30DB\u30DE-\u30F3\u30F5-\u30F6\u30FC-\u30FD]"
042: );
043:
044: out.close();
045:
046: /////////////////////////////////////////////////
047: if (true) return; // skip the stuff we've done already
048:
049: generateSets("Cyrillic-Latin", true, Normalizer.DECOMP, false,
050: "[\u0402\u0404-\u0406\u0408-\u040B\u040F-\u0418\u041A-\u0438\u043A-\u044F\u0452\u0454-\u0456\u0458-\u045B\u045F\u0490-\u0495\u0498-\u0499\u04D4-\u04D5\u04D8-\u04D9]"
051: );
052: generateSets("Latin-Cyrillic", false, Normalizer.DECOMP, false,
053: "[A-Za-z\u00C6\u00E6\u0110-\u0111\u018F\u0259\u02B9-\u02BA]"
054: );
055: */
056: }
057:
058: public static void generateSets(String label, boolean forward,
059: Normalizer.Mode m, boolean lowerFirst, String rules) {
060: UnicodeSet s = new UnicodeSet(rules);
061: System.out.println("Generating " + label
062: + (forward ? "" : " BACKWARD"));
063: close(s, m, lowerFirst);
064: out.println("# MINIMAL FILTER GENERATED FOR: " + label
065: + (forward ? "" : " BACKWARD"));
066: out.println(":: " + (forward ? "" : "( ") + s.toPattern(true)
067: + (forward ? "" : " )") + " ;");
068: out.println();
069: out.println("Unicode: " + s.toPattern(false));
070: }
071:
072: static boolean GENERATE = false;
073:
074: public static void test() throws Exception {
075: File f = new File("TestUnicodeSetClosure.txt");
076: String filename = f.getCanonicalFile().toString();
077: out = new PrintWriter(new OutputStreamWriter(
078: new FileOutputStream(filename), "UTF-8"));
079: System.out.println("Writing " + filename);
080: out.print('\uFEFF'); // BOM
081:
082: GENERATE = true;
083: test("[:Devanagari:]");
084: /*
085: test("[\u00E0Bc]");
086: test("[m]");
087: */
088:
089: out.close();
090: }
091:
092: static final Normalizer.Mode[] testModes = { Normalizer.NONE,
093: Normalizer.NFD, Normalizer.NFC, Normalizer.NFKD,
094: Normalizer.NFKC };
095: static final String[] modeNames = { "NoNF", "NFD", "NFC", "NFKD",
096: "NFKC" };
097:
098: static final boolean[] testCases = { false, true };
099: static final String[] caseNames = { "noLower", "lower" };
100:
101: public static void test(String testStr) throws Exception {
102: UnicodeSet original = new UnicodeSet(testStr);
103:
104: System.out.println("Testing Closure of: "
105: + original.toPattern(true));
106: out.println("Testing Closure of: " + original.toPattern(false));
107: UnicodeSet raw = new UnicodeSet(original);
108: original.complement();
109: original.complement();
110: if (!raw.equals(original)) {
111: out.println("Equals:" + original.toPattern(false));
112: }
113: StringWriter swLog = null;
114: PrintWriter log = null;
115:
116: if (GENERATE) {
117: swLog = new StringWriter();
118: log = new PrintWriter(swLog);
119: log.println("static UnicodeSet[][] UNCHANGED = {");
120: }
121: for (int i = 0; i < testCases.length; ++i) {
122: if (GENERATE)
123: log.println(" {");
124: for (int j = 0; j < testModes.length; ++j) {
125: UnicodeSet test = new UnicodeSet(original);
126: close(test, testModes[j], testCases[i]);
127: if (GENERATE) {
128: log.println("\tnew UnicodeSet(\""
129: + generatedSet.toPattern(true) + "\"),");
130: }
131: String label = caseNames[i] + ", " + modeNames[j];
132: System.out.println(label);
133: out.println(label + ": " + test.toPattern(false));
134: test.removeAll(original);
135: if (test.isEmpty()) {
136: out.println("\tNo Difference from original");
137: } else {
138: out.println("\tDifference = "
139: + test.toPattern(false));
140: }
141: out.flush();
142: }
143: if (GENERATE)
144: log.println(" },");
145: out.println();
146: }
147:
148: if (GENERATE) {
149: log.println("};");
150: out.print(swLog.getBuffer().toString());
151: }
152:
153: /*
154: close(test, Normalizer.DECOMP, false);
155: print("NFD", test);
156:
157: test = new UnicodeSet(testStr);
158: close(test, Normalizer.NO_OP, true);
159: print("Lower", test);
160:
161: test = new UnicodeSet(testStr);
162: close(test, Normalizer.COMPOSE, false);
163: print("NFC", test);
164:
165: test = new UnicodeSet(testStr);
166: close(test, Normalizer.DECOMP_COMPAT, false);
167: print("NFKD", test);
168:
169: test = new UnicodeSet(testStr);
170: close(test, Normalizer.COMPOSE_COMPAT, false);
171: print("NFKC", test);
172: */
173: }
174:
175: static PrintWriter out;
176:
177: /*
178: public static void print(String label, UnicodeSet test) {
179: System.out.println(label);
180: out.println(label + ": " + test.toPattern(false));
181: out.println();
182: }
183: */
184:
185: // dumb, slow implementations
186: public static class NFToString implements Char32ToString {
187: Normalizer.Mode mode;
188: boolean lowerFirst;
189:
190: NFToString(Normalizer.Mode m, boolean lowerFirst) {
191: mode = m;
192: this .lowerFirst = lowerFirst;
193: }
194:
195: public String get(int cp) {
196: String source = UTF16.valueOf(cp);
197: String result = source;
198: if (lowerFirst)
199: result = UCharacter.toLowerCase(Locale.US, result);
200: result = Normalizer.normalize(result, mode);
201: if (lowerFirst)
202: result = UCharacter.toLowerCase(Locale.US, result);
203: if (result.equals(source))
204: return null;
205: return result;
206: }
207: }
208:
209: /** Returns a mapping from char32 to a string. If there is no change,
210: * null is returned.
211: */
212:
213: interface Char32ToString {
214: public String get(int cp);
215: }
216:
217: static boolean FAST = true;
218:
219: public static void close(UnicodeSet s, Normalizer.Mode m,
220: boolean lowerFirst) {
221: Char32ToString f = new NFToString(m, lowerFirst);
222: if (FAST) {
223: int mm;
224: for (mm = 0;; ++mm)
225: if (m == testModes[mm])
226: break; // find mode
227: close2(s, f, lowerFirst ? 1 : 0, mm);
228: return;
229: }
230: close(s, f);
231: }
232:
233: public static void close(UnicodeSet s, Char32ToString f) {
234: if (GENERATE)
235: generatedSet = new UnicodeSet();
236:
237: for (int cp = 0; cp <= 0x10FFFF; ++cp) {
238: int type = UCharacter.getType(cp);
239: if (type == Character.UNASSIGNED)
240: continue;
241:
242: //if (cp == '\u00e7') {
243: // System.out.println("debug");
244: //}
245: String result = f.get(cp);
246: if (result == null)
247: continue;
248: if (GENERATE) {
249: generatedSet.add(cp);
250: }
251: if (!containsSome(s, result))
252: continue;
253: s.add(cp);
254: }
255: }
256:
257: public static void close2(UnicodeSet s, Char32ToString f, int lc,
258: int mode) {
259: UnicodeSet unchanged = new UnicodeSet(); // UNCHANGED[lc][mode];
260: int count = unchanged.getRangeCount();
261: for (int i = 0; i < count; ++i) {
262: int start = unchanged.getRangeStart(i);
263: int end = unchanged.getRangeEnd(i);
264: for (int cp = start; cp <= end; ++cp) {
265: String result = f.get(cp);
266: if (result == null)
267: throw new IllegalArgumentException(
268: "Something wrong -- should never happen");
269: if (!containsSome(s, result))
270: continue;
271: s.add(cp);
272: }
273: }
274: }
275:
276: /*
277: static final UnicodeSet[][] UNCHANGED = {
278: { // \u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4
279: new UnicodeSet("[\u1FF6-\u1FFD\u2000-\u2001\u2126]"),
280: // \u212A-\u212B\u219A-\u219B\u21AE\u21CD-\u21CF\u2204\u2209\u220C\u2224\u2226\u2241\u2244\u2247\u2249\u2260\u2262\u226D-\u2271\u2274-\u2275\u2278
281: new UnicodeSet("[\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0340-\u0341\u0343-\u0344\u0374\u037E\u0385-\u038A\u038C\u038E-\u0390\u03AA-\u03B0\u03CA-\u03CE\u03D3-\u03D4\u0400-\u0401\u0403\u0407\u040C-\u040E\u0419\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0476-\u0477\u04C1-\u04C2\u04D0-\u04D3\u04D6-\u04D7\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F5\u04F8-\u04F9\u0622-\u0626\u06C0\u06C2\u06D3\u0929\u0931\u0934\u0958-\u095F\u09CB-\u09CC\u09DC-\u09DD\u09DF\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B48\u0B4B-\u0B4C\u0B5C-\u0B5D\u0B94\u0BCA-\u0BCC\u0C48\u0CC0\u0CC7-\u0CC8\u0CCA-\u0CCB\u0D4A-\u0D4C\u0DDA\u0DDC-\u0DDE\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F76\u0F78\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u1026\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4\u1FF6-\u1FFD\u2000-\u2001\u2126\u212A-\u212B\u219A-\u219B\u21AE\u21CD-\u21CF\u2204\u2209\u220C\u2224\u2226\u2241\u2244\u2247\u2249\u2260\u2262\u226D-\u2271\u2274-\u2275\u2278-\u2279\u2280-\u2281\u2284-\u2285\u2288-\u2289\u22AC-\u22AF\u22E0-\u22E3\u22EA-\u22ED\u2329-\u232A\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u309E\u30AC\u30AE\u30B0\u30B2\u30B4\u30B6\u30B8\u30BA\u30BC\u30BE\u30C0\u30C2\u30C5\u30C7\u30C9\u30D0-\u30D1\u30D3-\u30D4\u30D6-\u30D7\u30D9-\u30DA\u30DC-\u30DD\u30F4\u30F7-\u30FA\u30FE\uAC00-\uD7A3\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB1D\uFB1F\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFB4E]"),
282: new UnicodeSet("[\u0340-\u0341\u0343-\u0344\u0374\u037E\u0387\u0958-\u095F\u09DC-\u09DD\u09DF\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B5C-\u0B5D\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F76\u0F78\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1FBB\u1FBE\u1FC9\u1FCB\u1FD3\u1FDB\u1FE3\u1FEB\u1FEE-\u1FEF\u1FF9\u1FFB\u1FFD\u2000-\u2001\u2126\u212A-\u212B\u2329-\u232A\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB1F\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFB4E]"),
283: new UnicodeSet("[\u00A0\u00A8\u00AA\u00AF\u00B2-\u00B5\u00B8-\u00BA\u00BC-\u00BE\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0132-\u0137\u0139-\u0140\u0143-\u0149\u014C-\u0151\u0154-\u0165\u0168-\u017F\u01A0-\u01A1\u01AF-\u01B0\u01C4-\u01DC\u01DE-\u01E3\u01E6-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u02B0-\u02B8\u02D8-\u02DD\u02E0-\u02E4\u0340-\u0341\u0343-\u0344\u0374\u037A\u037E\u0384-\u038A\u038C\u038E-\u0390\u03AA-\u03B0\u03CA-\u03CE\u03D0-\u03D6\u03F0-\u03F2\u0400-\u0401\u0403\u0407\u040C-\u040E\u0419\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0476-\u0477\u04C1-\u04C2\u04D0-\u04D3\u04D6-\u04D7\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F5\u04F8-\u04F9\u0587\u0622-\u0626\u0675-\u0678\u06C0\u06C2\u06D3\u0929\u0931\u0934\u0958-\u095F\u09CB-\u09CC\u09DC-\u09DD\u09DF\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B48\u0B4B-\u0B4C\u0B5C-\u0B5D\u0B94\u0BCA-\u0BCC\u0C48\u0CC0\u0CC7-\u0CC8\u0CCA-\u0CCB\u0D4A-\u0D4C\u0DDA\u0DDC-\u0DDE\u0E33\u0EB3\u0EDC-\u0EDD\u0F0C\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F79\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u1026\u1E00-\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4\u1FF6-\u1FFE\u2000-\u200A\u2011\u2017\u2024-\u2026\u202F\u2033-\u2034\u2036-\u2037\u203C\u203E\u2048-\u2049\u2070\u2074-\u208E\u20A8\u2100-\u2103\u2105-\u2107\u2109-\u2113\u2115-\u2116\u2119-\u211D\u2120-\u2122\u2124\u2126\u2128\u212A-\u212D\u212F-\u2131\u2133-\u2139\u2153-\u217F\u219A-\u219B\u21AE\u21CD-\u21CF\u2204\u2209\u220C\u2224\u2226\u222C-\u222D\u222F-\u2230\u2241\u2244\u2247\u2249\u2260\u2262\u226D-\u2271\u2274-\u2275\u2278-\u2279\u2280-\u2281\u2284-\u2285\u2288-\u2289\u22AC-\u22AF\u22E0-\u22E3\u22EA-\u22ED\u2329-\u232A\u2460-\u24EA\u2E9F\u2EF3\u2F00-\u2FD5\u3000\u3036\u3038-\u303A\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u309B-\u309C\u309E\u30AC\u30AE\u30B0\u30B2\u30B4\u30B6\u30B8\u30BA\u30BC\u30BE\u30C0\u30C2\u30C5\u30C7\u30C9\u30D0-\u30D1\u30D3-\u30D4\u30D6-\u30D7\u30D9-\u30DA\u30DC-\u30DD\u30F4\u30F7-\u30FA\u30FE\u3131-\u318E\u3192-\u319F\u3200-\u321C\u3220-\u3243\u3260-\u327B\u3280-\u32B0\u32C0-\u32CB\u32D0-\u32FE\u3300-\u3376\u337B-\u33DD\u33E0-\u33FE\uAC00-\uD7A3\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB00-\uFB06\uFB13-\uFB17\uFB1D\uFB1F-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE30-\uFE44\uFE49-\uFE52\uFE54-\uFE66\uFE68-\uFE6B\uFE70-\uFE72\uFE74\uFE76-\uFEFC\uFF01-\uFF5E\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC\uFFE0-\uFFE6\uFFE8-\uFFEE]"),
284: new UnicodeSet("[\u00A0\u00A8\u00AA\u00AF\u00B2-\u00B5\u00B8-\u00BA\u00BC-\u00BE\u0132-\u0133\u013F-\u0140\u0149\u017F\u01C4-\u01CC\u01F1-\u01F3\u02B0-\u02B8\u02D8-\u02DD\u02E0-\u02E4\u0340-\u0341\u0343-\u0344\u0374\u037A\u037E\u0384-\u0385\u0387\u03D0-\u03D6\u03F0-\u03F2\u0587\u0675-\u0678\u0958-\u095F\u09DC-\u09DD\u09DF\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B5C-\u0B5D\u0E33\u0EB3\u0EDC-\u0EDD\u0F0C\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F79\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u1E9A-\u1E9B\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1FBB\u1FBD-\u1FC1\u1FC9\u1FCB\u1FCD-\u1FCF\u1FD3\u1FDB\u1FDD-\u1FDF\u1FE3\u1FEB\u1FED-\u1FEF\u1FF9\u1FFB\u1FFD-\u1FFE\u2000-\u200A\u2011\u2017\u2024-\u2026\u202F\u2033-\u2034\u2036-\u2037\u203C\u203E\u2048-\u2049\u2070\u2074-\u208E\u20A8\u2100-\u2103\u2105-\u2107\u2109-\u2113\u2115-\u2116\u2119-\u211D\u2120-\u2122\u2124\u2126\u2128\u212A-\u212D\u212F-\u2131\u2133-\u2139\u2153-\u217F\u222C-\u222D\u222F-\u2230\u2329-\u232A\u2460-\u24EA\u2E9F\u2EF3\u2F00-\u2FD5\u3000\u3036\u3038-\u303A\u309B-\u309C\u3131-\u318E\u3192-\u319F\u3200-\u321C\u3220-\u3243\u3260-\u327B\u3280-\u32B0\u32C0-\u32CB\u32D0-\u32FE\u3300-\u3376\u337B-\u33DD\u33E0-\u33FE\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB00-\uFB06\uFB13-\uFB17\uFB1F-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE30-\uFE44\uFE49-\uFE52\uFE54-\uFE66\uFE68-\uFE6B\uFE70-\uFE72\uFE74\uFE76-\uFEFC\uFF01-\uFF5E\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC\uFFE0-\uFFE6\uFFE8-\uFFEE]"),
285: },
286: {
287: new UnicodeSet("[A-Z\u00B2-\u00B3\u00B5\u00B9\u00BC-\u00BE\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130-\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178-\u0179\u017B\u017D\u017F\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018B\u018E-\u0191\u0193-\u0194\u0196-\u0198\u019C-\u019D\u019F-\u01A0\u01A2\u01A4\u01A6-\u01A7\u01A9\u01AC\u01AE-\u01AF\u01B1-\u01B3\u01B5\u01B7-\u01B8\u01BC\u01C4-\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1-\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u0345\u0386\u0388-\u038A\u038C\u038E-\u038F\u0391-\u03A1\u03A3-\u03AB\u03C2\u03D0-\u03D1\u03D5-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F0-\u03F2\u03F4-\u03F5\u0400-\u042F\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476\u0478\u047A\u047C\u047E\u0480\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C1\u04C3\u04C7\u04CB\u04D0\u04D2\u04D4\u04D6\u04D8\u04DA\u04DC\u04DE\u04E0\u04E2\u04E4\u04E6\u04E8\u04EA\u04EC\u04EE\u04F0\u04F2\u04F4\u04F8\u0531-\u0556\u09F8\u0F2A-\u0F33\u137C\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9B\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1F08-\u1F0F\u1F18-\u1F1D\u1F28-\u1F2F\u1F38-\u1F3F\u1F48-\u1F4D\u1F59\u1F5B\u1F5D\u1F5F\u1F68-\u1F6F\u1F88-\u1F8F\u1F98-\u1F9F\u1FA8-\u1FAF\u1FB8-\u1FBC\u1FBE\u1FC8-\u1FCC\u1FD8-\u1FDB\u1FE8-\u1FEC\u1FF8-\u1FFC\u2070\u2074-\u2079\u2080-\u2089\u2126\u212A-\u212B\u2153-\u215E\u2160-\u217F\u2181-\u2183\u2460-\u2468\u2474-\u247C\u2488-\u2490\u24B6-\u24EA\u2776-\u277E\u2780-\u2788\u278A-\u2792\uFF21-\uFF3A\\U0001034A\\U00010400-\\U00010425\\U0001D165-\\U0001D166\\U0001D16D-\\U0001D172]"),
288: new UnicodeSet("[A-Z\u00B2-\u00B3\u00B5\u00B9\u00BC-\u00BE\u00C0-\u00D6\u00D8-\u00DE\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u0110\u0112-\u0126\u0128-\u0132\u0134-\u0137\u0139-\u013F\u0141\u0143-\u0148\u014A\u014C-\u0152\u0154-\u0166\u0168-\u017F\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018B\u018E-\u0191\u0193-\u0194\u0196-\u0198\u019C-\u019D\u019F-\u01A2\u01A4\u01A6-\u01A7\u01A9\u01AC\u01AE-\u01B3\u01B5\u01B7-\u01B8\u01BC\u01C4-\u01DC\u01DE-\u01E4\u01E6-\u021C\u021E-\u021F\u0222\u0224\u0226-\u0233\u0340-\u0341\u0343-\u0345\u0374\u037E\u0385-\u038A\u038C\u038E-\u03A1\u03A3-\u03B0\u03C2\u03CA-\u03CE\u03D0-\u03D1\u03D3-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F0-\u03F2\u03F4-\u03F5\u0400-\u042F\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476-\u0478\u047A\u047C\u047E\u0480\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C1-\u04C3\u04C7\u04CB\u04D0-\u04D4\u04D6-\u04D8\u04DA-\u04E0\u04E2-\u04E8\u04EA-\u04F5\u04F8-\u04F9\u0531-\u0556\u0622-\u0626\u06C0\u06C2\u06D3\u0929\u0931\u0934\u0958-\u095F\u09CB-\u09CC\u09DC-\u09DD\u09DF\u09F8\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B48\u0B4B-\u0B4C\u0B5C-\u0B5D\u0B94\u0BCA-\u0BCC\u0C48\u0CC0\u0CC7-\u0CC8\u0CCA-\u0CCB\u0D4A-\u0D4C\u0DDA\u0DDC-\u0DDE\u0F2A-\u0F33\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F76\u0F78\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u1026\u137C\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4\u1FF6-\u1FFD\u2000-\u2001\u2070\u2074-\u2079\u2080-\u2089\u2126\u212A-\u212B\u2153-\u215E\u2160-\u217F\u2181-\u2183\u219A-\u219B\u21AE\u21CD-\u21CF\u2204\u2209\u220C\u2224\u2226\u2241\u2244\u2247\u2249\u2260\u2262\u226D-\u2271\u2274-\u2275\u2278-\u2279\u2280-\u2281\u2284-\u2285\u2288-\u2289\u22AC-\u22AF\u22E0-\u22E3\u22EA-\u22ED\u2329-\u232A\u2460-\u2468\u2474-\u247C\u2488-\u2490\u24B6-\u24EA\u2776-\u277E\u2780-\u2788\u278A-\u2792\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u309E\u30AC\u30AE\u30B0\u30B2\u30B4\u30B6\u30B8\u30BA\u30BC\u30BE\u30C0\u30C2\u30C5\u30C7\u30C9\u30D0-\u30D1\u30D3-\u30D4\u30D6-\u30D7\u30D9-\u30DA\u30DC-\u30DD\u30F4\u30F7-\u30FA\u30FE\uAC00-\uD7A3\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB1D\uFB1F\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFB4E\uFF21-\uFF3A\\U0001034A\\U00010400-\\U00010425\\U0001D165-\\U0001D166\\U0001D16D-\\U0001D172]"),
289: new UnicodeSet("[A-Z\u00B2-\u00B3\u00B5\u00B9\u00BC-\u00BE\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130-\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178-\u0179\u017B\u017D\u017F\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018B\u018E-\u0191\u0193-\u0194\u0196-\u0198\u019C-\u019D\u019F-\u01A0\u01A2\u01A4\u01A6-\u01A7\u01A9\u01AC\u01AE-\u01AF\u01B1-\u01B3\u01B5\u01B7-\u01B8\u01BC\u01C4-\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1-\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u0340-\u0341\u0343-\u0345\u0374\u037E\u0386-\u038A\u038C\u038E-\u038F\u0391-\u03A1\u03A3-\u03AB\u03C2\u03D0-\u03D1\u03D5-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F0-\u03F2\u03F4-\u03F5\u0400-\u042F\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476\u0478\u047A\u047C\u047E\u0480\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C1\u04C3\u04C7\u04CB\u04D0\u04D2\u04D4\u04D6\u04D8\u04DA\u04DC\u04DE\u04E0\u04E2\u04E4\u04E6\u04E8\u04EA\u04EC\u04EE\u04F0\u04F2\u04F4\u04F8\u0531-\u0556\u0958-\u095F\u09DC-\u09DD\u09DF\u09F8\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B5C-\u0B5D\u0F2A-\u0F33\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F76\u0F78\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u137C\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9B\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1F08-\u1F0F\u1F18-\u1F1D\u1F28-\u1F2F\u1F38-\u1F3F\u1F48-\u1F4D\u1F59\u1F5B\u1F5D\u1F5F\u1F68-\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F88-\u1F8F\u1F98-\u1F9F\u1FA8-\u1FAF\u1FB8-\u1FBC\u1FBE\u1FC8-\u1FCC\u1FD3\u1FD8-\u1FDB\u1FE3\u1FE8-\u1FEC\u1FEE-\u1FEF\u1FF8-\u1FFD\u2000-\u2001\u2070\u2074-\u2079\u2080-\u2089\u2126\u212A-\u212B\u2153-\u215E\u2160-\u217F\u2181-\u2183\u2329-\u232A\u2460-\u2468\u2474-\u247C\u2488-\u2490\u24B6-\u24EA\u2776-\u277E\u2780-\u2788\u278A-\u2792\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB1F\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFB4E\uFF21-\uFF3A\\U0001034A\\U00010400-\\U00010425\\U0001D165-\\U0001D166\\U0001D16D-\\U0001D172]"),
290: new UnicodeSet("[A-Z\u00A0\u00A8\u00AA\u00AF\u00B2-\u00B5\u00B8-\u00BA\u00BC-\u00BE\u00C0-\u00D6\u00D8-\u00DE\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u0110\u0112-\u0126\u0128-\u0137\u0139-\u0141\u0143-\u014A\u014C-\u0152\u0154-\u0166\u0168-\u017F\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018B\u018E-\u0191\u0193-\u0194\u0196-\u0198\u019C-\u019D\u019F-\u01A2\u01A4\u01A6-\u01A7\u01A9\u01AC\u01AE-\u01B3\u01B5\u01B7-\u01B8\u01BC\u01C4-\u01DC\u01DE-\u01E4\u01E6-\u021C\u021E-\u021F\u0222\u0224\u0226-\u0233\u02B0-\u02B8\u02D8-\u02DD\u02E0-\u02E4\u0340-\u0341\u0343-\u0345\u0374\u037A\u037E\u0384-\u038A\u038C\u038E-\u03A1\u03A3-\u03B0\u03C2\u03CA-\u03CE\u03D0-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F0-\u03F2\u03F4-\u03F5\u0400-\u042F\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476-\u0478\u047A\u047C\u047E\u0480\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C1-\u04C3\u04C7\u04CB\u04D0-\u04D4\u04D6-\u04D8\u04DA-\u04E0\u04E2-\u04E8\u04EA-\u04F5\u04F8-\u04F9\u0531-\u0556\u0587\u0622-\u0626\u0675-\u0678\u06C0\u06C2\u06D3\u0929\u0931\u0934\u0958-\u095F\u09CB-\u09CC\u09DC-\u09DD\u09DF\u09F8\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B48\u0B4B-\u0B4C\u0B5C-\u0B5D\u0B94\u0BCA-\u0BCC\u0C48\u0CC0\u0CC7-\u0CC8\u0CCA-\u0CCB\u0D4A-\u0D4C\u0DDA\u0DDC-\u0DDE\u0E33\u0EB3\u0EDC-\u0EDD\u0F0C\u0F2A-\u0F33\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F79\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u1026\u137C\u1E00-\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4\u1FF6-\u1FFE\u2000-\u200A\u2011\u2017\u2024-\u2026\u202F\u2033-\u2034\u2036-\u2037\u203C\u203E\u2048-\u2049\u2070\u2074-\u208E\u20A8\u2100-\u2103\u2105-\u2107\u2109-\u2113\u2115-\u2116\u2119-\u211D\u2120-\u2122\u2124\u2126\u2128\u212A-\u212D\u212F-\u2131\u2133-\u2139\u2153-\u217F\u2181-\u2183\u219A-\u219B\u21AE\u21CD-\u21CF\u2204\u2209\u220C\u2224\u2226\u222C-\u222D\u222F-\u2230\u2241\u2244\u2247\u2249\u2260\u2262\u226D-\u2271\u2274-\u2275\u2278-\u2279\u2280-\u2281\u2284-\u2285\u2288-\u2289\u22AC-\u22AF\u22E0-\u22E3\u22EA-\u22ED\u2329-\u232A\u2460-\u24EA\u2776-\u277E\u2780-\u2788\u278A-\u2792\u2E9F\u2EF3\u2F00-\u2FD5\u3000\u3036\u3038-\u303A\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u309B-\u309C\u309E\u30AC\u30AE\u30B0\u30B2\u30B4\u30B6\u30B8\u30BA\u30BC\u30BE\u30C0\u30C2\u30C5\u30C7\u30C9\u30D0-\u30D1\u30D3-\u30D4\u30D6-\u30D7\u30D9-\u30DA\u30DC-\u30DD\u30F4\u30F7-\u30FA\u30FE\u3131-\u318E\u3192-\u319F\u3200-\u321C\u3220-\u3243\u3260-\u327B\u3280-\u32B0\u32C0-\u32CB\u32D0-\u32FE\u3300-\u3376\u337B-\u33DD\u33E0-\u33FE\uAC00-\uD7A3\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB00-\uFB06\uFB13-\uFB17\uFB1D\uFB1F-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE30-\uFE44\uFE49-\uFE52\uFE54-\uFE66\uFE68-\uFE6B\uFE70-\uFE72\uFE74\uFE76-\uFEFC\uFF01-\uFF5E\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC\uFFE0-\uFFE6\uFFE8-\uFFEE\\U0001034A\\U00010400-\\U00010425\\U0001D165-\\U0001D166\\U0001D16D-\\U0001D172]"),
291: new UnicodeSet("[A-Z\u00A0\u00A8\u00AA\u00AF\u00B2-\u00B5\u00B8-\u00BA\u00BC-\u00BE\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130-\u0134\u0136\u0139\u013B\u013D\u013F-\u0141\u0143\u0145\u0147\u0149-\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178-\u0179\u017B\u017D\u017F\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018B\u018E-\u0191\u0193-\u0194\u0196-\u0198\u019C-\u019D\u019F-\u01A0\u01A2\u01A4\u01A6-\u01A7\u01A9\u01AC\u01AE-\u01AF\u01B1-\u01B3\u01B5\u01B7-\u01B8\u01BC\u01C4-\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1-\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u02B0-\u02B8\u02D8-\u02DD\u02E0-\u02E4\u0340-\u0341\u0343-\u0345\u0374\u037A\u037E\u0384-\u038A\u038C\u038E-\u038F\u0391-\u03A1\u03A3-\u03AB\u03C2\u03D0-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F0-\u03F2\u03F4-\u03F5\u0400-\u042F\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476\u0478\u047A\u047C\u047E\u0480\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C1\u04C3\u04C7\u04CB\u04D0\u04D2\u04D4\u04D6\u04D8\u04DA\u04DC\u04DE\u04E0\u04E2\u04E4\u04E6\u04E8\u04EA\u04EC\u04EE\u04F0\u04F2\u04F4\u04F8\u0531-\u0556\u0587\u0675-\u0678\u0958-\u095F\u09DC-\u09DD\u09DF\u09F8\u0A33\u0A36\u0A59-\u0A5B\u0A5E\u0B5C-\u0B5D\u0E33\u0EB3\u0EDC-\u0EDD\u0F0C\u0F2A-\u0F33\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F73\u0F75-\u0F79\u0F81\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u137C\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9A-\u1E9B\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1F08-\u1F0F\u1F18-\u1F1D\u1F28-\u1F2F\u1F38-\u1F3F\u1F48-\u1F4D\u1F59\u1F5B\u1F5D\u1F5F\u1F68-\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F88-\u1F8F\u1F98-\u1F9F\u1FA8-\u1FAF\u1FB8-\u1FC1\u1FC8-\u1FCF\u1FD3\u1FD8-\u1FDB\u1FDD-\u1FDF\u1FE3\u1FE8-\u1FEF\u1FF8-\u1FFE\u2000-\u200A\u2011\u2017\u2024-\u2026\u202F\u2033-\u2034\u2036-\u2037\u203C\u203E\u2048-\u2049\u2070\u2074-\u208E\u20A8\u2100-\u2103\u2105-\u2107\u2109-\u2113\u2115-\u2116\u2119-\u211D\u2120-\u2122\u2124\u2126\u2128\u212A-\u212D\u212F-\u2131\u2133-\u2139\u2153-\u217F\u2181-\u2183\u222C-\u222D\u222F-\u2230\u2329-\u232A\u2460-\u24EA\u2776-\u277E\u2780-\u2788\u278A-\u2792\u2E9F\u2EF3\u2F00-\u2FD5\u3000\u3036\u3038-\u303A\u309B-\u309C\u3131-\u318E\u3192-\u319F\u3200-\u321C\u3220-\u3243\u3260-\u327B\u3280-\u32B0\u32C0-\u32CB\u32D0-\u32FE\u3300-\u3376\u337B-\u33DD\u33E0-\u33FE\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB00-\uFB06\uFB13-\uFB17\uFB1F-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE30-\uFE44\uFE49-\uFE52\uFE54-\uFE66\uFE68-\uFE6B\uFE70-\uFE72\uFE74\uFE76-\uFEFC\uFF01-\uFF5E\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC\uFFE0-\uFFE6\uFFE8-\uFFEE\\U0001034A\\U00010400-\\U00010425\\U0001D165-\\U0001D166\\U0001D16D-\\U0001D172]"),
292: },
293: };
294: */
295:
296: static UnicodeSet generatedSet;
297:
298: // These should both be public, and on the respective classes
299:
300: public static void addAll(UnicodeSet s, String str) {
301: int cp;
302: for (int i = 0; i < str.length(); i += UTF16.getCharCount(cp)) {
303: cp = UTF16.charAt(str, i);
304: s.add(cp);
305: }
306: }
307:
308: public static boolean containsSome(UnicodeSet s, String str) {
309: int cp;
310: for (int i = 0; i < str.length(); i += UTF16.getCharCount(cp)) {
311: cp = UTF16.charAt(str, i);
312: if (s.contains(cp))
313: return true;
314: }
315: return false;
316: }
317: }
|