0001: /*
0002: * Licensed to the Apache Software Foundation (ASF) under one or more
0003: * contributor license agreements. See the NOTICE file distributed with
0004: * this work for additional information regarding copyright ownership.
0005: * The ASF licenses this file to You under the Apache License, Version 2.0
0006: * (the "License"); you may not use this file except in compliance with
0007: * the License. You may obtain a copy of the License at
0008: *
0009: * http://www.apache.org/licenses/LICENSE-2.0
0010: *
0011: * Unless required by applicable law or agreed to in writing, software
0012: * distributed under the License is distributed on an "AS IS" BASIS,
0013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014: * See the License for the specific language governing permissions and
0015: * limitations under the License.
0016: */
0017:
0018: package java.lang;
0019:
0020: import java.io.Serializable;
0021: import java.util.SortedMap;
0022: import java.util.TreeMap;
0023:
0024: import org.apache.harmony.luni.util.BinarySearch;
0025:
0026: import com.ibm.icu.lang.UCharacter;
0027:
0028: /**
0029: * <p>
0030: * Character is the wrapper for the primitive type <code>char</code>. This
0031: * class also provides a number of utility methods for working with
0032: * <code>char</code>s.
0033: * </p>
0034: *
0035: * <p>
0036: * Character data is based upon the Unicode Standard, 4.0. The Unicode
0037: * specification, character tables and other information are available at <a
0038: * href="http://www.unicode.org/">http://www.unicode.org/</a>.
0039: * </p>
0040: *
0041: * <p>
0042: * Unicode characters are referred to as <i>code points</i>. The range of valid
0043: * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i>
0044: * is the code point range U+0000 to U+FFFF. Characters above the BMP are
0045: * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16
0046: * encoding and <code>char</code> pairs are used to represent code points in
0047: * the supplementary range. A pair of <code>char</code> values that represent
0048: * a supplementary character are made up of a <i>high surrogate</i> with a
0049: * value range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value
0050: * range of 0xDC00 to 0xDFFF.
0051: * </p>
0052: *
0053: * <p>
0054: * On the Java platform a <code>char</code> value represents either a single
0055: * BMP code point or a UTF-16 unit that's part of a surrogate pair. The
0056: * <code>int</code> type is used to represent all Unicode code points.
0057: * </p>
0058: *
0059: * @since 1.0
0060: */
0061: public final class Character implements Serializable,
0062: Comparable<Character> {
0063: private static final long serialVersionUID = 3786198910865385080L;
0064:
0065: private final char value;
0066:
0067: /**
0068: * The minimum possible Character value.
0069: */
0070: public static final char MIN_VALUE = '\u0000';
0071:
0072: /**
0073: * The maximum possible Character value.
0074: */
0075: public static final char MAX_VALUE = '\uffff';
0076:
0077: /**
0078: * The minimum possible radix used for conversions between Characters and
0079: * integers.
0080: */
0081: public static final int MIN_RADIX = 2;
0082:
0083: /**
0084: * The maximum possible radix used for conversions between Characters and
0085: * integers.
0086: */
0087: public static final int MAX_RADIX = 36;
0088:
0089: /**
0090: * The <code>char</code> {@link Class} object.
0091: */
0092: @SuppressWarnings("unchecked")
0093: public static final Class<Character> TYPE = (Class<Character>) new char[0]
0094: .getClass().getComponentType();
0095:
0096: // Note: This can't be set to "char.class", since *that* is
0097: // defined to be "java.lang.Character.TYPE";
0098:
0099: /**
0100: * Unicode category constant Cn.
0101: */
0102: public static final byte UNASSIGNED = 0;
0103:
0104: /**
0105: * Unicode category constant Lu.
0106: */
0107: public static final byte UPPERCASE_LETTER = 1;
0108:
0109: /**
0110: * Unicode category constant Ll.
0111: */
0112: public static final byte LOWERCASE_LETTER = 2;
0113:
0114: /**
0115: * Unicode category constant Lt.
0116: */
0117: public static final byte TITLECASE_LETTER = 3;
0118:
0119: /**
0120: * Unicode category constant Lm.
0121: */
0122: public static final byte MODIFIER_LETTER = 4;
0123:
0124: /**
0125: * Unicode category constant Lo.
0126: */
0127: public static final byte OTHER_LETTER = 5;
0128:
0129: /**
0130: * Unicode category constant Mn.
0131: */
0132: public static final byte NON_SPACING_MARK = 6;
0133:
0134: /**
0135: * Unicode category constant Me.
0136: */
0137: public static final byte ENCLOSING_MARK = 7;
0138:
0139: /**
0140: * Unicode category constant Mc.
0141: */
0142: public static final byte COMBINING_SPACING_MARK = 8;
0143:
0144: /**
0145: * Unicode category constant Nd.
0146: */
0147: public static final byte DECIMAL_DIGIT_NUMBER = 9;
0148:
0149: /**
0150: * Unicode category constant Nl.
0151: */
0152: public static final byte LETTER_NUMBER = 10;
0153:
0154: /**
0155: * Unicode category constant No.
0156: */
0157: public static final byte OTHER_NUMBER = 11;
0158:
0159: /**
0160: * Unicode category constant Zs.
0161: */
0162: public static final byte SPACE_SEPARATOR = 12;
0163:
0164: /**
0165: * Unicode category constant Zl.
0166: */
0167: public static final byte LINE_SEPARATOR = 13;
0168:
0169: /**
0170: * Unicode category constant Zp.
0171: */
0172: public static final byte PARAGRAPH_SEPARATOR = 14;
0173:
0174: /**
0175: * Unicode category constant Cc.
0176: */
0177: public static final byte CONTROL = 15;
0178:
0179: /**
0180: * Unicode category constant Cf.
0181: */
0182: public static final byte FORMAT = 16;
0183:
0184: /**
0185: * Unicode category constant Co.
0186: */
0187: public static final byte PRIVATE_USE = 18;
0188:
0189: /**
0190: * Unicode category constant Cs.
0191: */
0192: public static final byte SURROGATE = 19;
0193:
0194: /**
0195: * Unicode category constant Pd.
0196: */
0197: public static final byte DASH_PUNCTUATION = 20;
0198:
0199: /**
0200: * Unicode category constant Ps.
0201: */
0202: public static final byte START_PUNCTUATION = 21;
0203:
0204: /**
0205: * Unicode category constant Pe.
0206: */
0207: public static final byte END_PUNCTUATION = 22;
0208:
0209: /**
0210: * Unicode category constant Pc.
0211: */
0212: public static final byte CONNECTOR_PUNCTUATION = 23;
0213:
0214: /**
0215: * Unicode category constant Po.
0216: */
0217: public static final byte OTHER_PUNCTUATION = 24;
0218:
0219: /**
0220: * Unicode category constant Sm.
0221: */
0222: public static final byte MATH_SYMBOL = 25;
0223:
0224: /**
0225: * Unicode category constant Sc.
0226: */
0227: public static final byte CURRENCY_SYMBOL = 26;
0228:
0229: /**
0230: * Unicode category constant Sk.
0231: */
0232: public static final byte MODIFIER_SYMBOL = 27;
0233:
0234: /**
0235: * Unicode category constant So.
0236: */
0237: public static final byte OTHER_SYMBOL = 28;
0238:
0239: /**
0240: * Unicode category constant Pi.
0241: * @since 1.4
0242: */
0243: public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
0244:
0245: /**
0246: * Unicode category constant Pf.
0247: * @since 1.4
0248: */
0249: public static final byte FINAL_QUOTE_PUNCTUATION = 30;
0250:
0251: /**
0252: * Unicode bidirectional constant.
0253: * @since 1.4
0254: */
0255: public static final byte DIRECTIONALITY_UNDEFINED = -1;
0256:
0257: /**
0258: * Unicode bidirectional constant L.
0259: * @since 1.4
0260: */
0261: public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
0262:
0263: /**
0264: * Unicode bidirectional constant R.
0265: * @since 1.4
0266: */
0267: public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
0268:
0269: /**
0270: * Unicode bidirectional constant AL.
0271: * @since 1.4
0272: */
0273: public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
0274:
0275: /**
0276: * Unicode bidirectional constant EN.
0277: * @since 1.4
0278: */
0279: public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
0280:
0281: /**
0282: * Unicode bidirectional constant ES.
0283: * @since 1.4
0284: */
0285: public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
0286:
0287: /**
0288: * Unicode bidirectional constant ET.
0289: * @since 1.4
0290: */
0291: public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
0292:
0293: /**
0294: * Unicode bidirectional constant AN.
0295: * @since 1.4
0296: */
0297: public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
0298:
0299: /**
0300: * Unicode bidirectional constant CS.
0301: * @since 1.4
0302: */
0303: public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
0304:
0305: /**
0306: * Unicode bidirectional constant NSM.
0307: * @since 1.4
0308: */
0309: public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
0310:
0311: /**
0312: * Unicode bidirectional constant BN.
0313: * @since 1.4
0314: */
0315: public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
0316:
0317: /**
0318: * Unicode bidirectional constant B.
0319: * @since 1.4
0320: */
0321: public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
0322:
0323: /**
0324: * Unicode bidirectional constant S.
0325: * @since 1.4
0326: */
0327: public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
0328:
0329: /**
0330: * Unicode bidirectional constant WS.
0331: * @since 1.4
0332: */
0333: public static final byte DIRECTIONALITY_WHITESPACE = 12;
0334:
0335: /**
0336: * Unicode bidirectional constant ON.
0337: * @since 1.4
0338: */
0339: public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
0340:
0341: /**
0342: * Unicode bidirectional constant LRE.
0343: * @since 1.4
0344: */
0345: public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
0346:
0347: /**
0348: * Unicode bidirectional constant LRO.
0349: * @since 1.4
0350: */
0351: public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
0352:
0353: /**
0354: * Unicode bidirectional constant RLE.
0355: * @since 1.4
0356: */
0357: public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
0358:
0359: /**
0360: * Unicode bidirectional constant RLO.
0361: * @since 1.4
0362: */
0363: public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
0364:
0365: /**
0366: * Unicode bidirectional constant PDF.
0367: * @since 1.4
0368: */
0369: public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
0370:
0371: /**
0372: * <p>
0373: * Minimum value of a high surrogate or leading surrogate unit in UTF-16
0374: * encoding - <code>'\uD800'</code>.
0375: * </p>
0376: *
0377: * @since 1.5
0378: */
0379: public static final char MIN_HIGH_SURROGATE = '\uD800';
0380:
0381: /**
0382: * <p>
0383: * Maximum value of a high surrogate or leading surrogate unit in UTF-16
0384: * encoding - <code>'\uDBFF'</code>.
0385: * </p>
0386: *
0387: * @since 1.5
0388: */
0389: public static final char MAX_HIGH_SURROGATE = '\uDBFF';
0390:
0391: /**
0392: * <p>
0393: * Minimum value of a low surrogate or trailing surrogate unit in UTF-16
0394: * encoding - <code>'\uDC00'</code>.
0395: * </p>
0396: *
0397: * @since 1.5
0398: */
0399: public static final char MIN_LOW_SURROGATE = '\uDC00';
0400:
0401: /**
0402: * Maximum value of a low surrogate or trailing surrogate unit in UTF-16
0403: * encoding - <code>'\uDFFF'</code>.
0404: * </p>
0405: *
0406: * @since 1.5
0407: */
0408: public static final char MAX_LOW_SURROGATE = '\uDFFF';
0409:
0410: /**
0411: * <p>
0412: * Minimum value of a surrogate unit in UTF-16 encoding - <code>'\uD800'</code>.
0413: * </p>
0414: *
0415: * @since 1.5
0416: */
0417: public static final char MIN_SURROGATE = '\uD800';
0418:
0419: /**
0420: * <p>
0421: * Maximum value of a surrogate unit in UTF-16 encoding - <code>'\uDFFF'</code>.
0422: * </p>
0423: *
0424: * @since 1.5
0425: */
0426: public static final char MAX_SURROGATE = '\uDFFF';
0427:
0428: /**
0429: * <p>
0430: * Minimum value of a supplementary code point - <code>U+010000</code>.
0431: * </p>
0432: *
0433: * @since 1.5
0434: */
0435: public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
0436:
0437: /**
0438: * <p>
0439: * Minimum code point value - <code>U+0000</code>.
0440: * </p>
0441: *
0442: * @since 1.5
0443: */
0444: public static final int MIN_CODE_POINT = 0x000000;
0445:
0446: /**
0447: * <p>
0448: * Maximum code point value - <code>U+10FFFF</code>.
0449: * </p>
0450: *
0451: * @since 1.5
0452: */
0453: public static final int MAX_CODE_POINT = 0x10FFFF;
0454:
0455: /**
0456: * <p>
0457: * Constant for the number of bits to represent a <code>char</code> in
0458: * two's compliment form.
0459: * </p>
0460: *
0461: * @since 1.5
0462: */
0463: public static final int SIZE = 16;
0464:
0465: // Unicode 3.0.1 (same as Unicode 3.0.0)
0466: private static final String bidiKeys = "\u0000\t\f\u000e\u001c\u001f!#&+/1:<A[a{\u007f\u0085\u0087\u00a0\u00a2\u00a6\u00aa\u00ac\u00b0\u00b2\u00b4\u00b7\u00b9\u00bb\u00c0\u00d7\u00d9\u00f7\u00f9\u0222\u0250\u02b0\u02b9\u02bb\u02c2\u02d0\u02d2\u02e0\u02e5\u02ee\u0300\u0360\u0374\u037a\u037e\u0384\u0386\u0389\u038c\u038e\u03a3\u03d0\u03da\u0400\u0483\u0488\u048c\u04c7\u04cb\u04d0\u04f8\u0531\u0559\u0561\u0589\u0591\u05a3\u05bb\u05be\u05c2\u05d0\u05f0\u060c\u061b\u061f\u0621\u0640\u064b\u0660\u066a\u066c\u0670\u0672\u06d6\u06e5\u06e7\u06e9\u06eb\u06f0\u06fa\u0700\u070f\u0711\u0713\u0730\u0780\u07a6\u0901\u0903\u0905\u093c\u093e\u0941\u0949\u094d\u0950\u0952\u0958\u0962\u0964\u0981\u0983\u0985\u098f\u0993\u09aa\u09b2\u09b6\u09bc\u09be\u09c1\u09c7\u09cb\u09cd\u09d7\u09dc\u09df\u09e2\u09e6\u09f2\u09f4\u0a02\u0a05\u0a0f\u0a13\u0a2a\u0a32\u0a35\u0a38\u0a3c\u0a3e\u0a41\u0a47\u0a4b\u0a59\u0a5e\u0a66\u0a70\u0a72\u0a81\u0a83\u0a85\u0a8d\u0a8f\u0a93\u0aaa\u0ab2\u0ab5\u0abc\u0abe\u0ac1\u0ac7\u0ac9\u0acb\u0acd\u0ad0\u0ae0\u0ae6\u0b01\u0b03\u0b05\u0b0f\u0b13\u0b2a\u0b32\u0b36\u0b3c\u0b3e\u0b42\u0b47\u0b4b\u0b4d\u0b56\u0b5c\u0b5f\u0b66\u0b82\u0b85\u0b8e\u0b92\u0b99\u0b9c\u0b9e\u0ba3\u0ba8\u0bae\u0bb7\u0bbe\u0bc0\u0bc2\u0bc6\u0bca\u0bcd\u0bd7\u0be7\u0c01\u0c05\u0c0e\u0c12\u0c2a\u0c35\u0c3e\u0c41\u0c46\u0c4a\u0c55\u0c60\u0c66\u0c82\u0c85\u0c8e\u0c92\u0caa\u0cb5\u0cbe\u0cc1\u0cc6\u0cc8\u0cca\u0ccc\u0cd5\u0cde\u0ce0\u0ce6\u0d02\u0d05\u0d0e\u0d12\u0d2a\u0d3e\u0d41\u0d46\u0d4a\u0d4d\u0d57\u0d60\u0d66\u0d82\u0d85\u0d9a\u0db3\u0dbd\u0dc0\u0dca\u0dcf\u0dd2\u0dd6\u0dd8\u0df2\u0e01\u0e31\u0e33\u0e35\u0e3f\u0e41\u0e47\u0e4f\u0e81\u0e84\u0e87\u0e8a\u0e8d\u0e94\u0e99\u0ea1\u0ea5\u0ea7\u0eaa\u0ead\u0eb1\u0eb3\u0eb5\u0ebb\u0ebd\u0ec0\u0ec6\u0ec8\u0ed0\u0edc\u0f00\u0f18\u0f1a\u0f35\u0f3a\u0f3e\u0f49\u0f71\u0f7f\u0f81\u0f85\u0f87\u0f89\u0f90\u0f99\u0fbe\u0fc6\u0fc8\u0fcf\u1000\u1023\u1029\u102c\u102e\u1031\u1036\u1038\u1040\u1058\u10a0\u10d0\u10fb\u1100\u115f\u11a8\u1200\u1208\u1248\u124a\u1250\u1258\u125a\u1260\u1288\u128a\u1290\u12b0\u12b2\u12b8\u12c0\u12c2\u12c8\u12d0\u12d8\u12f0\u1310\u1312\u1318\u1320\u1348\u1361\u13a0\u1401\u1680\u1682\u169b\u16a0\u1780\u17b7\u17be\u17c6\u17c8\u17ca\u17d4\u17db\u17e0\u1800\u180b\u1810\u1820\u1880\u18a9\u1e00\u1ea0\u1f00\u1f18\u1f20\u1f48\u1f50\u1f59\u1f5b\u1f5d\u1f5f\u1f80\u1fb6\u1fbd\u1fc0\u1fc2\u1fc6\u1fcd\u1fd0\u1fd6\u1fdd\u1fe0\u1fed\u1ff2\u1ff6\u1ffd\u2000\u200b\u200e\u2010\u2028\u202a\u202c\u202e\u2030\u2035\u2048\u206a\u2070\u2074\u207a\u207c\u207f\u2081\u208a\u208c\u20a0\u20d0\u2100\u2102\u2104\u2107\u2109\u210b\u2114\u2117\u2119\u211e\u2124\u212b\u212e\u2130\u2132\u2134\u213a\u2153\u2160\u2190\u2200\u2212\u2214\u2300\u2336\u237b\u237d\u2395\u2397\u2400\u2440\u2460\u249c\u24ea\u2500\u25a0\u2600\u2619\u2701\u2706\u270c\u2729\u274d\u274f\u2756\u2758\u2761\u2776\u2798\u27b1\u2800\u2e80\u2e9b\u2f00\u2ff0\u3000\u3002\u3005\u3008\u3021\u302a\u3030\u3032\u3036\u3038\u303e\u3041\u3099\u309b\u309d\u30a1\u30fb\u30fd\u3105\u3131\u3190\u3200\u3220\u3260\u327f\u32c0\u32d0\u3300\u337b\u33e0\u3400\u4e00\ua000\ua490\ua4a4\ua4b5\ua4c2\ua4c6\uac00\ud800\ufb00\ufb13\ufb1d\ufb20\ufb29\ufb2b\ufb38\ufb3e\ufb40\ufb43\ufb46\ufb50\ufbd3\ufd3e\ufd50\ufd92\ufdf0\ufe20\ufe30\ufe49\ufe50\ufe54\ufe57\ufe5f\ufe61\ufe63\ufe65\ufe68\ufe6a\ufe70\ufe74\ufe76\ufeff\uff01\uff03\uff06\uff0b\uff0f\uff11\uff1a\uff1c\uff21\uff3b\uff41\uff5b\uff61\uff66\uffc2\uffca\uffd2\uffda\uffe0\uffe2\uffe5\uffe8\ufff9\ufffc";
0467:
0468: private static final char[] bidiValues = "\b\n\u000b\u0c0b\r\u0b0d\u001b\n\u001e\u000b \u0c0d\"\u000e%\u0006*\u000e.\u06080\u05049\u0004;\u0e08@\u000eZ\u0001`\u000ez\u0001~\u000e\u0084\n\u0086\u0b0a\u009f\n\u00a1\u0e08\u00a5\u0006\u00a9\u000e\u00ab\u0e01\u00af\u000e\u00b1\u0006\u00b3\u0004\u00b6\u010e\u00b8\u000e\u00ba\u0401\u00bf\u000e\u00d6\u0001\u00d8\u0e01\u00f6\u0001\u00f8\u0e01\u021f\u0001\u0233\u0001\u02ad\u0001\u02b8\u0001\u02ba\u000e\u02c1\u0001\u02cf\u000e\u02d1\u0001\u02df\u000e\u02e4\u0001\u02ed\u000e\u02ee\u0001\u034e\t\u0362\t\u0375\u000e\u037a\u0001\u037e\u000e\u0385\u000e\u0388\u0e01\u038a\u0001\u038c\u0001\u03a1\u0001\u03ce\u0001\u03d7\u0001\u03f3\u0001\u0482\u0001\u0486\t\u0489\t\u04c4\u0001\u04c8\u0001\u04cc\u0001\u04f5\u0001\u04f9\u0001\u0556\u0001\u055f\u0001\u0587\u0001\u058a\u010e\u05a1\t\u05b9\t\u05bd\t\u05c1\u0902\u05c4\u0209\u05ea\u0002\u05f4\u0002\u060c\b\u061b\u0300\u061f\u0300\u063a\u0003\u064a\u0003\u0655\t\u0669\u0007\u066b\u0706\u066d\u0307\u0671\u0309\u06d5\u0003\u06e4\t\u06e6\u0003\u06e8\t\u06ea\u0e09\u06ed\t\u06f9\u0004\u06fe\u0003\u070d\u0003\u0710\u0a03\u0712\u0903\u072c\u0003\u074a\t\u07a5\u0003\u07b0\t\u0902\t\u0903\u0100\u0939\u0001\u093d\u0109\u0940\u0001\u0948\t\u094c\u0001\u094d\u0900\u0951\u0901\u0954\t\u0961\u0001\u0963\t\u0970\u0001\u0982\u0901\u0983\u0100\u098c\u0001\u0990\u0001\u09a8\u0001\u09b0\u0001\u09b2\u0001\u09b9\u0001\u09bc\t\u09c0\u0001\u09c4\t\u09c8\u0001\u09cc\u0001\u09cd\u0900\u09d7\u0100\u09dd\u0001\u09e1\u0001\u09e3\t\u09f1\u0001\u09f3\u0006\u09fa\u0001\u0a02\t\u0a0a\u0001\u0a10\u0001\u0a28\u0001\u0a30\u0001\u0a33\u0001\u0a36\u0001\u0a39\u0001\u0a3c\t\u0a40\u0001\u0a42\t\u0a48\t\u0a4d\t\u0a5c\u0001\u0a5e\u0001\u0a6f\u0001\u0a71\t\u0a74\u0001\u0a82\t\u0a83\u0100\u0a8b\u0001\u0a8d\u0100\u0a91\u0001\u0aa8\u0001\u0ab0\u0001\u0ab3\u0001\u0ab9\u0001\u0abd\u0109\u0ac0\u0001\u0ac5\t\u0ac8\t\u0ac9\u0100\u0acc\u0001\u0acd\u0900\u0ad0\u0001\u0ae0\u0001\u0aef\u0001\u0b02\u0901\u0b03\u0100\u0b0c\u0001\u0b10\u0001\u0b28\u0001\u0b30\u0001\u0b33\u0001\u0b39\u0001\u0b3d\u0109\u0b41\u0901\u0b43\t\u0b48\u0001\u0b4c\u0001\u0b4d\u0900\u0b57\u0109\u0b5d\u0001\u0b61\u0001\u0b70\u0001\u0b83\u0109\u0b8a\u0001\u0b90\u0001\u0b95\u0001\u0b9a\u0001\u0b9c\u0001\u0b9f\u0001\u0ba4\u0001\u0baa\u0001\u0bb5\u0001\u0bb9\u0001\u0bbf\u0001\u0bc1\u0109\u0bc2\u0001\u0bc8\u0001\u0bcc\u0001\u0bcd\u0900\u0bd7\u0100\u0bf2\u0001\u0c03\u0001\u0c0c\u0001\u0c10\u0001\u0c28\u0001\u0c33\u0001\u0c39\u0001\u0c40\t\u0c44\u0001\u0c48\t\u0c4d\t\u0c56\t\u0c61\u0001\u0c6f\u0001\u0c83\u0001\u0c8c\u0001\u0c90\u0001\u0ca8\u0001\u0cb3\u0001\u0cb9\u0001\u0cc0\u0901\u0cc4\u0001\u0cc7\u0109\u0cc8\u0001\u0ccb\u0001\u0ccd\t\u0cd6\u0001\u0cde\u0001\u0ce1\u0001\u0cef\u0001\u0d03\u0001\u0d0c\u0001\u0d10\u0001\u0d28\u0001\u0d39\u0001\u0d40\u0001\u0d43\t\u0d48\u0001\u0d4c\u0001\u0d4d\u0900\u0d57\u0100\u0d61\u0001\u0d6f\u0001\u0d83\u0001\u0d96\u0001\u0db1\u0001\u0dbb\u0001\u0dbd\u0100\u0dc6\u0001\u0dca\t\u0dd1\u0001\u0dd4\t\u0dd6\t\u0ddf\u0001\u0df4\u0001\u0e30\u0001\u0e32\u0901\u0e34\u0109\u0e3a\t\u0e40\u0601\u0e46\u0001\u0e4e\t\u0e5b\u0001\u0e82\u0001\u0e84\u0001\u0e88\u0001\u0e8a\u0001\u0e8d\u0100\u0e97\u0001\u0e9f\u0001\u0ea3\u0001\u0ea5\u0100\u0ea7\u0100\u0eab\u0001\u0eb0\u0001\u0eb2\u0901\u0eb4\u0109\u0eb9\t\u0ebc\t\u0ebd\u0100\u0ec4\u0001\u0ec6\u0001\u0ecd\t\u0ed9\u0001\u0edd\u0001\u0f17\u0001\u0f19\t\u0f34\u0001\u0f39\u0901\u0f3d\u000e\u0f47\u0001\u0f6a\u0001\u0f7e\t\u0f80\u0109\u0f84\t\u0f86\u0109\u0f88\u0901\u0f8b\u0001\u0f97\t\u0fbc\t\u0fc5\u0001\u0fc7\u0109\u0fcc\u0001\u0fcf\u0100\u1021\u0001\u1027\u0001\u102a\u0001\u102d\u0901\u1030\t\u1032\u0109\u1037\t\u1039\u0901\u1057\u0001\u1059\t\u10c5\u0001\u10f6\u0001\u10fb\u0100\u1159\u0001\u11a2\u0001\u11f9\u0001\u1206\u0001\u1246\u0001\u1248\u0001\u124d\u0001\u1256\u0001\u1258\u0001\u125d\u0001\u1286\u0001\u1288\u0001\u128d\u0001\u12ae\u0001\u12b0\u0001\u12b5\u0001\u12be\u0001\u12c0\u0001\u12c5\u0001\u12ce\u0001\u12d6\u0001\u12ee\u0001\u130e\u0001\u1310\u0001\u1315\u0001\u131e\u0001\u1346\u0001\u135a\u0001\u137c\u0001\u13f4\u0001\u1676\u0001\u1681\u010d\u169a\u0001\u169c\u000e\u16f0\u0001\u17b6\u0001\u17bd\t\u17c5\u0001\u17c7\u0109\u17c9\u0901\u17d3\t\u17da\u0001\u17dc\u0601\u17e9\u0001\u180a\u000e\u180e\n\u1819\u0001\u1877\u0001\u18a8\u0001\u18a9\u0900\u1e9b\u0001\u1ef9\u0001\u1f15\u0001\u1f1d\u0001\u1f45\u0001\u1f4d\u0001\u1f57\u0001\u1f59\u0100\u1f5b\u0100\u1f5d\u0100\u1f7d\u0001\u1fb4\u0001\u1fbc\u0001\u1fbf\u0e01\u1fc1\u000e\u1fc4\u0001\u1fcc\u0001\u1fcf\u000e\u1fd3\u0001\u1fdb\u0001\u1fdf\u000e\u1fec\u0001\u1fef\u000e\u1ff4\u0001\u1ffc\u0001\u1ffe\u000e\u200a\r\u200d\n\u200f\u0201\u2027\u000e\u2029\u0b0d\u202b\u110f\u202d\u1013\u202f\u0d12\u2034\u0006\u2046\u000e\u204d\u000e\u206f\n\u2070\u0004\u2079\u0004\u207b\u0006\u207e\u000e\u2080\u0104\u2089\u0004\u208b\u0006\u208e\u000e\u20af\u0006\u20e3\t\u2101\u000e\u2103\u0e01\u2106\u000e\u2108\u010e\u210a\u0e01\u2113\u0001\u2116\u010e\u2118\u000e\u211d\u0001\u2123\u000e\u212a\u0e01\u212d\u0001\u212f\u0106\u2131\u0001\u2133\u010e\u2139\u0001\u213a\u000e\u215f\u000e\u2183\u0001\u21f3\u000e\u2211\u000e\u2213\u0006\u22f1\u000e\u2335\u000e\u237a\u0001\u237b\u0e00\u2394\u000e\u2396\u010e\u239a\u000e\u2426\u000e\u244a\u000e\u249b\u0004\u24e9\u0001\u24ea\u0004\u2595\u000e\u25f7\u000e\u2613\u000e\u2671\u000e\u2704\u000e\u2709\u000e\u2727\u000e\u274b\u000e\u274d\u0e00\u2752\u000e\u2756\u000e\u275e\u000e\u2767\u000e\u2794\u000e\u27af\u000e\u27be\u000e\u28ff\u000e\u2e99\u000e\u2ef3\u000e\u2fd5\u000e\u2ffb\u000e\u3001\u0e0d\u3004\u000e\u3007\u0001\u3020\u000e\u3029\u0001\u302f\t\u3031\u010e\u3035\u0001\u3037\u000e\u303a\u0001\u303f\u000e\u3094\u0001\u309a\t\u309c\u000e\u309e\u0001\u30fa\u0001\u30fc\u0e01\u30fe\u0001\u312c\u0001\u318e\u0001\u31b7\u0001\u321c\u0001\u3243\u0001\u327b\u0001\u32b0\u0001\u32cb\u0001\u32fe\u0001\u3376\u0001\u33dd\u0001\u33fe\u0001\u4db5\u0001\u9fa5\u0001\ua48c\u0001\ua4a1\u000e\ua4b3\u000e\ua4c0\u000e\ua4c4\u000e\ua4c6\u000e\ud7a3\u0001\ufa2d\u0001\ufb06\u0001\ufb17\u0001\ufb1f\u0209\ufb28\u0002\ufb2a\u0602\ufb36\u0002\ufb3c\u0002\ufb3e\u0002\ufb41\u0002\ufb44\u0002\ufb4f\u0002\ufbb1\u0003\ufd3d\u0003\ufd3f\u000e\ufd8f\u0003\ufdc7\u0003\ufdfb\u0003\ufe23\t\ufe44\u000e\ufe4f\u000e\ufe52\u0e08\ufe56\u080e\ufe5e\u000e\ufe60\u060e\ufe62\u0e06\ufe64\u060e\ufe66\u000e\ufe69\u060e\ufe6b\u0e06\ufe72\u0003\ufe74\u0003\ufefc\u0003\ufeff\u0a00\uff02\u000e\uff05\u0006\uff0a\u000e\uff0e\u0608\uff10\u0504\uff19\u0004\uff1b\u0e08\uff20\u000e\uff3a\u0001\uff40\u000e\uff5a\u0001\uff5e\u000e\uff65\u000e\uffbe\u0001\uffc7\u0001\uffcf\u0001\uffd7\u0001\uffdc\u0001\uffe1\u0006\uffe4\u000e\uffe6\u0006\uffee\u000e\ufffb\n\ufffd\u000e"
0469: .getValue();
0470:
0471: private static final char[] mirrored = "\u0000\u0000\u0300\u5000\u0000\u2800\u0000\u2800\u0000\u0000\u0800\u0800\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0600`\u0000\u0000\u6000\u6000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u3f1e\ubc62\uf857\ufa0f\u1fff\u803c\ucff5\uffff\u9fff\u0107\uffcc\uc1ff\u3e00\uffc3\u3fff\u0003\u0f00\u0000\u0603\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\uff00\u0ff3"
0472: .getValue();
0473:
0474: // Unicode 3.0.1 (same as Unicode 3.0.0)
0475: private static final String typeKeys = "\u0000 \"$&(*-/1:<?A[]_a{}\u007f\u00a0\u00a2\u00a6\u00a8\u00aa\u00ac\u00ae\u00b1\u00b3\u00b5\u00b7\u00b9\u00bb\u00bd\u00bf\u00c1\u00d7\u00d9\u00df\u00f7\u00f9\u0100\u0138\u0149\u0179\u017f\u0181\u0183\u0187\u018a\u018c\u018e\u0192\u0194\u0197\u0199\u019c\u019e\u01a0\u01a7\u01ab\u01af\u01b2\u01b4\u01b8\u01ba\u01bc\u01be\u01c0\u01c4\u01c6\u01c8\u01ca\u01cc\u01dd\u01f0\u01f2\u01f4\u01f7\u01f9\u0222\u0250\u02b0\u02b9\u02bb\u02c2\u02d0\u02d2\u02e0\u02e5\u02ee\u0300\u0360\u0374\u037a\u037e\u0384\u0386\u0389\u038c\u038e\u0390\u0392\u03a3\u03ac\u03d0\u03d2\u03d5\u03da\u03f0\u0400\u0430\u0460\u0482\u0484\u0488\u048c\u04c1\u04c7\u04cb\u04d0\u04f8\u0531\u0559\u055b\u0561\u0589\u0591\u05a3\u05bb\u05be\u05c2\u05d0\u05f0\u05f3\u060c\u061b\u061f\u0621\u0640\u0642\u064b\u0660\u066a\u0670\u0672\u06d4\u06d6\u06dd\u06df\u06e5\u06e7\u06e9\u06eb\u06f0\u06fa\u06fd\u0700\u070f\u0711\u0713\u0730\u0780\u07a6\u0901\u0903\u0905\u093c\u093e\u0941\u0949\u094d\u0950\u0952\u0958\u0962\u0964\u0966\u0970\u0981\u0983\u0985\u098f\u0993\u09aa\u09b2\u09b6\u09bc\u09be\u09c1\u09c7\u09cb\u09cd\u09d7\u09dc\u09df\u09e2\u09e6\u09f0\u09f2\u09f4\u09fa\u0a02\u0a05\u0a0f\u0a13\u0a2a\u0a32\u0a35\u0a38\u0a3c\u0a3e\u0a41\u0a47\u0a4b\u0a59\u0a5e\u0a66\u0a70\u0a72\u0a81\u0a83\u0a85\u0a8d\u0a8f\u0a93\u0aaa\u0ab2\u0ab5\u0abc\u0abe\u0ac1\u0ac7\u0ac9\u0acb\u0acd\u0ad0\u0ae0\u0ae6\u0b01\u0b03\u0b05\u0b0f\u0b13\u0b2a\u0b32\u0b36\u0b3c\u0b3e\u0b42\u0b47\u0b4b\u0b4d\u0b56\u0b5c\u0b5f\u0b66\u0b70\u0b82\u0b85\u0b8e\u0b92\u0b99\u0b9c\u0b9e\u0ba3\u0ba8\u0bae\u0bb7\u0bbe\u0bc0\u0bc2\u0bc6\u0bca\u0bcd\u0bd7\u0be7\u0bf0\u0c01\u0c05\u0c0e\u0c12\u0c2a\u0c35\u0c3e\u0c41\u0c46\u0c4a\u0c55\u0c60\u0c66\u0c82\u0c85\u0c8e\u0c92\u0caa\u0cb5\u0cbe\u0cc1\u0cc6\u0cc8\u0cca\u0ccc\u0cd5\u0cde\u0ce0\u0ce6\u0d02\u0d05\u0d0e\u0d12\u0d2a\u0d3e\u0d41\u0d46\u0d4a\u0d4d\u0d57\u0d60\u0d66\u0d82\u0d85\u0d9a\u0db3\u0dbd\u0dc0\u0dca\u0dcf\u0dd2\u0dd6\u0dd8\u0df2\u0df4\u0e01\u0e31\u0e33\u0e35\u0e3f\u0e41\u0e46\u0e48\u0e4f\u0e51\u0e5a\u0e81\u0e84\u0e87\u0e8a\u0e8d\u0e94\u0e99\u0ea1\u0ea5\u0ea7\u0eaa\u0ead\u0eb1\u0eb3\u0eb5\u0ebb\u0ebd\u0ec0\u0ec6\u0ec8\u0ed0\u0edc\u0f00\u0f02\u0f04\u0f13\u0f18\u0f1a\u0f20\u0f2a\u0f34\u0f3a\u0f3e\u0f40\u0f49\u0f71\u0f7f\u0f81\u0f85\u0f87\u0f89\u0f90\u0f99\u0fbe\u0fc6\u0fc8\u0fcf\u1000\u1023\u1029\u102c\u102e\u1031\u1036\u1038\u1040\u104a\u1050\u1056\u1058\u10a0\u10d0\u10fb\u1100\u115f\u11a8\u1200\u1208\u1248\u124a\u1250\u1258\u125a\u1260\u1288\u128a\u1290\u12b0\u12b2\u12b8\u12c0\u12c2\u12c8\u12d0\u12d8\u12f0\u1310\u1312\u1318\u1320\u1348\u1361\u1369\u1372\u13a0\u1401\u166d\u166f\u1680\u1682\u169b\u16a0\u16eb\u16ee\u1780\u17b4\u17b7\u17be\u17c6\u17c8\u17ca\u17d4\u17db\u17e0\u1800\u1806\u1808\u180b\u1810\u1820\u1843\u1845\u1880\u18a9\u1e00\u1e96\u1ea0\u1f00\u1f08\u1f10\u1f18\u1f20\u1f28\u1f30\u1f38\u1f40\u1f48\u1f50\u1f59\u1f5b\u1f5d\u1f5f\u1f61\u1f68\u1f70\u1f80\u1f88\u1f90\u1f98\u1fa0\u1fa8\u1fb0\u1fb6\u1fb8\u1fbc\u1fbe\u1fc0\u1fc2\u1fc6\u1fc8\u1fcc\u1fce\u1fd0\u1fd6\u1fd8\u1fdd\u1fe0\u1fe8\u1fed\u1ff2\u1ff6\u1ff8\u1ffc\u1ffe\u2000\u200c\u2010\u2016\u2018\u201a\u201c\u201e\u2020\u2028\u202a\u202f\u2031\u2039\u203b\u203f\u2041\u2044\u2046\u2048\u206a\u2070\u2074\u207a\u207d\u207f\u2081\u208a\u208d\u20a0\u20d0\u20dd\u20e1\u20e3\u2100\u2102\u2104\u2107\u2109\u210b\u210e\u2110\u2113\u2115\u2117\u2119\u211e\u2124\u212b\u212e\u2130\u2132\u2134\u2136\u2139\u2153\u2160\u2190\u2195\u219a\u219c\u21a0\u21a2\u21a5\u21a8\u21ae\u21b0\u21ce\u21d0\u21d2\u21d6\u2200\u2300\u2308\u230c\u2320\u2322\u2329\u232b\u237d\u2400\u2440\u2460\u249c\u24ea\u2500\u25a0\u25b7\u25b9\u25c1\u25c3\u2600\u2619\u266f\u2671\u2701\u2706\u270c\u2729\u274d\u274f\u2756\u2758\u2761\u2776\u2794\u2798\u27b1\u2800\u2e80\u2e9b\u2f00\u2ff0\u3000\u3002\u3004\u3006\u3008\u3012\u3014\u301c\u301e\u3020\u3022\u302a\u3030\u3032\u3036\u3038\u303e\u3041\u3099\u309b\u309d\u30a1\u30fb\u30fd\u3105\u3131\u3190\u3192\u3196\u31a0\u3200\u3220\u322a\u3260\u327f\u3281\u328a\u32c0\u32d0\u3300\u337b\u33e0\u3400\u4e00\ua000\ua490\ua4a4\ua4b5\ua4c2\ua4c6\uac00\ud800\ue000\uf900\ufb00\ufb13\ufb1d\ufb20\ufb29\ufb2b\ufb38\ufb3e\ufb40\ufb43\ufb46\ufbd3\ufd3e\ufd50\ufd92\ufdf0\ufe20\ufe30\ufe32\ufe34\ufe36\ufe49\ufe4d\ufe50\ufe54\ufe58\ufe5a\ufe5f\ufe62\ufe65\ufe68\ufe6b\ufe70\ufe74\ufe76\ufeff\uff01\uff04\uff06\uff08\uff0a\uff0d\uff0f\uff11\uff1a\uff1c\uff1f\uff21\uff3b\uff3d\uff3f\uff41\uff5b\uff5d\uff61\uff63\uff65\uff67\uff70\uff72\uff9e\uffa0\uffc2\uffca\uffd2\uffda\uffe0\uffe2\uffe4\uffe6\uffe8\uffea\uffed\ufff9\ufffc";
0476:
0477: private static final char[] typeValues = "\u001f\u000f!\u180c#\u0018%\u181a'\u0018)\u1615,\u1918.\u14180\u18099\t;\u0018>\u0019@\u0018Z\u0001\\\u1518^\u161b`\u171bz\u0002|\u1519~\u1619\u009f\u000f\u00a1\u180c\u00a5\u001a\u00a7\u001c\u00a9\u1c1b\u00ab\u1d02\u00ad\u1419\u00b0\u1b1c\u00b2\u190b\u00b4\u0b1b\u00b6\u021c\u00b8\u181b\u00ba\u0b02\u00bc\u1e0b\u00be\u000b\u00c0\u1801\u00d6\u0001\u00d8\u1901\u00de\u0001\u00f6\u0002\u00f8\u1902\u00ff\u0002\u0137\u0201\u0148\u0102\u0178\u0201\u017e\u0102\u0180\u0002\u0182\u0001\u0186\u0201\u0189\u0102\u018b\u0001\u018d\u0002\u0191\u0001\u0193\u0102\u0196\u0201\u0198\u0001\u019b\u0002\u019d\u0001\u019f\u0102\u01a6\u0201\u01aa\u0102\u01ae\u0201\u01b1\u0102\u01b3\u0001\u01b7\u0102\u01b9\u0201\u01bb\u0502\u01bd\u0201\u01bf\u0002\u01c3\u0005\u01c5\u0301\u01c7\u0102\u01c9\u0203\u01cb\u0301\u01dc\u0102\u01ef\u0201\u01f1\u0102\u01f3\u0203\u01f6\u0201\u01f8\u0001\u021f\u0201\u0233\u0201\u02ad\u0002\u02b8\u0004\u02ba\u001b\u02c1\u0004\u02cf\u001b\u02d1\u0004\u02df\u001b\u02e4\u0004\u02ed\u001b\u02ee\u0004\u034e\u0006\u0362\u0006\u0375\u001b\u037a\u0004\u037e\u0018\u0385\u001b\u0388\u1801\u038a\u0001\u038c\u0001\u038f\u0001\u0391\u0102\u03a1\u0001\u03ab\u0001\u03ce\u0002\u03d1\u0002\u03d4\u0001\u03d7\u0002\u03ef\u0201\u03f3\u0002\u042f\u0001\u045f\u0002\u0481\u0201\u0483\u061c\u0486\u0006\u0489\u0007\u04c0\u0201\u04c4\u0102\u04c8\u0102\u04cc\u0102\u04f5\u0201\u04f9\u0201\u0556\u0001\u055a\u0418\u055f\u0018\u0587\u0002\u058a\u1814\u05a1\u0006\u05b9\u0006\u05bd\u0006\u05c1\u0618\u05c4\u1806\u05ea\u0005\u05f2\u0005\u05f4\u0018\u060c\u0018\u061b\u1800\u061f\u1800\u063a\u0005\u0641\u0504\u064a\u0005\u0655\u0006\u0669\t\u066d\u0018\u0671\u0506\u06d3\u0005\u06d5\u0518\u06dc\u0006\u06de\u0007\u06e4\u0006\u06e6\u0004\u06e8\u0006\u06ea\u1c06\u06ed\u0006\u06f9\t\u06fc\u0005\u06fe\u001c\u070d\u0018\u0710\u1005\u0712\u0605\u072c\u0005\u074a\u0006\u07a5\u0005\u07b0\u0006\u0902\u0006\u0903\u0800\u0939\u0005\u093d\u0506\u0940\b\u0948\u0006\u094c\b\u094d\u0600\u0951\u0605\u0954\u0006\u0961\u0005\u0963\u0006\u0965\u0018\u096f\t\u0970\u0018\u0982\u0608\u0983\u0800\u098c\u0005\u0990\u0005\u09a8\u0005\u09b0\u0005\u09b2\u0005\u09b9\u0005\u09bc\u0006\u09c0\b\u09c4\u0006\u09c8\b\u09cc\b\u09cd\u0600\u09d7\u0800\u09dd\u0005\u09e1\u0005\u09e3\u0006\u09ef\t\u09f1\u0005\u09f3\u001a\u09f9\u000b\u09fa\u001c\u0a02\u0006\u0a0a\u0005\u0a10\u0005\u0a28\u0005\u0a30\u0005\u0a33\u0005\u0a36\u0005\u0a39\u0005\u0a3c\u0006\u0a40\b\u0a42\u0006\u0a48\u0006\u0a4d\u0006\u0a5c\u0005\u0a5e\u0005\u0a6f\t\u0a71\u0006\u0a74\u0005\u0a82\u0006\u0a83\u0800\u0a8b\u0005\u0a8d\u0500\u0a91\u0005\u0aa8\u0005\u0ab0\u0005\u0ab3\u0005\u0ab9\u0005\u0abd\u0506\u0ac0\b\u0ac5\u0006\u0ac8\u0006\u0ac9\u0800\u0acc\b\u0acd\u0600\u0ad0\u0005\u0ae0\u0005\u0aef\t\u0b02\u0608\u0b03\u0800\u0b0c\u0005\u0b10\u0005\u0b28\u0005\u0b30\u0005\u0b33\u0005\u0b39\u0005\u0b3d\u0506\u0b41\u0608\u0b43\u0006\u0b48\b\u0b4c\b\u0b4d\u0600\u0b57\u0806\u0b5d\u0005\u0b61\u0005\u0b6f\t\u0b70\u001c\u0b83\u0806\u0b8a\u0005\u0b90\u0005\u0b95\u0005\u0b9a\u0005\u0b9c\u0005\u0b9f\u0005\u0ba4\u0005\u0baa\u0005\u0bb5\u0005\u0bb9\u0005\u0bbf\b\u0bc1\u0806\u0bc2\b\u0bc8\b\u0bcc\b\u0bcd\u0600\u0bd7\u0800\u0bef\t\u0bf2\u000b\u0c03\b\u0c0c\u0005\u0c10\u0005\u0c28\u0005\u0c33\u0005\u0c39\u0005\u0c40\u0006\u0c44\b\u0c48\u0006\u0c4d\u0006\u0c56\u0006\u0c61\u0005\u0c6f\t\u0c83\b\u0c8c\u0005\u0c90\u0005\u0ca8\u0005\u0cb3\u0005\u0cb9\u0005\u0cc0\u0608\u0cc4\b\u0cc7\u0806\u0cc8\b\u0ccb\b\u0ccd\u0006\u0cd6\b\u0cde\u0005\u0ce1\u0005\u0cef\t\u0d03\b\u0d0c\u0005\u0d10\u0005\u0d28\u0005\u0d39\u0005\u0d40\b\u0d43\u0006\u0d48\b\u0d4c\b\u0d4d\u0600\u0d57\u0800\u0d61\u0005\u0d6f\t\u0d83\b\u0d96\u0005\u0db1\u0005\u0dbb\u0005\u0dbd\u0500\u0dc6\u0005\u0dca\u0006\u0dd1\b\u0dd4\u0006\u0dd6\u0006\u0ddf\b\u0df3\b\u0df4\u0018\u0e30\u0005\u0e32\u0605\u0e34\u0506\u0e3a\u0006\u0e40\u1a05\u0e45\u0005\u0e47\u0604\u0e4e\u0006\u0e50\u1809\u0e59\t\u0e5b\u0018\u0e82\u0005\u0e84\u0005\u0e88\u0005\u0e8a\u0005\u0e8d\u0500\u0e97\u0005\u0e9f\u0005\u0ea3\u0005\u0ea5\u0500\u0ea7\u0500\u0eab\u0005\u0eb0\u0005\u0eb2\u0605\u0eb4\u0506\u0eb9\u0006\u0ebc\u0006\u0ebd\u0500\u0ec4\u0005\u0ec6\u0004\u0ecd\u0006\u0ed9\t\u0edd\u0005\u0f01\u1c05\u0f03\u001c\u0f12\u0018\u0f17\u001c\u0f19\u0006\u0f1f\u001c\u0f29\t\u0f33\u000b\u0f39\u061c\u0f3d\u1615\u0f3f\b\u0f47\u0005\u0f6a\u0005\u0f7e\u0006\u0f80\u0806\u0f84\u0006\u0f86\u1806\u0f88\u0605\u0f8b\u0005\u0f97\u0006\u0fbc\u0006\u0fc5\u001c\u0fc7\u1c06\u0fcc\u001c\u0fcf\u1c00\u1021\u0005\u1027\u0005\u102a\u0005\u102d\u0608\u1030\u0006\u1032\u0806\u1037\u0006\u1039\u0608\u1049\t\u104f\u0018\u1055\u0005\u1057\b\u1059\u0006\u10c5\u0001\u10f6\u0005\u10fb\u1800\u1159\u0005\u11a2\u0005\u11f9\u0005\u1206\u0005\u1246\u0005\u1248\u0005\u124d\u0005\u1256\u0005\u1258\u0005\u125d\u0005\u1286\u0005\u1288\u0005\u128d\u0005\u12ae\u0005\u12b0\u0005\u12b5\u0005\u12be\u0005\u12c0\u0005\u12c5\u0005\u12ce\u0005\u12d6\u0005\u12ee\u0005\u130e\u0005\u1310\u0005\u1315\u0005\u131e\u0005\u1346\u0005\u135a\u0005\u1368\u0018\u1371\t\u137c\u000b\u13f4\u0005\u166c\u0005\u166e\u0018\u1676\u0005\u1681\u050c\u169a\u0005\u169c\u1516\u16ea\u0005\u16ed\u0018\u16f0\u000b\u17b3\u0005\u17b6\b\u17bd\u0006\u17c5\b\u17c7\u0806\u17c9\u0608\u17d3\u0006\u17da\u0018\u17dc\u1a18\u17e9\t\u1805\u0018\u1807\u1814\u180a\u0018\u180e\u0010\u1819\t\u1842\u0005\u1844\u0405\u1877\u0005\u18a8\u0005\u18a9\u0600\u1e95\u0201\u1e9b\u0002\u1ef9\u0201\u1f07\u0002\u1f0f\u0001\u1f15\u0002\u1f1d\u0001\u1f27\u0002\u1f2f\u0001\u1f37\u0002\u1f3f\u0001\u1f45\u0002\u1f4d\u0001\u1f57\u0002\u1f59\u0100\u1f5b\u0100\u1f5d\u0100\u1f60\u0102\u1f67\u0002\u1f6f\u0001\u1f7d\u0002\u1f87\u0002\u1f8f\u0003\u1f97\u0002\u1f9f\u0003\u1fa7\u0002\u1faf\u0003\u1fb4\u0002\u1fb7\u0002\u1fbb\u0001\u1fbd\u1b03\u1fbf\u1b02\u1fc1\u001b\u1fc4\u0002\u1fc7\u0002\u1fcb\u0001\u1fcd\u1b03\u1fcf\u001b\u1fd3\u0002\u1fd7\u0002\u1fdb\u0001\u1fdf\u001b\u1fe7\u0002\u1fec\u0001\u1fef\u001b\u1ff4\u0002\u1ff7\u0002\u1ffb\u0001\u1ffd\u1b03\u1ffe\u001b\u200b\f\u200f\u0010\u2015\u0014\u2017\u0018\u2019\u1e1d\u201b\u1d15\u201d\u1e1d\u201f\u1d15\u2027\u0018\u2029\u0e0d\u202e\u0010\u2030\u0c18\u2038\u0018\u203a\u1d1e\u203e\u0018\u2040\u0017\u2043\u0018\u2045\u1519\u2046\u0016\u204d\u0018\u206f\u0010\u2070\u000b\u2079\u000b\u207c\u0019\u207e\u1516\u2080\u020b\u2089\u000b\u208c\u0019\u208e\u1516\u20af\u001a\u20dc\u0006\u20e0\u0007\u20e2\u0607\u20e3\u0700\u2101\u001c\u2103\u1c01\u2106\u001c\u2108\u011c\u210a\u1c02\u210d\u0001\u210f\u0002\u2112\u0001\u2114\u021c\u2116\u011c\u2118\u001c\u211d\u0001\u2123\u001c\u212a\u1c01\u212d\u0001\u212f\u021c\u2131\u0001\u2133\u011c\u2135\u0502\u2138\u0005\u213a\u021c\u215f\u000b\u2183\n\u2194\u0019\u2199\u001c\u219b\u0019\u219f\u001c\u21a1\u1c19\u21a4\u191c\u21a7\u1c19\u21ad\u001c\u21af\u1c19\u21cd\u001c\u21cf\u0019\u21d1\u001c\u21d5\u1c19\u21f3\u001c\u22f1\u0019\u2307\u001c\u230b\u0019\u231f\u001c\u2321\u0019\u2328\u001c\u232a\u1516\u237b\u001c\u239a\u001c\u2426\u001c\u244a\u001c\u249b\u000b\u24e9\u001c\u24ea\u000b\u2595\u001c\u25b6\u001c\u25b8\u191c\u25c0\u001c\u25c2\u191c\u25f7\u001c\u2613\u001c\u266e\u001c\u2670\u191c\u2671\u1c00\u2704\u001c\u2709\u001c\u2727\u001c\u274b\u001c\u274d\u1c00\u2752\u001c\u2756\u001c\u275e\u001c\u2767\u001c\u2793\u000b\u2794\u001c\u27af\u001c\u27be\u001c\u28ff\u001c\u2e99\u001c\u2ef3\u001c\u2fd5\u001c\u2ffb\u001c\u3001\u180c\u3003\u0018\u3005\u041c\u3007\u0a05\u3011\u1615\u3013\u001c\u301b\u1615\u301d\u1514\u301f\u0016\u3021\u0a1c\u3029\n\u302f\u0006\u3031\u0414\u3035\u0004\u3037\u001c\u303a\n\u303f\u001c\u3094\u0005\u309a\u0006\u309c\u001b\u309e\u0004\u30fa\u0005\u30fc\u1704\u30fe\u0004\u312c\u0005\u318e\u0005\u3191\u001c\u3195\u000b\u319f\u001c\u31b7\u0005\u321c\u001c\u3229\u000b\u3243\u001c\u327b\u001c\u3280\u1c0b\u3289\u000b\u32b0\u001c\u32cb\u001c\u32fe\u001c\u3376\u001c\u33dd\u001c\u33fe\u001c\u4db5\u0005\u9fa5\u0005\ua48c\u0005\ua4a1\u001c\ua4b3\u001c\ua4c0\u001c\ua4c4\u001c\ua4c6\u001c\ud7a3\u0005\udfff\u0013\uf8ff\u0012\ufa2d\u0005\ufb06\u0002\ufb17\u0002\ufb1f\u0506\ufb28\u0005\ufb2a\u1905\ufb36\u0005\ufb3c\u0005\ufb3e\u0005\ufb41\u0005\ufb44\u0005\ufbb1\u0005\ufd3d\u0005\ufd3f\u1615\ufd8f\u0005\ufdc7\u0005\ufdfb\u0005\ufe23\u0006\ufe31\u1418\ufe33\u1714\ufe35\u1517\ufe44\u1516\ufe4c\u0018\ufe4f\u0017\ufe52\u0018\ufe57\u0018\ufe59\u1514\ufe5e\u1516\ufe61\u0018\ufe64\u1419\ufe66\u0019\ufe6a\u1a18\ufe6b\u1800\ufe72\u0005\ufe74\u0005\ufefc\u0005\ufeff\u1000\uff03\u0018\uff05\u181a\uff07\u0018\uff09\u1615\uff0c\u1918\uff0e\u1418\uff10\u1809\uff19\t\uff1b\u0018\uff1e\u0019\uff20\u0018\uff3a\u0001\uff3c\u1518\uff3e\u161b\uff40\u171b\uff5a\u0002\uff5c\u1519\uff5e\u1619\uff62\u1815\uff64\u1618\uff66\u1705\uff6f\u0005\uff71\u0504\uff9d\u0005\uff9f\u0004\uffbe\u0005\uffc7\u0005\uffcf\u0005\uffd7\u0005\uffdc\u0005\uffe1\u001a\uffe3\u1b19\uffe5\u1a1c\uffe6\u001a\uffe9\u191c\uffec\u0019\uffee\u001c\ufffb\u0010\ufffd\u001c"
0478: .getValue();
0479:
0480: private static final int[] typeValuesCache = { 15, 15, 15, 15, 15,
0481: 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
0482: 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 12, 24, 24,
0483: 24, 26, 24, 24, 24, 21, 22, 24, 25, 24, 20, 24, 24, 9, 9,
0484: 9, 9, 9, 9, 9, 9, 9, 9, 24, 24, 25, 25, 25, 24, 24, 1, 1,
0485: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0486: 1, 1, 1, 1, 21, 24, 22, 27, 23, 27, 2, 2, 2, 2, 2, 2, 2, 2,
0487: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 21,
0488: 25, 22, 25, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
0489: 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
0490: 15, 15, 15, 15, 15, 15, 12, 24, 26, 26, 26, 26, 28, 28, 27,
0491: 28, 2, 29, 25, 16, 28, 27, 28, 25, 11, 11, 27, 2, 28, 24,
0492: 27, 11, 2, 30, 11, 11, 11, 24, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0493: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 25, 1, 1, 1, 1,
0494: 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
0495: 2, 2, 2, 2, 2, 2, 2, 25, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1,
0496: 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1,
0497: 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1,
0498: 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2,
0499: 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1,
0500: 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1,
0501: 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2,
0502: 1, 2, 1, 2, 2, 2, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 2, 2, 1,
0503: 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1,
0504: 2, 1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2,
0505: 1, 1, 2, 2, 5, 1, 2, 2, 2, 5, 5, 5, 5, 1, 3, 2, 1, 3, 2, 1,
0506: 3, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1,
0507: 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 3,
0508: 2, 1, 2, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1,
0509: 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1,
0510: 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1,
0511: 2, 1, 2, 1, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0512: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2,
0513: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
0514: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
0515: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
0516: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
0517: 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0518: 4, 4, 4, 4, 4, 4, 4, 27, 27, 27, 27, 4, 4, 4, 4, 4, 4, 4,
0519: 4, 4, 4, 4, 4, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
0520: 27, 27, 27, 4, 4, 4, 4, 4, 27, 27, 27, 27, 27, 27, 27, 27,
0521: 27, 4, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
0522: 27, 27, 27, 27, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
0523: 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
0524: 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
0525: 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
0526: 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 0, 0, 0, 0, 6,
0527: 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 0,
0528: 0, 0, 27, 27, 0, 0, 0, 0, 4, 0, 0, 0, 24, 0, 0, 0, 0, 0,
0529: 27, 27, 1, 24, 1, 1, 1, 0, 1, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1,
0530: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
0531: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
0532: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 1,
0533: 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1,
0534: 2 };
0535:
0536: // Unicode 3.0.1 (same as Unicode 3.0.0)
0537: private static final String uppercaseKeys = "a\u00b5\u00e0\u00f8\u00ff\u0101\u0131\u0133\u013a\u014b\u017a\u017f\u0183\u0188\u018c\u0192\u0195\u0199\u01a1\u01a8\u01ad\u01b0\u01b4\u01b9\u01bd\u01bf\u01c5\u01c6\u01c8\u01c9\u01cb\u01cc\u01ce\u01dd\u01df\u01f2\u01f3\u01f5\u01f9\u0223\u0253\u0254\u0256\u0259\u025b\u0260\u0263\u0268\u0269\u026f\u0272\u0275\u0280\u0283\u0288\u028a\u0292\u0345\u03ac\u03ad\u03b1\u03c2\u03c3\u03cc\u03cd\u03d0\u03d1\u03d5\u03d6\u03db\u03f0\u03f1\u03f2\u0430\u0450\u0461\u048d\u04c2\u04c8\u04cc\u04d1\u04f9\u0561\u1e01\u1e9b\u1ea1\u1f00\u1f10\u1f20\u1f30\u1f40\u1f51\u1f60\u1f70\u1f72\u1f76\u1f78\u1f7a\u1f7c\u1f80\u1f90\u1fa0\u1fb0\u1fb3\u1fbe\u1fc3\u1fd0\u1fe0\u1fe5\u1ff3\u2170\u24d0\uff41";
0538:
0539: private static final char[] uppercaseValues = "z\uffe0\u00b5\u02e7\u00f6\uffe0\u00fe\uffe0\u00ffy\u812f\uffff\u0131\uff18\u8137\uffff\u8148\uffff\u8177\uffff\u817e\uffff\u017f\ufed4\u8185\uffff\u0188\uffff\u018c\uffff\u0192\uffff\u0195a\u0199\uffff\u81a5\uffff\u01a8\uffff\u01ad\uffff\u01b0\uffff\u81b6\uffff\u01b9\uffff\u01bd\uffff\u01bf8\u01c5\uffff\u01c6\ufffe\u01c8\uffff\u01c9\ufffe\u01cb\uffff\u01cc\ufffe\u81dc\uffff\u01dd\uffb1\u81ef\uffff\u01f2\uffff\u01f3\ufffe\u01f5\uffff\u821f\uffff\u8233\uffff\u0253\uff2e\u0254\uff32\u0257\uff33\u0259\uff36\u025b\uff35\u0260\uff33\u0263\uff31\u0268\uff2f\u0269\uff2d\u026f\uff2d\u0272\uff2b\u0275\uff2a\u0280\uff26\u0283\uff26\u0288\uff26\u028b\uff27\u0292\uff25\u0345T\u03ac\uffda\u03af\uffdb\u03c1\uffe0\u03c2\uffe1\u03cb\uffe0\u03cc\uffc0\u03ce\uffc1\u03d0\uffc2\u03d1\uffc7\u03d5\uffd1\u03d6\uffca\u83ef\uffff\u03f0\uffaa\u03f1\uffb0\u03f2\uffb1\u044f\uffe0\u045f\uffb0\u8481\uffff\u84bf\uffff\u84c4\uffff\u04c8\uffff\u04cc\uffff\u84f5\uffff\u04f9\uffff\u0586\uffd0\u9e95\uffff\u1e9b\uffc5\u9ef9\uffff\u1f07\b\u1f15\b\u1f27\b\u1f37\b\u1f45\b\u9f57\b\u1f67\b\u1f71J\u1f75V\u1f77d\u1f79\u0080\u1f7bp\u1f7d~\u1f87\b\u1f97\b\u1fa7\b\u1fb1\b\u1fb3\t\u1fbe\ue3db\u1fc3\t\u1fd1\b\u1fe1\b\u1fe5\u0007\u1ff3\t\u217f\ufff0\u24e9\uffe6\uff5a\uffe0"
0540: .getValue();
0541:
0542: private static final int[] uppercaseValuesCache = { 924, 182, 183,
0543: 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195,
0544: 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
0545: 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,
0546: 220, 221, 222, 223, 192, 193, 194, 195, 196, 197, 198, 199,
0547: 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211,
0548: 212, 213, 214, 247, 216, 217, 218, 219, 220, 221, 222, 376,
0549: 256, 256, 258, 258, 260, 260, 262, 262, 264, 264, 266, 266,
0550: 268, 268, 270, 270, 272, 272, 274, 274, 276, 276, 278, 278,
0551: 280, 280, 282, 282, 284, 284, 286, 286, 288, 288, 290, 290,
0552: 292, 292, 294, 294, 296, 296, 298, 298, 300, 300, 302, 302,
0553: 304, 73, 306, 306, 308, 308, 310, 310, 312, 313, 313, 315,
0554: 315, 317, 317, 319, 319, 321, 321, 323, 323, 325, 325, 327,
0555: 327, 329, 330, 330, 332, 332, 334, 334, 336, 336, 338, 338,
0556: 340, 340, 342, 342, 344, 344, 346, 346, 348, 348, 350, 350,
0557: 352, 352, 354, 354, 356, 356, 358, 358, 360, 360, 362, 362,
0558: 364, 364, 366, 366, 368, 368, 370, 370, 372, 372, 374, 374,
0559: 376, 377, 377, 379, 379, 381, 381, 83, 384, 385, 386, 386,
0560: 388, 388, 390, 391, 391, 393, 394, 395, 395, 397, 398, 399,
0561: 400, 401, 401, 403, 404, 502, 406, 407, 408, 408, 410, 411,
0562: 412, 413, 544, 415, 416, 416, 418, 418, 420, 420, 422, 423,
0563: 423, 425, 426, 427, 428, 428, 430, 431, 431, 433, 434, 435,
0564: 435, 437, 437, 439, 440, 440, 442, 443, 444, 444, 446, 503,
0565: 448, 449, 450, 451, 452, 452, 452, 455, 455, 455, 458, 458,
0566: 458, 461, 461, 463, 463, 465, 465, 467, 467, 469, 469, 471,
0567: 471, 473, 473, 475, 475, 398, 478, 478, 480, 480, 482, 482,
0568: 484, 484, 486, 486, 488, 488, 490, 490, 492, 492, 494, 494,
0569: 496, 497, 497, 497, 500, 500, 502, 503, 504, 504, 506, 506,
0570: 508, 508, 510, 510, 512, 512, 514, 514, 516, 516, 518, 518,
0571: 520, 520, 522, 522, 524, 524, 526, 526, 528, 528, 530, 530,
0572: 532, 532, 534, 534, 536, 536, 538, 538, 540, 540, 542, 542,
0573: 544, 545, 546, 546, 548, 548, 550, 550, 552, 552, 554, 554,
0574: 556, 556, 558, 558, 560, 560, 562, 562, 564, 565, 566, 567,
0575: 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579,
0576: 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591,
0577: 592, 593, 594, 385, 390, 597, 393, 394, 600, 399, 602, 400,
0578: 604, 605, 606, 607, 403, 609, 610, 404, 612, 613, 614, 615,
0579: 407, 406, 618, 619, 620, 621, 622, 412, 624, 625, 413, 627,
0580: 628, 415, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639,
0581: 422, 641, 642, 425, 644, 645, 646, 647, 430, 649, 433, 434,
0582: 652, 653, 654, 655, 656, 657, 439, 659, 660, 661, 662, 663,
0583: 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675,
0584: 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687,
0585: 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699,
0586: 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711,
0587: 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723,
0588: 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735,
0589: 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747,
0590: 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759,
0591: 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771,
0592: 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783,
0593: 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795,
0594: 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807,
0595: 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819,
0596: 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831,
0597: 832, 833, 834, 835, 836, 921, 838, 839, 840, 841, 842, 843,
0598: 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855,
0599: 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867,
0600: 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879,
0601: 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891,
0602: 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903,
0603: 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915,
0604: 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927,
0605: 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939,
0606: 902, 904, 905, 906, 944, 913, 914, 915, 916, 917, 918, 919,
0607: 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 931, 931,
0608: 932, 933, 934, 935, 936, 937, 938, 939, 908, 910, 911, 975,
0609: 914, 920, 978, 979, 980, 934, 928, 983, 984, 984, 986, 986,
0610: 988, 988, 990, 990, 992, 992, 994, 994, 996, 996, 998, 998 };
0611:
0612: private static final String lowercaseKeys = "A\u00c0\u00d8\u0100\u0130\u0132\u0139\u014a\u0178\u0179\u0181\u0182\u0186\u0187\u0189\u018b\u018e\u018f\u0190\u0191\u0193\u0194\u0196\u0197\u0198\u019c\u019d\u019f\u01a0\u01a6\u01a7\u01a9\u01ac\u01ae\u01af\u01b1\u01b3\u01b7\u01b8\u01bc\u01c4\u01c5\u01c7\u01c8\u01ca\u01cb\u01de\u01f1\u01f2\u01f6\u01f7\u01f8\u0222\u0386\u0388\u038c\u038e\u0391\u03a3\u03da\u0400\u0410\u0460\u048c\u04c1\u04c7\u04cb\u04d0\u04f8\u0531\u1e00\u1ea0\u1f08\u1f18\u1f28\u1f38\u1f48\u1f59\u1f68\u1f88\u1f98\u1fa8\u1fb8\u1fba\u1fbc\u1fc8\u1fcc\u1fd8\u1fda\u1fe8\u1fea\u1fec\u1ff8\u1ffa\u1ffc\u2126\u212a\u212b\u2160\u24b6\uff21";
0613:
0614: private static final char[] lowercaseValues = "Z \u00d6 \u00de \u812e\u0001\u0130\uff39\u8136\u0001\u8147\u0001\u8176\u0001\u0178\uff87\u817d\u0001\u0181\u00d2\u8184\u0001\u0186\u00ce\u0187\u0001\u018a\u00cd\u018b\u0001\u018eO\u018f\u00ca\u0190\u00cb\u0191\u0001\u0193\u00cd\u0194\u00cf\u0196\u00d3\u0197\u00d1\u0198\u0001\u019c\u00d3\u019d\u00d5\u019f\u00d6\u81a4\u0001\u01a6\u00da\u01a7\u0001\u01a9\u00da\u01ac\u0001\u01ae\u00da\u01af\u0001\u01b2\u00d9\u81b5\u0001\u01b7\u00db\u01b8\u0001\u01bc\u0001\u01c4\u0002\u01c5\u0001\u01c7\u0002\u01c8\u0001\u01ca\u0002\u81db\u0001\u81ee\u0001\u01f1\u0002\u81f4\u0001\u01f6\uff9f\u01f7\uffc8\u821e\u0001\u8232\u0001\u0386&\u038a%\u038c@\u038f?\u03a1 \u03ab \u83ee\u0001\u040fP\u042f \u8480\u0001\u84be\u0001\u84c3\u0001\u04c7\u0001\u04cb\u0001\u84f4\u0001\u04f8\u0001\u05560\u9e94\u0001\u9ef8\u0001\u1f0f\ufff8\u1f1d\ufff8\u1f2f\ufff8\u1f3f\ufff8\u1f4d\ufff8\u9f5f\ufff8\u1f6f\ufff8\u1f8f\ufff8\u1f9f\ufff8\u1faf\ufff8\u1fb9\ufff8\u1fbb\uffb6\u1fbc\ufff7\u1fcb\uffaa\u1fcc\ufff7\u1fd9\ufff8\u1fdb\uff9c\u1fe9\ufff8\u1feb\uff90\u1fec\ufff9\u1ff9\uff80\u1ffb\uff82\u1ffc\ufff7\u2126\ue2a3\u212a\udf41\u212b\udfba\u216f\u0010\u24cf\u001a\uff3a "
0615: .getValue();
0616:
0617: private static final int[] lowercaseValuesCache = { 224, 225, 226,
0618: 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238,
0619: 239, 240, 241, 242, 243, 244, 245, 246, 215, 248, 249, 250,
0620: 251, 252, 253, 254, 223, 224, 225, 226, 227, 228, 229, 230,
0621: 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242,
0622: 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
0623: 255, 257, 257, 259, 259, 261, 261, 263, 263, 265, 265, 267,
0624: 267, 269, 269, 271, 271, 273, 273, 275, 275, 277, 277, 279,
0625: 279, 281, 281, 283, 283, 285, 285, 287, 287, 289, 289, 291,
0626: 291, 293, 293, 295, 295, 297, 297, 299, 299, 301, 301, 303,
0627: 303, 105, 305, 307, 307, 309, 309, 311, 311, 312, 314, 314,
0628: 316, 316, 318, 318, 320, 320, 322, 322, 324, 324, 326, 326,
0629: 328, 328, 329, 331, 331, 333, 333, 335, 335, 337, 337, 339,
0630: 339, 341, 341, 343, 343, 345, 345, 347, 347, 349, 349, 351,
0631: 351, 353, 353, 355, 355, 357, 357, 359, 359, 361, 361, 363,
0632: 363, 365, 365, 367, 367, 369, 369, 371, 371, 373, 373, 375,
0633: 375, 255, 378, 378, 380, 380, 382, 382, 383, 384, 595, 387,
0634: 387, 389, 389, 596, 392, 392, 598, 599, 396, 396, 397, 477,
0635: 601, 603, 402, 402, 608, 611, 405, 617, 616, 409, 409, 410,
0636: 411, 623, 626, 414, 629, 417, 417, 419, 419, 421, 421, 640,
0637: 424, 424, 643, 426, 427, 429, 429, 648, 432, 432, 650, 651,
0638: 436, 436, 438, 438, 658, 441, 441, 442, 443, 445, 445, 446,
0639: 447, 448, 449, 450, 451, 454, 454, 454, 457, 457, 457, 460,
0640: 460, 460, 462, 462, 464, 464, 466, 466, 468, 468, 470, 470,
0641: 472, 472, 474, 474, 476, 476, 477, 479, 479, 481, 481, 483,
0642: 483, 485, 485, 487, 487, 489, 489, 491, 491, 493, 493, 495,
0643: 495, 496, 499, 499, 499, 501, 501, 405, 447, 505, 505, 507,
0644: 507, 509, 509, 511, 511, 513, 513, 515, 515, 517, 517, 519,
0645: 519, 521, 521, 523, 523, 525, 525, 527, 527, 529, 529, 531,
0646: 531, 533, 533, 535, 535, 537, 537, 539, 539, 541, 541, 543,
0647: 543, 414, 545, 547, 547, 549, 549, 551, 551, 553, 553, 555,
0648: 555, 557, 557, 559, 559, 561, 561, 563, 563, 564, 565, 566,
0649: 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578,
0650: 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590,
0651: 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602,
0652: 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614,
0653: 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626,
0654: 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638,
0655: 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650,
0656: 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662,
0657: 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674,
0658: 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686,
0659: 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698,
0660: 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710,
0661: 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722,
0662: 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734,
0663: 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746,
0664: 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758,
0665: 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770,
0666: 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782,
0667: 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794,
0668: 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806,
0669: 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818,
0670: 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830,
0671: 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842,
0672: 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854,
0673: 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866,
0674: 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878,
0675: 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890,
0676: 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 940,
0677: 903, 941, 942, 943, 907, 972, 909, 973, 974, 912, 945, 946,
0678: 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958,
0679: 959, 960, 961, 930, 963, 964, 965, 966, 967, 968, 969, 970,
0680: 971, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950,
0681: 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962,
0682: 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974,
0683: 975, 976, 977, 978, 979, 980, 981, 982, 983, 985, 985, 987,
0684: 987, 989, 989, 991, 991, 993, 993, 995, 995, 997, 997, 999,
0685: 999 };
0686:
0687: private static final String digitKeys = "0Aa\u0660\u06f0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be7\u0c66\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1369\u17e0\u1810\uff10\uff21\uff41";
0688:
0689: private static final char[] digitValues = "90Z7zW\u0669\u0660\u06f9\u06f0\u096f\u0966\u09ef\u09e6\u0a6f\u0a66\u0aef\u0ae6\u0b6f\u0b66\u0bef\u0be6\u0c6f\u0c66\u0cef\u0ce6\u0d6f\u0d66\u0e59\u0e50\u0ed9\u0ed0\u0f29\u0f20\u1049\u1040\u1371\u1368\u17e9\u17e0\u1819\u1810\uff19\uff10\uff3a\uff17\uff5a\uff37"
0690: .getValue();
0691:
0692: private static final char[] typeTags = "\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0000\u0000\u0000\u0000\u0003\u0000\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0000\u0000\u0000\u0000\u0002"
0693: .getValue();
0694:
0695: private static final byte[] DIRECTIONALITY = new byte[] {
0696: DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
0697: DIRECTIONALITY_EUROPEAN_NUMBER,
0698: DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
0699: DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
0700: DIRECTIONALITY_ARABIC_NUMBER,
0701: DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
0702: DIRECTIONALITY_PARAGRAPH_SEPARATOR,
0703: DIRECTIONALITY_SEGMENT_SEPARATOR,
0704: DIRECTIONALITY_WHITESPACE, DIRECTIONALITY_OTHER_NEUTRALS,
0705: DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
0706: DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
0707: DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
0708: DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
0709: DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
0710: DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
0711: DIRECTIONALITY_NONSPACING_MARK,
0712: DIRECTIONALITY_BOUNDARY_NEUTRAL };
0713:
0714: private static final int ISJAVASTART = 1;
0715:
0716: private static final int ISJAVAPART = 2;
0717:
0718: // Unicode 3.0.1 (same as Unicode 3.0.0)
0719: private static final String titlecaseKeys = "\u01c4\u01c6\u01c7\u01c9\u01ca\u01cc\u01f1\u01f3";
0720:
0721: private static final char[] titlecaseValues = "\u01c5\u01c5\u01c8\u01c8\u01cb\u01cb\u01f2\u01f2"
0722: .getValue();
0723:
0724: // Unicode 3.0.0 (NOT the same as Unicode 3.0.1)
0725: private static final String numericKeys = "0Aa\u00b2\u00b9\u00bc\u0660\u06f0\u0966\u09e6\u09f4\u09f9\u0a66\u0ae6\u0b66\u0be7\u0bf1\u0bf2\u0c66\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1369\u1373\u1374\u1375\u1376\u1377\u1378\u1379\u137a\u137b\u137c\u16ee\u17e0\u1810\u2070\u2074\u2080\u2153\u215f\u2160\u216c\u216d\u216e\u216f\u2170\u217c\u217d\u217e\u217f\u2180\u2181\u2182\u2460\u2474\u2488\u24ea\u2776\u2780\u278a\u3007\u3021\u3038\u3039\u303a\u3280\uff10\uff21\uff41";
0726:
0727: private static final char[] numericValues = "90Z7zW\u00b3\u00b0\u00b9\u00b8\u00be\u0000\u0669\u0660\u06f9\u06f0\u096f\u0966\u09ef\u09e6\u09f7\u09f3\u09f9\u09e9\u0a6f\u0a66\u0aef\u0ae6\u0b6f\u0b66\u0bf0\u0be6\u0bf1\u0b8d\u0bf2\u080a\u0c6f\u0c66\u0cef\u0ce6\u0d6f\u0d66\u0e59\u0e50\u0ed9\u0ed0\u0f29\u0f20\u1049\u1040\u1372\u1368\u1373\u135f\u1374\u1356\u1375\u134d\u1376\u1344\u1377\u133b\u1378\u1332\u1379\u1329\u137a\u1320\u137b\u1317\u137c\uec6c\u16f0\u16dd\u17e9\u17e0\u1819\u1810\u2070\u2070\u2079\u2070\u2089\u2080\u215e\u0000\u215f\u215e\u216b\u215f\u216c\u213a\u216d\u2109\u216e\u1f7a\u216f\u1d87\u217b\u216f\u217c\u214a\u217d\u2119\u217e\u1f8a\u217f\u1d97\u2180\u1d98\u2181\u0df9\u2182\ufa72\u2473\u245f\u2487\u2473\u249b\u2487\u24ea\u24ea\u277f\u2775\u2789\u277f\u2793\u2789\u3007\u3007\u3029\u3020\u3038\u302e\u3039\u3025\u303a\u301c\u3289\u327f\uff19\uff10\uff3a\uff17\uff5a\uff37"
0728: .getValue();
0729:
0730: /*
0731: * Subset represents a subset of characters.
0732: */
0733: public static class Subset {
0734: String name;
0735:
0736: protected Subset(String string) {
0737: if (string == null) {
0738: throw new NullPointerException();
0739: }
0740: name = string;
0741: }
0742:
0743: /**
0744: * Compares the specified object to this Subset and answers true if they
0745: * are equal. The object must be the same instance of Subset.
0746: *
0747: * @param object
0748: * the object to compare
0749: * @return true if the specified object is equal to this Subset, false
0750: * otherwise
0751: *
0752: * @see #hashCode
0753: */
0754: @Override
0755: public final boolean equals(Object object) {
0756: return super .equals(object);
0757: }
0758:
0759: /**
0760: * Answers an integer hash code for the receiver. Objects which are
0761: * equal answer the same value for this method.
0762: *
0763: * @return the receiver's hash
0764: *
0765: * @see #equals
0766: */
0767: @Override
0768: public final int hashCode() {
0769: return super .hashCode();
0770: }
0771:
0772: /**
0773: * Answers the string representation of this Subset.
0774: *
0775: * @return the string representation of this Subset
0776: */
0777: @Override
0778: public final String toString() {
0779: return name;
0780: }
0781: }
0782:
0783: /**
0784: * Blocks of characters, as defined by the Unicode 4.0.1 specification.
0785: * @since 1.2
0786: */
0787: public static final class UnicodeBlock extends Subset {
0788: /**
0789: * The "Surrogates Area" Unicode Block.
0790: * @deprecated As of Java 5, this block has been replaced by {@link #HIGH_SURROGATES}, {@link #HIGH_PRIVATE_USE_SURROGATES} and {@link #LOW_SURROGATES}.
0791: */
0792: @Deprecated
0793: public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock(
0794: "SURROGATES_AREA", 0x0, 0x0);
0795: /**
0796: * The "Basic Latin" Unicode Block.
0797: * @since 1.2
0798: */
0799: public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock(
0800: "BASIC_LATIN", 0x0, 0x7f);
0801: /**
0802: * The "Latin-1 Supplement" Unicode Block.
0803: * @since 1.2
0804: */
0805: public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock(
0806: "LATIN_1_SUPPLEMENT", 0x80, 0xff);
0807: /**
0808: * The "Latin Extended-A" Unicode Block.
0809: * @since 1.2
0810: */
0811: public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock(
0812: "LATIN_EXTENDED_A", 0x100, 0x17f);
0813: /**
0814: * The "Latin Extended-B" Unicode Block.
0815: * @since 1.2
0816: */
0817: public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock(
0818: "LATIN_EXTENDED_B", 0x180, 0x24f);
0819: /**
0820: * The "IPA Extensions" Unicode Block.
0821: * @since 1.2
0822: */
0823: public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock(
0824: "IPA_EXTENSIONS", 0x250, 0x2af);
0825: /**
0826: * The "Spacing Modifier Letters" Unicode Block.
0827: * @since 1.2
0828: */
0829: public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock(
0830: "SPACING_MODIFIER_LETTERS", 0x2b0, 0x2ff);
0831: /**
0832: * The "Combining Diacritical Marks" Unicode Block.
0833: * @since 1.2
0834: */
0835: public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock(
0836: "COMBINING_DIACRITICAL_MARKS", 0x300, 0x36f);
0837: /**
0838: * The "Greek and Coptic" Unicode Block. Previously referred to as "Greek".
0839: * @since 1.2
0840: */
0841: public static final UnicodeBlock GREEK = new UnicodeBlock(
0842: "GREEK", 0x370, 0x3ff);
0843: /**
0844: * The "Cyrillic" Unicode Block.
0845: * @since 1.2
0846: */
0847: public static final UnicodeBlock CYRILLIC = new UnicodeBlock(
0848: "CYRILLIC", 0x400, 0x4ff);
0849: /**
0850: * The "Cyrillic Supplement" Unicode Block. Previously referred to as "Cyrillic Supplementary".
0851: * @since 1.5
0852: */
0853: public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock(
0854: "CYRILLIC_SUPPLEMENTARY", 0x500, 0x52f);
0855: /**
0856: * The "Armenian" Unicode Block.
0857: * @since 1.2
0858: */
0859: public static final UnicodeBlock ARMENIAN = new UnicodeBlock(
0860: "ARMENIAN", 0x530, 0x58f);
0861: /**
0862: * The "Hebrew" Unicode Block.
0863: * @since 1.2
0864: */
0865: public static final UnicodeBlock HEBREW = new UnicodeBlock(
0866: "HEBREW", 0x590, 0x5ff);
0867: /**
0868: * The "Arabic" Unicode Block.
0869: * @since 1.2
0870: */
0871: public static final UnicodeBlock ARABIC = new UnicodeBlock(
0872: "ARABIC", 0x600, 0x6ff);
0873: /**
0874: * The "Syriac" Unicode Block.
0875: * @since 1.4
0876: */
0877: public static final UnicodeBlock SYRIAC = new UnicodeBlock(
0878: "SYRIAC", 0x700, 0x74f);
0879: /**
0880: * The "Thaana" Unicode Block.
0881: * @since 1.4
0882: */
0883: public static final UnicodeBlock THAANA = new UnicodeBlock(
0884: "THAANA", 0x780, 0x7bf);
0885: /**
0886: * The "Devanagari" Unicode Block.
0887: * @since 1.2
0888: */
0889: public static final UnicodeBlock DEVANAGARI = new UnicodeBlock(
0890: "DEVANAGARI", 0x900, 0x97f);
0891: /**
0892: * The "Bengali" Unicode Block.
0893: * @since 1.2
0894: */
0895: public static final UnicodeBlock BENGALI = new UnicodeBlock(
0896: "BENGALI", 0x980, 0x9ff);
0897: /**
0898: * The "Gurmukhi" Unicode Block.
0899: * @since 1.2
0900: */
0901: public static final UnicodeBlock GURMUKHI = new UnicodeBlock(
0902: "GURMUKHI", 0xa00, 0xa7f);
0903: /**
0904: * The "Gujarati" Unicode Block.
0905: * @since 1.2
0906: */
0907: public static final UnicodeBlock GUJARATI = new UnicodeBlock(
0908: "GUJARATI", 0xa80, 0xaff);
0909: /**
0910: * The "Oriya" Unicode Block.
0911: * @since 1.2
0912: */
0913: public static final UnicodeBlock ORIYA = new UnicodeBlock(
0914: "ORIYA", 0xb00, 0xb7f);
0915: /**
0916: * The "Tamil" Unicode Block.
0917: * @since 1.2
0918: */
0919: public static final UnicodeBlock TAMIL = new UnicodeBlock(
0920: "TAMIL", 0xb80, 0xbff);
0921: /**
0922: * The "Telugu" Unicode Block.
0923: * @since 1.2
0924: */
0925: public static final UnicodeBlock TELUGU = new UnicodeBlock(
0926: "TELUGU", 0xc00, 0xc7f);
0927: /**
0928: * The "Kannada" Unicode Block.
0929: * @since 1.2
0930: */
0931: public static final UnicodeBlock KANNADA = new UnicodeBlock(
0932: "KANNADA", 0xc80, 0xcff);
0933: /**
0934: * The "Malayalam" Unicode Block.
0935: * @since 1.2
0936: */
0937: public static final UnicodeBlock MALAYALAM = new UnicodeBlock(
0938: "MALAYALAM", 0xd00, 0xd7f);
0939: /**
0940: * The "Sinhala" Unicode Block.
0941: * @since 1.4
0942: */
0943: public static final UnicodeBlock SINHALA = new UnicodeBlock(
0944: "SINHALA", 0xd80, 0xdff);
0945: /**
0946: * The "Thai" Unicode Block.
0947: * @since 1.2
0948: */
0949: public static final UnicodeBlock THAI = new UnicodeBlock(
0950: "THAI", 0xe00, 0xe7f);
0951: /**
0952: * The "Lao" Unicode Block.
0953: * @since 1.2
0954: */
0955: public static final UnicodeBlock LAO = new UnicodeBlock("LAO",
0956: 0xe80, 0xeff);
0957: /**
0958: * The "Tibetan" Unicode Block.
0959: * @since 1.2
0960: */
0961: public static final UnicodeBlock TIBETAN = new UnicodeBlock(
0962: "TIBETAN", 0xf00, 0xfff);
0963: /**
0964: * The "Myanmar" Unicode Block.
0965: * @since 1.4
0966: */
0967: public static final UnicodeBlock MYANMAR = new UnicodeBlock(
0968: "MYANMAR", 0x1000, 0x109f);
0969: /**
0970: * The "Georgian" Unicode Block.
0971: * @since 1.2
0972: */
0973: public static final UnicodeBlock GEORGIAN = new UnicodeBlock(
0974: "GEORGIAN", 0x10a0, 0x10ff);
0975: /**
0976: * The "Hangul Jamo" Unicode Block.
0977: * @since 1.2
0978: */
0979: public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock(
0980: "HANGUL_JAMO", 0x1100, 0x11ff);
0981: /**
0982: * The "Ethiopic" Unicode Block.
0983: * @since 1.4
0984: */
0985: public static final UnicodeBlock ETHIOPIC = new UnicodeBlock(
0986: "ETHIOPIC", 0x1200, 0x137f);
0987: /**
0988: * The "Cherokee" Unicode Block.
0989: * @since 1.4
0990: */
0991: public static final UnicodeBlock CHEROKEE = new UnicodeBlock(
0992: "CHEROKEE", 0x13a0, 0x13ff);
0993: /**
0994: * The "Unified Canadian Aboriginal Syllabics" Unicode Block.
0995: * @since 1.4
0996: */
0997: public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock(
0998: "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 0x1400, 0x167f);
0999: /**
1000: * The "Ogham" Unicode Block.
1001: * @since 1.4
1002: */
1003: public static final UnicodeBlock OGHAM = new UnicodeBlock(
1004: "OGHAM", 0x1680, 0x169f);
1005: /**
1006: * The "Runic" Unicode Block.
1007: * @since 1.4
1008: */
1009: public static final UnicodeBlock RUNIC = new UnicodeBlock(
1010: "RUNIC", 0x16a0, 0x16ff);
1011: /**
1012: * The "Tagalog" Unicode Block.
1013: * @since 1.5
1014: */
1015: public static final UnicodeBlock TAGALOG = new UnicodeBlock(
1016: "TAGALOG", 0x1700, 0x171f);
1017: /**
1018: * The "Hanunoo" Unicode Block.
1019: * @since 1.5
1020: */
1021: public static final UnicodeBlock HANUNOO = new UnicodeBlock(
1022: "HANUNOO", 0x1720, 0x173f);
1023: /**
1024: * The "Buhid" Unicode Block.
1025: * @since 1.5
1026: */
1027: public static final UnicodeBlock BUHID = new UnicodeBlock(
1028: "BUHID", 0x1740, 0x175f);
1029: /**
1030: * The "Tagbanwa" Unicode Block.
1031: * @since 1.5
1032: */
1033: public static final UnicodeBlock TAGBANWA = new UnicodeBlock(
1034: "TAGBANWA", 0x1760, 0x177f);
1035: /**
1036: * The "Khmer" Unicode Block.
1037: * @since 1.4
1038: */
1039: public static final UnicodeBlock KHMER = new UnicodeBlock(
1040: "KHMER", 0x1780, 0x17ff);
1041: /**
1042: * The "Mongolian" Unicode Block.
1043: * @since 1.4
1044: */
1045: public static final UnicodeBlock MONGOLIAN = new UnicodeBlock(
1046: "MONGOLIAN", 0x1800, 0x18af);
1047: /**
1048: * The "Limbu" Unicode Block.
1049: * @since 1.5
1050: */
1051: public static final UnicodeBlock LIMBU = new UnicodeBlock(
1052: "LIMBU", 0x1900, 0x194f);
1053: /**
1054: * The "Tai Le" Unicode Block.
1055: * @since 1.5
1056: */
1057: public static final UnicodeBlock TAI_LE = new UnicodeBlock(
1058: "TAI_LE", 0x1950, 0x197f);
1059: /**
1060: * The "Khmer Symbols" Unicode Block.
1061: * @since 1.5
1062: */
1063: public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock(
1064: "KHMER_SYMBOLS", 0x19e0, 0x19ff);
1065: /**
1066: * The "Phonetic Extensions" Unicode Block.
1067: * @since 1.5
1068: */
1069: public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock(
1070: "PHONETIC_EXTENSIONS", 0x1d00, 0x1d7f);
1071: /**
1072: * The "Latin Extended Additional" Unicode Block.
1073: * @since 1.2
1074: */
1075: public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock(
1076: "LATIN_EXTENDED_ADDITIONAL", 0x1e00, 0x1eff);
1077: /**
1078: * The "Greek Extended" Unicode Block.
1079: * @since 1.2
1080: */
1081: public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock(
1082: "GREEK_EXTENDED", 0x1f00, 0x1fff);
1083: /**
1084: * The "General Punctuation" Unicode Block.
1085: * @since 1.2
1086: */
1087: public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock(
1088: "GENERAL_PUNCTUATION", 0x2000, 0x206f);
1089: /**
1090: * The "Superscripts and Subscripts" Unicode Block.
1091: * @since 1.2
1092: */
1093: public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock(
1094: "SUPERSCRIPTS_AND_SUBSCRIPTS", 0x2070, 0x209f);
1095: /**
1096: * The "Currency Symbols" Unicode Block.
1097: * @since 1.2
1098: */
1099: public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock(
1100: "CURRENCY_SYMBOLS", 0x20a0, 0x20cf);
1101: /**
1102: * The "Combining Diacritical Marks for Symbols" Unicode Block. Previously referred to as "Combining Marks for Symbols".
1103: * @since 1.2
1104: */
1105: public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock(
1106: "COMBINING_MARKS_FOR_SYMBOLS", 0x20d0, 0x20ff);
1107: /**
1108: * The "Letterlike Symbols" Unicode Block.
1109: * @since 1.2
1110: */
1111: public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock(
1112: "LETTERLIKE_SYMBOLS", 0x2100, 0x214f);
1113: /**
1114: * The "Number Forms" Unicode Block.
1115: * @since 1.2
1116: */
1117: public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock(
1118: "NUMBER_FORMS", 0x2150, 0x218f);
1119: /**
1120: * The "Arrows" Unicode Block.
1121: * @since 1.2
1122: */
1123: public static final UnicodeBlock ARROWS = new UnicodeBlock(
1124: "ARROWS", 0x2190, 0x21ff);
1125: /**
1126: * The "Mathematical Operators" Unicode Block.
1127: * @since 1.2
1128: */
1129: public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock(
1130: "MATHEMATICAL_OPERATORS", 0x2200, 0x22ff);
1131: /**
1132: * The "Miscellaneous Technical" Unicode Block.
1133: * @since 1.2
1134: */
1135: public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock(
1136: "MISCELLANEOUS_TECHNICAL", 0x2300, 0x23ff);
1137: /**
1138: * The "Control Pictures" Unicode Block.
1139: * @since 1.2
1140: */
1141: public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock(
1142: "CONTROL_PICTURES", 0x2400, 0x243f);
1143: /**
1144: * The "Optical Character Recognition" Unicode Block.
1145: * @since 1.2
1146: */
1147: public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock(
1148: "OPTICAL_CHARACTER_RECOGNITION", 0x2440, 0x245f);
1149: /**
1150: * The "Enclosed Alphanumerics" Unicode Block.
1151: * @since 1.2
1152: */
1153: public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock(
1154: "ENCLOSED_ALPHANUMERICS", 0x2460, 0x24ff);
1155: /**
1156: * The "Box Drawing" Unicode Block.
1157: * @since 1.2
1158: */
1159: public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock(
1160: "BOX_DRAWING", 0x2500, 0x257f);
1161: /**
1162: * The "Block Elements" Unicode Block.
1163: * @since 1.2
1164: */
1165: public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock(
1166: "BLOCK_ELEMENTS", 0x2580, 0x259f);
1167: /**
1168: * The "Geometric Shapes" Unicode Block.
1169: * @since 1.2
1170: */
1171: public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock(
1172: "GEOMETRIC_SHAPES", 0x25a0, 0x25ff);
1173: /**
1174: * The "Miscellaneous Symbols" Unicode Block.
1175: * @since 1.2
1176: */
1177: public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock(
1178: "MISCELLANEOUS_SYMBOLS", 0x2600, 0x26ff);
1179: /**
1180: * The "Dingbats" Unicode Block.
1181: * @since 1.2
1182: */
1183: public static final UnicodeBlock DINGBATS = new UnicodeBlock(
1184: "DINGBATS", 0x2700, 0x27bf);
1185: /**
1186: * The "Miscellaneous Mathematical Symbols-A" Unicode Block.
1187: * @since 1.5
1188: */
1189: public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock(
1190: "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 0x27c0, 0x27ef);
1191: /**
1192: * The "Supplemental Arrows-A" Unicode Block.
1193: * @since 1.5
1194: */
1195: public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock(
1196: "SUPPLEMENTAL_ARROWS_A", 0x27f0, 0x27ff);
1197: /**
1198: * The "Braille Patterns" Unicode Block.
1199: * @since 1.4
1200: */
1201: public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock(
1202: "BRAILLE_PATTERNS", 0x2800, 0x28ff);
1203: /**
1204: * The "Supplemental Arrows-B" Unicode Block.
1205: * @since 1.5
1206: */
1207: public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock(
1208: "SUPPLEMENTAL_ARROWS_B", 0x2900, 0x297f);
1209: /**
1210: * The "Miscellaneous Mathematical Symbols-B" Unicode Block.
1211: * @since 1.5
1212: */
1213: public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock(
1214: "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 0x2980, 0x29ff);
1215: /**
1216: * The "Supplemental Mathematical Operators" Unicode Block.
1217: * @since 1.5
1218: */
1219: public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock(
1220: "SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 0x2a00, 0x2aff);
1221: /**
1222: * The "Miscellaneous Symbols and Arrows" Unicode Block.
1223: * @since 1.2
1224: */
1225: public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock(
1226: "MISCELLANEOUS_SYMBOLS_AND_ARROWS", 0x2b00, 0x2bff);
1227: /**
1228: * The "CJK Radicals Supplement" Unicode Block.
1229: * @since 1.4
1230: */
1231: public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock(
1232: "CJK_RADICALS_SUPPLEMENT", 0x2e80, 0x2eff);
1233: /**
1234: * The "Kangxi Radicals" Unicode Block.
1235: * @since 1.4
1236: */
1237: public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock(
1238: "KANGXI_RADICALS", 0x2f00, 0x2fdf);
1239: /**
1240: * The "Ideographic Description Characters" Unicode Block.
1241: * @since 1.4
1242: */
1243: public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock(
1244: "IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 0x2ff0, 0x2fff);
1245: /**
1246: * The "CJK Symbols and Punctuation" Unicode Block.
1247: * @since 1.2
1248: */
1249: public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock(
1250: "CJK_SYMBOLS_AND_PUNCTUATION", 0x3000, 0x303f);
1251: /**
1252: * The "Hiragana" Unicode Block.
1253: * @since 1.2
1254: */
1255: public static final UnicodeBlock HIRAGANA = new UnicodeBlock(
1256: "HIRAGANA", 0x3040, 0x309f);
1257: /**
1258: * The "Katakana" Unicode Block.
1259: * @since 1.2
1260: */
1261: public static final UnicodeBlock KATAKANA = new UnicodeBlock(
1262: "KATAKANA", 0x30a0, 0x30ff);
1263: /**
1264: * The "Bopomofo" Unicode Block.
1265: * @since 1.2
1266: */
1267: public static final UnicodeBlock BOPOMOFO = new UnicodeBlock(
1268: "BOPOMOFO", 0x3100, 0x312f);
1269: /**
1270: * The "Hangul Compatibility Jamo" Unicode Block.
1271: * @since 1.2
1272: */
1273: public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock(
1274: "HANGUL_COMPATIBILITY_JAMO", 0x3130, 0x318f);
1275: /**
1276: * The "Kanbun" Unicode Block.
1277: * @since 1.2
1278: */
1279: public static final UnicodeBlock KANBUN = new UnicodeBlock(
1280: "KANBUN", 0x3190, 0x319f);
1281: /**
1282: * The "Bopomofo Extended" Unicode Block.
1283: * @since 1.4
1284: */
1285: public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock(
1286: "BOPOMOFO_EXTENDED", 0x31a0, 0x31bf);
1287: /**
1288: * The "Katakana Phonetic Extensions" Unicode Block.
1289: * @since 1.5
1290: */
1291: public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock(
1292: "KATAKANA_PHONETIC_EXTENSIONS", 0x31f0, 0x31ff);
1293: /**
1294: * The "Enclosed CJK Letters and Months" Unicode Block.
1295: * @since 1.2
1296: */
1297: public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock(
1298: "ENCLOSED_CJK_LETTERS_AND_MONTHS", 0x3200, 0x32ff);
1299: /**
1300: * The "CJK Compatibility" Unicode Block.
1301: * @since 1.2
1302: */
1303: public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock(
1304: "CJK_COMPATIBILITY", 0x3300, 0x33ff);
1305: /**
1306: * The "CJK Unified Ideographs Extension A" Unicode Block.
1307: * @since 1.4
1308: */
1309: public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock(
1310: "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 0x3400, 0x4dbf);
1311: /**
1312: * The "Yijing Hexagram Symbols" Unicode Block.
1313: * @since 1.5
1314: */
1315: public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock(
1316: "YIJING_HEXAGRAM_SYMBOLS", 0x4dc0, 0x4dff);
1317: /**
1318: * The "CJK Unified Ideographs" Unicode Block.
1319: * @since 1.2
1320: */
1321: public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock(
1322: "CJK_UNIFIED_IDEOGRAPHS", 0x4e00, 0x9fff);
1323: /**
1324: * The "Yi Syllables" Unicode Block.
1325: * @since 1.4
1326: */
1327: public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock(
1328: "YI_SYLLABLES", 0xa000, 0xa48f);
1329: /**
1330: * The "Yi Radicals" Unicode Block.
1331: * @since 1.4
1332: */
1333: public static final UnicodeBlock YI_RADICALS = new UnicodeBlock(
1334: "YI_RADICALS", 0xa490, 0xa4cf);
1335: /**
1336: * The "Hangul Syllables" Unicode Block.
1337: * @since 1.2
1338: */
1339: public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock(
1340: "HANGUL_SYLLABLES", 0xac00, 0xd7af);
1341: /**
1342: * The "High Surrogates" Unicode Block.
1343: * This block represents code point values in the high surrogate range 0xD800 to 0xDB7F @since 1.5
1344: */
1345: public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock(
1346: "HIGH_SURROGATES", 0xd800, 0xdb7f);
1347: /**
1348: * The "High Private Use Surrogates" Unicode Block.
1349: * This block represents code point values in the high surrogate range 0xDB80 to 0xDBFF @since 1.5
1350: */
1351: public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock(
1352: "HIGH_PRIVATE_USE_SURROGATES", 0xdb80, 0xdbff);
1353: /**
1354: * The "Low Surrogates" Unicode Block.
1355: * This block represents code point values in the low surrogate range 0xDC00 to 0xDFFF @since 1.5
1356: */
1357: public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock(
1358: "LOW_SURROGATES", 0xdc00, 0xdfff);
1359: /**
1360: * The "Private Use Area" Unicode Block.
1361: * @since 1.2
1362: */
1363: public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock(
1364: "PRIVATE_USE_AREA", 0xe000, 0xf8ff);
1365: /**
1366: * The "CJK Compatibility Ideographs" Unicode Block.
1367: * @since 1.2
1368: */
1369: public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock(
1370: "CJK_COMPATIBILITY_IDEOGRAPHS", 0xf900, 0xfaff);
1371: /**
1372: * The "Alphabetic Presentation Forms" Unicode Block.
1373: * @since 1.2
1374: */
1375: public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock(
1376: "ALPHABETIC_PRESENTATION_FORMS", 0xfb00, 0xfb4f);
1377: /**
1378: * The "Arabic Presentation Forms-A" Unicode Block.
1379: * @since 1.2
1380: */
1381: public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock(
1382: "ARABIC_PRESENTATION_FORMS_A", 0xfb50, 0xfdff);
1383: /**
1384: * The "Variation Selectors" Unicode Block.
1385: * @since 1.5
1386: */
1387: public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock(
1388: "VARIATION_SELECTORS", 0xfe00, 0xfe0f);
1389: /**
1390: * The "Combining Half Marks" Unicode Block.
1391: * @since 1.2
1392: */
1393: public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock(
1394: "COMBINING_HALF_MARKS", 0xfe20, 0xfe2f);
1395: /**
1396: * The "CJK Compatibility Forms" Unicode Block.
1397: * @since 1.2
1398: */
1399: public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock(
1400: "CJK_COMPATIBILITY_FORMS", 0xfe30, 0xfe4f);
1401: /**
1402: * The "Small Form Variants" Unicode Block.
1403: * @since 1.2
1404: */
1405: public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock(
1406: "SMALL_FORM_VARIANTS", 0xfe50, 0xfe6f);
1407: /**
1408: * The "Arabic Presentation Forms-B" Unicode Block.
1409: * @since 1.2
1410: */
1411: public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock(
1412: "ARABIC_PRESENTATION_FORMS_B", 0xfe70, 0xfeff);
1413: /**
1414: * The "Halfwidth and Fullwidth Forms" Unicode Block.
1415: * @since 1.2
1416: */
1417: public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock(
1418: "HALFWIDTH_AND_FULLWIDTH_FORMS", 0xff00, 0xffef);
1419: /**
1420: * The "Specials" Unicode Block.
1421: * @since 1.2
1422: */
1423: public static final UnicodeBlock SPECIALS = new UnicodeBlock(
1424: "SPECIALS", 0xfff0, 0xffff);
1425: /**
1426: * The "Linear B Syllabary" Unicode Block.
1427: * @since 1.2
1428: */
1429: public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock(
1430: "LINEAR_B_SYLLABARY", 0x10000, 0x1007f);
1431: /**
1432: * The "Linear B Ideograms" Unicode Block.
1433: * @since 1.5
1434: */
1435: public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock(
1436: "LINEAR_B_IDEOGRAMS", 0x10080, 0x100ff);
1437: /**
1438: * The "Aegean Numbers" Unicode Block.
1439: * @since 1.5
1440: */
1441: public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock(
1442: "AEGEAN_NUMBERS", 0x10100, 0x1013f);
1443: /**
1444: * The "Old Italic" Unicode Block.
1445: * @since 1.5
1446: */
1447: public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock(
1448: "OLD_ITALIC", 0x10300, 0x1032f);
1449: /**
1450: * The "Gothic" Unicode Block.
1451: * @since 1.5
1452: */
1453: public static final UnicodeBlock GOTHIC = new UnicodeBlock(
1454: "GOTHIC", 0x10330, 0x1034f);
1455: /**
1456: * The "Ugaritic" Unicode Block.
1457: * @since 1.5
1458: */
1459: public static final UnicodeBlock UGARITIC = new UnicodeBlock(
1460: "UGARITIC", 0x10380, 0x1039f);
1461: /**
1462: * The "Deseret" Unicode Block.
1463: * @since 1.5
1464: */
1465: public static final UnicodeBlock DESERET = new UnicodeBlock(
1466: "DESERET", 0x10400, 0x1044f);
1467: /**
1468: * The "Shavian" Unicode Block.
1469: * @since 1.5
1470: */
1471: public static final UnicodeBlock SHAVIAN = new UnicodeBlock(
1472: "SHAVIAN", 0x10450, 0x1047f);
1473: /**
1474: * The "Osmanya" Unicode Block.
1475: * @since 1.5
1476: */
1477: public static final UnicodeBlock OSMANYA = new UnicodeBlock(
1478: "OSMANYA", 0x10480, 0x104af);
1479: /**
1480: * The "Cypriot Syllabary" Unicode Block.
1481: * @since 1.5
1482: */
1483: public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock(
1484: "CYPRIOT_SYLLABARY", 0x10800, 0x1083f);
1485: /**
1486: * The "Byzantine Musical Symbols" Unicode Block.
1487: * @since 1.5
1488: */
1489: public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock(
1490: "BYZANTINE_MUSICAL_SYMBOLS", 0x1d000, 0x1d0ff);
1491: /**
1492: * The "Musical Symbols" Unicode Block.
1493: * @since 1.5
1494: */
1495: public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock(
1496: "MUSICAL_SYMBOLS", 0x1d100, 0x1d1ff);
1497: /**
1498: * The "Tai Xuan Jing Symbols" Unicode Block.
1499: * @since 1.5
1500: */
1501: public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock(
1502: "TAI_XUAN_JING_SYMBOLS", 0x1d300, 0x1d35f);
1503: /**
1504: * The "Mathematical Alphanumeric Symbols" Unicode Block.
1505: * @since 1.5
1506: */
1507: public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock(
1508: "MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 0x1d400, 0x1d7ff);
1509: /**
1510: * The "CJK Unified Ideographs Extension B" Unicode Block.
1511: * @since 1.5
1512: */
1513: public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock(
1514: "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 0x20000, 0x2a6df);
1515: /**
1516: * The "CJK Compatibility Ideographs Supplement" Unicode Block.
1517: * @since 1.5
1518: */
1519: public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock(
1520: "CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 0x2f800,
1521: 0x2fa1f);
1522: /**
1523: * The "Tags" Unicode Block.
1524: * @since 1.5
1525: */
1526: public static final UnicodeBlock TAGS = new UnicodeBlock(
1527: "TAGS", 0xe0000, 0xe007f);
1528: /**
1529: * The "Variation Selectors Supplement" Unicode Block.
1530: * @since 1.5
1531: */
1532: public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock(
1533: "VARIATION_SELECTORS_SUPPLEMENT", 0xe0100, 0xe01ef);
1534: /**
1535: * The "Supplementary Private Use Area-A" Unicode Block.
1536: * @since 1.5
1537: */
1538: public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock(
1539: "SUPPLEMENTARY_PRIVATE_USE_AREA_A", 0xf0000, 0xfffff);
1540: /**
1541: * The "Supplementary Private Use Area-B" Unicode Block.
1542: * @since 1.5
1543: */
1544: public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock(
1545: "SUPPLEMENTARY_PRIVATE_USE_AREA_B", 0x100000, 0x10ffff);
1546:
1547: /*
1548: * All of the UnicodeBlocks with valid ranges in ascending order.
1549: */
1550: private static final UnicodeBlock[] BLOCKS = { BASIC_LATIN,
1551: LATIN_1_SUPPLEMENT, LATIN_EXTENDED_A, LATIN_EXTENDED_B,
1552: IPA_EXTENSIONS, SPACING_MODIFIER_LETTERS,
1553: COMBINING_DIACRITICAL_MARKS, GREEK, CYRILLIC,
1554: CYRILLIC_SUPPLEMENTARY, ARMENIAN, HEBREW, ARABIC,
1555: SYRIAC, THAANA, DEVANAGARI, BENGALI, GURMUKHI,
1556: GUJARATI, ORIYA, TAMIL, TELUGU, KANNADA, MALAYALAM,
1557: SINHALA, THAI, LAO, TIBETAN, MYANMAR, GEORGIAN,
1558: HANGUL_JAMO, ETHIOPIC, CHEROKEE,
1559: UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, OGHAM, RUNIC,
1560: TAGALOG, HANUNOO, BUHID, TAGBANWA, KHMER, MONGOLIAN,
1561: LIMBU, TAI_LE, KHMER_SYMBOLS, PHONETIC_EXTENSIONS,
1562: LATIN_EXTENDED_ADDITIONAL, GREEK_EXTENDED,
1563: GENERAL_PUNCTUATION, SUPERSCRIPTS_AND_SUBSCRIPTS,
1564: CURRENCY_SYMBOLS, COMBINING_MARKS_FOR_SYMBOLS,
1565: LETTERLIKE_SYMBOLS, NUMBER_FORMS, ARROWS,
1566: MATHEMATICAL_OPERATORS, MISCELLANEOUS_TECHNICAL,
1567: CONTROL_PICTURES, OPTICAL_CHARACTER_RECOGNITION,
1568: ENCLOSED_ALPHANUMERICS, BOX_DRAWING, BLOCK_ELEMENTS,
1569: GEOMETRIC_SHAPES, MISCELLANEOUS_SYMBOLS, DINGBATS,
1570: MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
1571: SUPPLEMENTAL_ARROWS_A, BRAILLE_PATTERNS,
1572: SUPPLEMENTAL_ARROWS_B,
1573: MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1574: SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
1575: MISCELLANEOUS_SYMBOLS_AND_ARROWS,
1576: CJK_RADICALS_SUPPLEMENT, KANGXI_RADICALS,
1577: IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
1578: CJK_SYMBOLS_AND_PUNCTUATION, HIRAGANA, KATAKANA,
1579: BOPOMOFO, HANGUL_COMPATIBILITY_JAMO, KANBUN,
1580: BOPOMOFO_EXTENDED, KATAKANA_PHONETIC_EXTENSIONS,
1581: ENCLOSED_CJK_LETTERS_AND_MONTHS, CJK_COMPATIBILITY,
1582: CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
1583: YIJING_HEXAGRAM_SYMBOLS, CJK_UNIFIED_IDEOGRAPHS,
1584: YI_SYLLABLES, YI_RADICALS, HANGUL_SYLLABLES,
1585: HIGH_SURROGATES, HIGH_PRIVATE_USE_SURROGATES,
1586: LOW_SURROGATES, PRIVATE_USE_AREA,
1587: CJK_COMPATIBILITY_IDEOGRAPHS,
1588: ALPHABETIC_PRESENTATION_FORMS,
1589: ARABIC_PRESENTATION_FORMS_A, VARIATION_SELECTORS,
1590: COMBINING_HALF_MARKS, CJK_COMPATIBILITY_FORMS,
1591: SMALL_FORM_VARIANTS, ARABIC_PRESENTATION_FORMS_B,
1592: HALFWIDTH_AND_FULLWIDTH_FORMS, SPECIALS,
1593: LINEAR_B_SYLLABARY, LINEAR_B_IDEOGRAMS, AEGEAN_NUMBERS,
1594: OLD_ITALIC, GOTHIC, UGARITIC, DESERET, SHAVIAN,
1595: OSMANYA, CYPRIOT_SYLLABARY, BYZANTINE_MUSICAL_SYMBOLS,
1596: MUSICAL_SYMBOLS, TAI_XUAN_JING_SYMBOLS,
1597: MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
1598: CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
1599: CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, TAGS,
1600: VARIATION_SELECTORS_SUPPLEMENT,
1601: SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1602: SUPPLEMENTARY_PRIVATE_USE_AREA_B };
1603:
1604: /*
1605: * A SortedMap (String.CASE_INSENSITIVE_ORDER) with keys that represents
1606: * valid block names and values of the UnicodeBlock constant they map
1607: * to.
1608: */
1609: private static final SortedMap<String, UnicodeBlock> BLOCKS_BY_NAME = new TreeMap<String, UnicodeBlock>(
1610: String.CASE_INSENSITIVE_ORDER);
1611:
1612: static {
1613: BLOCKS_BY_NAME.put("SURROGATES_AREA", SURROGATES_AREA);
1614: BLOCKS_BY_NAME.put("Basic Latin", BASIC_LATIN);
1615: BLOCKS_BY_NAME.put("BasicLatin", BASIC_LATIN);
1616: BLOCKS_BY_NAME.put("BASIC_LATIN", BASIC_LATIN);
1617: BLOCKS_BY_NAME
1618: .put("Latin-1 Supplement", LATIN_1_SUPPLEMENT);
1619: BLOCKS_BY_NAME.put("Latin-1Supplement", LATIN_1_SUPPLEMENT);
1620: BLOCKS_BY_NAME
1621: .put("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT);
1622: BLOCKS_BY_NAME.put("Latin Extended-A", LATIN_EXTENDED_A);
1623: BLOCKS_BY_NAME.put("LatinExtended-A", LATIN_EXTENDED_A);
1624: BLOCKS_BY_NAME.put("LATIN_EXTENDED_A", LATIN_EXTENDED_A);
1625: BLOCKS_BY_NAME.put("Latin Extended-B", LATIN_EXTENDED_B);
1626: BLOCKS_BY_NAME.put("LatinExtended-B", LATIN_EXTENDED_B);
1627: BLOCKS_BY_NAME.put("LATIN_EXTENDED_B", LATIN_EXTENDED_B);
1628: BLOCKS_BY_NAME.put("IPA Extensions", IPA_EXTENSIONS);
1629: BLOCKS_BY_NAME.put("IPAExtensions", IPA_EXTENSIONS);
1630: BLOCKS_BY_NAME.put("IPA_EXTENSIONS", IPA_EXTENSIONS);
1631: BLOCKS_BY_NAME.put("Spacing Modifier Letters",
1632: SPACING_MODIFIER_LETTERS);
1633: BLOCKS_BY_NAME.put("SpacingModifierLetters",
1634: SPACING_MODIFIER_LETTERS);
1635: BLOCKS_BY_NAME.put("SPACING_MODIFIER_LETTERS",
1636: SPACING_MODIFIER_LETTERS);
1637: BLOCKS_BY_NAME.put("Combining Diacritical Marks",
1638: COMBINING_DIACRITICAL_MARKS);
1639: BLOCKS_BY_NAME.put("CombiningDiacriticalMarks",
1640: COMBINING_DIACRITICAL_MARKS);
1641: BLOCKS_BY_NAME.put("COMBINING_DIACRITICAL_MARKS",
1642: COMBINING_DIACRITICAL_MARKS);
1643: BLOCKS_BY_NAME.put("Greek and Coptic", GREEK);
1644: BLOCKS_BY_NAME.put("GreekandCoptic", GREEK);
1645: BLOCKS_BY_NAME.put("GREEK", GREEK);
1646: BLOCKS_BY_NAME.put("Greek", GREEK);
1647: BLOCKS_BY_NAME.put("Greek", GREEK);
1648: BLOCKS_BY_NAME.put("Cyrillic", CYRILLIC);
1649: BLOCKS_BY_NAME.put("Cyrillic Supplement",
1650: CYRILLIC_SUPPLEMENTARY);
1651: BLOCKS_BY_NAME.put("CyrillicSupplement",
1652: CYRILLIC_SUPPLEMENTARY);
1653: BLOCKS_BY_NAME.put("CYRILLIC_SUPPLEMENTARY",
1654: CYRILLIC_SUPPLEMENTARY);
1655: BLOCKS_BY_NAME.put("Cyrillic Supplementary",
1656: CYRILLIC_SUPPLEMENTARY);
1657: BLOCKS_BY_NAME.put("CyrillicSupplementary",
1658: CYRILLIC_SUPPLEMENTARY);
1659: BLOCKS_BY_NAME.put("Armenian", ARMENIAN);
1660: BLOCKS_BY_NAME.put("Hebrew", HEBREW);
1661: BLOCKS_BY_NAME.put("Arabic", ARABIC);
1662: BLOCKS_BY_NAME.put("Syriac", SYRIAC);
1663: BLOCKS_BY_NAME.put("Thaana", THAANA);
1664: BLOCKS_BY_NAME.put("Devanagari", DEVANAGARI);
1665: BLOCKS_BY_NAME.put("Bengali", BENGALI);
1666: BLOCKS_BY_NAME.put("Gurmukhi", GURMUKHI);
1667: BLOCKS_BY_NAME.put("Gujarati", GUJARATI);
1668: BLOCKS_BY_NAME.put("Oriya", ORIYA);
1669: BLOCKS_BY_NAME.put("Tamil", TAMIL);
1670: BLOCKS_BY_NAME.put("Telugu", TELUGU);
1671: BLOCKS_BY_NAME.put("Kannada", KANNADA);
1672: BLOCKS_BY_NAME.put("Malayalam", MALAYALAM);
1673: BLOCKS_BY_NAME.put("Sinhala", SINHALA);
1674: BLOCKS_BY_NAME.put("Thai", THAI);
1675: BLOCKS_BY_NAME.put("Lao", LAO);
1676: BLOCKS_BY_NAME.put("Tibetan", TIBETAN);
1677: BLOCKS_BY_NAME.put("Myanmar", MYANMAR);
1678: BLOCKS_BY_NAME.put("Georgian", GEORGIAN);
1679: BLOCKS_BY_NAME.put("Hangul Jamo", HANGUL_JAMO);
1680: BLOCKS_BY_NAME.put("HangulJamo", HANGUL_JAMO);
1681: BLOCKS_BY_NAME.put("HANGUL_JAMO", HANGUL_JAMO);
1682: BLOCKS_BY_NAME.put("Ethiopic", ETHIOPIC);
1683: BLOCKS_BY_NAME.put("Cherokee", CHEROKEE);
1684: BLOCKS_BY_NAME.put("Unified Canadian Aboriginal Syllabics",
1685: UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS);
1686: BLOCKS_BY_NAME.put("UnifiedCanadianAboriginalSyllabics",
1687: UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS);
1688: BLOCKS_BY_NAME.put("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1689: UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS);
1690: BLOCKS_BY_NAME.put("Ogham", OGHAM);
1691: BLOCKS_BY_NAME.put("Runic", RUNIC);
1692: BLOCKS_BY_NAME.put("Tagalog", TAGALOG);
1693: BLOCKS_BY_NAME.put("Hanunoo", HANUNOO);
1694: BLOCKS_BY_NAME.put("Buhid", BUHID);
1695: BLOCKS_BY_NAME.put("Tagbanwa", TAGBANWA);
1696: BLOCKS_BY_NAME.put("Khmer", KHMER);
1697: BLOCKS_BY_NAME.put("Mongolian", MONGOLIAN);
1698: BLOCKS_BY_NAME.put("Limbu", LIMBU);
1699: BLOCKS_BY_NAME.put("Tai Le", TAI_LE);
1700: BLOCKS_BY_NAME.put("TaiLe", TAI_LE);
1701: BLOCKS_BY_NAME.put("TAI_LE", TAI_LE);
1702: BLOCKS_BY_NAME.put("Khmer Symbols", KHMER_SYMBOLS);
1703: BLOCKS_BY_NAME.put("KhmerSymbols", KHMER_SYMBOLS);
1704: BLOCKS_BY_NAME.put("KHMER_SYMBOLS", KHMER_SYMBOLS);
1705: BLOCKS_BY_NAME.put("Phonetic Extensions",
1706: PHONETIC_EXTENSIONS);
1707: BLOCKS_BY_NAME.put("PhoneticExtensions",
1708: PHONETIC_EXTENSIONS);
1709: BLOCKS_BY_NAME.put("PHONETIC_EXTENSIONS",
1710: PHONETIC_EXTENSIONS);
1711: BLOCKS_BY_NAME.put("Latin Extended Additional",
1712: LATIN_EXTENDED_ADDITIONAL);
1713: BLOCKS_BY_NAME.put("LatinExtendedAdditional",
1714: LATIN_EXTENDED_ADDITIONAL);
1715: BLOCKS_BY_NAME.put("LATIN_EXTENDED_ADDITIONAL",
1716: LATIN_EXTENDED_ADDITIONAL);
1717: BLOCKS_BY_NAME.put("Greek Extended", GREEK_EXTENDED);
1718: BLOCKS_BY_NAME.put("GreekExtended", GREEK_EXTENDED);
1719: BLOCKS_BY_NAME.put("GREEK_EXTENDED", GREEK_EXTENDED);
1720: BLOCKS_BY_NAME.put("General Punctuation",
1721: GENERAL_PUNCTUATION);
1722: BLOCKS_BY_NAME.put("GeneralPunctuation",
1723: GENERAL_PUNCTUATION);
1724: BLOCKS_BY_NAME.put("GENERAL_PUNCTUATION",
1725: GENERAL_PUNCTUATION);
1726: BLOCKS_BY_NAME.put("Superscripts and Subscripts",
1727: SUPERSCRIPTS_AND_SUBSCRIPTS);
1728: BLOCKS_BY_NAME.put("SuperscriptsandSubscripts",
1729: SUPERSCRIPTS_AND_SUBSCRIPTS);
1730: BLOCKS_BY_NAME.put("SUPERSCRIPTS_AND_SUBSCRIPTS",
1731: SUPERSCRIPTS_AND_SUBSCRIPTS);
1732: BLOCKS_BY_NAME.put("Currency Symbols", CURRENCY_SYMBOLS);
1733: BLOCKS_BY_NAME.put("CurrencySymbols", CURRENCY_SYMBOLS);
1734: BLOCKS_BY_NAME.put("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS);
1735: BLOCKS_BY_NAME.put(
1736: "Combining Diacritical Marks for Symbols",
1737: COMBINING_MARKS_FOR_SYMBOLS);
1738: BLOCKS_BY_NAME.put("CombiningDiacriticalMarksforSymbols",
1739: COMBINING_MARKS_FOR_SYMBOLS);
1740: BLOCKS_BY_NAME.put("COMBINING_MARKS_FOR_SYMBOLS",
1741: COMBINING_MARKS_FOR_SYMBOLS);
1742: BLOCKS_BY_NAME.put("Combining Marks for Symbols",
1743: COMBINING_MARKS_FOR_SYMBOLS);
1744: BLOCKS_BY_NAME.put("CombiningMarksforSymbols",
1745: COMBINING_MARKS_FOR_SYMBOLS);
1746: BLOCKS_BY_NAME
1747: .put("Letterlike Symbols", LETTERLIKE_SYMBOLS);
1748: BLOCKS_BY_NAME.put("LetterlikeSymbols", LETTERLIKE_SYMBOLS);
1749: BLOCKS_BY_NAME
1750: .put("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS);
1751: BLOCKS_BY_NAME.put("Number Forms", NUMBER_FORMS);
1752: BLOCKS_BY_NAME.put("NumberForms", NUMBER_FORMS);
1753: BLOCKS_BY_NAME.put("NUMBER_FORMS", NUMBER_FORMS);
1754: BLOCKS_BY_NAME.put("Arrows", ARROWS);
1755: BLOCKS_BY_NAME.put("Mathematical Operators",
1756: MATHEMATICAL_OPERATORS);
1757: BLOCKS_BY_NAME.put("MathematicalOperators",
1758: MATHEMATICAL_OPERATORS);
1759: BLOCKS_BY_NAME.put("MATHEMATICAL_OPERATORS",
1760: MATHEMATICAL_OPERATORS);
1761: BLOCKS_BY_NAME.put("Miscellaneous Technical",
1762: MISCELLANEOUS_TECHNICAL);
1763: BLOCKS_BY_NAME.put("MiscellaneousTechnical",
1764: MISCELLANEOUS_TECHNICAL);
1765: BLOCKS_BY_NAME.put("MISCELLANEOUS_TECHNICAL",
1766: MISCELLANEOUS_TECHNICAL);
1767: BLOCKS_BY_NAME.put("Control Pictures", CONTROL_PICTURES);
1768: BLOCKS_BY_NAME.put("ControlPictures", CONTROL_PICTURES);
1769: BLOCKS_BY_NAME.put("CONTROL_PICTURES", CONTROL_PICTURES);
1770: BLOCKS_BY_NAME.put("Optical Character Recognition",
1771: OPTICAL_CHARACTER_RECOGNITION);
1772: BLOCKS_BY_NAME.put("OpticalCharacterRecognition",
1773: OPTICAL_CHARACTER_RECOGNITION);
1774: BLOCKS_BY_NAME.put("OPTICAL_CHARACTER_RECOGNITION",
1775: OPTICAL_CHARACTER_RECOGNITION);
1776: BLOCKS_BY_NAME.put("Enclosed Alphanumerics",
1777: ENCLOSED_ALPHANUMERICS);
1778: BLOCKS_BY_NAME.put("EnclosedAlphanumerics",
1779: ENCLOSED_ALPHANUMERICS);
1780: BLOCKS_BY_NAME.put("ENCLOSED_ALPHANUMERICS",
1781: ENCLOSED_ALPHANUMERICS);
1782: BLOCKS_BY_NAME.put("Box Drawing", BOX_DRAWING);
1783: BLOCKS_BY_NAME.put("BoxDrawing", BOX_DRAWING);
1784: BLOCKS_BY_NAME.put("BOX_DRAWING", BOX_DRAWING);
1785: BLOCKS_BY_NAME.put("Block Elements", BLOCK_ELEMENTS);
1786: BLOCKS_BY_NAME.put("BlockElements", BLOCK_ELEMENTS);
1787: BLOCKS_BY_NAME.put("BLOCK_ELEMENTS", BLOCK_ELEMENTS);
1788: BLOCKS_BY_NAME.put("Geometric Shapes", GEOMETRIC_SHAPES);
1789: BLOCKS_BY_NAME.put("GeometricShapes", GEOMETRIC_SHAPES);
1790: BLOCKS_BY_NAME.put("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES);
1791: BLOCKS_BY_NAME.put("Miscellaneous Symbols",
1792: MISCELLANEOUS_SYMBOLS);
1793: BLOCKS_BY_NAME.put("MiscellaneousSymbols",
1794: MISCELLANEOUS_SYMBOLS);
1795: BLOCKS_BY_NAME.put("MISCELLANEOUS_SYMBOLS",
1796: MISCELLANEOUS_SYMBOLS);
1797: BLOCKS_BY_NAME.put("Dingbats", DINGBATS);
1798: BLOCKS_BY_NAME.put("Miscellaneous Mathematical Symbols-A",
1799: MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A);
1800: BLOCKS_BY_NAME.put("MiscellaneousMathematicalSymbols-A",
1801: MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A);
1802: BLOCKS_BY_NAME.put("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1803: MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A);
1804: BLOCKS_BY_NAME.put("Supplemental Arrows-A",
1805: SUPPLEMENTAL_ARROWS_A);
1806: BLOCKS_BY_NAME.put("SupplementalArrows-A",
1807: SUPPLEMENTAL_ARROWS_A);
1808: BLOCKS_BY_NAME.put("SUPPLEMENTAL_ARROWS_A",
1809: SUPPLEMENTAL_ARROWS_A);
1810: BLOCKS_BY_NAME.put("Braille Patterns", BRAILLE_PATTERNS);
1811: BLOCKS_BY_NAME.put("BraillePatterns", BRAILLE_PATTERNS);
1812: BLOCKS_BY_NAME.put("BRAILLE_PATTERNS", BRAILLE_PATTERNS);
1813: BLOCKS_BY_NAME.put("Supplemental Arrows-B",
1814: SUPPLEMENTAL_ARROWS_B);
1815: BLOCKS_BY_NAME.put("SupplementalArrows-B",
1816: SUPPLEMENTAL_ARROWS_B);
1817: BLOCKS_BY_NAME.put("SUPPLEMENTAL_ARROWS_B",
1818: SUPPLEMENTAL_ARROWS_B);
1819: BLOCKS_BY_NAME.put("Miscellaneous Mathematical Symbols-B",
1820: MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B);
1821: BLOCKS_BY_NAME.put("MiscellaneousMathematicalSymbols-B",
1822: MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B);
1823: BLOCKS_BY_NAME.put("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1824: MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B);
1825: BLOCKS_BY_NAME.put("Supplemental Mathematical Operators",
1826: SUPPLEMENTAL_MATHEMATICAL_OPERATORS);
1827: BLOCKS_BY_NAME.put("SupplementalMathematicalOperators",
1828: SUPPLEMENTAL_MATHEMATICAL_OPERATORS);
1829: BLOCKS_BY_NAME.put("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1830: SUPPLEMENTAL_MATHEMATICAL_OPERATORS);
1831: BLOCKS_BY_NAME.put("Miscellaneous Symbols and Arrows",
1832: MISCELLANEOUS_SYMBOLS_AND_ARROWS);
1833: BLOCKS_BY_NAME.put("MiscellaneousSymbolsandArrows",
1834: MISCELLANEOUS_SYMBOLS_AND_ARROWS);
1835: BLOCKS_BY_NAME.put("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1836: MISCELLANEOUS_SYMBOLS_AND_ARROWS);
1837: BLOCKS_BY_NAME.put("CJK Radicals Supplement",
1838: CJK_RADICALS_SUPPLEMENT);
1839: BLOCKS_BY_NAME.put("CJKRadicalsSupplement",
1840: CJK_RADICALS_SUPPLEMENT);
1841: BLOCKS_BY_NAME.put("CJK_RADICALS_SUPPLEMENT",
1842: CJK_RADICALS_SUPPLEMENT);
1843: BLOCKS_BY_NAME.put("Kangxi Radicals", KANGXI_RADICALS);
1844: BLOCKS_BY_NAME.put("KangxiRadicals", KANGXI_RADICALS);
1845: BLOCKS_BY_NAME.put("KANGXI_RADICALS", KANGXI_RADICALS);
1846: BLOCKS_BY_NAME.put("Ideographic Description Characters",
1847: IDEOGRAPHIC_DESCRIPTION_CHARACTERS);
1848: BLOCKS_BY_NAME.put("IdeographicDescriptionCharacters",
1849: IDEOGRAPHIC_DESCRIPTION_CHARACTERS);
1850: BLOCKS_BY_NAME.put("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1851: IDEOGRAPHIC_DESCRIPTION_CHARACTERS);
1852: BLOCKS_BY_NAME.put("CJK Symbols and Punctuation",
1853: CJK_SYMBOLS_AND_PUNCTUATION);
1854: BLOCKS_BY_NAME.put("CJKSymbolsandPunctuation",
1855: CJK_SYMBOLS_AND_PUNCTUATION);
1856: BLOCKS_BY_NAME.put("CJK_SYMBOLS_AND_PUNCTUATION",
1857: CJK_SYMBOLS_AND_PUNCTUATION);
1858: BLOCKS_BY_NAME.put("Hiragana", HIRAGANA);
1859: BLOCKS_BY_NAME.put("Katakana", KATAKANA);
1860: BLOCKS_BY_NAME.put("Bopomofo", BOPOMOFO);
1861: BLOCKS_BY_NAME.put("Hangul Compatibility Jamo",
1862: HANGUL_COMPATIBILITY_JAMO);
1863: BLOCKS_BY_NAME.put("HangulCompatibilityJamo",
1864: HANGUL_COMPATIBILITY_JAMO);
1865: BLOCKS_BY_NAME.put("HANGUL_COMPATIBILITY_JAMO",
1866: HANGUL_COMPATIBILITY_JAMO);
1867: BLOCKS_BY_NAME.put("Kanbun", KANBUN);
1868: BLOCKS_BY_NAME.put("Bopomofo Extended", BOPOMOFO_EXTENDED);
1869: BLOCKS_BY_NAME.put("BopomofoExtended", BOPOMOFO_EXTENDED);
1870: BLOCKS_BY_NAME.put("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED);
1871: BLOCKS_BY_NAME.put("Katakana Phonetic Extensions",
1872: KATAKANA_PHONETIC_EXTENSIONS);
1873: BLOCKS_BY_NAME.put("KatakanaPhoneticExtensions",
1874: KATAKANA_PHONETIC_EXTENSIONS);
1875: BLOCKS_BY_NAME.put("KATAKANA_PHONETIC_EXTENSIONS",
1876: KATAKANA_PHONETIC_EXTENSIONS);
1877: BLOCKS_BY_NAME.put("Enclosed CJK Letters and Months",
1878: ENCLOSED_CJK_LETTERS_AND_MONTHS);
1879: BLOCKS_BY_NAME.put("EnclosedCJKLettersandMonths",
1880: ENCLOSED_CJK_LETTERS_AND_MONTHS);
1881: BLOCKS_BY_NAME.put("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1882: ENCLOSED_CJK_LETTERS_AND_MONTHS);
1883: BLOCKS_BY_NAME.put("CJK Compatibility", CJK_COMPATIBILITY);
1884: BLOCKS_BY_NAME.put("CJKCompatibility", CJK_COMPATIBILITY);
1885: BLOCKS_BY_NAME.put("CJK_COMPATIBILITY", CJK_COMPATIBILITY);
1886: BLOCKS_BY_NAME.put("CJK Unified Ideographs Extension A",
1887: CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
1888: BLOCKS_BY_NAME.put("CJKUnifiedIdeographsExtensionA",
1889: CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
1890: BLOCKS_BY_NAME.put("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1891: CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
1892: BLOCKS_BY_NAME.put("Yijing Hexagram Symbols",
1893: YIJING_HEXAGRAM_SYMBOLS);
1894: BLOCKS_BY_NAME.put("YijingHexagramSymbols",
1895: YIJING_HEXAGRAM_SYMBOLS);
1896: BLOCKS_BY_NAME.put("YIJING_HEXAGRAM_SYMBOLS",
1897: YIJING_HEXAGRAM_SYMBOLS);
1898: BLOCKS_BY_NAME.put("CJK Unified Ideographs",
1899: CJK_UNIFIED_IDEOGRAPHS);
1900: BLOCKS_BY_NAME.put("CJKUnifiedIdeographs",
1901: CJK_UNIFIED_IDEOGRAPHS);
1902: BLOCKS_BY_NAME.put("CJK_UNIFIED_IDEOGRAPHS",
1903: CJK_UNIFIED_IDEOGRAPHS);
1904: BLOCKS_BY_NAME.put("Yi Syllables", YI_SYLLABLES);
1905: BLOCKS_BY_NAME.put("YiSyllables", YI_SYLLABLES);
1906: BLOCKS_BY_NAME.put("YI_SYLLABLES", YI_SYLLABLES);
1907: BLOCKS_BY_NAME.put("Yi Radicals", YI_RADICALS);
1908: BLOCKS_BY_NAME.put("YiRadicals", YI_RADICALS);
1909: BLOCKS_BY_NAME.put("YI_RADICALS", YI_RADICALS);
1910: BLOCKS_BY_NAME.put("Hangul Syllables", HANGUL_SYLLABLES);
1911: BLOCKS_BY_NAME.put("HangulSyllables", HANGUL_SYLLABLES);
1912: BLOCKS_BY_NAME.put("HANGUL_SYLLABLES", HANGUL_SYLLABLES);
1913: BLOCKS_BY_NAME.put("High Surrogates", HIGH_SURROGATES);
1914: BLOCKS_BY_NAME.put("HighSurrogates", HIGH_SURROGATES);
1915: BLOCKS_BY_NAME.put("HIGH_SURROGATES", HIGH_SURROGATES);
1916: BLOCKS_BY_NAME.put("High Private Use Surrogates",
1917: HIGH_PRIVATE_USE_SURROGATES);
1918: BLOCKS_BY_NAME.put("HighPrivateUseSurrogates",
1919: HIGH_PRIVATE_USE_SURROGATES);
1920: BLOCKS_BY_NAME.put("HIGH_PRIVATE_USE_SURROGATES",
1921: HIGH_PRIVATE_USE_SURROGATES);
1922: BLOCKS_BY_NAME.put("Low Surrogates", LOW_SURROGATES);
1923: BLOCKS_BY_NAME.put("LowSurrogates", LOW_SURROGATES);
1924: BLOCKS_BY_NAME.put("LOW_SURROGATES", LOW_SURROGATES);
1925: BLOCKS_BY_NAME.put("Private Use Area", PRIVATE_USE_AREA);
1926: BLOCKS_BY_NAME.put("PrivateUseArea", PRIVATE_USE_AREA);
1927: BLOCKS_BY_NAME.put("PRIVATE_USE_AREA", PRIVATE_USE_AREA);
1928: BLOCKS_BY_NAME.put("CJK Compatibility Ideographs",
1929: CJK_COMPATIBILITY_IDEOGRAPHS);
1930: BLOCKS_BY_NAME.put("CJKCompatibilityIdeographs",
1931: CJK_COMPATIBILITY_IDEOGRAPHS);
1932: BLOCKS_BY_NAME.put("CJK_COMPATIBILITY_IDEOGRAPHS",
1933: CJK_COMPATIBILITY_IDEOGRAPHS);
1934: BLOCKS_BY_NAME.put("Alphabetic Presentation Forms",
1935: ALPHABETIC_PRESENTATION_FORMS);
1936: BLOCKS_BY_NAME.put("AlphabeticPresentationForms",
1937: ALPHABETIC_PRESENTATION_FORMS);
1938: BLOCKS_BY_NAME.put("ALPHABETIC_PRESENTATION_FORMS",
1939: ALPHABETIC_PRESENTATION_FORMS);
1940: BLOCKS_BY_NAME.put("Arabic Presentation Forms-A",
1941: ARABIC_PRESENTATION_FORMS_A);
1942: BLOCKS_BY_NAME.put("ArabicPresentationForms-A",
1943: ARABIC_PRESENTATION_FORMS_A);
1944: BLOCKS_BY_NAME.put("ARABIC_PRESENTATION_FORMS_A",
1945: ARABIC_PRESENTATION_FORMS_A);
1946: BLOCKS_BY_NAME.put("Variation Selectors",
1947: VARIATION_SELECTORS);
1948: BLOCKS_BY_NAME.put("VariationSelectors",
1949: VARIATION_SELECTORS);
1950: BLOCKS_BY_NAME.put("VARIATION_SELECTORS",
1951: VARIATION_SELECTORS);
1952: BLOCKS_BY_NAME.put("Combining Half Marks",
1953: COMBINING_HALF_MARKS);
1954: BLOCKS_BY_NAME.put("CombiningHalfMarks",
1955: COMBINING_HALF_MARKS);
1956: BLOCKS_BY_NAME.put("COMBINING_HALF_MARKS",
1957: COMBINING_HALF_MARKS);
1958: BLOCKS_BY_NAME.put("CJK Compatibility Forms",
1959: CJK_COMPATIBILITY_FORMS);
1960: BLOCKS_BY_NAME.put("CJKCompatibilityForms",
1961: CJK_COMPATIBILITY_FORMS);
1962: BLOCKS_BY_NAME.put("CJK_COMPATIBILITY_FORMS",
1963: CJK_COMPATIBILITY_FORMS);
1964: BLOCKS_BY_NAME.put("Small Form Variants",
1965: SMALL_FORM_VARIANTS);
1966: BLOCKS_BY_NAME
1967: .put("SmallFormVariants", SMALL_FORM_VARIANTS);
1968: BLOCKS_BY_NAME.put("SMALL_FORM_VARIANTS",
1969: SMALL_FORM_VARIANTS);
1970: BLOCKS_BY_NAME.put("Arabic Presentation Forms-B",
1971: ARABIC_PRESENTATION_FORMS_B);
1972: BLOCKS_BY_NAME.put("ArabicPresentationForms-B",
1973: ARABIC_PRESENTATION_FORMS_B);
1974: BLOCKS_BY_NAME.put("ARABIC_PRESENTATION_FORMS_B",
1975: ARABIC_PRESENTATION_FORMS_B);
1976: BLOCKS_BY_NAME.put("Halfwidth and Fullwidth Forms",
1977: HALFWIDTH_AND_FULLWIDTH_FORMS);
1978: BLOCKS_BY_NAME.put("HalfwidthandFullwidthForms",
1979: HALFWIDTH_AND_FULLWIDTH_FORMS);
1980: BLOCKS_BY_NAME.put("HALFWIDTH_AND_FULLWIDTH_FORMS",
1981: HALFWIDTH_AND_FULLWIDTH_FORMS);
1982: BLOCKS_BY_NAME.put("Specials", SPECIALS);
1983: BLOCKS_BY_NAME
1984: .put("Linear B Syllabary", LINEAR_B_SYLLABARY);
1985: BLOCKS_BY_NAME.put("LinearBSyllabary", LINEAR_B_SYLLABARY);
1986: BLOCKS_BY_NAME
1987: .put("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY);
1988: BLOCKS_BY_NAME
1989: .put("Linear B Ideograms", LINEAR_B_IDEOGRAMS);
1990: BLOCKS_BY_NAME.put("LinearBIdeograms", LINEAR_B_IDEOGRAMS);
1991: BLOCKS_BY_NAME
1992: .put("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS);
1993: BLOCKS_BY_NAME.put("Aegean Numbers", AEGEAN_NUMBERS);
1994: BLOCKS_BY_NAME.put("AegeanNumbers", AEGEAN_NUMBERS);
1995: BLOCKS_BY_NAME.put("AEGEAN_NUMBERS", AEGEAN_NUMBERS);
1996: BLOCKS_BY_NAME.put("Old Italic", OLD_ITALIC);
1997: BLOCKS_BY_NAME.put("OldItalic", OLD_ITALIC);
1998: BLOCKS_BY_NAME.put("OLD_ITALIC", OLD_ITALIC);
1999: BLOCKS_BY_NAME.put("Gothic", GOTHIC);
2000: BLOCKS_BY_NAME.put("Ugaritic", UGARITIC);
2001: BLOCKS_BY_NAME.put("Deseret", DESERET);
2002: BLOCKS_BY_NAME.put("Shavian", SHAVIAN);
2003: BLOCKS_BY_NAME.put("Osmanya", OSMANYA);
2004: BLOCKS_BY_NAME.put("Cypriot Syllabary", CYPRIOT_SYLLABARY);
2005: BLOCKS_BY_NAME.put("CypriotSyllabary", CYPRIOT_SYLLABARY);
2006: BLOCKS_BY_NAME.put("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY);
2007: BLOCKS_BY_NAME.put("Byzantine Musical Symbols",
2008: BYZANTINE_MUSICAL_SYMBOLS);
2009: BLOCKS_BY_NAME.put("ByzantineMusicalSymbols",
2010: BYZANTINE_MUSICAL_SYMBOLS);
2011: BLOCKS_BY_NAME.put("BYZANTINE_MUSICAL_SYMBOLS",
2012: BYZANTINE_MUSICAL_SYMBOLS);
2013: BLOCKS_BY_NAME.put("Musical Symbols", MUSICAL_SYMBOLS);
2014: BLOCKS_BY_NAME.put("MusicalSymbols", MUSICAL_SYMBOLS);
2015: BLOCKS_BY_NAME.put("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS);
2016: BLOCKS_BY_NAME.put("Tai Xuan Jing Symbols",
2017: TAI_XUAN_JING_SYMBOLS);
2018: BLOCKS_BY_NAME.put("TaiXuanJingSymbols",
2019: TAI_XUAN_JING_SYMBOLS);
2020: BLOCKS_BY_NAME.put("TAI_XUAN_JING_SYMBOLS",
2021: TAI_XUAN_JING_SYMBOLS);
2022: BLOCKS_BY_NAME.put("Mathematical Alphanumeric Symbols",
2023: MATHEMATICAL_ALPHANUMERIC_SYMBOLS);
2024: BLOCKS_BY_NAME.put("MathematicalAlphanumericSymbols",
2025: MATHEMATICAL_ALPHANUMERIC_SYMBOLS);
2026: BLOCKS_BY_NAME.put("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
2027: MATHEMATICAL_ALPHANUMERIC_SYMBOLS);
2028: BLOCKS_BY_NAME.put("CJK Unified Ideographs Extension B",
2029: CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B);
2030: BLOCKS_BY_NAME.put("CJKUnifiedIdeographsExtensionB",
2031: CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B);
2032: BLOCKS_BY_NAME.put("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
2033: CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B);
2034: BLOCKS_BY_NAME.put(
2035: "CJK Compatibility Ideographs Supplement",
2036: CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT);
2037: BLOCKS_BY_NAME.put("CJKCompatibilityIdeographsSupplement",
2038: CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT);
2039: BLOCKS_BY_NAME.put(
2040: "CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
2041: CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT);
2042: BLOCKS_BY_NAME.put("Tags", TAGS);
2043: BLOCKS_BY_NAME.put("Variation Selectors Supplement",
2044: VARIATION_SELECTORS_SUPPLEMENT);
2045: BLOCKS_BY_NAME.put("VariationSelectorsSupplement",
2046: VARIATION_SELECTORS_SUPPLEMENT);
2047: BLOCKS_BY_NAME.put("VARIATION_SELECTORS_SUPPLEMENT",
2048: VARIATION_SELECTORS_SUPPLEMENT);
2049: BLOCKS_BY_NAME.put("Supplementary Private Use Area-A",
2050: SUPPLEMENTARY_PRIVATE_USE_AREA_A);
2051: BLOCKS_BY_NAME.put("SupplementaryPrivateUseArea-A",
2052: SUPPLEMENTARY_PRIVATE_USE_AREA_A);
2053: BLOCKS_BY_NAME.put("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
2054: SUPPLEMENTARY_PRIVATE_USE_AREA_A);
2055: BLOCKS_BY_NAME.put("Supplementary Private Use Area-B",
2056: SUPPLEMENTARY_PRIVATE_USE_AREA_B);
2057: BLOCKS_BY_NAME.put("SupplementaryPrivateUseArea-B",
2058: SUPPLEMENTARY_PRIVATE_USE_AREA_B);
2059: BLOCKS_BY_NAME.put("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
2060: SUPPLEMENTARY_PRIVATE_USE_AREA_B);
2061: }
2062:
2063: /**
2064: * <p>
2065: * Retrieves the constant that corresponds to the block name given. The
2066: * block names are defined by the Unicode 4.0.1 specification in the
2067: * <code>Blocks-4.0.1.txt</code> file.
2068: * </p>
2069: * <p>
2070: * Block names may be one of the following:
2071: * </p>
2072: * <ul>
2073: * <li>Canonical block name, as defined by the Unicode specification;
2074: * case-insensitive.</li>
2075: * <li>Canonical block name without any spaces, as defined by the
2076: * Unicode specification; case-insensitive.</li>
2077: * <li><code>UnicodeBlock</code> constant identifier. This is
2078: * determined by uppercasing the canonical name and replacing all spaces
2079: * and hyphens with underscores.</li>
2080: * </ul>
2081: *
2082: * @param blockName The name of the block to retrieve.
2083: * @return A UnicodeBlock constant.
2084: * @throws NullPointerException if <code>blockName</code> is
2085: * <code>null</code>.
2086: * @throws IllegalArgumentException if <code>blockName</code> is not a
2087: * valid block name.
2088: * @since 1.5
2089: */
2090: public static UnicodeBlock forName(String blockName) {
2091: if (blockName == null) {
2092: throw new NullPointerException();
2093: }
2094: UnicodeBlock match = BLOCKS_BY_NAME.get(blockName);
2095: if (match == null) {
2096: throw new IllegalArgumentException();
2097: }
2098: return match;
2099: }
2100:
2101: /**
2102: * <p>
2103: * Retrieves the constant that contains the given <code>char</code> or
2104: * <code>null</code> if there is none.
2105: * </p>
2106: *
2107: * @param c The character to retrieve a UnicodeBlock for.
2108: * @return A UnicodeBlock constant or <code>null</code>.
2109: */
2110: public static UnicodeBlock of(char c) {
2111: return of((int) c);
2112: }
2113:
2114: /**
2115: * <p>
2116: * Retrieves the constant that contains the given Unicode code point or
2117: * <code>null</code> if there is none.
2118: * </p>
2119: *
2120: * @param codePoint The Unicode code point to retrieve a UnicodeBlock
2121: * for.
2122: * @return A UnicodeBlock constant or <code>null</code>.
2123: * @throws IllegalArgumentException if <code>codePoint</code> is not a
2124: * valid Unicode code point.
2125: * @since 1.5
2126: */
2127: public static UnicodeBlock of(int codePoint) {
2128: if (!isValidCodePoint(codePoint)) {
2129: throw new IllegalArgumentException();
2130: }
2131: int low = 0;
2132: int mid = -1;
2133: int high = BLOCKS.length - 1;
2134: while (low <= high) {
2135: mid = (low + high) >>> 1;
2136: UnicodeBlock block = BLOCKS[mid];
2137: if (codePoint > block.end) {
2138: low = mid + 1;
2139: } else if (codePoint >= block.start
2140: && codePoint <= block.end) {
2141: return block;
2142: } else {
2143: high = mid - 1;
2144: }
2145: }
2146: return null;
2147: }
2148:
2149: private int start;
2150: private int end;
2151:
2152: private UnicodeBlock(String name, int start, int end) {
2153: super (name);
2154: this .start = start;
2155: this .end = end;
2156: }
2157: }
2158:
2159: /**
2160: * Constructs a new instance of the receiver which represents the char
2161: * valued argument.
2162: *
2163: * @param value
2164: * the char to store in the new instance.
2165: */
2166: public Character(char value) {
2167: this .value = value;
2168: }
2169:
2170: /**
2171: * Answers the char value which the receiver represents.
2172: *
2173: * @return char the value of the receiver
2174: */
2175: public char charValue() {
2176: return value;
2177: }
2178:
2179: /**
2180: * Compares the receiver to the specified Character to determine the
2181: * relative ordering.
2182: *
2183: * @param c
2184: * the Character
2185: * @return an int < 0 if this Character is less than the specified
2186: * Character, 0 if they are equal, and > 0 if this Character is
2187: * greater
2188: * @throws NullPointerException
2189: * if <code>c</code> is <code>null</code>.
2190: * @since 1.2
2191: */
2192: public int compareTo(Character c) {
2193: return value - c.value;
2194: }
2195:
2196: /**
2197: * <p>
2198: * Returns a <code>Character</code> instance for the <code>char</code>
2199: * value passed. This method is preferred over the constructor, as this
2200: * method may maintain a cache of instances.
2201: * </p>
2202: *
2203: * @param c The char value.
2204: * @return A <code>Character</code> instance.
2205: * @since 1.5
2206: */
2207: public static Character valueOf(char c) {
2208: if (c >= CACHE_LEN) {
2209: return new Character(c);
2210: }
2211: return valueOfCache.CACHE[c];
2212: }
2213:
2214: private static final int CACHE_LEN = 512;
2215:
2216: static class valueOfCache {
2217: /*
2218: * Provides a cache for the 'valueOf' method. A size of 512 should cache the
2219: * first couple pages of Unicode, which includes the ASCII/Latin-1
2220: * characters, which other parts of this class are optimized for.
2221: */
2222: private static final Character[] CACHE = new Character[CACHE_LEN];
2223:
2224: static {
2225: for (int i = 0; i < CACHE.length; i++) {
2226: CACHE[i] = new Character((char) i);
2227: }
2228: }
2229: }
2230:
2231: /**
2232: * <p>
2233: * A test for determining if the <code>codePoint</code> is a valid Unicode
2234: * code point.
2235: * </p>
2236: *
2237: * @param codePoint The code point to test.
2238: * @return A boolean value.
2239: * @since 1.5
2240: */
2241: public static boolean isValidCodePoint(int codePoint) {
2242: return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
2243: }
2244:
2245: /**
2246: * <p>
2247: * A test for determining if the <code>codePoint</code> is within the
2248: * supplementary code point range.
2249: * </p>
2250: *
2251: * @param codePoint The code point to test.
2252: * @return A boolean value.
2253: * @since 1.5
2254: */
2255: public static boolean isSupplementaryCodePoint(int codePoint) {
2256: return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint);
2257: }
2258:
2259: /**
2260: * <p>
2261: * A test for determining if the <code>char</code> is a high
2262: * surrogate/leading surrogate unit that's used for representing
2263: * supplementary characters in UTF-16 encoding.
2264: * </p>
2265: *
2266: * @param ch The <code>char</code> unit to test.
2267: * @return A boolean value.
2268: * @since 1.5
2269: * @see #isLowSurrogate(char)
2270: */
2271: public static boolean isHighSurrogate(char ch) {
2272: return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch);
2273: }
2274:
2275: /**
2276: * <p>
2277: * A test for determining if the <code>char</code> is a high
2278: * surrogate/leading surrogate unit that's used for representing
2279: * supplementary characters in UTF-16 encoding.
2280: * </p>
2281: *
2282: * @param ch The <code>char</code> unit to test.
2283: * @return A boolean value.
2284: * @since 1.5
2285: * @see #isHighSurrogate(char)
2286: */
2287: public static boolean isLowSurrogate(char ch) {
2288: return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch);
2289: }
2290:
2291: /**
2292: * <p>
2293: * A test for determining if the <code>char</code> pair is a valid
2294: * surrogate pair.
2295: * </p>
2296: *
2297: * @param high The high surrogate unit to test.
2298: * @param low The low surrogate unit to test.
2299: * @return A boolean value.
2300: * @since 1.5
2301: * @see #isHighSurrogate(char)
2302: * @see #isLowSurrogate(char)
2303: */
2304: public static boolean isSurrogatePair(char high, char low) {
2305: return (isHighSurrogate(high) && isLowSurrogate(low));
2306: }
2307:
2308: /**
2309: * <p>
2310: * Calculates the number of <code>char</code> values required to represent
2311: * the Unicode code point. This method only tests if the
2312: * <code>codePoint</code> is greater than or equal to <code>0x10000</code>,
2313: * in which case <code>2</code> is returned, otherwise <code>1</code>.
2314: * To test if the code point is valid, use the
2315: * {@link #isValidCodePoint(int)} method.
2316: * </p>
2317: *
2318: * @param codePoint The code point to test.
2319: * @return An <code>int</code> value of 2 or 1.
2320: * @since 1.5
2321: * @see #isValidCodePoint(int)
2322: * @see #isSupplementaryCodePoint(int)
2323: */
2324: public static int charCount(int codePoint) {
2325: return (codePoint >= 0x10000 ? 2 : 1);
2326: }
2327:
2328: /**
2329: * <p>
2330: * Converts a surrogate pair into a Unicode code point. This method assume
2331: * that the pair are valid surrogates. If the pair are NOT valid surrogates,
2332: * then the result is indeterminate. The
2333: * {@link #isSurrogatePair(char, char)} method should be used prior to this
2334: * method to validate the pair.
2335: * </p>
2336: *
2337: * @param high The high surrogate unit.
2338: * @param low The low surrogate unit.
2339: * @return The decoded code point.
2340: * @since 1.5
2341: * @see #isSurrogatePair(char, char)
2342: */
2343: public static int toCodePoint(char high, char low) {
2344: // See RFC 2781, Section 2.2
2345: // http://www.faqs.org/rfcs/rfc2781.html
2346: int h = (high & 0x3FF) << 10;
2347: int l = low & 0x3FF;
2348: return (h | l) + 0x10000;
2349: }
2350:
2351: /**
2352: * <p>
2353: * Returns the code point at the index in the <code>CharSequence</code>.
2354: * If <code>char</code> unit at the index is a high-surrogate unit, the
2355: * next index is less than the length of the sequence and the
2356: * <code>char</code> unit at the next index is a low surrogate unit, then
2357: * the code point represented by the pair is returned; otherwise the
2358: * <code>char</code> unit at the index is returned.
2359: * </p>
2360: *
2361: * @param seq The sequence of <code>char</code> units.
2362: * @param index The index into the <code>seq</code> to retrieve and
2363: * convert.
2364: * @return The Unicode code point.
2365: * @throws NullPointerException if <code>seq</code> is <code>null</code>.
2366: * @throws IndexOutOfBoundsException if the <code>index</code> is negative
2367: * or greater than or equal to <code>seq.length()</code>.
2368: * @since 1.5
2369: */
2370: public static int codePointAt(CharSequence seq, int index) {
2371: if (seq == null) {
2372: throw new NullPointerException();
2373: }
2374: int len = seq.length();
2375: if (index < 0 || index >= len) {
2376: throw new IndexOutOfBoundsException();
2377: }
2378:
2379: char high = seq.charAt(index++);
2380: if (index >= len) {
2381: return high;
2382: }
2383: char low = seq.charAt(index);
2384: if (isSurrogatePair(high, low)) {
2385: return toCodePoint(high, low);
2386: }
2387: return high;
2388: }
2389:
2390: /**
2391: * <p>
2392: * Returns the code point at the index in the <code>char[]</code>. If
2393: * <code>char</code> unit at the index is a high-surrogate unit, the next
2394: * index is less than the length of the sequence and the <code>char</code>
2395: * unit at the next index is a low surrogate unit, then the code point
2396: * represented by the pair is returned; otherwise the <code>char</code>
2397: * unit at the index is returned.
2398: * </p>
2399: *
2400: * @param seq The sequence of <code>char</code> units.
2401: * @param index The index into the <code>seq</code> to retrieve and
2402: * convert.
2403: * @return The Unicode code point.
2404: * @throws NullPointerException if <code>seq</code> is <code>null</code>.
2405: * @throws IndexOutOfBoundsException if the <code>index</code> is negative
2406: * or greater than or equal to <code>seq.length()</code>.
2407: * @since 1.5
2408: */
2409: public static int codePointAt(char[] seq, int index) {
2410: if (seq == null) {
2411: throw new NullPointerException();
2412: }
2413: int len = seq.length;
2414: if (index < 0 || index >= len) {
2415: throw new IndexOutOfBoundsException();
2416: }
2417:
2418: char high = seq[index++];
2419: if (index >= len) {
2420: return high;
2421: }
2422: char low = seq[index];
2423: if (isSurrogatePair(high, low)) {
2424: return toCodePoint(high, low);
2425: }
2426: return high;
2427: }
2428:
2429: /**
2430: * <p>
2431: * Returns the code point at the index in the <code>char[]</code> that's
2432: * within the limit. If <code>char</code> unit at the index is a
2433: * high-surrogate unit, the next index is less than the <code>limit</code>
2434: * and the <code>char</code> unit at the next index is a low surrogate
2435: * unit, then the code point represented by the pair is returned; otherwise
2436: * the <code>char</code> unit at the index is returned.
2437: * </p>
2438: *
2439: * @param seq The sequence of <code>char</code> units.
2440: * @param index The index into the <code>seq</code> to retrieve and
2441: * convert.
2442: * @param limit The exclusive index into the <code>seq</code> that marks
2443: * the end of the units that can be used.
2444: * @return The Unicode code point.
2445: * @throws NullPointerException if <code>seq</code> is <code>null</code>.
2446: * @throws IndexOutOfBoundsException if the <code>index</code> is
2447: * negative, greater than or equal to <code>limit</code>,
2448: * <code>limit</code> is negative or <code>limit</code> is
2449: * greater than the length of <code>seq</code>.
2450: * @since 1.5
2451: */
2452: public static int codePointAt(char[] seq, int index, int limit) {
2453: if (index < 0 || index >= limit || limit < 0
2454: || limit > seq.length) {
2455: throw new IndexOutOfBoundsException();
2456: }
2457:
2458: char high = seq[index++];
2459: if (index >= limit) {
2460: return high;
2461: }
2462: char low = seq[index];
2463: if (isSurrogatePair(high, low)) {
2464: return toCodePoint(high, low);
2465: }
2466: return high;
2467: }
2468:
2469: /**
2470: * <p>
2471: * Returns the Unicode code point that proceeds the <code>index</code> in
2472: * the <code>CharSequence</code>. If the <code>char</code> unit at
2473: * <code>index - 1</code> is within the low surrogate range, the value
2474: * <code>index - 2</code> isn't negative and the <code>char</code> unit
2475: * at <code>index - 2</code> is within the high surrogate range, then the
2476: * supplementary code point made up of the surrogate pair is returned;
2477: * otherwise, the <code>char</code> value at <code>index - 1</code> is
2478: * returned.
2479: * </p>
2480: *
2481: * @param seq The <code>CharSequence</code> to search.
2482: * @param index The index into the <code>seq</code>.
2483: * @return A Unicode code point.
2484: * @throws NullPointerException if <code>seq</code> is <code>null</code>.
2485: * @throws IndexOutOfBoundsException if <code>index</code> is less than 1
2486: * or greater than <code>seq.length()</code>.
2487: * @since 1.5
2488: */
2489: public static int codePointBefore(CharSequence seq, int index) {
2490: if (seq == null) {
2491: throw new NullPointerException();
2492: }
2493: int len = seq.length();
2494: if (index < 1 || index > len) {
2495: throw new IndexOutOfBoundsException();
2496: }
2497:
2498: char low = seq.charAt(--index);
2499: if (--index < 0) {
2500: return low;
2501: }
2502: char high = seq.charAt(index);
2503: if (isSurrogatePair(high, low)) {
2504: return toCodePoint(high, low);
2505: }
2506: return low;
2507: }
2508:
2509: /**
2510: * <p>
2511: * Returns the Unicode code point that proceeds the <code>index</code> in
2512: * the <code>char[]</code>. If the <code>char</code> unit at
2513: * <code>index - 1</code> is within the low surrogate range, the value
2514: * <code>index - 2</code> isn't negative and the <code>char</code> unit
2515: * at <code>index - 2</code> is within the high surrogate range, then the
2516: * supplementary code point made up of the surrogate pair is returned;
2517: * otherwise, the <code>char</code> value at <code>index - 1</code> is
2518: * returned.
2519: * </p>
2520: *
2521: * @param seq The <code>char[]</code> to search.
2522: * @param index The index into the <code>seq</code>.
2523: * @return A Unicode code point.
2524: * @throws NullPointerException if <code>seq</code> is <code>null</code>.
2525: * @throws IndexOutOfBoundsException if <code>index</code> is less than 1
2526: * or greater than <code>seq.length</code>.
2527: * @since 1.5
2528: */
2529: public static int codePointBefore(char[] seq, int index) {
2530: if (seq == null) {
2531: throw new NullPointerException();
2532: }
2533: int len = seq.length;
2534: if (index < 1 || index > len) {
2535: throw new IndexOutOfBoundsException();
2536: }
2537:
2538: char low = seq[--index];
2539: if (--index < 0) {
2540: return low;
2541: }
2542: char high = seq[index];
2543: if (isSurrogatePair(high, low)) {
2544: return toCodePoint(high, low);
2545: }
2546: return low;
2547: }
2548:
2549: /**
2550: * <p>
2551: * Returns the Unicode code point that proceeds the <code>index</code> in
2552: * the <code>char[]</code> and isn't less than <code>start</code>. If
2553: * the <code>char</code> unit at <code>index - 1</code> is within the
2554: * low surrogate range, the value <code>index - 2</code> isn't less than
2555: * <code>start</code> and the <code>char</code> unit at
2556: * <code>index - 2</code> is within the high surrogate range, then the
2557: * supplementary code point made up of the surrogate pair is returned;
2558: * otherwise, the <code>char</code> value at <code>index - 1</code> is
2559: * returned.
2560: * </p>
2561: *
2562: * @param seq The <code>char[]</code> to search.
2563: * @param index The index into the <code>seq</code>.
2564: * @return A Unicode code point.
2565: * @throws NullPointerException if <code>seq</code> is <code>null</code>.
2566: * @throws IndexOutOfBoundsException if <code>index</code> is less than or
2567: * equal to <code>start</code>, <code>index</code> is greater
2568: * than <code>seq.length</code>, <code>start</code> is not
2569: * negative and <code>start</code> is greater than
2570: * <code>seq.length</code>.
2571: * @since 1.5
2572: */
2573: public static int codePointBefore(char[] seq, int index, int start) {
2574: if (seq == null) {
2575: throw new NullPointerException();
2576: }
2577: int len = seq.length;
2578: if (index <= start || index > len || start < 0 || start >= len) {
2579: throw new IndexOutOfBoundsException();
2580: }
2581:
2582: char low = seq[--index];
2583: if (--index < start) {
2584: return low;
2585: }
2586: char high = seq[index];
2587: if (isSurrogatePair(high, low)) {
2588: return toCodePoint(high, low);
2589: }
2590: return low;
2591: }
2592:
2593: /**
2594: * <p>
2595: * Converts the Unicode code point, <code>codePoint</code>, into a UTF-16
2596: * encoded sequence and copies the value(s) into the
2597: * <code>char[]</code> <code>dst</code>, starting at the index
2598: * <code>dstIndex</code>.
2599: * </p>
2600: *
2601: * @param codePoint The Unicode code point to encode.
2602: * @param dst The <code>char[]</code> to copy the encoded value into.
2603: * @param dstIndex The index to start copying into <code>dst</code>.
2604: * @return The number of <code>char</code> value units copied into
2605: * <code>dst</code>.
2606: * @throws IllegalArgumentException if <code>codePoint</code> is not a
2607: * valid Unicode code point.
2608: * @throws NullPointerException if <code>dst</code> is <code>null</code>.
2609: * @throws IndexOutOfBoundsException if <code>dstIndex</code> is negative,
2610: * greater than or equal to <code>dst.length</code> or equals
2611: * <code>dst.length - 1</code> when <code>codePoint</code> is a
2612: * {@link #isSupplementaryCodePoint(int) supplementary code point}.
2613: * @since 1.5
2614: */
2615: public static int toChars(int codePoint, char[] dst, int dstIndex) {
2616: if (!isValidCodePoint(codePoint)) {
2617: throw new IllegalArgumentException();
2618: }
2619: if (dst == null) {
2620: throw new NullPointerException();
2621: }
2622: if (dstIndex < 0 || dstIndex >= dst.length) {
2623: throw new IndexOutOfBoundsException();
2624: }
2625:
2626: if (isSupplementaryCodePoint(codePoint)) {
2627: if (dstIndex == dst.length - 1) {
2628: throw new IndexOutOfBoundsException();
2629: }
2630: // See RFC 2781, Section 2.1
2631: // http://www.faqs.org/rfcs/rfc2781.html
2632: int cpPrime = codePoint - 0x10000;
2633: int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
2634: int low = 0xDC00 | (cpPrime & 0x3FF);
2635: dst[dstIndex] = (char) high;
2636: dst[dstIndex + 1] = (char) low;
2637: return 2;
2638: }
2639:
2640: dst[dstIndex] = (char) codePoint;
2641: return 1;
2642: }
2643:
2644: /**
2645: * <p>
2646: * Converts the Unicode code point, <code>codePoint</code>, into a UTF-16
2647: * encoded sequence that is returned as a <code>char[]</code>.
2648: * </p>
2649: *
2650: * @param codePoint The Unicode code point to encode.
2651: * @return The UTF-16 encoded <code>char</code> sequence; if code point is
2652: * a {@link #isSupplementaryCodePoint(int) supplementary code point},
2653: * then a 2 <code>char</code> array is returned, otherwise a 1
2654: * <code>char</code> array is returned.
2655: * @throws IllegalArgumentException if <code>codePoint</code> is not a
2656: * valid Unicode code point.
2657: * @since 1.5
2658: */
2659: public static char[] toChars(int codePoint) {
2660: if (!isValidCodePoint(codePoint)) {
2661: throw new IllegalArgumentException();
2662: }
2663:
2664: if (isSupplementaryCodePoint(codePoint)) {
2665: int cpPrime = codePoint - 0x10000;
2666: int high = 0xD800 | ((cpPrime >> 10) & 0x3FF);
2667: int low = 0xDC00 | (cpPrime & 0x3FF);
2668: return new char[] { (char) high, (char) low };
2669: }
2670: return new char[] { (char) codePoint };
2671: }
2672:
2673: /**
2674: * <p>
2675: * Counts the number of Unicode code points in the subsequence of the
2676: * <code>CharSequence</code>, as delineated by the
2677: * <code>beginIndex</code> and <code>endIndex</code>. Any surrogate
2678: * values with missing pair values will be counted as 1 code point.
2679: * </p>
2680: *
2681: * @param seq The <code>CharSequence</code> to look through.
2682: * @param beginIndex The inclusive index to begin counting at.
2683: * @param endIndex The exclusive index to stop counting at.
2684: * @return The number of Unicode code points.
2685: * @throws NullPointerException if <code>seq</code> is <code>null</code>.
2686: * @throws IndexOutOfBoundsException if <code>beginIndex</code> is
2687: * negative, greater than <code>seq.length()</code> or greater
2688: * than <code>endIndex</code>.
2689: * @since 1.5
2690: */
2691: public static int codePointCount(CharSequence seq, int beginIndex,
2692: int endIndex) {
2693: if (seq == null) {
2694: throw new NullPointerException();
2695: }
2696: int len = seq.length();
2697: if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) {
2698: throw new IndexOutOfBoundsException();
2699: }
2700:
2701: int result = 0;
2702: for (int i = beginIndex; i < endIndex; i++) {
2703: char c = seq.charAt(i);
2704: if (isHighSurrogate(c)) {
2705: if (++i < endIndex) {
2706: c = seq.charAt(i);
2707: if (!isLowSurrogate(c)) {
2708: result++;
2709: }
2710: }
2711: }
2712: result++;
2713: }
2714: return result;
2715: }
2716:
2717: /**
2718: * <p>
2719: * Counts the number of Unicode code points in the subsequence of the
2720: * <code>char[]</code>, as delineated by the <code>offset</code> and
2721: * <code>count</code>. Any surrogate values with missing pair values will
2722: * be counted as 1 code point.
2723: * </p>
2724: *
2725: * @param seq The <code>char[]</code> to look through.
2726: * @param offset The inclusive index to begin counting at.
2727: * @param count The number of <code>char</code> values to look through in
2728: * <code>seq</code>.
2729: * @return The number of Unicode code points.
2730: * @throws NullPointerException if <code>seq</code> is <code>null</code>.
2731: * @throws IndexOutOfBoundsException if <code>offset</code> or
2732: * <code>count</code> is negative or if <code>endIndex</code> is
2733: * greater than <code>seq.length</code>.
2734: * @since 1.5
2735: */
2736: public static int codePointCount(char[] seq, int offset, int count) {
2737: if (seq == null) {
2738: throw new NullPointerException();
2739: }
2740: int len = seq.length;
2741: int endIndex = offset + count;
2742: if (offset < 0 || count < 0 || endIndex > len) {
2743: throw new IndexOutOfBoundsException();
2744: }
2745:
2746: int result = 0;
2747: for (int i = offset; i < endIndex; i++) {
2748: char c = seq[i];
2749: if (isHighSurrogate(c)) {
2750: if (++i < endIndex) {
2751: c = seq[i];
2752: if (!isLowSurrogate(c)) {
2753: result++;
2754: }
2755: }
2756: }
2757: result++;
2758: }
2759: return result;
2760: }
2761:
2762: /**
2763: * <p>
2764: * Determines the index into the <code>CharSequence</code> that is offset
2765: * (measured in code points and specified by <code>codePointOffset</code>),
2766: * from the <code>index</code> argument.
2767: * </p>
2768: *
2769: * @param seq The <code>CharSequence</code> to find the index within.
2770: * @param index The index to begin from, within the
2771: * <code>CharSequence</code>.
2772: * @param codePointOffset The number of code points to look back or
2773: * forwards; may be a negative or positive value.
2774: * @return The calculated index that is <code>codePointOffset</code> code
2775: * points from <code>index</code>.
2776: * @throws NullPointerException if <code>seq</code> is <code>null</code>.
2777: * @throws IndexOutOfBoundsException if <code>index</code> is negative,
2778: * greater than <code>seq.length()</code>, there aren't enough
2779: * values in <code>seq</code> after <code>index</code> or before
2780: * <code>index</code> if <code>codePointOffset</code> is
2781: * negative.
2782: * @since 1.5
2783: */
2784: public static int offsetByCodePoints(CharSequence seq, int index,
2785: int codePointOffset) {
2786: if (seq == null) {
2787: throw new NullPointerException();
2788: }
2789: int len = seq.length();
2790: if (index < 0 || index > len) {
2791: throw new IndexOutOfBoundsException();
2792: }
2793:
2794: if (codePointOffset == 0) {
2795: return index;
2796: }
2797:
2798: if (codePointOffset > 0) {
2799: int codePoints = codePointOffset;
2800: int i = index;
2801: while (codePoints > 0) {
2802: codePoints--;
2803: if (i >= len) {
2804: throw new IndexOutOfBoundsException();
2805: }
2806: if (isHighSurrogate(seq.charAt(i))) {
2807: int next = i + 1;
2808: if (next < len && isLowSurrogate(seq.charAt(next))) {
2809: i++;
2810: }
2811: }
2812: i++;
2813: }
2814: return i;
2815: }
2816:
2817: assert codePointOffset < 0;
2818: int codePoints = -codePointOffset;
2819: int i = index;
2820: while (codePoints > 0) {
2821: codePoints--;
2822: i--;
2823: if (i < 0) {
2824: throw new IndexOutOfBoundsException();
2825: }
2826: if (isLowSurrogate(seq.charAt(i))) {
2827: int prev = i - 1;
2828: if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) {
2829: i--;
2830: }
2831: }
2832: }
2833: return i;
2834: }
2835:
2836: /**
2837: * <p>
2838: * Determines the index into the <code>char[]</code> that is offset
2839: * (measured in code points and specified by <code>codePointOffset</code>),
2840: * from the <code>index</code> argument and is within the subsequence as
2841: * delineated by <code>start</code> and <code>count</code>.
2842: * </p>
2843: *
2844: * @param seq The <code>char[]</code> to find the index within.
2845: *
2846: * @param index The index to begin from, within the <code>char[]</code>.
2847: * @param codePointOffset The number of code points to look back or
2848: * forwards; may be a negative or positive value.
2849: * @param start The inclusive index that marks the beginning of the
2850: * subsequence.
2851: * @param count The number of <code>char</code> values to include within
2852: * the subsequence.
2853: * @return The calculated index that is <code>codePointOffset</code> code
2854: * points from <code>index</code>.
2855: * @throws NullPointerException if <code>seq</code> is <code>null</code>.
2856: * @throws IndexOutOfBoundsException if <code>start</code> or
2857: * <code>count</code> is negative, <code>start + count</code>
2858: * greater than <code>seq.length</code>, <code>index</code> is
2859: * less than <code>start</code>, <code>index</code> is greater
2860: * than <code>start + count</code> or there aren't enough values
2861: * in <code>seq</code> after <code>index</code> or before
2862: * <code>index</code> if <code>codePointOffset</code> is
2863: * negative.
2864: * @since 1.5
2865: */
2866: public static int offsetByCodePoints(char[] seq, int start,
2867: int count, int index, int codePointOffset) {
2868: if (seq == null) {
2869: throw new NullPointerException();
2870: }
2871: int end = start + count;
2872: if (start < 0 || count < 0 || end > seq.length || index < start
2873: || index > end) {
2874: throw new IndexOutOfBoundsException();
2875: }
2876:
2877: if (codePointOffset == 0) {
2878: return index;
2879: }
2880:
2881: if (codePointOffset > 0) {
2882: int codePoints = codePointOffset;
2883: int i = index;
2884: while (codePoints > 0) {
2885: codePoints--;
2886: if (i >= end) {
2887: throw new IndexOutOfBoundsException();
2888: }
2889: if (isHighSurrogate(seq[i])) {
2890: int next = i + 1;
2891: if (next < end && isLowSurrogate(seq[next])) {
2892: i++;
2893: }
2894: }
2895: i++;
2896: }
2897: return i;
2898: }
2899:
2900: assert codePointOffset < 0;
2901: int codePoints = -codePointOffset;
2902: int i = index;
2903: while (codePoints > 0) {
2904: codePoints--;
2905: i--;
2906: if (i < start) {
2907: throw new IndexOutOfBoundsException();
2908: }
2909: if (isLowSurrogate(seq[i])) {
2910: int prev = i - 1;
2911: if (prev >= start && isHighSurrogate(seq[prev])) {
2912: i--;
2913: }
2914: }
2915: }
2916: return i;
2917: }
2918:
2919: /**
2920: * Convenient method to determine the value of character <code>c</code> in
2921: * the supplied radix. The value of <code>radix</code> must be between
2922: * MIN_RADIX and MAX_RADIX.
2923: *
2924: * @param c
2925: * the character
2926: * @param radix
2927: * the radix
2928: * @return if <code>radix</code> lies between {@link #MIN_RADIX} and
2929: * {@link #MAX_RADIX} then the value of the character in the radix,
2930: * otherwise -1.
2931: */
2932: public static int digit(char c, int radix) {
2933: if (radix >= MIN_RADIX && radix <= MAX_RADIX) {
2934: if (c < 128) {
2935: // Optimized for ASCII
2936: int result = -1;
2937: if ('0' <= c && c <= '9') {
2938: result = c - '0';
2939: } else if ('a' <= c && c <= 'z') {
2940: result = c - ('a' - 10);
2941: } else if ('A' <= c && c <= 'Z') {
2942: result = c - ('A' - 10);
2943: }
2944: return result < radix ? result : -1;
2945: }
2946: int result = BinarySearch.binarySearchRange(digitKeys, c);
2947: if (result >= 0 && c <= digitValues[result * 2]) {
2948: int value = (char) (c - digitValues[result * 2 + 1]);
2949: if (value >= radix) {
2950: return -1;
2951: }
2952: return value;
2953: }
2954: }
2955: return -1;
2956: }
2957:
2958: /**
2959: * Convenient method to determine the value of character
2960: * <code>codePoint</code> in the supplied radix. The value of
2961: * <code>radix</code> must be between MIN_RADIX and MAX_RADIX.
2962: *
2963: * @param codePoint
2964: * the character, including supplementary characters
2965: * @param radix
2966: * the radix
2967: * @return if <code>radix</code> lies between {@link #MIN_RADIX} and
2968: * {@link #MAX_RADIX} then the value of the character in the radix,
2969: * otherwise -1.
2970: */
2971: public static int digit(int codePoint, int radix) {
2972: return UCharacter.digit(codePoint, radix);
2973: }
2974:
2975: /**
2976: * Compares the argument to the receiver, and answers true if they represent
2977: * the <em>same</em> object using a class specific comparison.
2978: * <p>
2979: * In this case, the argument must also be a Character, and the receiver and
2980: * argument must represent the same char value.
2981: *
2982: * @param object
2983: * the object to compare with this object
2984: * @return <code>true</code> if the object is the same as this object
2985: * <code>false</code> if it is different from this object
2986: *
2987: * @see #hashCode
2988: */
2989: @Override
2990: public boolean equals(Object object) {
2991: return (object instanceof Character)
2992: && (value == ((Character) object).value);
2993: }
2994:
2995: /**
2996: * Answers the character which represents the value in the specified radix.
2997: * The radix must be between MIN_RADIX and MAX_RADIX inclusive.
2998: *
2999: * @param digit
3000: * the integer value
3001: * @param radix
3002: * the radix
3003: * @return the character which represents the value in the radix
3004: */
3005: public static char forDigit(int digit, int radix) {
3006: if (MIN_RADIX <= radix && radix <= MAX_RADIX) {
3007: if (0 <= digit && digit < radix) {
3008: return (char) (digit < 10 ? digit + '0'
3009: : digit + 'a' - 10);
3010: }
3011: }
3012: return 0;
3013: }
3014:
3015: /**
3016: * Gets the numeric value of the Unicode character.
3017: *
3018: * @param c
3019: * the character
3020: * @return a numeric int value >= 0, -1 if there is no numeric value, -2 if
3021: * the numeric value is not an int >= 0
3022: */
3023: public static int getNumericValue(char c) {
3024: if (c < 128) {
3025: // Optimized for ASCII
3026: if (c >= '0' && c <= '9') {
3027: return c - '0';
3028: }
3029: if (c >= 'a' && c <= 'z') {
3030: return c - ('a' - 10);
3031: }
3032: if (c >= 'A' && c <= 'Z') {
3033: return c - ('A' - 10);
3034: }
3035: return -1;
3036: }
3037: int result = BinarySearch.binarySearchRange(numericKeys, c);
3038: if (result >= 0 && c <= numericValues[result * 2]) {
3039: char difference = numericValues[result * 2 + 1];
3040: if (difference == 0) {
3041: return -2;
3042: }
3043: // Value is always positive, must be negative value
3044: if (difference > c) {
3045: return c - (short) difference;
3046: }
3047: return c - difference;
3048: }
3049: return -1;
3050: }
3051:
3052: /**
3053: * Gets the numeric value of the Unicode character.
3054: *
3055: * @param codePoint
3056: * the character, including supplementary characters
3057: * @return a numeric int value which is not negative, -1 if there is no numeric value, -2 if
3058: * the numeric value is negative
3059: */
3060: public static int getNumericValue(int codePoint) {
3061: return UCharacter.getNumericValue(codePoint);
3062: }
3063:
3064: /**
3065: * Gets the general Unicode category of the specified character.
3066: *
3067: * @param c
3068: * the character
3069: * @return the Unicode category
3070: */
3071: public static int getType(char c) {
3072: if (c < 1000) {
3073: return typeValuesCache[(int) c];
3074: }
3075: int result = BinarySearch.binarySearchRange(typeKeys, c);
3076: int high = typeValues[result * 2];
3077: if (c <= high) {
3078: int code = typeValues[result * 2 + 1];
3079: if (code < 0x100) {
3080: return code;
3081: }
3082: return (c & 1) == 1 ? code >> 8 : code & 0xff;
3083: }
3084: return UNASSIGNED;
3085: }
3086:
3087: /**
3088: * Gets the general Unicode category of the specified character.
3089: *
3090: * @param codePoint
3091: * the character, including supplementary characters
3092: * @return the Unicode category
3093: */
3094: public static int getType(int codePoint) {
3095: if (codePoint < 1000 && codePoint > 0) {
3096: return typeValuesCache[codePoint];
3097: }
3098: int type = UCharacter.getType(codePoint);
3099:
3100: // the type values returned by UCharacter are not compatible with what
3101: // the spec says.RI's Character type values skip the value 17.
3102: if (type <= Character.FORMAT) {
3103: return type;
3104: }
3105: return (type + 1);
3106: }
3107:
3108: /**
3109: * Gets the Unicode directionality of the specified character.
3110: *
3111: * @param c
3112: * the character
3113: * @return the Unicode directionality
3114: */
3115: public static byte getDirectionality(char c) {
3116: int result = BinarySearch.binarySearchRange(bidiKeys, c);
3117: int high = bidiValues[result * 2];
3118: if (c <= high) {
3119: int code = bidiValues[result * 2 + 1];
3120: if (code < 0x100) {
3121: return (byte) (code - 1);
3122: }
3123: return (byte) (((c & 1) == 1 ? code >> 8 : code & 0xff) - 1);
3124: }
3125: return DIRECTIONALITY_UNDEFINED;
3126: }
3127:
3128: /**
3129: * Gets the Unicode directionality of the specified character.
3130: *
3131: * @param codePoint
3132: * the character, including supplementary characters
3133: * @return the Unicode directionality
3134: */
3135: public static byte getDirectionality(int codePoint) {
3136: if (getType(codePoint) == Character.UNASSIGNED) {
3137: return Character.DIRECTIONALITY_UNDEFINED;
3138: }
3139:
3140: byte UCDirectionality = UCharacter.getDirectionality(codePoint);
3141: if (UCDirectionality == -1) {
3142: return -1;
3143: }
3144: return DIRECTIONALITY[UCDirectionality];
3145: }
3146:
3147: /**
3148: * Answers whether the specified character is mirrored
3149: *
3150: * @param c
3151: * the character
3152: * @return true if the character is mirrored, false otherwise
3153: */
3154: public static boolean isMirrored(char c) {
3155: int value = c / 16;
3156: if (value >= mirrored.length) {
3157: return false;
3158: }
3159: int bit = 1 << (c % 16);
3160: return (mirrored[value] & bit) != 0;
3161: }
3162:
3163: /**
3164: * Answers whether the specified character is mirrored
3165: *
3166: * @param codePoint
3167: * the character, including supplementary characters
3168: * @return true if the character is mirrored, false otherwise
3169: */
3170: public static boolean isMirrored(int codePoint) {
3171: return UCharacter.isMirrored(codePoint);
3172: }
3173:
3174: /**
3175: * Answers an integer hash code for the receiver. Any two objects which
3176: * answer <code>true</code> when passed to <code>equals</code> must
3177: * answer the same value for this method.
3178: *
3179: * @return the receiver's hash
3180: *
3181: * @see #equals
3182: */
3183: @Override
3184: public int hashCode() {
3185: return value;
3186: }
3187:
3188: /**
3189: * Answers whether the specified character is defined in the Unicode
3190: * specification.
3191: *
3192: * @param c
3193: * the character
3194: * @return true if the general Unicode category of the character is not
3195: * UNASSIGNED, false otherwise
3196: */
3197: public static boolean isDefined(char c) {
3198: return getType(c) != UNASSIGNED;
3199: }
3200:
3201: /**
3202: * Answers whether the specified character is defined in the Unicode
3203: * specification.
3204: *
3205: * @param codePoint
3206: * the character, including supplementary characters
3207: * @return true if the general Unicode category of the character is not
3208: * UNASSIGNED, false otherwise
3209: */
3210: public static boolean isDefined(int codePoint) {
3211: return UCharacter.isDefined(codePoint);
3212: }
3213:
3214: /**
3215: * Answers whether the character is a digit.
3216: *
3217: * @param c
3218: * the character
3219: * @return true when the character is a digit, false otherwise
3220: */
3221: public static boolean isDigit(char c) {
3222: // Optimized case for ASCII
3223: if ('0' <= c && c <= '9') {
3224: return true;
3225: }
3226: if (c < 1632) {
3227: return false;
3228: }
3229: return getType(c) == DECIMAL_DIGIT_NUMBER;
3230: }
3231:
3232: /**
3233: * Answers whether the character is a digit.
3234: *
3235: * @param codePoint
3236: * the character, including supplementary characters
3237: * @return true when the character is a digit, false otherwise
3238: */
3239: public static boolean isDigit(int codePoint) {
3240: return UCharacter.isDigit(codePoint);
3241: }
3242:
3243: /**
3244: * Answers whether the specified character is ignorable in a Java or Unicode
3245: * identifier.
3246: *
3247: * @param c
3248: * the character
3249: * @return true when the character is ignorable, false otherwise
3250: */
3251: public static boolean isIdentifierIgnorable(char c) {
3252: return (c >= 0 && c <= 8) || (c >= 0xe && c <= 0x1b)
3253: || (c >= 0x7f && c <= 0x9f) || getType(c) == FORMAT;
3254: }
3255:
3256: /**
3257: * Answers whether the specified character is ignorable in a Java or Unicode
3258: * identifier.
3259: *
3260: * @param codePoint
3261: * the character, including supplementary characters
3262: * @return true when the character is ignorable, false otherwise
3263: */
3264: public static boolean isIdentifierIgnorable(int codePoint) {
3265: return UCharacter.isIdentifierIgnorable(codePoint);
3266: }
3267:
3268: /**
3269: * Answers whether the character is an ISO control character.
3270: *
3271: * @param c
3272: * the character
3273: * @return <code>true</code> if <code>c</code> is an ISO control
3274: * character, otherwise <code>false</code>
3275: */
3276: public static boolean isISOControl(char c) {
3277: return isISOControl((int) c);
3278: }
3279:
3280: /**
3281: * Answers whether the character is an ISO control character.
3282: *
3283: * @param c
3284: * the character, including supplementary characters
3285: * @return <code>true</code> if <code>c</code> is an ISO control
3286: * character, otherwise <code>false</code>
3287: */
3288: public static boolean isISOControl(int c) {
3289: return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f);
3290: }
3291:
3292: /**
3293: * Answers whether the character is a valid part of a Unicode identifier as
3294: * other than the first character.
3295: *
3296: * @param c
3297: * the character
3298: * @return true when the character is valid as part of a Java identifier,
3299: * false otherwise
3300: */
3301: public static boolean isJavaIdentifierPart(char c) {
3302: // Optimized case for ASCII
3303: if (c < 128) {
3304: return (typeTags[c] & ISJAVAPART) != 0;
3305: }
3306:
3307: int type = getType(c);
3308: return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
3309: || type == CURRENCY_SYMBOL
3310: || type == CONNECTOR_PUNCTUATION
3311: || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER)
3312: || type == NON_SPACING_MARK
3313: || type == COMBINING_SPACING_MARK
3314: || (c >= 0x80 && c <= 0x9f) || type == FORMAT;
3315: }
3316:
3317: /**
3318: * Answers whether the character is a valid part of a Unicode identifier as
3319: * other than the first character.
3320: *
3321: * @param codePoint
3322: * the character, including supplementary characters
3323: * @return true when the character is valid as part of a Java identifier,
3324: * false otherwise
3325: */
3326: public static boolean isJavaIdentifierPart(int codePoint) {
3327: int type = getType(codePoint);
3328: return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
3329: || type == CURRENCY_SYMBOL
3330: || type == CONNECTOR_PUNCTUATION
3331: || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER)
3332: || type == COMBINING_SPACING_MARK
3333: || type == NON_SPACING_MARK
3334: || isIdentifierIgnorable(codePoint);
3335: }
3336:
3337: /**
3338: * Answers whether the character is a valid start of a Unicode identifier
3339: *
3340: * @param c
3341: * the character
3342: * @return true when the character is a valid start of a Java identifier,
3343: * false otherwise
3344: */
3345: public static boolean isJavaIdentifierStart(char c) {
3346: // Optimized case for ASCII
3347: if (c < 128) {
3348: return (typeTags[c] & ISJAVASTART) != 0;
3349: }
3350:
3351: int type = getType(c);
3352: return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
3353: || type == CURRENCY_SYMBOL
3354: || type == CONNECTOR_PUNCTUATION
3355: || type == LETTER_NUMBER;
3356: }
3357:
3358: /**
3359: * Answers whether the character is a valid start of a Unicode identifier
3360: *
3361: * @param codePoint
3362: * the character, including supplementary characters
3363: * @return true when the character is a valid start of a Java identifier,
3364: * false otherwise
3365: */
3366: public static boolean isJavaIdentifierStart(int codePoint) {
3367: int type = getType(codePoint);
3368: return isLetter(codePoint) || type == CURRENCY_SYMBOL
3369: || type == CONNECTOR_PUNCTUATION
3370: || type == LETTER_NUMBER;
3371: }
3372:
3373: /**
3374: * Answers whether the character is a Java letter.
3375: *
3376: * @deprecated Use isJavaIdentifierStart
3377: */
3378: @Deprecated
3379: public static boolean isJavaLetter(char c) {
3380: return isJavaIdentifierStart(c);
3381: }
3382:
3383: /**
3384: * Answers whether the character is a Java letter or digit character.
3385: *
3386: * @deprecated Use isJavaIdentifierPart
3387: */
3388: @Deprecated
3389: public static boolean isJavaLetterOrDigit(char c) {
3390: return isJavaIdentifierPart(c);
3391: }
3392:
3393: /**
3394: * Answers whether the character is a letter.
3395: *
3396: * @param c
3397: * the character
3398: * @return true when the character is a letter, false otherwise
3399: */
3400: public static boolean isLetter(char c) {
3401: if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) {
3402: return true;
3403: }
3404: if (c < 128) {
3405: return false;
3406: }
3407: int type = getType(c);
3408: return type >= UPPERCASE_LETTER && type <= OTHER_LETTER;
3409: }
3410:
3411: /**
3412: * Answers whether the character is a letter.
3413: *
3414: * @param codePoint
3415: * the character, including supplementary characters
3416: * @return true when the character is a letter, false otherwise
3417: */
3418: public static boolean isLetter(int codePoint) {
3419: return UCharacter.isLetter(codePoint);
3420: }
3421:
3422: /**
3423: * Answers whether the character is a letter or a digit.
3424: *
3425: * @param c
3426: * the character
3427: * @return true when the character is a letter or a digit, false otherwise
3428: */
3429: public static boolean isLetterOrDigit(char c) {
3430: int type = getType(c);
3431: return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
3432: || type == DECIMAL_DIGIT_NUMBER;
3433: }
3434:
3435: /**
3436: * Answers whether the character is a letter or a digit.
3437: *
3438: * @param codePoint
3439: * the character, including supplementary characters
3440: * @return true when the character is a letter or a digit, false otherwise
3441: */
3442: public static boolean isLetterOrDigit(int codePoint) {
3443: return UCharacter.isLetterOrDigit(codePoint);
3444: }
3445:
3446: /**
3447: * Answers whether the character is a lower case letter.
3448: *
3449: * @param c
3450: * the character
3451: * @return true when the character is a lower case letter, false otherwise
3452: */
3453: public static boolean isLowerCase(char c) {
3454: // Optimized case for ASCII
3455: if ('a' <= c && c <= 'z') {
3456: return true;
3457: }
3458: if (c < 128) {
3459: return false;
3460: }
3461:
3462: return getType(c) == LOWERCASE_LETTER;
3463: }
3464:
3465: /**
3466: * Answers whether the character is a lower case letter.
3467: *
3468: * @param codePoint
3469: * the character, including supplementary characters
3470: * @return true when the character is a lower case letter, false otherwise
3471: */
3472: public static boolean isLowerCase(int codePoint) {
3473: return UCharacter.isLowerCase(codePoint);
3474: }
3475:
3476: /**
3477: * Answers whether the character is a Java space.
3478: *
3479: * @deprecated Use isWhitespace
3480: */
3481: @Deprecated
3482: public static boolean isSpace(char c) {
3483: return c == '\n' || c == '\t' || c == '\f' || c == '\r'
3484: || c == ' ';
3485: }
3486:
3487: /**
3488: * Answers whether the character is a Unicode space character. A member of
3489: * one of the Unicode categories Space Separator, Line Separator, or
3490: * Paragraph Separator.
3491: *
3492: * @param c
3493: * the character
3494: * @return true when the character is a Unicode space character, false
3495: * otherwise
3496: */
3497: public static boolean isSpaceChar(char c) {
3498: if (c == 0x20 || c == 0xa0 || c == 0x1680) {
3499: return true;
3500: }
3501: if (c < 0x2000) {
3502: return false;
3503: }
3504: return c <= 0x200b || c == 0x2028 || c == 0x2029 || c == 0x202f
3505: || c == 0x3000;
3506: }
3507:
3508: /**
3509: * Answers whether the character is a Unicode space character. A member of
3510: * one of the Unicode categories Space Separator, Line Separator, or
3511: * Paragraph Separator.
3512: *
3513: * @param codePoint
3514: * the character, including supplementary characters
3515: * @return true when the character is a Unicode space character, false
3516: * otherwise
3517: */
3518: public static boolean isSpaceChar(int codePoint) {
3519: return UCharacter.isSpaceChar(codePoint);
3520: }
3521:
3522: /**
3523: * Answers whether the character is a titlecase character.
3524: *
3525: * @param c
3526: * the character
3527: * @return true when the character is a titlecase character, false
3528: * otherwise
3529: */
3530: public static boolean isTitleCase(char c) {
3531: if (c == '\u01c5' || c == '\u01c8' || c == '\u01cb'
3532: || c == '\u01f2') {
3533: return true;
3534: }
3535: if (c >= '\u1f88' && c <= '\u1ffc') {
3536: // 0x1f88 - 0x1f8f, 0x1f98 - 0x1f9f, 0x1fa8 - 0x1faf
3537: if (c > '\u1faf') {
3538: return c == '\u1fbc' || c == '\u1fcc' || c == '\u1ffc';
3539: }
3540: int last = c & 0xf;
3541: return last >= 8 && last <= 0xf;
3542: }
3543: return false;
3544: }
3545:
3546: /**
3547: * Answers whether the character is a titlecase character.
3548: *
3549: * @param codePoint
3550: * the character, including supplementary characters
3551: * @return true when the character is a titlecase character, false
3552: * otherwise
3553: */
3554: public static boolean isTitleCase(int codePoint) {
3555: return UCharacter.isTitleCase(codePoint);
3556: }
3557:
3558: /**
3559: * Answers whether the character is valid as part of a Unicode identifier as
3560: * other than the first character.
3561: *
3562: * @param c
3563: * the character
3564: * @return true when the character is valid as part of a Unicode identifier,
3565: * false otherwise
3566: */
3567: public static boolean isUnicodeIdentifierPart(char c) {
3568: int type = getType(c);
3569: return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
3570: || type == CONNECTOR_PUNCTUATION
3571: || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER)
3572: || type == NON_SPACING_MARK
3573: || type == COMBINING_SPACING_MARK
3574: || isIdentifierIgnorable(c);
3575: }
3576:
3577: /**
3578: * Answers whether the character is valid as part of a Unicode identifier as
3579: * other than the first character.
3580: *
3581: * @param codePoint
3582: * the character, including supplementary characters
3583: * @return true when the character is valid as part of a Unicode identifier,
3584: * false otherwise
3585: */
3586: public static boolean isUnicodeIdentifierPart(int codePoint) {
3587: return UCharacter.isUnicodeIdentifierPart(codePoint);
3588: }
3589:
3590: /**
3591: * Answers whether the character is a valid initial character for a Unicode
3592: * identifier.
3593: *
3594: * @param c
3595: * the character
3596: * @return true when the character is a valid start of a Unicode identifier,
3597: * false otherwise
3598: */
3599: public static boolean isUnicodeIdentifierStart(char c) {
3600: int type = getType(c);
3601: return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER)
3602: || type == LETTER_NUMBER;
3603: }
3604:
3605: /**
3606: * Answers whether the character is a valid initial character for a Unicode
3607: * identifier.
3608: *
3609: * @param codePoint
3610: * the character, including supplementary characters
3611: * @return true when the character is a valid start of a Unicode identifier,
3612: * false otherwise
3613: */
3614: public static boolean isUnicodeIdentifierStart(int codePoint) {
3615: return UCharacter.isUnicodeIdentifierStart(codePoint);
3616: }
3617:
3618: /**
3619: * Answers whether the character is an upper case letter.
3620: *
3621: * @param c
3622: * the character
3623: * @return true when the character is a upper case letter, false otherwise
3624: */
3625: public static boolean isUpperCase(char c) {
3626: // Optimized case for ASCII
3627: if ('A' <= c && c <= 'Z') {
3628: return true;
3629: }
3630: if (c < 128) {
3631: return false;
3632: }
3633:
3634: return getType(c) == UPPERCASE_LETTER;
3635: }
3636:
3637: /**
3638: * Answers whether the character is an upper case letter.
3639: *
3640: * @param codePoint
3641: * the character, including supplementary characters
3642: * @return true when the character is a upper case letter, false otherwise
3643: */
3644: public static boolean isUpperCase(int codePoint) {
3645: return UCharacter.isUpperCase(codePoint);
3646: }
3647:
3648: /**
3649: * Answers whether the character is a whitespace character in Java.
3650: *
3651: * @param c
3652: * the character
3653: * @return true if the supplied <code>c</code> is a whitespace character
3654: * in Java, otherwise false.
3655: */
3656: public static boolean isWhitespace(char c) {
3657: // Optimized case for ASCII
3658: if ((c >= 0x1c && c <= 0x20) || (c >= 0x9 && c <= 0xd)) {
3659: return true;
3660: }
3661: if (c == 0x1680) {
3662: return true;
3663: }
3664: if (c < 0x2000 || c == 0x2007) {
3665: return false;
3666: }
3667: return c <= 0x200b || c == 0x2028 || c == 0x2029 || c == 0x3000;
3668: }
3669:
3670: /**
3671: * Answers whether the character is a whitespace character in Java.
3672: *
3673: * @param codePoint
3674: * the character, including supplementary characters
3675: * @return true if the supplied <code>c</code> is a whitespace character
3676: * in Java, otherwise false.
3677: */
3678: public static boolean isWhitespace(int codePoint) {
3679: //FIXME depends on ICU when the codePoint is '\u2007'
3680: return UCharacter.isWhitespace(codePoint);
3681:
3682: }
3683:
3684: /**
3685: * Reverse the order of the first and second bytes in character
3686: * @param c
3687: * the character
3688: * @return the character with reordered bytes.
3689: */
3690: public static char reverseBytes(char c) {
3691: return (char) ((c << 8) | (c >> 8));
3692: }
3693:
3694: /**
3695: * Answers the lower case equivalent for the character when the character is
3696: * an upper case letter, otherwise answers the character.
3697: *
3698: * @param c
3699: * the character
3700: * @return if c is not a lower case character then its lower case
3701: * counterpart, otherwise just c
3702: */
3703: public static char toLowerCase(char c) {
3704: // Optimized case for ASCII
3705: if ('A' <= c && c <= 'Z') {
3706: return (char) (c + ('a' - 'A'));
3707: }
3708: if (c < 192) {// || c == 215 || (c > 222 && c < 256)) {
3709: return c;
3710: }
3711: if (c < 1000) {
3712: return (char) lowercaseValuesCache[c - 192];
3713: }
3714:
3715: int result = BinarySearch.binarySearchRange(lowercaseKeys, c);
3716: if (result >= 0) {
3717: boolean by2 = false;
3718: char start = lowercaseKeys.charAt(result);
3719: char end = lowercaseValues[result * 2];
3720: if ((start & 0x8000) != (end & 0x8000)) {
3721: end ^= 0x8000;
3722: by2 = true;
3723: }
3724: if (c <= end) {
3725: if (by2 && (c & 1) != (start & 1)) {
3726: return c;
3727: }
3728: char mapping = lowercaseValues[result * 2 + 1];
3729: return (char) (c + mapping);
3730: }
3731: }
3732: return c;
3733: }
3734:
3735: /**
3736: * Answers the lower case equivalent for the character when the character is
3737: * an upper case letter, otherwise answers the character.
3738: *
3739: * @param codePoint
3740: * the character, including supplementary characters
3741: * @return if codePoint is not a lower case character then its lower case
3742: * counterpart, otherwise just codePoint
3743: */
3744: public static int toLowerCase(int codePoint) {
3745: return UCharacter.toLowerCase(codePoint);
3746: }
3747:
3748: /**
3749: * Answers a string containing a concise, human-readable description of the
3750: * receiver.
3751: *
3752: * @return a printable representation for the receiver.
3753: */
3754: @Override
3755: public String toString() {
3756: return String.valueOf(value);
3757: }
3758:
3759: /**
3760: * Converts the specified character to its string representation.
3761: *
3762: * @param value
3763: * the character
3764: * @return the character converted to a string
3765: */
3766: public static String toString(char value) {
3767: return String.valueOf(value);
3768: }
3769:
3770: /**
3771: * Answers the title case equivalent for the character, otherwise answers the
3772: * character.
3773: *
3774: * @param c
3775: * the character
3776: * @return the title case equivalent of the character
3777: */
3778: public static char toTitleCase(char c) {
3779: if (isTitleCase(c)) {
3780: return c;
3781: }
3782: int result = BinarySearch.binarySearch(titlecaseKeys, c);
3783: if (result >= 0) {
3784: return titlecaseValues[result];
3785: }
3786: return toUpperCase(c);
3787: }
3788:
3789: /**
3790: * Answers the title case equivalent for the character, otherwise answers the
3791: * character.
3792: *
3793: * @param codePoint
3794: * the character
3795: * @return the title case equivalent of the character
3796: */
3797: public static int toTitleCase(int codePoint) {
3798: return UCharacter.toTitleCase(codePoint);
3799: }
3800:
3801: /**
3802: * Answers the upper case equivalent for the character when the character is
3803: * a lower case letter, otherwise answers the character.
3804: *
3805: * @param c
3806: * the character
3807: * @return if c is not an upper case character then its upper case
3808: * counterpart, otherwise just c
3809: */
3810: public static char toUpperCase(char c) {
3811: // Optimized case for ASCII
3812: if ('a' <= c && c <= 'z') {
3813: return (char) (c - ('a' - 'A'));
3814: }
3815: if (c < 181) {
3816: return c;
3817: }
3818: if (c < 1000) {
3819: return (char) uppercaseValuesCache[(int) c - 181];
3820: }
3821: int result = BinarySearch.binarySearchRange(uppercaseKeys, c);
3822: if (result >= 0) {
3823: boolean by2 = false;
3824: char start = uppercaseKeys.charAt(result);
3825: char end = uppercaseValues[result * 2];
3826: if ((start & 0x8000) != (end & 0x8000)) {
3827: end ^= 0x8000;
3828: by2 = true;
3829: }
3830: if (c <= end) {
3831: if (by2 && (c & 1) != (start & 1)) {
3832: return c;
3833: }
3834: char mapping = uppercaseValues[result * 2 + 1];
3835: return (char) (c + mapping);
3836: }
3837: }
3838: return c;
3839: }
3840:
3841: /**
3842: * Answers the upper case equivalent for the character when the character is
3843: * a lower case letter, otherwise answers the character.
3844: *
3845: * @param codePoint
3846: * the character, including supplementary characters
3847: * @return if codePoint is not an upper case character then its upper case
3848: * counterpart, otherwise just codePoint
3849: */
3850: public static int toUpperCase(int codePoint) {
3851: return UCharacter.toUpperCase(codePoint);
3852: }
3853:
3854: }
|