Source Code Cross Referenced for UCharacter.java in » Internationalization-Localization » icu4j » com » ibm » icu » lang » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.lang
Source Cross Referenced Class Diagram Java Document (Java Doc)
0001:        //##header
0002:        /**
0003:         *******************************************************************************
0004:         * Copyright (C) 1996-2006, International Business Machines Corporation and    *
0005:         * others. All Rights Reserved.                                                *
0006:         *******************************************************************************
0007:         */package com.ibm.icu.lang;
0008:
0009:        import java.io.IOException;
0010:        import java.lang.ref.SoftReference;
0011:        import java.util.HashMap;
0012:        import java.util.Locale;
0013:        import java.util.Map;
0014:        import java.util.MissingResourceException;
0015:
0016:        import com.ibm.icu.impl.UBiDiProps;
0017:        import com.ibm.icu.impl.UCaseProps;
0018:        import com.ibm.icu.impl.NormalizerImpl;
0019:        import com.ibm.icu.impl.UCharacterUtility;
0020:        import com.ibm.icu.impl.UCharacterName;
0021:        import com.ibm.icu.impl.UCharacterNameChoice;
0022:        import com.ibm.icu.impl.UPropertyAliases;
0023:        import com.ibm.icu.lang.UCharacterEnums.*;
0024:        import com.ibm.icu.text.BreakIterator;
0025:        import com.ibm.icu.text.UTF16;
0026:        import com.ibm.icu.impl.UCharacterProperty;
0027:        import com.ibm.icu.util.RangeValueIterator;
0028:        import com.ibm.icu.util.ULocale;
0029:        import com.ibm.icu.util.ValueIterator;
0030:        import com.ibm.icu.util.VersionInfo;
0031:
0032:        /**
0033:         * <p>
0034:         * The UCharacter class provides extensions to the 
0035:         * <a href="http://java.sun.com/j2se/1.5/docs/api/java/lang/Character.html">
0036:         * java.lang.Character</a> class. These extensions provide support for 
0037:         * more Unicode properties and together with the <a href=../text/UTF16.html>UTF16</a> 
0038:         * class, provide support for supplementary characters (those with code 
0039:         * points above U+FFFF).
0040:         * Each ICU release supports the latest version of Unicode available at that time.
0041:         * </p>
0042:         * <p>
0043:         * Code points are represented in these API using ints. While it would be 
0044:         * more convenient in Java to have a separate primitive datatype for them, 
0045:         * ints suffice in the meantime.
0046:         * </p>
0047:         * <p>
0048:         * To use this class please add the jar file name icu4j.jar to the 
0049:         * class path, since it contains data files which supply the information used 
0050:         * by this file.<br>
0051:         * E.g. In Windows <br>
0052:         * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br>
0053:         * Otherwise, another method would be to copy the files uprops.dat and 
0054:         * unames.icu from the icu4j source subdirectory
0055:         * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory 
0056:         * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>.
0057:         * </p>
0058:         * <p>
0059:         * Aside from the additions for UTF-16 support, and the updated Unicode
0060:         * properties, the main differences between UCharacter and Character are:
0061:         * <ul>
0062:         * <li> UCharacter is not designed to be a char wrapper and does not have 
0063:         *      APIs to which involves management of that single char.<br>
0064:         *      These include: 
0065:         *      <ul>
0066:         *        <li> char charValue(), 
0067:         *        <li> int compareTo(java.lang.Character, java.lang.Character), etc.
0068:         *      </ul>
0069:         * <li> UCharacter does not include Character APIs that are deprecated, nor
0070:         *      does it include the Java-specific character information, such as 
0071:         *      boolean isJavaIdentifierPart(char ch).
0072:         * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric 
0073:         *      values '10' - '35'. UCharacter also does this in digit and
0074:         *      getNumericValue, to adhere to the java semantics of these
0075:         *      methods.  New methods unicodeDigit, and
0076:         *      getUnicodeNumericValue do not treat the above code points 
0077:         *      as having numeric values.  This is a semantic change from ICU4J 1.3.1.
0078:         * </ul>
0079:         * <p>
0080:         * Further detail differences can be determined from the program 
0081:         *        <a href="http://dev.icu-project.org/cgi-bin/viewcvs.cgi/~checkout~/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java">
0082:         *        com.ibm.icu.dev.test.lang.UCharacterCompare</a>
0083:         * </p>
0084:         * <p>
0085:         * In addition to Java compatibility functions, which calculate derived properties,
0086:         * this API provides low-level access to the Unicode Character Database.
0087:         * </p>
0088:         * <p>
0089:         * Unicode assigns each code point (not just assigned character) values for
0090:         * many properties.
0091:         * Most of them are simple boolean flags, or constants from a small enumerated list.
0092:         * For some properties, values are strings or other relatively more complex types.
0093:         * </p>
0094:         * <p>
0095:         * For more information see
0096:         * "About the Unicode Character Database" (http://www.unicode.org/ucd/)
0097:         * and the ICU User Guide chapter on Properties (http://icu.sourceforge.net/userguide/properties.html).
0098:         * </p>
0099:         * <p>
0100:         * There are also functions that provide easy migration from C/POSIX functions
0101:         * like isblank(). Their use is generally discouraged because the C/POSIX
0102:         * standards do not define their semantics beyond the ASCII range, which means
0103:         * that different implementations exhibit very different behavior.
0104:         * Instead, Unicode properties should be used directly.
0105:         * </p>
0106:         * <p>
0107:         * There are also only a few, broad C/POSIX character classes, and they tend
0108:         * to be used for conflicting purposes. For example, the "isalpha()" class
0109:         * is sometimes used to determine word boundaries, while a more sophisticated
0110:         * approach would at least distinguish initial letters from continuation
0111:         * characters (the latter including combining marks).
0112:         * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
0113:         * Another example: There is no "istitle()" class for titlecase characters.
0114:         * </p>
0115:         * <p>
0116:         * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
0117:         * ICU implements them according to the Standard Recommendations in
0118:         * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
0119:         * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
0120:         * </p>
0121:         * <p>
0122:         * API access for C/POSIX character classes is as follows:
0123:         * - alpha:     isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC)
0124:         * - lower:     isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE)
0125:         * - upper:     isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE)
0126:         * - punct:     ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)|(1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)|(1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0
0127:         * - digit:     isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER
0128:         * - xdigit:    hasBinaryProperty(c, UProperty.POSIX_XDIGIT)
0129:         * - alnum:     hasBinaryProperty(c, UProperty.POSIX_ALNUM)
0130:         * - space:     isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE)
0131:         * - blank:     hasBinaryProperty(c, UProperty.POSIX_BLANK)
0132:         * - cntrl:     getType(c)==CONTROL
0133:         * - graph:     hasBinaryProperty(c, UProperty.POSIX_GRAPH)
0134:         * - print:     hasBinaryProperty(c, UProperty.POSIX_PRINT)
0135:         * </p>
0136:         * <p>
0137:         * The C/POSIX character classes are also available in UnicodeSet patterns,
0138:         * using patterns like [:graph:] or \p{graph}.
0139:         * </p>
0140:         * <p>
0141:         * Note: There are several ICU (and Java) whitespace functions.
0142:         * Comparison:
0143:         * - isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
0144:         *       most of general categories "Z" (separators) + most whitespace ISO controls
0145:         *       (including no-break spaces, but excluding IS1..IS4 and ZWSP)
0146:         * - isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
0147:         * - isSpaceChar: just Z (including no-break spaces)
0148:         * </p>
0149:         * <p>
0150:         * This class is not subclassable
0151:         * </p>
0152:         * @author Syn Wee Quek
0153:         * @stable ICU 2.1
0154:         * @see com.ibm.icu.lang.UCharacterEnums
0155:         */
0156:
0157:        public final class UCharacter implements  ECharacterCategory,
0158:                ECharacterDirection {
0159:            // public inner classes ----------------------------------------------
0160:
0161:            /**
0162:             * A family of character subsets representing the character blocks in the 
0163:             * Unicode specification, generated from Unicode Data file Blocks.txt. 
0164:             * Character blocks generally define characters used for a specific script 
0165:             * or purpose. A character is contained by at most one Unicode block. 
0166:             * @stable ICU 2.4
0167:             */
0168:            public static final class UnicodeBlock extends Character.Subset {
0169:                // blocks objects ---------------------------------------------------
0170:
0171:                /** 
0172:                 * @stable ICU 2.6
0173:                 */
0174:                public static final UnicodeBlock NO_BLOCK = new UnicodeBlock(
0175:                        "NO_BLOCK", 0);
0176:
0177:                /** 
0178:                 * @stable ICU 2.4
0179:                 */
0180:                public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock(
0181:                        "BASIC_LATIN", 1);
0182:                /** 
0183:                 * @stable ICU 2.4 
0184:                 */
0185:                public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock(
0186:                        "LATIN_1_SUPPLEMENT", 2);
0187:                /** 
0188:                 * @stable ICU 2.4 
0189:                 */
0190:                public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock(
0191:                        "LATIN_EXTENDED_A", 3);
0192:                /** 
0193:                 * @stable ICU 2.4 
0194:                 */
0195:                public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock(
0196:                        "LATIN_EXTENDED_B", 4);
0197:                /** 
0198:                 * @stable ICU 2.4 
0199:                 */
0200:                public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock(
0201:                        "IPA_EXTENSIONS", 5);
0202:                /** 
0203:                 * @stable ICU 2.4 
0204:                 */
0205:                public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock(
0206:                        "SPACING_MODIFIER_LETTERS", 6);
0207:                /** 
0208:                 * @stable ICU 2.4 
0209:                 */
0210:                public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock(
0211:                        "COMBINING_DIACRITICAL_MARKS", 7);
0212:                /**
0213:                 * Unicode 3.2 renames this block to "Greek and Coptic".
0214:                 * @stable ICU 2.4
0215:                 */
0216:                public static final UnicodeBlock GREEK = new UnicodeBlock(
0217:                        "GREEK", 8);
0218:                /** 
0219:                 * @stable ICU 2.4 
0220:                 */
0221:                public static final UnicodeBlock CYRILLIC = new UnicodeBlock(
0222:                        "CYRILLIC", 9);
0223:                /** 
0224:                 * @stable ICU 2.4 
0225:                 */
0226:                public static final UnicodeBlock ARMENIAN = new UnicodeBlock(
0227:                        "ARMENIAN", 10);
0228:                /** 
0229:                 * @stable ICU 2.4 
0230:                 */
0231:                public static final UnicodeBlock HEBREW = new UnicodeBlock(
0232:                        "HEBREW", 11);
0233:                /** 
0234:                 * @stable ICU 2.4 
0235:                 */
0236:                public static final UnicodeBlock ARABIC = new UnicodeBlock(
0237:                        "ARABIC", 12);
0238:                /** 
0239:                 * @stable ICU 2.4 
0240:                 */
0241:                public static final UnicodeBlock SYRIAC = new UnicodeBlock(
0242:                        "SYRIAC", 13);
0243:                /** 
0244:                 * @stable ICU 2.4 
0245:                 */
0246:                public static final UnicodeBlock THAANA = new UnicodeBlock(
0247:                        "THAANA", 14);
0248:                /** 
0249:                 * @stable ICU 2.4 
0250:                 */
0251:                public static final UnicodeBlock DEVANAGARI = new UnicodeBlock(
0252:                        "DEVANAGARI", 15);
0253:                /** 
0254:                 * @stable ICU 2.4 
0255:                 */
0256:                public static final UnicodeBlock BENGALI = new UnicodeBlock(
0257:                        "BENGALI", 16);
0258:                /** 
0259:                 * @stable ICU 2.4 
0260:                 */
0261:                public static final UnicodeBlock GURMUKHI = new UnicodeBlock(
0262:                        "GURMUKHI", 17);
0263:                /** 
0264:                 * @stable ICU 2.4 
0265:                 */
0266:                public static final UnicodeBlock GUJARATI = new UnicodeBlock(
0267:                        "GUJARATI", 18);
0268:                /** 
0269:                 * @stable ICU 2.4 
0270:                 */
0271:                public static final UnicodeBlock ORIYA = new UnicodeBlock(
0272:                        "ORIYA", 19);
0273:                /** 
0274:                 * @stable ICU 2.4 
0275:                 */
0276:                public static final UnicodeBlock TAMIL = new UnicodeBlock(
0277:                        "TAMIL", 20);
0278:                /** 
0279:                 * @stable ICU 2.4 
0280:                 */
0281:                public static final UnicodeBlock TELUGU = new UnicodeBlock(
0282:                        "TELUGU", 21);
0283:                /** 
0284:                 * @stable ICU 2.4 
0285:                 */
0286:                public static final UnicodeBlock KANNADA = new UnicodeBlock(
0287:                        "KANNADA", 22);
0288:                /** 
0289:                 * @stable ICU 2.4 
0290:                 */
0291:                public static final UnicodeBlock MALAYALAM = new UnicodeBlock(
0292:                        "MALAYALAM", 23);
0293:                /** 
0294:                 * @stable ICU 2.4 
0295:                 */
0296:                public static final UnicodeBlock SINHALA = new UnicodeBlock(
0297:                        "SINHALA", 24);
0298:                /** 
0299:                 * @stable ICU 2.4 
0300:                 */
0301:                public static final UnicodeBlock THAI = new UnicodeBlock(
0302:                        "THAI", 25);
0303:                /** 
0304:                 * @stable ICU 2.4 
0305:                 */
0306:                public static final UnicodeBlock LAO = new UnicodeBlock("LAO",
0307:                        26);
0308:                /** 
0309:                 * @stable ICU 2.4 
0310:                 */
0311:                public static final UnicodeBlock TIBETAN = new UnicodeBlock(
0312:                        "TIBETAN", 27);
0313:                /** 
0314:                 * @stable ICU 2.4 
0315:                 */
0316:                public static final UnicodeBlock MYANMAR = new UnicodeBlock(
0317:                        "MYANMAR", 28);
0318:                /** 
0319:                 * @stable ICU 2.4 
0320:                 */
0321:                public static final UnicodeBlock GEORGIAN = new UnicodeBlock(
0322:                        "GEORGIAN", 29);
0323:                /** 
0324:                 * @stable ICU 2.4 
0325:                 */
0326:                public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock(
0327:                        "HANGUL_JAMO", 30);
0328:                /** 
0329:                 * @stable ICU 2.4 
0330:                 */
0331:                public static final UnicodeBlock ETHIOPIC = new UnicodeBlock(
0332:                        "ETHIOPIC", 31);
0333:                /** 
0334:                 * @stable ICU 2.4 
0335:                 */
0336:                public static final UnicodeBlock CHEROKEE = new UnicodeBlock(
0337:                        "CHEROKEE", 32);
0338:                /** 
0339:                 * @stable ICU 2.4 
0340:                 */
0341:                public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock(
0342:                        "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 33);
0343:                /** 
0344:                 * @stable ICU 2.4 
0345:                 */
0346:                public static final UnicodeBlock OGHAM = new UnicodeBlock(
0347:                        "OGHAM", 34);
0348:                /** 
0349:                 * @stable ICU 2.4 
0350:                 */
0351:                public static final UnicodeBlock RUNIC = new UnicodeBlock(
0352:                        "RUNIC", 35);
0353:                /** 
0354:                 * @stable ICU 2.4 
0355:                 */
0356:                public static final UnicodeBlock KHMER = new UnicodeBlock(
0357:                        "KHMER", 36);
0358:                /** 
0359:                 * @stable ICU 2.4 
0360:                 */
0361:                public static final UnicodeBlock MONGOLIAN = new UnicodeBlock(
0362:                        "MONGOLIAN", 37);
0363:                /** 
0364:                 * @stable ICU 2.4 
0365:                 */
0366:                public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock(
0367:                        "LATIN_EXTENDED_ADDITIONAL", 38);
0368:                /** 
0369:                 * @stable ICU 2.4 
0370:                 */
0371:                public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock(
0372:                        "GREEK_EXTENDED", 39);
0373:                /** 
0374:                 * @stable ICU 2.4 
0375:                 */
0376:                public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock(
0377:                        "GENERAL_PUNCTUATION", 40);
0378:                /** 
0379:                 * @stable ICU 2.4 
0380:                 */
0381:                public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock(
0382:                        "SUPERSCRIPTS_AND_SUBSCRIPTS", 41);
0383:                /** 
0384:                 * @stable ICU 2.4 
0385:                 */
0386:                public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock(
0387:                        "CURRENCY_SYMBOLS", 42);
0388:                /**
0389:                 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 
0390:                 * Symbols".
0391:                 * @stable ICU 2.4
0392:                 */
0393:                public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock(
0394:                        "COMBINING_MARKS_FOR_SYMBOLS", 43);
0395:                /** 
0396:                 * @stable ICU 2.4 
0397:                 */
0398:                public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock(
0399:                        "LETTERLIKE_SYMBOLS", 44);
0400:                /** 
0401:                 * @stable ICU 2.4 
0402:                 */
0403:                public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock(
0404:                        "NUMBER_FORMS", 45);
0405:                /** 
0406:                 * @stable ICU 2.4 
0407:                 */
0408:                public static final UnicodeBlock ARROWS = new UnicodeBlock(
0409:                        "ARROWS", 46);
0410:                /** 
0411:                 * @stable ICU 2.4 
0412:                 */
0413:                public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock(
0414:                        "MATHEMATICAL_OPERATORS", 47);
0415:                /** 
0416:                 * @stable ICU 2.4 
0417:                 */
0418:                public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock(
0419:                        "MISCELLANEOUS_TECHNICAL", 48);
0420:                /** 
0421:                 * @stable ICU 2.4 
0422:                 */
0423:                public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock(
0424:                        "CONTROL_PICTURES", 49);
0425:                /** 
0426:                 * @stable ICU 2.4 
0427:                 */
0428:                public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock(
0429:                        "OPTICAL_CHARACTER_RECOGNITION", 50);
0430:                /** 
0431:                 * @stable ICU 2.4 
0432:                 */
0433:                public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock(
0434:                        "ENCLOSED_ALPHANUMERICS", 51);
0435:                /** 
0436:                 * @stable ICU 2.4 
0437:                 */
0438:                public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock(
0439:                        "BOX_DRAWING", 52);
0440:                /** 
0441:                 * @stable ICU 2.4 
0442:                 */
0443:                public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock(
0444:                        "BLOCK_ELEMENTS", 53);
0445:                /** 
0446:                 * @stable ICU 2.4 
0447:                 */
0448:                public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock(
0449:                        "GEOMETRIC_SHAPES", 54);
0450:                /** 
0451:                 * @stable ICU 2.4 
0452:                 */
0453:                public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock(
0454:                        "MISCELLANEOUS_SYMBOLS", 55);
0455:                /** 
0456:                 * @stable ICU 2.4 
0457:                 */
0458:                public static final UnicodeBlock DINGBATS = new UnicodeBlock(
0459:                        "DINGBATS", 56);
0460:                /** 
0461:                 * @stable ICU 2.4 
0462:                 */
0463:                public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock(
0464:                        "BRAILLE_PATTERNS", 57);
0465:                /** 
0466:                 * @stable ICU 2.4 
0467:                 */
0468:                public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock(
0469:                        "CJK_RADICALS_SUPPLEMENT", 58);
0470:                /** 
0471:                 * @stable ICU 2.4 
0472:                 */
0473:                public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock(
0474:                        "KANGXI_RADICALS", 59);
0475:                /** 
0476:                 * @stable ICU 2.4 
0477:                 */
0478:                public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock(
0479:                        "IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 60);
0480:                /** 
0481:                 * @stable ICU 2.4 
0482:                 */
0483:                public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock(
0484:                        "CJK_SYMBOLS_AND_PUNCTUATION", 61);
0485:                /** 
0486:                 * @stable ICU 2.4 
0487:                 */
0488:                public static final UnicodeBlock HIRAGANA = new UnicodeBlock(
0489:                        "HIRAGANA", 62);
0490:                /** 
0491:                 * @stable ICU 2.4 
0492:                 */
0493:                public static final UnicodeBlock KATAKANA = new UnicodeBlock(
0494:                        "KATAKANA", 63);
0495:                /** 
0496:                 * @stable ICU 2.4 
0497:                 */
0498:                public static final UnicodeBlock BOPOMOFO = new UnicodeBlock(
0499:                        "BOPOMOFO", 64);
0500:                /** 
0501:                 * @stable ICU 2.4 
0502:                 */
0503:                public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock(
0504:                        "HANGUL_COMPATIBILITY_JAMO", 65);
0505:                /** 
0506:                 * @stable ICU 2.4 
0507:                 */
0508:                public static final UnicodeBlock KANBUN = new UnicodeBlock(
0509:                        "KANBUN", 66);
0510:                /** 
0511:                 * @stable ICU 2.4 
0512:                 */
0513:                public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock(
0514:                        "BOPOMOFO_EXTENDED", 67);
0515:                /** 
0516:                 * @stable ICU 2.4 
0517:                 */
0518:                public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock(
0519:                        "ENCLOSED_CJK_LETTERS_AND_MONTHS", 68);
0520:                /** 
0521:                 * @stable ICU 2.4 
0522:                 */
0523:                public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock(
0524:                        "CJK_COMPATIBILITY", 69);
0525:                /** 
0526:                 * @stable ICU 2.4 
0527:                 */
0528:                public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock(
0529:                        "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 70);
0530:                /** 
0531:                 * @stable ICU 2.4 
0532:                 */
0533:                public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock(
0534:                        "CJK_UNIFIED_IDEOGRAPHS", 71);
0535:                /** 
0536:                 * @stable ICU 2.4 
0537:                 */
0538:                public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock(
0539:                        "YI_SYLLABLES", 72);
0540:                /** 
0541:                 * @stable ICU 2.4 
0542:                 */
0543:                public static final UnicodeBlock YI_RADICALS = new UnicodeBlock(
0544:                        "YI_RADICALS", 73);
0545:                /** 
0546:                 * @stable ICU 2.4 
0547:                 */
0548:                public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock(
0549:                        "HANGUL_SYLLABLES", 74);
0550:                /** 
0551:                 * @stable ICU 2.4 
0552:                 */
0553:                public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock(
0554:                        "HIGH_SURROGATES", 75);
0555:                /** 
0556:                 * @stable ICU 2.4 
0557:                 */
0558:                public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock(
0559:                        "HIGH_PRIVATE_USE_SURROGATES", 76);
0560:                /** 
0561:                 * @stable ICU 2.4 
0562:                 */
0563:                public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock(
0564:                        "LOW_SURROGATES", 77);
0565:                /**
0566:                 * Same as public static final int PRIVATE_USE.
0567:                 * Until Unicode 3.1.1; the corresponding block name was "Private Use";
0568:                 * and multiple code point ranges had this block.
0569:                 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 
0570:                 * and adds separate blocks for the supplementary PUAs.
0571:                 * @stable ICU 2.4
0572:                 */
0573:                public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock(
0574:                        "PRIVATE_USE_AREA", 78);
0575:                /**
0576:                 * Same as public static final int PRIVATE_USE_AREA.
0577:                 * Until Unicode 3.1.1; the corresponding block name was "Private Use";
0578:                 * and multiple code point ranges had this block.
0579:                 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 
0580:                 * and adds separate blocks for the supplementary PUAs.
0581:                 * @stable ICU 2.4
0582:                 */
0583:                public static final UnicodeBlock PRIVATE_USE = PRIVATE_USE_AREA;
0584:                /** 
0585:                 * @stable ICU 2.4 
0586:                 */
0587:                public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock(
0588:                        "CJK_COMPATIBILITY_IDEOGRAPHS", 79);
0589:                /** 
0590:                 * @stable ICU 2.4 
0591:                 */
0592:                public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock(
0593:                        "ALPHABETIC_PRESENTATION_FORMS", 80);
0594:                /** 
0595:                 * @stable ICU 2.4 
0596:                 */
0597:                public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock(
0598:                        "ARABIC_PRESENTATION_FORMS_A", 81);
0599:                /** 
0600:                 * @stable ICU 2.4 
0601:                 */
0602:                public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock(
0603:                        "COMBINING_HALF_MARKS", 82);
0604:                /** 
0605:                 * @stable ICU 2.4 
0606:                 */
0607:                public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock(
0608:                        "CJK_COMPATIBILITY_FORMS", 83);
0609:                /** 
0610:                 * @stable ICU 2.4 
0611:                 */
0612:                public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock(
0613:                        "SMALL_FORM_VARIANTS", 84);
0614:                /** 
0615:                 * @stable ICU 2.4 
0616:                 */
0617:                public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock(
0618:                        "ARABIC_PRESENTATION_FORMS_B", 85);
0619:                /** 
0620:                 * @stable ICU 2.4 
0621:                 */
0622:                public static final UnicodeBlock SPECIALS = new UnicodeBlock(
0623:                        "SPECIALS", 86);
0624:                /** 
0625:                 * @stable ICU 2.4 
0626:                 */
0627:                public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock(
0628:                        "HALFWIDTH_AND_FULLWIDTH_FORMS", 87);
0629:                /** 
0630:                 * @stable ICU 2.4 
0631:                 */
0632:                public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock(
0633:                        "OLD_ITALIC", 88);
0634:                /** 
0635:                 * @stable ICU 2.4 
0636:                 */
0637:                public static final UnicodeBlock GOTHIC = new UnicodeBlock(
0638:                        "GOTHIC", 89);
0639:                /** 
0640:                 * @stable ICU 2.4 
0641:                 */
0642:                public static final UnicodeBlock DESERET = new UnicodeBlock(
0643:                        "DESERET", 90);
0644:                /** 
0645:                 * @stable ICU 2.4 
0646:                 */
0647:                public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock(
0648:                        "BYZANTINE_MUSICAL_SYMBOLS", 91);
0649:                /** 
0650:                 * @stable ICU 2.4 
0651:                 */
0652:                public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock(
0653:                        "MUSICAL_SYMBOLS", 92);
0654:                /** 
0655:                 * @stable ICU 2.4 
0656:                 */
0657:                public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock(
0658:                        "MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 93);
0659:                /** 
0660:                 * @stable ICU 2.4 
0661:                 */
0662:                public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock(
0663:                        "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 94);
0664:                /** 
0665:                 * @stable ICU 2.4 
0666:                 */
0667:                public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock(
0668:                        "CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 95);
0669:                /** 
0670:                 * @stable ICU 2.4 
0671:                 */
0672:                public static final UnicodeBlock TAGS = new UnicodeBlock(
0673:                        "TAGS", 96);
0674:
0675:                // New blocks in Unicode 3.2
0676:
0677:                /** 
0678:                 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
0679:                 * @stable ICU 2.4 
0680:                 */
0681:                public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock(
0682:                        "CYRILLIC_SUPPLEMENTARY", 97);
0683:                /** 
0684:                 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
0685:                 * @stable ICU 3.0
0686:                 */
0687:                public static final UnicodeBlock CYRILLIC_SUPPLEMENT = new UnicodeBlock(
0688:                        "CYRILLIC_SUPPLEMENT", 97);
0689:                /** 
0690:                 * @stable ICU 2.4 
0691:                 */
0692:                public static final UnicodeBlock TAGALOG = new UnicodeBlock(
0693:                        "TAGALOG", 98);
0694:                /** 
0695:                 * @stable ICU 2.4 
0696:                 */
0697:                public static final UnicodeBlock HANUNOO = new UnicodeBlock(
0698:                        "HANUNOO", 99);
0699:                /** 
0700:                 * @stable ICU 2.4 
0701:                 */
0702:                public static final UnicodeBlock BUHID = new UnicodeBlock(
0703:                        "BUHID", 100);
0704:                /** 
0705:                 * @stable ICU 2.4 
0706:                 */
0707:                public static final UnicodeBlock TAGBANWA = new UnicodeBlock(
0708:                        "TAGBANWA", 101);
0709:                /** 
0710:                 * @stable ICU 2.4 
0711:                 */
0712:                public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock(
0713:                        "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 102);
0714:                /** 
0715:                 * @stable ICU 2.4 
0716:                 */
0717:                public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock(
0718:                        "SUPPLEMENTAL_ARROWS_A", 103);
0719:                /** 
0720:                 * @stable ICU 2.4 
0721:                 */
0722:                public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock(
0723:                        "SUPPLEMENTAL_ARROWS_B", 104);
0724:                /** 
0725:                 * @stable ICU 2.4 
0726:                 */
0727:                public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock(
0728:                        "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 105);
0729:                /** 
0730:                 * @stable ICU 2.4 
0731:                 */
0732:                public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock(
0733:                        "SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 106);
0734:                /** 
0735:                 * @stable ICU 2.4 
0736:                 */
0737:                public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock(
0738:                        "KATAKANA_PHONETIC_EXTENSIONS", 107);
0739:                /** 
0740:                 * @stable ICU 2.4 
0741:                 */
0742:                public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock(
0743:                        "VARIATION_SELECTORS", 108);
0744:                /** 
0745:                 * @stable ICU 2.4 
0746:                 */
0747:                public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock(
0748:                        "SUPPLEMENTARY_PRIVATE_USE_AREA_A", 109);
0749:                /** 
0750:                 * @stable ICU 2.4 
0751:                 */
0752:                public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock(
0753:                        "SUPPLEMENTARY_PRIVATE_USE_AREA_B", 110);
0754:
0755:                /** 
0756:                 * @stable ICU 2.6 
0757:                 */
0758:                public static final UnicodeBlock LIMBU = new UnicodeBlock(
0759:                        "LIMBU", 111);
0760:                /** 
0761:                 * @stable ICU 2.6 
0762:                 */
0763:                public static final UnicodeBlock TAI_LE = new UnicodeBlock(
0764:                        "TAI LE", 112);
0765:                /** 
0766:                 * @stable ICU 2.6 
0767:                 */
0768:                public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock(
0769:                        "KHMER SYMBOLS", 113);
0770:
0771:                /** 
0772:                 * @stable ICU 2.6 
0773:                 */
0774:                public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock(
0775:                        "PHONETIC EXTENSIONS", 114);
0776:
0777:                /** 
0778:                 * @stable ICU 2.6 
0779:                 */
0780:                public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock(
0781:                        "MISCELLANEOUS_SYMBOLS_AND_ARROWS", 115);
0782:                /** 
0783:                 * @stable ICU 2.6 
0784:                 */
0785:                public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock(
0786:                        "YIJING_HEXAGRAM_SYMBOLS", 116);
0787:                /** 
0788:                 * @stable ICU 2.6 
0789:                 */
0790:                public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock(
0791:                        "LINEAR_B_SYLLABARY", 117);
0792:                /** 
0793:                 * @stable ICU 2.6 
0794:                 */
0795:                public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock(
0796:                        "LINEAR_B_IDEOGRAMS", 118);
0797:                /** 
0798:                 * @stable ICU 2.6 
0799:                 */
0800:                public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock(
0801:                        "AEGEAN_NUMBERS", 119);
0802:                /** 
0803:                 * @stable ICU 2.6 
0804:                 */
0805:                public static final UnicodeBlock UGARITIC = new UnicodeBlock(
0806:                        "UGARITIC", 120);
0807:                /** 
0808:                 * @stable ICU 2.6 
0809:                 */
0810:                public static final UnicodeBlock SHAVIAN = new UnicodeBlock(
0811:                        "SHAVIAN", 121);
0812:                /** 
0813:                 * @stable ICU 2.6 
0814:                 */
0815:                public static final UnicodeBlock OSMANYA = new UnicodeBlock(
0816:                        "OSMANYA", 122);
0817:                /** 
0818:                 * @stable ICU 2.6 
0819:                 */
0820:                public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock(
0821:                        "CYPRIOT_SYLLABARY", 123);
0822:                /** 
0823:                 * @stable ICU 2.6 
0824:                 */
0825:                public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock(
0826:                        "TAI_XUAN_JING_SYMBOLS", 124);
0827:
0828:                /** 
0829:                 * @stable ICU 2.6 
0830:                 */
0831:                public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock(
0832:                        "VARIATION_SELECTORS_SUPPLEMENT", 125);
0833:
0834:                /* New blocks in Unicode 4.1 */
0835:
0836:                /**
0837:                 * @draft ICU 3.4
0838:                 * @provisional This API might change or be removed in a future release.
0839:                 */
0840:                public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = new UnicodeBlock(
0841:                        "ANCIENT_GREEK_MUSICAL_NOTATION", 126); /*[1D200]*/
0842:
0843:                /**
0844:                 * @draft ICU 3.4
0845:                 * @provisional This API might change or be removed in a future release.
0846:                 */
0847:                public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = new UnicodeBlock(
0848:                        "ANCIENT_GREEK_NUMBERS", 127); /*[10140]*/
0849:
0850:                /**
0851:                 * @draft ICU 3.4
0852:                 * @provisional This API might change or be removed in a future release.
0853:                 */
0854:                public static final UnicodeBlock ARABIC_SUPPLEMENT = new UnicodeBlock(
0855:                        "ARABIC_SUPPLEMENT", 128); /*[0750]*/
0856:
0857:                /**
0858:                 * @draft ICU 3.4
0859:                 * @provisional This API might change or be removed in a future release.
0860:                 */
0861:                public static final UnicodeBlock BUGINESE = new UnicodeBlock(
0862:                        "BUGINESE", 129); /*[1A00]*/
0863:
0864:                /**
0865:                 * @draft ICU 3.4
0866:                 * @provisional This API might change or be removed in a future release.
0867:                 */
0868:                public static final UnicodeBlock CJK_STROKES = new UnicodeBlock(
0869:                        "CJK_STROKES", 130); /*[31C0]*/
0870:
0871:                /**
0872:                 * @draft ICU 3.4
0873:                 * @provisional This API might change or be removed in a future release.
0874:                 */
0875:                public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = new UnicodeBlock(
0876:                        "COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 131); /*[1DC0]*/
0877:
0878:                /**
0879:                 * @draft ICU 3.4
0880:                 * @provisional This API might change or be removed in a future release.
0881:                 */
0882:                public static final UnicodeBlock COPTIC = new UnicodeBlock(
0883:                        "COPTIC", 132); /*[2C80]*/
0884:
0885:                /**
0886:                 * @draft ICU 3.4
0887:                 * @provisional This API might change or be removed in a future release.
0888:                 */
0889:                public static final UnicodeBlock ETHIOPIC_EXTENDED = new UnicodeBlock(
0890:                        "ETHIOPIC_EXTENDED", 133); /*[2D80]*/
0891:
0892:                /**
0893:                 * @draft ICU 3.4
0894:                 * @provisional This API might change or be removed in a future release.
0895:                 */
0896:                public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = new UnicodeBlock(
0897:                        "ETHIOPIC_SUPPLEMENT", 134); /*[1380]*/
0898:
0899:                /**
0900:                 * @draft ICU 3.4
0901:                 * @provisional This API might change or be removed in a future release.
0902:                 */
0903:                public static final UnicodeBlock GEORGIAN_SUPPLEMENT = new UnicodeBlock(
0904:                        "GEORGIAN_SUPPLEMENT", 135); /*[2D00]*/
0905:
0906:                /**
0907:                 * @draft ICU 3.4
0908:                 * @provisional This API might change or be removed in a future release.
0909:                 */
0910:                public static final UnicodeBlock GLAGOLITIC = new UnicodeBlock(
0911:                        "GLAGOLITIC", 136); /*[2C00]*/
0912:
0913:                /**
0914:                 * @draft ICU 3.4
0915:                 * @provisional This API might change or be removed in a future release.
0916:                 */
0917:                public static final UnicodeBlock KHAROSHTHI = new UnicodeBlock(
0918:                        "KHAROSHTHI", 137); /*[10A00]*/
0919:
0920:                /**
0921:                 * @draft ICU 3.4
0922:                 * @provisional This API might change or be removed in a future release.
0923:                 */
0924:                public static final UnicodeBlock MODIFIER_TONE_LETTERS = new UnicodeBlock(
0925:                        "MODIFIER_TONE_LETTERS", 138); /*[A700]*/
0926:
0927:                /**
0928:                 * @draft ICU 3.4
0929:                 * @provisional This API might change or be removed in a future release.
0930:                 */
0931:                public static final UnicodeBlock NEW_TAI_LUE = new UnicodeBlock(
0932:                        "NEW_TAI_LUE", 139); /*[1980]*/
0933:
0934:                /**
0935:                 * @draft ICU 3.4
0936:                 * @provisional This API might change or be removed in a future release.
0937:                 */
0938:                public static final UnicodeBlock OLD_PERSIAN = new UnicodeBlock(
0939:                        "OLD_PERSIAN", 140); /*[103A0]*/
0940:
0941:                /**
0942:                 * @draft ICU 3.4
0943:                 * @provisional This API might change or be removed in a future release.
0944:                 */
0945:                public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = new UnicodeBlock(
0946:                        "PHONETIC_EXTENSIONS_SUPPLEMENT", 141); /*[1D80]*/
0947:
0948:                /**
0949:                 * @draft ICU 3.4
0950:                 * @provisional This API might change or be removed in a future release.
0951:                 */
0952:                public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = new UnicodeBlock(
0953:                        "SUPPLEMENTAL_PUNCTUATION", 142); /*[2E00]*/
0954:
0955:                /**
0956:                 * @draft ICU 3.4
0957:                 * @provisional This API might change or be removed in a future release.
0958:                 */
0959:                public static final UnicodeBlock SYLOTI_NAGRI = new UnicodeBlock(
0960:                        "SYLOTI_NAGRI", 143); /*[A800]*/
0961:
0962:                /**
0963:                 * @draft ICU 3.4
0964:                 * @provisional This API might change or be removed in a future release.
0965:                 */
0966:                public static final UnicodeBlock TIFINAGH = new UnicodeBlock(
0967:                        "TIFINAGH", 144); /*[2D30]*/
0968:
0969:                /**
0970:                 * @draft ICU 3.4
0971:                 * @provisional This API might change or be removed in a future release.
0972:                 */
0973:                public static final UnicodeBlock VERTICAL_FORMS = new UnicodeBlock(
0974:                        "VERTICAL_FORMS", 145); /*[FE10]*/
0975:
0976:                /** 
0977:                 * @draft ICU 3.6 
0978:                 * @provisional This API might change or be removed in a future release.
0979:                 */
0980:                public static final UnicodeBlock NKO = new UnicodeBlock("NKO",
0981:                        146); /*[07C0]*/
0982:                /** 
0983:                 * @draft ICU 3.6 
0984:                 * @provisional This API might change or be removed in a future release.
0985:                 */
0986:                public static final UnicodeBlock BALINESE = new UnicodeBlock(
0987:                        "BALINESE", 147); /*[1B00]*/
0988:                /** 
0989:                 * @draft ICU 3.6 
0990:                 * @provisional This API might change or be removed in a future release.
0991:                 */
0992:                public static final UnicodeBlock LATIN_EXTENDED_C = new UnicodeBlock(
0993:                        "LATIN_EXTENDED_C", 148); /*[2C60]*/
0994:                /** 
0995:                 * @draft ICU 3.6 
0996:                 * @provisional This API might change or be removed in a future release. 
0997:                 */
0998:                public static final UnicodeBlock LATIN_EXTENDED_D = new UnicodeBlock(
0999:                        "LATIN_EXTENDED_D", 149); /*[A720]*/
1000:                /** 
1001:                 * @draft ICU 3.6 
1002:                 * @provisional This API might change or be removed in a future release.
1003:                 */
1004:                public static final UnicodeBlock PHAGS_PA = new UnicodeBlock(
1005:                        "PHAGS_PA", 150); /*[A840]*/
1006:                /** 
1007:                 * @draft ICU 3.6 
1008:                 * @provisional This API might change or be removed in a future release.
1009:                 */
1010:                public static final UnicodeBlock PHOENICIAN = new UnicodeBlock(
1011:                        "PHOENICIAN", 151); /*[10900]*/
1012:                /** 
1013:                 * @draft ICU 3.6 
1014:                 * @provisional This API might change or be removed in a future release.
1015:                 */
1016:                public static final UnicodeBlock CUNEIFORM = new UnicodeBlock(
1017:                        "CUNEIFORM", 152); /*[12000]*/
1018:                /** 
1019:                 * @draft ICU 3.6 
1020:                 * @provisional This API might change or be removed in a future release.
1021:                 */
1022:                public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = new UnicodeBlock(
1023:                        "CUNEIFORM_NUMBERS_AND_PUNCTUATION", 153); /*[12400]*/
1024:                /**
1025:                 * @draft ICU 3.6 
1026:                 * @provisional This API might change or be removed in a future release.
1027:                 */
1028:                public static final UnicodeBlock COUNTING_ROD_NUMERALS = new UnicodeBlock(
1029:                        "COUNTING_ROD_NUMERALS", 154); /*[1D360]*/
1030:
1031:                /** 
1032:                 * @stable ICU 2.4 
1033:                 */
1034:                public static final UnicodeBlock INVALID_CODE = new UnicodeBlock(
1035:                        "INVALID_CODE", -1);
1036:
1037:                // block id corresponding to icu4c -----------------------------------
1038:
1039:                /** 
1040:                 * @stable ICU 2.4 
1041:                 */
1042:                public static final int INVALID_CODE_ID = -1;
1043:                /** 
1044:                 * @stable ICU 2.4
1045:                 */
1046:                public static final int BASIC_LATIN_ID = 1;
1047:                /** 
1048:                 * @stable ICU 2.4 
1049:                 */
1050:                public static final int LATIN_1_SUPPLEMENT_ID = 2;
1051:                /** 
1052:                 * @stable ICU 2.4 
1053:                 */
1054:                public static final int LATIN_EXTENDED_A_ID = 3;
1055:                /** 
1056:                 * @stable ICU 2.4 
1057:                 */
1058:                public static final int LATIN_EXTENDED_B_ID = 4;
1059:                /** 
1060:                 * @stable ICU 2.4 
1061:                 */
1062:                public static final int IPA_EXTENSIONS_ID = 5;
1063:                /** 
1064:                 * @stable ICU 2.4 
1065:                 */
1066:                public static final int SPACING_MODIFIER_LETTERS_ID = 6;
1067:                /** 
1068:                 * @stable ICU 2.4 
1069:                 */
1070:                public static final int COMBINING_DIACRITICAL_MARKS_ID = 7;
1071:                /**
1072:                 * Unicode 3.2 renames this block to "Greek and Coptic".
1073:                 * @stable ICU 2.4
1074:                 */
1075:                public static final int GREEK_ID = 8;
1076:                /** 
1077:                 * @stable ICU 2.4 
1078:                 */
1079:                public static final int CYRILLIC_ID = 9;
1080:                /** 
1081:                 * @stable ICU 2.4 
1082:                 */
1083:                public static final int ARMENIAN_ID = 10;
1084:                /** 
1085:                 * @stable ICU 2.4 
1086:                 */
1087:                public static final int HEBREW_ID = 11;
1088:                /** 
1089:                 * @stable ICU 2.4 
1090:                 */
1091:                public static final int ARABIC_ID = 12;
1092:                /** 
1093:                 * @stable ICU 2.4 
1094:                 */
1095:                public static final int SYRIAC_ID = 13;
1096:                /** 
1097:                 * @stable ICU 2.4 
1098:                 */
1099:                public static final int THAANA_ID = 14;
1100:                /** 
1101:                 * @stable ICU 2.4 
1102:                 */
1103:                public static final int DEVANAGARI_ID = 15;
1104:                /** 
1105:                 * @stable ICU 2.4 
1106:                 */
1107:                public static final int BENGALI_ID = 16;
1108:                /** 
1109:                 * @stable ICU 2.4 
1110:                 */
1111:                public static final int GURMUKHI_ID = 17;
1112:                /** 
1113:                 * @stable ICU 2.4 
1114:                 */
1115:                public static final int GUJARATI_ID = 18;
1116:                /** 
1117:                 * @stable ICU 2.4 
1118:                 */
1119:                public static final int ORIYA_ID = 19;
1120:                /** 
1121:                 * @stable ICU 2.4 
1122:                 */
1123:                public static final int TAMIL_ID = 20;
1124:                /** 
1125:                 * @stable ICU 2.4 
1126:                 */
1127:                public static final int TELUGU_ID = 21;
1128:                /** 
1129:                 * @stable ICU 2.4 
1130:                 */
1131:                public static final int KANNADA_ID = 22;
1132:                /** 
1133:                 * @stable ICU 2.4 
1134:                 */
1135:                public static final int MALAYALAM_ID = 23;
1136:                /** 
1137:                 * @stable ICU 2.4 
1138:                 */
1139:                public static final int SINHALA_ID = 24;
1140:                /** 
1141:                 * @stable ICU 2.4 
1142:                 */
1143:                public static final int THAI_ID = 25;
1144:                /** 
1145:                 * @stable ICU 2.4 
1146:                 */
1147:                public static final int LAO_ID = 26;
1148:                /** 
1149:                 * @stable ICU 2.4 
1150:                 */
1151:                public static final int TIBETAN_ID = 27;
1152:                /** 
1153:                 * @stable ICU 2.4 
1154:                 */
1155:                public static final int MYANMAR_ID = 28;
1156:                /** 
1157:                 * @stable ICU 2.4 
1158:                 */
1159:                public static final int GEORGIAN_ID = 29;
1160:                /** 
1161:                 * @stable ICU 2.4 
1162:                 */
1163:                public static final int HANGUL_JAMO_ID = 30;
1164:                /** 
1165:                 * @stable ICU 2.4 
1166:                 */
1167:                public static final int ETHIOPIC_ID = 31;
1168:                /** 
1169:                 * @stable ICU 2.4 
1170:                 */
1171:                public static final int CHEROKEE_ID = 32;
1172:                /** 
1173:                 * @stable ICU 2.4 
1174:                 */
1175:                public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33;
1176:                /** 
1177:                 * @stable ICU 2.4 
1178:                 */
1179:                public static final int OGHAM_ID = 34;
1180:                /** 
1181:                 * @stable ICU 2.4 
1182:                 */
1183:                public static final int RUNIC_ID = 35;
1184:                /** 
1185:                 * @stable ICU 2.4 
1186:                 */
1187:                public static final int KHMER_ID = 36;
1188:                /** 
1189:                 * @stable ICU 2.4 
1190:                 */
1191:                public static final int MONGOLIAN_ID = 37;
1192:                /** 
1193:                 * @stable ICU 2.4 
1194:                 */
1195:                public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38;
1196:                /** 
1197:                 * @stable ICU 2.4 
1198:                 */
1199:                public static final int GREEK_EXTENDED_ID = 39;
1200:                /** 
1201:                 * @stable ICU 2.4 
1202:                 */
1203:                public static final int GENERAL_PUNCTUATION_ID = 40;
1204:                /** 
1205:                 * @stable ICU 2.4 
1206:                 */
1207:                public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41;
1208:                /** 
1209:                 * @stable ICU 2.4 
1210:                 */
1211:                public static final int CURRENCY_SYMBOLS_ID = 42;
1212:                /**
1213:                 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 
1214:                 * Symbols".
1215:                 * @stable ICU 2.4
1216:                 */
1217:                public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43;
1218:                /** 
1219:                 * @stable ICU 2.4 
1220:                 */
1221:                public static final int LETTERLIKE_SYMBOLS_ID = 44;
1222:                /** 
1223:                 * @stable ICU 2.4 
1224:                 */
1225:                public static final int NUMBER_FORMS_ID = 45;
1226:                /** 
1227:                 * @stable ICU 2.4 
1228:                 */
1229:                public static final int ARROWS_ID = 46;
1230:                /** 
1231:                 * @stable ICU 2.4 
1232:                 */
1233:                public static final int MATHEMATICAL_OPERATORS_ID = 47;
1234:                /** 
1235:                 * @stable ICU 2.4 
1236:                 */
1237:                public static final int MISCELLANEOUS_TECHNICAL_ID = 48;
1238:                /** 
1239:                 * @stable ICU 2.4 
1240:                 */
1241:                public static final int CONTROL_PICTURES_ID = 49;
1242:                /** 
1243:                 * @stable ICU 2.4 
1244:                 */
1245:                public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50;
1246:                /** 
1247:                 * @stable ICU 2.4 
1248:                 */
1249:                public static final int ENCLOSED_ALPHANUMERICS_ID = 51;
1250:                /** 
1251:                 * @stable ICU 2.4 
1252:                 */
1253:                public static final int BOX_DRAWING_ID = 52;
1254:                /** 
1255:                 * @stable ICU 2.4 
1256:                 */
1257:                public static final int BLOCK_ELEMENTS_ID = 53;
1258:                /** 
1259:                 * @stable ICU 2.4 
1260:                 */
1261:                public static final int GEOMETRIC_SHAPES_ID = 54;
1262:                /** 
1263:                 * @stable ICU 2.4 
1264:                 */
1265:                public static final int MISCELLANEOUS_SYMBOLS_ID = 55;
1266:                /** 
1267:                 * @stable ICU 2.4 
1268:                 */
1269:                public static final int DINGBATS_ID = 56;
1270:                /** 
1271:                 * @stable ICU 2.4 
1272:                 */
1273:                public static final int BRAILLE_PATTERNS_ID = 57;
1274:                /** 
1275:                 * @stable ICU 2.4 
1276:                 */
1277:                public static final int CJK_RADICALS_SUPPLEMENT_ID = 58;
1278:                /** 
1279:                 * @stable ICU 2.4 
1280:                 */
1281:                public static final int KANGXI_RADICALS_ID = 59;
1282:                /** 
1283:                 * @stable ICU 2.4 
1284:                 */
1285:                public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60;
1286:                /** 
1287:                 * @stable ICU 2.4 
1288:                 */
1289:                public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61;
1290:                /** 
1291:                 * @stable ICU 2.4 
1292:                 */
1293:                public static final int HIRAGANA_ID = 62;
1294:                /** 
1295:                 * @stable ICU 2.4 
1296:                 */
1297:                public static final int KATAKANA_ID = 63;
1298:                /** 
1299:                 * @stable ICU 2.4 
1300:                 */
1301:                public static final int BOPOMOFO_ID = 64;
1302:                /** 
1303:                 * @stable ICU 2.4 
1304:                 */
1305:                public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65;
1306:                /** 
1307:                 * @stable ICU 2.4 
1308:                 */
1309:                public static final int KANBUN_ID = 66;
1310:                /** 
1311:                 * @stable ICU 2.4 
1312:                 */
1313:                public static final int BOPOMOFO_EXTENDED_ID = 67;
1314:                /** 
1315:                 * @stable ICU 2.4 
1316:                 */
1317:                public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68;
1318:                /** 
1319:                 * @stable ICU 2.4 
1320:                 */
1321:                public static final int CJK_COMPATIBILITY_ID = 69;
1322:                /** 
1323:                 * @stable ICU 2.4 
1324:                 */
1325:                public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70;
1326:                /** 
1327:                 * @stable ICU 2.4 
1328:                 */
1329:                public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71;
1330:                /** 
1331:                 * @stable ICU 2.4 
1332:                 */
1333:                public static final int YI_SYLLABLES_ID = 72;
1334:                /** 
1335:                 * @stable ICU 2.4 
1336:                 */
1337:                public static final int YI_RADICALS_ID = 73;
1338:                /** 
1339:                 * @stable ICU 2.4 
1340:                 */
1341:                public static final int HANGUL_SYLLABLES_ID = 74;
1342:                /** 
1343:                 * @stable ICU 2.4 
1344:                 */
1345:                public static final int HIGH_SURROGATES_ID = 75;
1346:                /** 
1347:                 * @stable ICU 2.4 
1348:                 */
1349:                public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76;
1350:                /** 
1351:                 * @stable ICU 2.4 
1352:                 */
1353:                public static final int LOW_SURROGATES_ID = 77;
1354:                /**
1355:                 * Same as public static final int PRIVATE_USE.
1356:                 * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1357:                 * and multiple code point ranges had this block.
1358:                 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 
1359:                 * and adds separate blocks for the supplementary PUAs.
1360:                 * @stable ICU 2.4
1361:                 */
1362:                public static final int PRIVATE_USE_AREA_ID = 78;
1363:                /**
1364:                 * Same as public static final int PRIVATE_USE_AREA.
1365:                 * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1366:                 * and multiple code point ranges had this block.
1367:                 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 
1368:                 * and adds separate blocks for the supplementary PUAs.
1369:                 * @stable ICU 2.4
1370:                 */
1371:                public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID;
1372:                /** 
1373:                 * @stable ICU 2.4 
1374:                 */
1375:                public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79;
1376:                /** 
1377:                 * @stable ICU 2.4 
1378:                 */
1379:                public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80;
1380:                /** 
1381:                 * @stable ICU 2.4 
1382:                 */
1383:                public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81;
1384:                /** 
1385:                 * @stable ICU 2.4 
1386:                 */
1387:                public static final int COMBINING_HALF_MARKS_ID = 82;
1388:                /** 
1389:                 * @stable ICU 2.4 
1390:                 */
1391:                public static final int CJK_COMPATIBILITY_FORMS_ID = 83;
1392:                /** 
1393:                 * @stable ICU 2.4 
1394:                 */
1395:                public static final int SMALL_FORM_VARIANTS_ID = 84;
1396:                /** 
1397:                 * @stable ICU 2.4 
1398:                 */
1399:                public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85;
1400:                /** 
1401:                 * @stable ICU 2.4 
1402:                 */
1403:                public static final int SPECIALS_ID = 86;
1404:                /** 
1405:                 * @stable ICU 2.4 
1406:                 */
1407:                public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87;
1408:                /** 
1409:                 * @stable ICU 2.4 
1410:                 */
1411:                public static final int OLD_ITALIC_ID = 88;
1412:                /** 
1413:                 * @stable ICU 2.4 
1414:                 */
1415:                public static final int GOTHIC_ID = 89;
1416:                /** 
1417:                 * @stable ICU 2.4 
1418:                 */
1419:                public static final int DESERET_ID = 90;
1420:                /** 
1421:                 * @stable ICU 2.4 
1422:                 */
1423:                public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91;
1424:                /** 
1425:                 * @stable ICU 2.4 
1426:                 */
1427:                public static final int MUSICAL_SYMBOLS_ID = 92;
1428:                /** 
1429:                 * @stable ICU 2.4 
1430:                 */
1431:                public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93;
1432:                /** 
1433:                 * @stable ICU 2.4 
1434:                 */
1435:                public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94;
1436:                /** 
1437:                 * @stable ICU 2.4 
1438:                 */
1439:                public static final int CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95;
1440:                /** 
1441:                 * @stable ICU 2.4 
1442:                 */
1443:                public static final int TAGS_ID = 96;
1444:
1445:                // New blocks in Unicode 3.2
1446:
1447:                /** 
1448:                 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1449:                 * @stable ICU 2.4 
1450:                 */
1451:                public static final int CYRILLIC_SUPPLEMENTARY_ID = 97;
1452:                /** 
1453:                 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1454:                 * @stable ICU 3.0
1455:                 */
1456:
1457:                public static final int CYRILLIC_SUPPLEMENT_ID = 97;
1458:                /** 
1459:                 * @stable ICU 2.4 
1460:                 */
1461:                public static final int TAGALOG_ID = 98;
1462:                /** 
1463:                 * @stable ICU 2.4 
1464:                 */
1465:                public static final int HANUNOO_ID = 99;
1466:                /** 
1467:                 * @stable ICU 2.4 
1468:                 */
1469:                public static final int BUHID_ID = 100;
1470:                /** 
1471:                 * @stable ICU 2.4 
1472:                 */
1473:                public static final int TAGBANWA_ID = 101;
1474:                /** 
1475:                 * @stable ICU 2.4 
1476:                 */
1477:                public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102;
1478:                /** 
1479:                 * @stable ICU 2.4 
1480:                 */
1481:                public static final int SUPPLEMENTAL_ARROWS_A_ID = 103;
1482:                /** 
1483:                 * @stable ICU 2.4 
1484:                 */
1485:                public static final int SUPPLEMENTAL_ARROWS_B_ID = 104;
1486:                /** 
1487:                 * @stable ICU 2.4 
1488:                 */
1489:                public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105;
1490:                /** 
1491:                 * @stable ICU 2.4 
1492:                 */
1493:                public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106;
1494:                /** 
1495:                 * @stable ICU 2.4 
1496:                 */
1497:                public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107;
1498:                /** 
1499:                 * @stable ICU 2.4 
1500:                 */
1501:                public static final int VARIATION_SELECTORS_ID = 108;
1502:                /** 
1503:                 * @stable ICU 2.4 
1504:                 */
1505:                public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109;
1506:                /** 
1507:                 * @stable ICU 2.4 
1508:                 */
1509:                public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110;
1510:
1511:                /** 
1512:                 * @stable ICU 2.6 
1513:                 */
1514:                public static final int LIMBU_ID = 111; /*[1900]*/
1515:                /**
1516:                 * @stable ICU 2.6 
1517:                 */
1518:                public static final int TAI_LE_ID = 112; /*[1950]*/
1519:                /** 
1520:                 * @stable ICU 2.6 
1521:                 */
1522:                public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/
1523:                /** 
1524:                 * @stable ICU 2.6
1525:                 */
1526:                public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/
1527:                /** 
1528:                 * @stable ICU 2.6 
1529:                 */
1530:                public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/
1531:                /**
1532:                 * @stable ICU 2.6 
1533:                 */
1534:                public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/
1535:                /** 
1536:                 * @stable ICU 2.6 
1537:                 */
1538:                public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/
1539:                /**
1540:                 * @stable ICU 2.6 
1541:                 */
1542:                public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/
1543:                /** 
1544:                 * @stable ICU 2.6
1545:                 */
1546:                public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/
1547:                /**
1548:                 * @stable ICU 2.6
1549:                 */
1550:                public static final int UGARITIC_ID = 120; /*[10380]*/
1551:                /**
1552:                 * @stable ICU 2.6
1553:                 */
1554:                public static final int SHAVIAN_ID = 121; /*[10450]*/
1555:                /**
1556:                 * @stable ICU 2.6
1557:                 */
1558:                public static final int OSMANYA_ID = 122; /*[10480]*/
1559:                /**
1560:                 * @stable ICU 2.6
1561:                 */
1562:                public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/
1563:                /**
1564:                 * @stable ICU 2.6
1565:                 */
1566:                public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/
1567:                /**
1568:                 * @stable ICU 2.6
1569:                 */
1570:                public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/
1571:
1572:                /* New blocks in Unicode 4.1 */
1573:
1574:                /**
1575:                 * @draft ICU 3.4
1576:                 * @provisional This API might change or be removed in a future release.
1577:                 */
1578:                public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/
1579:
1580:                /**
1581:                 * @draft ICU 3.4
1582:                 * @provisional This API might change or be removed in a future release.
1583:                 */
1584:                public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/
1585:
1586:                /**
1587:                 * @draft ICU 3.4
1588:                 * @provisional This API might change or be removed in a future release.
1589:                 */
1590:                public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/
1591:
1592:                /**
1593:                 * @draft ICU 3.4
1594:                 * @provisional This API might change or be removed in a future release.
1595:                 */
1596:                public static final int BUGINESE_ID = 129; /*[1A00]*/
1597:
1598:                /**
1599:                 * @draft ICU 3.4
1600:                 * @provisional This API might change or be removed in a future release.
1601:                 */
1602:                public static final int CJK_STROKES_ID = 130; /*[31C0]*/
1603:
1604:                /**
1605:                 * @draft ICU 3.4
1606:                 * @provisional This API might change or be removed in a future release.
1607:                 */
1608:                public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/
1609:
1610:                /**
1611:                 * @draft ICU 3.4
1612:                 * @provisional This API might change or be removed in a future release.
1613:                 */
1614:                public static final int COPTIC_ID = 132; /*[2C80]*/
1615:
1616:                /**
1617:                 * @draft ICU 3.4
1618:                 * @provisional This API might change or be removed in a future release.
1619:                 */
1620:                public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/
1621:
1622:                /**
1623:                 * @draft ICU 3.4
1624:                 * @provisional This API might change or be removed in a future release.
1625:                 */
1626:                public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/
1627:
1628:                /**
1629:                 * @draft ICU 3.4
1630:                 * @provisional This API might change or be removed in a future release.
1631:                 */
1632:                public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/
1633:
1634:                /**
1635:                 * @draft ICU 3.4
1636:                 * @provisional This API might change or be removed in a future release.
1637:                 */
1638:                public static final int GLAGOLITIC_ID = 136; /*[2C00]*/
1639:
1640:                /**
1641:                 * @draft ICU 3.4
1642:                 * @provisional This API might change or be removed in a future release.
1643:                 */
1644:                public static final int KHAROSHTHI_ID = 137; /*[10A00]*/
1645:
1646:                /**
1647:                 * @draft ICU 3.4
1648:                 * @provisional This API might change or be removed in a future release.
1649:                 */
1650:                public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/
1651:
1652:                /**
1653:                 * @draft ICU 3.4
1654:                 * @provisional This API might change or be removed in a future release.
1655:                 */
1656:                public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/
1657:
1658:                /**
1659:                 * @draft ICU 3.4
1660:                 * @provisional This API might change or be removed in a future release.
1661:                 */
1662:                public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/
1663:
1664:                /**
1665:                 * @draft ICU 3.4
1666:                 * @provisional This API might change or be removed in a future release.
1667:                 */
1668:                public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/
1669:
1670:                /**
1671:                 * @draft ICU 3.4
1672:                 * @provisional This API might change or be removed in a future release.
1673:                 */
1674:                public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/
1675:
1676:                /**
1677:                 * @draft ICU 3.4
1678:                 * @provisional This API might change or be removed in a future release.
1679:                 */
1680:                public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/
1681:
1682:                /**
1683:                 * @draft ICU 3.4
1684:                 * @provisional This API might change or be removed in a future release.
1685:                 */
1686:                public static final int TIFINAGH_ID = 144; /*[2D30]*/
1687:
1688:                /**
1689:                 * @draft ICU 3.4
1690:                 * @provisional This API might change or be removed in a future release.
1691:                 */
1692:                public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/
1693:
1694:                /* New blocks in Unicode 5.0 */
1695:
1696:                /** 
1697:                 * @draft ICU 3.6 
1698:                 * @provisional This API might change or be removed in a future release.
1699:                 */
1700:                public static final int NKO_ID = 146; /*[07C0]*/
1701:                /** 
1702:                 * @draft ICU 3.6 
1703:                 * @provisional This API might change or be removed in a future release.
1704:                 */
1705:                public static final int BALINESE_ID = 147; /*[1B00]*/
1706:                /** 
1707:                 * @draft ICU 3.6 
1708:                 * @provisional This API might change or be removed in a future release.
1709:                 */
1710:                public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/
1711:                /** 
1712:                 * @draft ICU 3.6 
1713:                 * @provisional This API might change or be removed in a future release. 
1714:                 */
1715:                public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/
1716:                /** 
1717:                 * @draft ICU 3.6 
1718:                 * @provisional This API might change or be removed in a future release.
1719:                 */
1720:                public static final int PHAGS_PA_ID = 150; /*[A840]*/
1721:                /** 
1722:                 * @draft ICU 3.6 
1723:                 * @provisional This API might change or be removed in a future release.
1724:                 */
1725:                public static final int PHOENICIAN_ID = 151; /*[10900]*/
1726:                /** 
1727:                 * @draft ICU 3.6 
1728:                 * @provisional This API might change or be removed in a future release.
1729:                 */
1730:                public static final int CUNEIFORM_ID = 152; /*[12000]*/
1731:                /** 
1732:                 * @draft ICU 3.6 
1733:                 * @provisional This API might change or be removed in a future release.
1734:                 */
1735:                public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/
1736:                /**
1737:                 * @draft ICU 3.6 
1738:                 * @provisional This API might change or be removed in a future release.
1739:                 */
1740:                public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/
1741:
1742:                /** 
1743:                 * @stable ICU 2.4 
1744:                 */
1745:                public static final int COUNT = 155;
1746:
1747:                // public methods --------------------------------------------------
1748:
1749:                /** 
1750:                 * Gets the only instance of the UnicodeBlock with the argument ID.
1751:                 * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned.
1752:                 * @param id UnicodeBlock ID
1753:                 * @return the only instance of the UnicodeBlock with the argument ID
1754:                 *         if it exists, otherwise a INVALID_CODE UnicodeBlock will be 
1755:                 *         returned.
1756:                 * @stable ICU 2.4
1757:                 */
1758:                public static UnicodeBlock getInstance(int id) {
1759:                    if (id >= 0 && id < BLOCKS_.length) {
1760:                        return BLOCKS_[id];
1761:                    }
1762:                    return INVALID_CODE;
1763:                }
1764:
1765:                /**
1766:                 * Returns the Unicode allocation block that contains the code point,
1767:                 * or null if the code point is not a member of a defined block.
1768:                 * @param ch code point to be tested
1769:                 * @return the Unicode allocation block that contains the code point
1770:                 * @stable ICU 2.4
1771:                 */
1772:                public static UnicodeBlock of(int ch) {
1773:                    if (ch > MAX_VALUE) {
1774:                        return INVALID_CODE;
1775:                    }
1776:
1777:                    return UnicodeBlock.getInstance((PROPERTY_.getAdditional(
1778:                            ch, 0) & BLOCK_MASK_) >> BLOCK_SHIFT_);
1779:                }
1780:
1781:                /**
1782:                 * Internal function returning of(ch).getID().
1783:                 *
1784:                 * @param ch
1785:                 * @return numeric block value
1786:                 * @internal
1787:                 */
1788:                static int idOf(int ch) {
1789:                    if (ch < 0 || ch > MAX_VALUE) {
1790:                        return -1;
1791:                    }
1792:
1793:                    return (PROPERTY_.getAdditional(ch, 0) & BLOCK_MASK_) >> BLOCK_SHIFT_;
1794:                }
1795:
1796:                /**
1797:                 * Cover the JDK 1.5 API.  Return the Unicode block with the
1798:                 * given name.  <br/><b>Note</b>: Unlike JDK 1.5, this only matches
1799:                 * against the official UCD name and the Java block name
1800:                 * (ignoring case).
1801:                 * @param blockName the name of the block to match
1802:                 * @return the UnicodeBlock with that name
1803:                 * @throws IllegalArgumentException if the blockName could not be matched
1804:                 * @stable ICU 3.0
1805:                 */
1806:                public static final UnicodeBlock forName(String blockName) {
1807:                    Map m = null;
1808:                    if (mref != null) {
1809:                        m = (Map) mref.get();
1810:                    }
1811:                    if (m == null) {
1812:                        m = new HashMap(BLOCKS_.length);
1813:                        for (int i = 0; i < BLOCKS_.length; ++i) {
1814:                            UnicodeBlock b = BLOCKS_[i];
1815:                            String name = getPropertyValueName(UProperty.BLOCK,
1816:                                    b.getID(), UProperty.NameChoice.LONG);
1817:                            m.put(name.toUpperCase(), b);
1818:                            m.put(name.replace('_', ' ').toUpperCase(), b);
1819:                            m.put(b.toString().toUpperCase(), b);
1820:                        }
1821:                        mref = new SoftReference(m);
1822:                    }
1823:                    UnicodeBlock b = (UnicodeBlock) m.get(blockName
1824:                            .toUpperCase());
1825:                    if (b == null) {
1826:                        throw new IllegalArgumentException();
1827:                    }
1828:                    return b;
1829:                }
1830:
1831:                private static SoftReference mref;
1832:
1833:                /**
1834:                 * Returns the type ID of this Unicode block
1835:                 * @return integer type ID of this Unicode block
1836:                 * @stable ICU 2.4
1837:                 */
1838:                public int getID() {
1839:                    return m_id_;
1840:                }
1841:
1842:                // private data members ---------------------------------------------
1843:
1844:                /**
1845:                 * Array of UnicodeBlocks, for easy access in getInstance(int)
1846:                 */
1847:                private final static UnicodeBlock BLOCKS_[] = { NO_BLOCK,
1848:                        BASIC_LATIN, LATIN_1_SUPPLEMENT, LATIN_EXTENDED_A,
1849:                        LATIN_EXTENDED_B, IPA_EXTENSIONS,
1850:                        SPACING_MODIFIER_LETTERS, COMBINING_DIACRITICAL_MARKS,
1851:                        GREEK, CYRILLIC, ARMENIAN, HEBREW, ARABIC, SYRIAC,
1852:                        THAANA, DEVANAGARI, BENGALI, GURMUKHI, GUJARATI, ORIYA,
1853:                        TAMIL, TELUGU, KANNADA, MALAYALAM, SINHALA, THAI, LAO,
1854:                        TIBETAN, MYANMAR, GEORGIAN, HANGUL_JAMO, ETHIOPIC,
1855:                        CHEROKEE, UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, OGHAM,
1856:                        RUNIC, KHMER, MONGOLIAN, LATIN_EXTENDED_ADDITIONAL,
1857:                        GREEK_EXTENDED, GENERAL_PUNCTUATION,
1858:                        SUPERSCRIPTS_AND_SUBSCRIPTS, CURRENCY_SYMBOLS,
1859:                        COMBINING_MARKS_FOR_SYMBOLS, LETTERLIKE_SYMBOLS,
1860:                        NUMBER_FORMS, ARROWS, MATHEMATICAL_OPERATORS,
1861:                        MISCELLANEOUS_TECHNICAL, CONTROL_PICTURES,
1862:                        OPTICAL_CHARACTER_RECOGNITION, ENCLOSED_ALPHANUMERICS,
1863:                        BOX_DRAWING, BLOCK_ELEMENTS, GEOMETRIC_SHAPES,
1864:                        MISCELLANEOUS_SYMBOLS, DINGBATS, BRAILLE_PATTERNS,
1865:                        CJK_RADICALS_SUPPLEMENT, KANGXI_RADICALS,
1866:                        IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
1867:                        CJK_SYMBOLS_AND_PUNCTUATION, HIRAGANA, KATAKANA,
1868:                        BOPOMOFO, HANGUL_COMPATIBILITY_JAMO, KANBUN,
1869:                        BOPOMOFO_EXTENDED, ENCLOSED_CJK_LETTERS_AND_MONTHS,
1870:                        CJK_COMPATIBILITY, CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
1871:                        CJK_UNIFIED_IDEOGRAPHS, YI_SYLLABLES, YI_RADICALS,
1872:                        HANGUL_SYLLABLES, HIGH_SURROGATES,
1873:                        HIGH_PRIVATE_USE_SURROGATES, LOW_SURROGATES,
1874:                        PRIVATE_USE_AREA, CJK_COMPATIBILITY_IDEOGRAPHS,
1875:                        ALPHABETIC_PRESENTATION_FORMS,
1876:                        ARABIC_PRESENTATION_FORMS_A, COMBINING_HALF_MARKS,
1877:                        CJK_COMPATIBILITY_FORMS, SMALL_FORM_VARIANTS,
1878:                        ARABIC_PRESENTATION_FORMS_B, SPECIALS,
1879:                        HALFWIDTH_AND_FULLWIDTH_FORMS, OLD_ITALIC, GOTHIC,
1880:                        DESERET, BYZANTINE_MUSICAL_SYMBOLS, MUSICAL_SYMBOLS,
1881:                        MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
1882:                        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
1883:                        CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, TAGS,
1884:                        CYRILLIC_SUPPLEMENT, TAGALOG, HANUNOO, BUHID, TAGBANWA,
1885:                        MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
1886:                        SUPPLEMENTAL_ARROWS_A, SUPPLEMENTAL_ARROWS_B,
1887:                        MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1888:                        SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
1889:                        KATAKANA_PHONETIC_EXTENSIONS, VARIATION_SELECTORS,
1890:                        SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1891:                        SUPPLEMENTARY_PRIVATE_USE_AREA_B, LIMBU, TAI_LE,
1892:                        KHMER_SYMBOLS, PHONETIC_EXTENSIONS,
1893:                        MISCELLANEOUS_SYMBOLS_AND_ARROWS,
1894:                        YIJING_HEXAGRAM_SYMBOLS, LINEAR_B_SYLLABARY,
1895:                        LINEAR_B_IDEOGRAMS, AEGEAN_NUMBERS, UGARITIC, SHAVIAN,
1896:                        OSMANYA, CYPRIOT_SYLLABARY, TAI_XUAN_JING_SYMBOLS,
1897:                        VARIATION_SELECTORS_SUPPLEMENT,
1898:
1899:                        /* New blocks in Unicode 4.1 */
1900:                        ANCIENT_GREEK_MUSICAL_NOTATION, ANCIENT_GREEK_NUMBERS,
1901:                        ARABIC_SUPPLEMENT, BUGINESE, CJK_STROKES,
1902:                        COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, COPTIC,
1903:                        ETHIOPIC_EXTENDED, ETHIOPIC_SUPPLEMENT,
1904:                        GEORGIAN_SUPPLEMENT, GLAGOLITIC, KHAROSHTHI,
1905:                        MODIFIER_TONE_LETTERS, NEW_TAI_LUE, OLD_PERSIAN,
1906:                        PHONETIC_EXTENSIONS_SUPPLEMENT,
1907:                        SUPPLEMENTAL_PUNCTUATION, SYLOTI_NAGRI, TIFINAGH,
1908:                        VERTICAL_FORMS, NKO, BALINESE, LATIN_EXTENDED_C,
1909:                        LATIN_EXTENDED_D, PHAGS_PA, PHOENICIAN, CUNEIFORM,
1910:                        CUNEIFORM_NUMBERS_AND_PUNCTUATION,
1911:                        COUNTING_ROD_NUMERALS };
1912:
1913:                static {
1914:                    if (COUNT != BLOCKS_.length) {
1915:                        throw new java.lang.IllegalStateException(
1916:                                "UnicodeBlock fields are inconsistent!");
1917:                    }
1918:                }
1919:                /**
1920:                 * Identification code for this UnicodeBlock
1921:                 */
1922:                private int m_id_;
1923:
1924:                // private constructor ----------------------------------------------
1925:
1926:                /**
1927:                 * UnicodeBlock constructor
1928:                 * @param name name of this UnicodeBlock
1929:                 * @param id unique id of this UnicodeBlock
1930:                 * @exception NullPointerException if name is <code>null</code>
1931:                 */
1932:                private UnicodeBlock(String name, int id) {
1933:                    super (name);
1934:                    m_id_ = id;
1935:                }
1936:            }
1937:
1938:            /**
1939:             * East Asian Width constants.
1940:             * @see UProperty#EAST_ASIAN_WIDTH
1941:             * @see UCharacter#getIntPropertyValue
1942:             * @stable ICU 2.4
1943:             */
1944:            public static interface EastAsianWidth {
1945:                /**
1946:                 * @stable ICU 2.4
1947:                 */
1948:                public static final int NEUTRAL = 0;
1949:                /**
1950:                 * @stable ICU 2.4
1951:                 */
1952:                public static final int AMBIGUOUS = 1;
1953:                /**
1954:                 * @stable ICU 2.4
1955:                 */
1956:                public static final int HALFWIDTH = 2;
1957:                /**
1958:                 * @stable ICU 2.4
1959:                 */
1960:                public static final int FULLWIDTH = 3;
1961:                /**
1962:                 * @stable ICU 2.4
1963:                 */
1964:                public static final int NARROW = 4;
1965:                /**
1966:                 * @stable ICU 2.4
1967:                 */
1968:                public static final int WIDE = 5;
1969:                /**
1970:                 * @stable ICU 2.4
1971:                 */
1972:                public static final int COUNT = 6;
1973:            }
1974:
1975:            /**
1976:             * Decomposition Type constants.
1977:             * @see UProperty#DECOMPOSITION_TYPE
1978:             * @stable ICU 2.4
1979:             */
1980:            public static interface DecompositionType {
1981:                /**
1982:                 * @stable ICU 2.4
1983:                 */
1984:                public static final int NONE = 0;
1985:                /**
1986:                 * @stable ICU 2.4
1987:                 */
1988:                public static final int CANONICAL = 1;
1989:                /**
1990:                 * @stable ICU 2.4
1991:                 */
1992:                public static final int COMPAT = 2;
1993:                /**
1994:                 * @stable ICU 2.4
1995:                 */
1996:                public static final int CIRCLE = 3;
1997:                /**
1998:                 * @stable ICU 2.4
1999:                 */
2000:                public static final int FINAL = 4;
2001:                /**
2002:                 * @stable ICU 2.4
2003:                 */
2004:                public static final int FONT = 5;
2005:                /**
2006:                 * @stable ICU 2.4
2007:                 */
2008:                public static final int FRACTION = 6;
2009:                /**
2010:                 * @stable ICU 2.4
2011:                 */
2012:                public static final int INITIAL = 7;
2013:                /**
2014:                 * @stable ICU 2.4
2015:                 */
2016:                public static final int ISOLATED = 8;
2017:                /**
2018:                 * @stable ICU 2.4
2019:                 */
2020:                public static final int MEDIAL = 9;
2021:                /**
2022:                 * @stable ICU 2.4
2023:                 */
2024:                public static final int NARROW = 10;
2025:                /**
2026:                 * @stable ICU 2.4
2027:                 */
2028:                public static final int NOBREAK = 11;
2029:                /**
2030:                 * @stable ICU 2.4
2031:                 */
2032:                public static final int SMALL = 12;
2033:                /**
2034:                 * @stable ICU 2.4
2035:                 */
2036:                public static final int SQUARE = 13;
2037:                /**
2038:                 * @stable ICU 2.4
2039:                 */
2040:                public static final int SUB = 14;
2041:                /**
2042:                 * @stable ICU 2.4
2043:                 */
2044:                public static final int SUPER = 15;
2045:                /**
2046:                 * @stable ICU 2.4
2047:                 */
2048:                public static final int VERTICAL = 16;
2049:                /**
2050:                 * @stable ICU 2.4
2051:                 */
2052:                public static final int WIDE = 17;
2053:                /**
2054:                 * @stable ICU 2.4
2055:                 */
2056:                public static final int COUNT = 18;
2057:            }
2058:
2059:            /**
2060:             * Joining Type constants.
2061:             * @see UProperty#JOINING_TYPE
2062:             * @stable ICU 2.4
2063:             */
2064:            public static interface JoiningType {
2065:                /**
2066:                 * @stable ICU 2.4
2067:                 */
2068:                public static final int NON_JOINING = 0;
2069:                /**
2070:                 * @stable ICU 2.4
2071:                 */
2072:                public static final int JOIN_CAUSING = 1;
2073:                /**
2074:                 * @stable ICU 2.4
2075:                 */
2076:                public static final int DUAL_JOINING = 2;
2077:                /**
2078:                 * @stable ICU 2.4
2079:                 */
2080:                public static final int LEFT_JOINING = 3;
2081:                /**
2082:                 * @stable ICU 2.4
2083:                 */
2084:                public static final int RIGHT_JOINING = 4;
2085:                /**
2086:                 * @stable ICU 2.4
2087:                 */
2088:                public static final int TRANSPARENT = 5;
2089:                /**
2090:                 * @stable ICU 2.4
2091:                 */
2092:                public static final int COUNT = 6;
2093:            }
2094:
2095:            /**
2096:             * Joining Group constants.
2097:             * @see UProperty#JOINING_GROUP
2098:             * @stable ICU 2.4
2099:             */
2100:            public static interface JoiningGroup {
2101:                /**
2102:                 * @stable ICU 2.4
2103:                 */
2104:                public static final int NO_JOINING_GROUP = 0;
2105:                /**
2106:                 * @stable ICU 2.4
2107:                 */
2108:                public static final int AIN = 1;
2109:                /**
2110:                 * @stable ICU 2.4
2111:                 */
2112:                public static final int ALAPH = 2;
2113:                /**
2114:                 * @stable ICU 2.4
2115:                 */
2116:                public static final int ALEF = 3;
2117:                /**
2118:                 * @stable ICU 2.4
2119:                 */
2120:                public static final int BEH = 4;
2121:                /**
2122:                 * @stable ICU 2.4
2123:                 */
2124:                public static final int BETH = 5;
2125:                /**
2126:                 * @stable ICU 2.4
2127:                 */
2128:                public static final int DAL = 6;
2129:                /**
2130:                 * @stable ICU 2.4
2131:                 */
2132:                public static final int DALATH_RISH = 7;
2133:                /**
2134:                 * @stable ICU 2.4
2135:                 */
2136:                public static final int E = 8;
2137:                /**
2138:                 * @stable ICU 2.4
2139:                 */
2140:                public static final int FEH = 9;
2141:                /**
2142:                 * @stable ICU 2.4
2143:                 */
2144:                public static final int FINAL_SEMKATH = 10;
2145:                /**
2146:                 * @stable ICU 2.4
2147:                 */
2148:                public static final int GAF = 11;
2149:                /**
2150:                 * @stable ICU 2.4
2151:                 */
2152:                public static final int GAMAL = 12;
2153:                /** 
2154:                 * @stable ICU 2.4
2155:                 */
2156:                public static final int HAH = 13;
2157:                /**
2158:                 * @stable ICU 2.4
2159:                 */
2160:                public static final int HAMZA_ON_HEH_GOAL = 14;
2161:                /**
2162:                 * @stable ICU 2.4
2163:                 */
2164:                public static final int HE = 15;
2165:                /**
2166:                 * @stable ICU 2.4
2167:                 */
2168:                public static final int HEH = 16;
2169:                /**
2170:                 * @stable ICU 2.4
2171:                 */
2172:                public static final int HEH_GOAL = 17;
2173:                /**
2174:                 * @stable ICU 2.4
2175:                 */
2176:                public static final int HETH = 18;
2177:                /**
2178:                 * @stable ICU 2.4
2179:                 */
2180:                public static final int KAF = 19;
2181:                /**
2182:                 * @stable ICU 2.4
2183:                 */
2184:                public static final int KAPH = 20;
2185:                /**
2186:                 * @stable ICU 2.4
2187:                 */
2188:                public static final int KNOTTED_HEH = 21;
2189:                /**
2190:                 * @stable ICU 2.4
2191:                 */
2192:                public static final int LAM = 22;
2193:                /**
2194:                 * @stable ICU 2.4
2195:                 */
2196:                public static final int LAMADH = 23;
2197:                /**
2198:                 * @stable ICU 2.4
2199:                 */
2200:                public static final int MEEM = 24;
2201:                /**
2202:                 * @stable ICU 2.4
2203:                 */
2204:                public static final int MIM = 25;
2205:                /**
2206:                 * @stable ICU 2.4
2207:                 */
2208:                public static final int NOON = 26;
2209:                /**
2210:                 * @stable ICU 2.4
2211:                 */
2212:                public static final int NUN = 27;
2213:                /**
2214:                 * @stable ICU 2.4
2215:                 */
2216:                public static final int PE = 28;
2217:                /**
2218:                 * @stable ICU 2.4
2219:                 */
2220:                public static final int QAF = 29;
2221:                /**
2222:                 * @stable ICU 2.4
2223:                 */
2224:                public static final int QAPH = 30;
2225:                /**
2226:                 * @stable ICU 2.4
2227:                 */
2228:                public static final int REH = 31;
2229:                /**
2230:                 * @stable ICU 2.4
2231:                 */
2232:                public static final int REVERSED_PE = 32;
2233:                /**
2234:                 * @stable ICU 2.4
2235:                 */
2236:                public static final int SAD = 33;
2237:                /**
2238:                 * @stable ICU 2.4
2239:                 */
2240:                public static final int SADHE = 34;
2241:                /**
2242:                 * @stable ICU 2.4
2243:                 */
2244:                public static final int SEEN = 35;
2245:                /**
2246:                 * @stable ICU 2.4
2247:                 */
2248:                public static final int SEMKATH = 36;
2249:                /**
2250:                 * @stable ICU 2.4
2251:                 */
2252:                public static final int SHIN = 37;
2253:                /**
2254:                 * @stable ICU 2.4
2255:                 */
2256:                public static final int SWASH_KAF = 38;
2257:                /**
2258:                 * @stable ICU 2.4
2259:                 */
2260:                public static final int SYRIAC_WAW = 39;
2261:                /**
2262:                 * @stable ICU 2.4
2263:                 */
2264:                public static final int TAH = 40;
2265:                /**
2266:                 * @stable ICU 2.4
2267:                 */
2268:                public static final int TAW = 41;
2269:                /**
2270:                 * @stable ICU 2.4
2271:                 */
2272:                public static final int TEH_MARBUTA = 42;
2273:                /**
2274:                 * @stable ICU 2.4
2275:                 */
2276:                public static final int TETH = 43;
2277:                /**
2278:                 * @stable ICU 2.4
2279:                 */
2280:                public static final int WAW = 44;
2281:                /**
2282:                 * @stable ICU 2.4
2283:                 */
2284:                public static final int YEH = 45;
2285:                /**
2286:                 * @stable ICU 2.4
2287:                 */
2288:                public static final int YEH_BARREE = 46;
2289:                /**
2290:                 * @stable ICU 2.4
2291:                 */
2292:                public static final int YEH_WITH_TAIL = 47;
2293:                /**
2294:                 * @stable ICU 2.4
2295:                 */
2296:                public static final int YUDH = 48;
2297:                /**
2298:                 * @stable ICU 2.4
2299:                 */
2300:                public static final int YUDH_HE = 49;
2301:                /**
2302:                 * @stable ICU 2.4
2303:                 */
2304:                public static final int ZAIN = 50;
2305:                /** 
2306:                 * @stable ICU 2.6 
2307:                 */
2308:                public static final int FE = 51;
2309:                /** 
2310:                 * @stable ICU 2.6 
2311:                 */
2312:                public static final int KHAPH = 52;
2313:                /**
2314:                 * @stable ICU 2.6 
2315:                 */
2316:                public static final int ZHAIN = 53;
2317:                /**
2318:                 * @stable ICU 2.4
2319:                 */
2320:                public static final int COUNT = 54;
2321:            }
2322:
2323:            /**
2324:             * Grapheme Cluster Break constants.
2325:             * @see UProperty#GRAPHEME_CLUSTER_BREAK
2326:             * @draft ICU 3.4
2327:             * @provisional This API might change or be removed in a future release.
2328:             */
2329:            public static interface GraphemeClusterBreak {
2330:                /**
2331:                 * @draft ICU 3.4
2332:                 * @provisional This API might change or be removed in a future release.
2333:                 */
2334:                public static final int OTHER = 0;
2335:                /**
2336:                 * @draft ICU 3.4
2337:                 * @provisional This API might change or be removed in a future release.
2338:                 */
2339:                public static final int CONTROL = 1;
2340:                /**
2341:                 * @draft ICU 3.4
2342:                 * @provisional This API might change or be removed in a future release.
2343:                 */
2344:                public static final int CR = 2;
2345:                /**
2346:                 * @draft ICU 3.4
2347:                 * @provisional This API might change or be removed in a future release.
2348:                 */
2349:                public static final int EXTEND = 3;
2350:                /**
2351:                 * @draft ICU 3.4
2352:                 * @provisional This API might change or be removed in a future release.
2353:                 */
2354:                public static final int L = 4;
2355:                /**
2356:                 * @draft ICU 3.4
2357:                 * @provisional This API might change or be removed in a future release.
2358:                 */
2359:                public static final int LF = 5;
2360:                /**
2361:                 * @draft ICU 3.4
2362:                 * @provisional This API might change or be removed in a future release.
2363:                 */
2364:                public static final int LV = 6;
2365:                /**
2366:                 * @draft ICU 3.4
2367:                 * @provisional This API might change or be removed in a future release.
2368:                 */
2369:                public static final int LVT = 7;
2370:                /**
2371:                 * @draft ICU 3.4
2372:                 * @provisional This API might change or be removed in a future release.
2373:                 */
2374:                public static final int T = 8;
2375:                /**
2376:                 * @draft ICU 3.4
2377:                 * @provisional This API might change or be removed in a future release.
2378:                 */
2379:                public static final int V = 9;
2380:                /**
2381:                 * @draft ICU 3.4
2382:                 * @provisional This API might change or be removed in a future release.
2383:                 */
2384:                public static final int COUNT = 10;
2385:            }
2386:
2387:            /**
2388:             * Word Break constants.
2389:             * @see UProperty#WORD_BREAK
2390:             * @draft ICU 3.4
2391:             * @provisional This API might change or be removed in a future release.
2392:             */
2393:            public static interface WordBreak {
2394:                /**
2395:                 * @draft ICU 3.4
2396:                 * @provisional This API might change or be removed in a future release.
2397:                 */
2398:                public static final int OTHER = 0;
2399:                /**
2400:                 * @draft ICU 3.4
2401:                 * @provisional This API might change or be removed in a future release.
2402:                 */
2403:                public static final int ALETTER = 1;
2404:                /**
2405:                 * @draft ICU 3.4
2406:                 * @provisional This API might change or be removed in a future release.
2407:                 */
2408:                public static final int FORMAT = 2;
2409:                /**
2410:                 * @draft ICU 3.4
2411:                 * @provisional This API might change or be removed in a future release.
2412:                 */
2413:                public static final int KATAKANA = 3;
2414:                /**
2415:                 * @draft ICU 3.4
2416:                 * @provisional This API might change or be removed in a future release.
2417:                 */
2418:                public static final int MIDLETTER = 4;
2419:                /**
2420:                 * @draft ICU 3.4
2421:                 * @provisional This API might change or be removed in a future release.
2422:                 */
2423:                public static final int MIDNUM = 5;
2424:                /**
2425:                 * @draft ICU 3.4
2426:                 * @provisional This API might change or be removed in a future release.
2427:                 */
2428:                public static final int NUMERIC = 6;
2429:                /**
2430:                 * @draft ICU 3.4
2431:                 * @provisional This API might change or be removed in a future release.
2432:                 */
2433:                public static final int EXTENDNUMLET = 7;
2434:                /**
2435:                 * @draft ICU 3.4
2436:                 * @provisional This API might change or be removed in a future release.
2437:                 */
2438:                public static final int COUNT = 8;
2439:            }
2440:
2441:            /**
2442:             * Sentence Break constants.
2443:             * @see UProperty#SENTENCE_BREAK
2444:             * @draft ICU 3.4
2445:             * @provisional This API might change or be removed in a future release.
2446:             */
2447:            public static interface SentenceBreak {
2448:                /**
2449:                 * @draft ICU 3.4
2450:                 * @provisional This API might change or be removed in a future release.
2451:                 */
2452:                public static final int OTHER = 0;
2453:                /**
2454:                 * @draft ICU 3.4
2455:                 * @provisional This API might change or be removed in a future release.
2456:                 */
2457:                public static final int ATERM = 1;
2458:                /**
2459:                 * @draft ICU 3.4
2460:                 * @provisional This API might change or be removed in a future release.
2461:                 */
2462:                public static final int CLOSE = 2;
2463:                /**
2464:                 * @draft ICU 3.4
2465:                 * @provisional This API might change or be removed in a future release.
2466:                 */
2467:                public static final int FORMAT = 3;
2468:                /**
2469:                 * @draft ICU 3.4
2470:                 * @provisional This API might change or be removed in a future release.
2471:                 */
2472:                public static final int LOWER = 4;
2473:                /**
2474:                 * @draft ICU 3.4
2475:                 * @provisional This API might change or be removed in a future release.
2476:                 */
2477:                public static final int NUMERIC = 5;
2478:                /**
2479:                 * @draft ICU 3.4
2480:                 * @provisional This API might change or be removed in a future release.
2481:                 */
2482:                public static final int OLETTER = 6;
2483:                /**
2484:                 * @draft ICU 3.4
2485:                 * @provisional This API might change or be removed in a future release.
2486:                 */
2487:                public static final int SEP = 7;
2488:                /**
2489:                 * @draft ICU 3.4
2490:                 * @provisional This API might change or be removed in a future release.
2491:                 */
2492:                public static final int SP = 8;
2493:                /**
2494:                 * @draft ICU 3.4
2495:                 * @provisional This API might change or be removed in a future release.
2496:                 */
2497:                public static final int STERM = 9;
2498:                /**
2499:                 * @draft ICU 3.4
2500:                 * @provisional This API might change or be removed in a future release.
2501:                 */
2502:                public static final int UPPER = 10;
2503:                /**
2504:                 * @draft ICU 3.4
2505:                 * @provisional This API might change or be removed in a future release.
2506:                 */
2507:                public static final int COUNT = 11;
2508:            }
2509:
2510:            /**
2511:             * Line Break constants.
2512:             * @see UProperty#LINE_BREAK
2513:             * @stable ICU 2.4
2514:             */
2515:            public static interface LineBreak {
2516:                /**
2517:                 * @stable ICU 2.4
2518:                 */
2519:                public static final int UNKNOWN = 0;
2520:                /**
2521:                 * @stable ICU 2.4
2522:                 */
2523:                public static final int AMBIGUOUS = 1;
2524:                /**
2525:                 * @stable ICU 2.4
2526:                 */
2527:                public static final int ALPHABETIC = 2;
2528:                /**
2529:                 * @stable ICU 2.4
2530:                 */
2531:                public static final int BREAK_BOTH = 3;
2532:                /**
2533:                 * @stable ICU 2.4
2534:                 */
2535:                public static final int BREAK_AFTER = 4;
2536:                /**
2537:                 * @stable ICU 2.4
2538:                 */
2539:                public static final int BREAK_BEFORE = 5;
2540:                /**
2541:                 * @stable ICU 2.4
2542:                 */
2543:                public static final int MANDATORY_BREAK = 6;
2544:                /**
2545:                 * @stable ICU 2.4
2546:                 */
2547:                public static final int CONTINGENT_BREAK = 7;
2548:                /**
2549:                 * @stable ICU 2.4
2550:                 */
2551:                public static final int CLOSE_PUNCTUATION = 8;
2552:                /**
2553:                 * @stable ICU 2.4
2554:                 */
2555:                public static final int COMBINING_MARK = 9;
2556:                /**
2557:                 * @stable ICU 2.4
2558:                 */
2559:                public static final int CARRIAGE_RETURN = 10;
2560:                /**
2561:                 * @stable ICU 2.4
2562:                 */
2563:                public static final int EXCLAMATION = 11;
2564:                /**
2565:                 * @stable ICU 2.4
2566:                 */
2567:                public static final int GLUE = 12;
2568:                /**
2569:                 * @stable ICU 2.4
2570:                 */
2571:                public static final int HYPHEN = 13;
2572:                /**
2573:                 * @stable ICU 2.4
2574:                 */
2575:                public static final int IDEOGRAPHIC = 14;
2576:                /**
2577:                 * @see #INSEPARABLE
2578:                 * @stable ICU 2.4
2579:                 */
2580:                public static final int INSEPERABLE = 15;
2581:                /**
2582:                 * Renamed from the misspelled "inseperable" in Unicode 4.0.1.
2583:                 * @stable ICU 3.0
2584:                 */
2585:                public static final int INSEPARABLE = 15;
2586:                /**
2587:                 * @stable ICU 2.4
2588:                 */
2589:                public static final int INFIX_NUMERIC = 16;
2590:                /**
2591:                 * @stable ICU 2.4
2592:                 */
2593:                public static final int LINE_FEED = 17;
2594:                /**
2595:                 * @stable ICU 2.4
2596:                 */
2597:                public static final int NONSTARTER = 18;
2598:                /**
2599:                 * @stable ICU 2.4
2600:                 */
2601:                public static final int NUMERIC = 19;
2602:                /**
2603:                 * @stable ICU 2.4
2604:                 */
2605:                public static final int OPEN_PUNCTUATION = 20;
2606:                /**
2607:                 * @stable ICU 2.4
2608:                 */
2609:                public static final int POSTFIX_NUMERIC = 21;
2610:                /**
2611:                 * @stable ICU 2.4
2612:                 */
2613:                public static final int PREFIX_NUMERIC = 22;
2614:                /**
2615:                 * @stable ICU 2.4
2616:                 */
2617:                public static final int QUOTATION = 23;
2618:                /**
2619:                 * @stable ICU 2.4
2620:                 */
2621:                public static final int COMPLEX_CONTEXT = 24;
2622:                /**
2623:                 * @stable ICU 2.4
2624:                 */
2625:                public static final int SURROGATE = 25;
2626:                /**
2627:                 * @stable ICU 2.4
2628:                 */
2629:                public static final int SPACE = 26;
2630:                /**
2631:                 * @stable ICU 2.4
2632:                 */
2633:                public static final int BREAK_SYMBOLS = 27;
2634:                /**
2635:                 * @stable ICU 2.4
2636:                 */
2637:                public static final int ZWSPACE = 28;
2638:
2639:                /**
2640:                 * @stable ICU 2.6
2641:                 */
2642:                public static final int NEXT_LINE = 29; /*[NL]*//* from here on: new in Unicode 4/ICU 2.6 */
2643:
2644:                /**
2645:                 * @stable ICU 2.6
2646:                 */
2647:                public static final int WORD_JOINER = 30; /*[WJ]*/
2648:
2649:                /* from here on: new in Unicode 4.1/ICU 3.4 */
2650:
2651:                /**
2652:                 * @draft ICU 3.4
2653:                 * @provisional This API might change or be removed in a future release.
2654:                 */
2655:                public static final int H2 = 31;
2656:                /**
2657:                 * @draft ICU 3.4
2658:                 * @provisional This API might change or be removed in a future release.
2659:                 */
2660:                public static final int H3 = 32;
2661:                /**
2662:                 * @draft ICU 3.4
2663:                 * @provisional This API might change or be removed in a future release.
2664:                 */
2665:                public static final int JL = 33;
2666:                /**
2667:                 * @draft ICU 3.4
2668:                 * @provisional This API might change or be removed in a future release.
2669:                 */
2670:                public static final int JT = 34;
2671:                /**
2672:                 * @draft ICU 3.4
2673:                 * @provisional This API might change or be removed in a future release.
2674:                 */
2675:                public static final int JV = 35;
2676:
2677:                /**
2678:                 * @stable ICU 2.4
2679:                 */
2680:                public static final int COUNT = 36;
2681:            }
2682:
2683:            /**
2684:             * Numeric Type constants.
2685:             * @see UProperty#NUMERIC_TYPE
2686:             * @stable ICU 2.4
2687:             */
2688:            public static interface NumericType {
2689:                /**
2690:                 * @stable ICU 2.4
2691:                 */
2692:                public static final int NONE = 0;
2693:                /**
2694:                 * @stable ICU 2.4
2695:                 */
2696:                public static final int DECIMAL = 1;
2697:                /**
2698:                 * @stable ICU 2.4
2699:                 */
2700:                public static final int DIGIT = 2;
2701:                /**
2702:                 * @stable ICU 2.4
2703:                 */
2704:                public static final int NUMERIC = 3;
2705:                /**
2706:                 * @stable ICU 2.4
2707:                 */
2708:                public static final int COUNT = 4;
2709:            }
2710:
2711:            /**
2712:             * Hangul Syllable Type constants.
2713:             *
2714:             * @see UProperty#HANGUL_SYLLABLE_TYPE
2715:             * @stable ICU 2.6
2716:             */
2717:            public static interface HangulSyllableType {
2718:                /**
2719:                 * @stable ICU 2.6
2720:                 */
2721:                public static final int NOT_APPLICABLE = 0; /*[NA]*//*See note !!*/
2722:                /**
2723:                 * @stable ICU 2.6
2724:                 */
2725:                public static final int LEADING_JAMO = 1; /*[L]*/
2726:                /**
2727:                 * @stable ICU 2.6
2728:                 */
2729:                public static final int VOWEL_JAMO = 2; /*[V]*/
2730:                /**
2731:                 * @stable ICU 2.6
2732:                 */
2733:                public static final int TRAILING_JAMO = 3; /*[T]*/
2734:                /**
2735:                 * @stable ICU 2.6
2736:                 */
2737:                public static final int LV_SYLLABLE = 4; /*[LV]*/
2738:                /**
2739:                 * @stable ICU 2.6
2740:                 */
2741:                public static final int LVT_SYLLABLE = 5; /*[LVT]*/
2742:                /**
2743:                 * @stable ICU 2.6
2744:                 */
2745:                public static final int COUNT = 6;
2746:            }
2747:
2748:            // public data members -----------------------------------------------
2749:
2750:            /** 
2751:             * The lowest Unicode code point value.
2752:             * @stable ICU 2.1
2753:             */
2754:            public static final int MIN_VALUE = UTF16.CODEPOINT_MIN_VALUE;
2755:
2756:            /**
2757:             * The highest Unicode code point value (scalar value) according to the 
2758:             * Unicode Standard. 
2759:             * This is a 21-bit value (21 bits, rounded up).<br>
2760:             * Up-to-date Unicode implementation of java.lang.Character.MIN_VALUE
2761:             * @stable ICU 2.1
2762:             */
2763:            public static final int MAX_VALUE = UTF16.CODEPOINT_MAX_VALUE;
2764:
2765:            /**
2766:             * The minimum value for Supplementary code points
2767:             * @stable ICU 2.1
2768:             */
2769:            public static final int SUPPLEMENTARY_MIN_VALUE = UTF16.SUPPLEMENTARY_MIN_VALUE;
2770:
2771:            /**
2772:             * Unicode value used when translating into Unicode encoding form and there 
2773:             * is no existing character.
2774:             * @stable ICU 2.1
2775:             */
2776:            public static final int REPLACEMENT_CHAR = '\uFFFD';
2777:
2778:            /**
2779:             * Special value that is returned by getUnicodeNumericValue(int) when no 
2780:             * numeric value is defined for a code point.
2781:             * @stable ICU 2.4
2782:             * @see #getUnicodeNumericValue
2783:             */
2784:            public static final double NO_NUMERIC_VALUE = -123456789;
2785:
2786:            /**
2787:             * Compatibility constant for Java Character's MIN_RADIX.
2788:             * @draft ICU 3.4
2789:             * @provisional This API might change or be removed in a future release.
2790:             */
2791:            public static final int MIN_RADIX = java.lang.Character.MIN_RADIX;
2792:
2793:            /**
2794:             * Compatibility constant for Java Character's MAX_RADIX.
2795:             * @draft ICU 3.4
2796:             * @provisional This API might change or be removed in a future release.
2797:             */
2798:            public static final int MAX_RADIX = java.lang.Character.MAX_RADIX;
2799:
2800:            // public methods ----------------------------------------------------
2801:
2802:            /**
2803:             * Retrieves the numeric value of a decimal digit code point.
2804:             * <br>This method observes the semantics of
2805:             * <code>java.lang.Character.digit()</code>.  Note that this
2806:             * will return positive values for code points for which isDigit
2807:             * returns false, just like java.lang.Character.
2808:             * <br><em>Semantic Change:</em> In release 1.3.1 and
2809:             * prior, this did not treat the European letters as having a
2810:             * digit value, and also treated numeric letters and other numbers as 
2811:             * digits.  
2812:             * This has been changed to conform to the java semantics.
2813:             * <br>A code point is a valid digit if and only if:
2814:             * <ul>
2815:             *   <li>ch is a decimal digit or one of the european letters, and
2816:             *   <li>the value of ch is less than the specified radix.
2817:             * </ul>
2818:             * @param ch the code point to query
2819:             * @param radix the radix
2820:             * @return the numeric value represented by the code point in the
2821:             * specified radix, or -1 if the code point is not a decimal digit
2822:             * or if its value is too large for the radix
2823:             * @stable ICU 2.1
2824:             */
2825:            public static int digit(int ch, int radix) {
2826:                // when ch is out of bounds getProperty == 0
2827:                int props = getProperty(ch);
2828:                int value;
2829:                if (getNumericType(props) == NumericType.DECIMAL) {
2830:                    value = UCharacterProperty.getUnsignedValue(props);
2831:                } else {
2832:                    value = getEuropeanDigit(ch);
2833:                }
2834:                return (0 <= value && value < radix) ? value : -1;
2835:            }
2836:
2837:            /**
2838:             * Retrieves the numeric value of a decimal digit code point.
2839:             * <br>This is a convenience overload of <code>digit(int, int)</code> 
2840:             * that provides a decimal radix.
2841:             * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this
2842:             * treated numeric letters and other numbers as digits.  This has
2843:             * been changed to conform to the java semantics.
2844:             * @param ch the code point to query
2845:             * @return the numeric value represented by the code point,
2846:             * or -1 if the code point is not a decimal digit or if its
2847:             * value is too large for a decimal radix 
2848:             * @stable ICU 2.1
2849:             */
2850:            public static int digit(int ch) {
2851:                int props = getProperty(ch);
2852:                if (getNumericType(props) == NumericType.DECIMAL) {
2853:                    return UCharacterProperty.getUnsignedValue(props);
2854:                } else {
2855:                    return -1;
2856:                }
2857:            }
2858:
2859:            /** 
2860:             * Returns the numeric value of the code point as a nonnegative 
2861:             * integer.
2862:             * <br>If the code point does not have a numeric value, then -1 is returned. 
2863:             * <br>
2864:             * If the code point has a numeric value that cannot be represented as a 
2865:             * nonnegative integer (for example, a fractional value), then -2 is 
2866:             * returned.
2867:             * @param ch the code point to query
2868:             * @return the numeric value of the code point, or -1 if it has no numeric 
2869:             * value, or -2 if it has a numeric value that cannot be represented as a 
2870:             * nonnegative integer
2871:             * @stable ICU 2.1
2872:             */
2873:            public static int getNumericValue(int ch) {
2874:                // slightly pruned version of getUnicodeNumericValue(), plus getEuropeanDigit()
2875:                int props = PROPERTY_.getProperty(ch);
2876:                int numericType = getNumericType(props);
2877:
2878:                if (numericType == 0) {
2879:                    return getEuropeanDigit(ch);
2880:                }
2881:                if (numericType == UCharacterProperty.NT_FRACTION
2882:                        || numericType >= UCharacterProperty.NT_COUNT) {
2883:                    return -2;
2884:                }
2885:
2886:                int numericValue = UCharacterProperty.getUnsignedValue(props);
2887:
2888:                if (numericType < NumericType.COUNT) {
2889:                    /* normal type, the value is stored directly */
2890:                    return numericValue;
2891:                } else /* numericType==NT_LARGE */{
2892:                    /* large value with exponent */
2893:                    long numValue;
2894:                    int mant, exp;
2895:
2896:                    mant = numericValue >> LARGE_MANT_SHIFT;
2897:                    exp = numericValue & LARGE_EXP_MASK;
2898:                    if (mant == 0) {
2899:                        mant = 1;
2900:                        exp += LARGE_EXP_OFFSET_EXTRA;
2901:                    } else if (mant > 9) {
2902:                        return -2; /* reserved mantissa value */
2903:                    } else {
2904:                        exp += LARGE_EXP_OFFSET;
2905:                    }
2906:                    if (exp > 9) {
2907:                        return -2;
2908:                    }
2909:
2910:                    numValue = mant;
2911:
2912:                    /* multiply by 10^exp without math.h */
2913:                    while (exp >= 4) {
2914:                        numValue *= 10000.;
2915:                        exp -= 4;
2916:                    }
2917:                    switch (exp) {
2918:                    case 3:
2919:                        numValue *= 1000.;
2920:                        break;
2921:                    case 2:
2922:                        numValue *= 100.;
2923:                        break;
2924:                    case 1:
2925:                        numValue *= 10.;
2926:                        break;
2927:                    case 0:
2928:                    default:
2929:                        break;
2930:                    }
2931:                    if (numValue <= Integer.MAX_VALUE) {
2932:                        return (int) numValue;
2933:                    } else {
2934:                        return -2;
2935:                    }
2936:                }
2937:            }
2938:
2939:            /**
2940:             * <p>Get the numeric value for a Unicode code point as defined in the 
2941:             * Unicode Character Database.</p>
2942:             * <p>A "double" return type is necessary because some numeric values are 
2943:             * fractions, negative, or too large for int.</p>
2944:             * <p>For characters without any numeric values in the Unicode Character 
2945:             * Database, this function will return NO_NUMERIC_VALUE.</p>
2946:             * <p><em>API Change:</em> In release 2.2 and prior, this API has a
2947:             * return type int and returns -1 when the argument ch does not have a 
2948:             * corresponding numeric value. This has been changed to synch with ICU4C
2949:             * </p>
2950:             * This corresponds to the ICU4C function u_getNumericValue.
2951:             * @param ch Code point to get the numeric value for.
2952:             * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined.
2953:             * @stable ICU 2.4
2954:             */
2955:            public static double getUnicodeNumericValue(int ch) {
2956:                // equivalent to c version double u_getNumericValue(UChar32 c)
2957:                int props = PROPERTY_.getProperty(ch);
2958:                int numericType = getNumericType(props);
2959:
2960:                if (numericType == 0
2961:                        || numericType >= UCharacterProperty.NT_COUNT) {
2962:                    return NO_NUMERIC_VALUE;
2963:                }
2964:
2965:                int numericValue = UCharacterProperty.getUnsignedValue(props);
2966:
2967:                if (numericType < NumericType.COUNT) {
2968:                    /* normal type, the value is stored directly */
2969:                    return numericValue;
2970:                } else if (numericType == UCharacterProperty.NT_FRACTION) {
2971:                    /* fraction value */
2972:                    int numerator, denominator;
2973:
2974:                    numerator = numericValue >> FRACTION_NUM_SHIFT;
2975:                    denominator = (numericValue & FRACTION_DEN_MASK)
2976:                            + FRACTION_DEN_OFFSET;
2977:
2978:                    if (numerator == 0) {
2979:                        numerator = -1;
2980:                    }
2981:                    return (double) numerator / (double) denominator;
2982:                } else /* numericType==NT_LARGE */{
2983:                    /* large value with exponent */
2984:                    double numValue;
2985:                    int mant, exp;
2986:
2987:                    mant = numericValue >> LARGE_MANT_SHIFT;
2988:                    exp = numericValue & LARGE_EXP_MASK;
2989:                    if (mant == 0) {
2990:                        mant = 1;
2991:                        exp += LARGE_EXP_OFFSET_EXTRA;
2992:                    } else if (mant > 9) {
2993:                        return NO_NUMERIC_VALUE; /* reserved mantissa value */
2994:                    } else {
2995:                        exp += LARGE_EXP_OFFSET;
2996:                    }
2997:
2998:                    numValue = mant;
2999:
3000:                    /* multiply by 10^exp without math.h */
3001:                    while (exp >= 4) {
3002:                        numValue *= 10000.;
3003:                        exp -= 4;
3004:                    }
3005:                    switch (exp) {
3006:                    case 3:
3007:                        numValue *= 1000.;
3008:                        break;
3009:                    case 2:
3010:                        numValue *= 100.;
3011:                        break;
3012:                    case 1:
3013:                        numValue *= 10.;
3014:                        break;
3015:                    case 0:
3016:                    default:
3017:                        break;
3018:                    }
3019:
3020:                    return numValue;
3021:                }
3022:            }
3023:
3024:            /**
3025:             * Compatibility override of Java deprecated method.  This
3026:             * method will always remain deprecated.  Delegates to
3027:             * java.lang.Character.isSpace.
3028:             * @param ch the code point
3029:             * @return true if the code point is a space character as
3030:             * defined by java.lang.Character.isSpace.
3031:             * @deprecated ICU 3.4 (Java)
3032:             */
3033:            public static boolean isSpace(int ch) {
3034:                return ch <= 0x20
3035:                        && (ch == 0x20 || ch == 0x09 || ch == 0x0a
3036:                                || ch == 0x0c || ch == 0x0d);
3037:            }
3038:
3039:            /**
3040:             * Returns a value indicating a code point's Unicode category.
3041:             * Up-to-date Unicode implementation of java.lang.Character.getType() 
3042:             * except for the above mentioned code points that had their category 
3043:             * changed.<br>
3044:             * Return results are constants from the interface 
3045:             * <a href=UCharacterCategory.html>UCharacterCategory</a><br>
3046:             * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with
3047:             * those returned by java.lang.Character.getType.  UCharacterCategory values
3048:             * match the ones used in ICU4C, while java.lang.Character type
3049:             * values, though similar, skip the value 17.</p>
3050:             * @param ch code point whose type is to be determined
3051:             * @return category which is a value of UCharacterCategory
3052:             * @stable ICU 2.1
3053:             */
3054:            public static int getType(int ch) {
3055:                return getProperty(ch) & UCharacterProperty.TYPE_MASK;
3056:            }
3057:
3058:            /**
3059:             * Determines if a code point has a defined meaning in the up-to-date 
3060:             * Unicode standard.
3061:             * E.g. supplementary code points though allocated space are not defined in 
3062:             * Unicode yet.<br>
3063:             * Up-to-date Unicode implementation of java.lang.Character.isDefined()
3064:             * @param ch code point to be determined if it is defined in the most 
3065:             *        current version of Unicode
3066:             * @return true if this code point is defined in unicode
3067:             * @stable ICU 2.1
3068:             */
3069:            public static boolean isDefined(int ch) {
3070:                return getType(ch) != 0;
3071:            }
3072:
3073:            /**
3074:             * Determines if a code point is a Java digit.
3075:             * <br>This method observes the semantics of
3076:             * <code>java.lang.Character.isDigit()</code>. It returns true for decimal 
3077:             * digits only.
3078:             * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated 
3079:             * numeric letters and other numbers as digits. 
3080:             * This has been changed to conform to the java semantics.
3081:             * @param ch code point to query
3082:             * @return true if this code point is a digit 
3083:             * @stable ICU 2.1
3084:             */
3085:            public static boolean isDigit(int ch) {
3086:                return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER;
3087:            }
3088:
3089:            /**
3090:             * Determines if the specified code point is an ISO control character.
3091:             * A code point is considered to be an ISO control character if it is in 
3092:             * the range &#92u0000 through &#92u001F or in the range &#92u007F through 
3093:             * &#92u009F.<br>
3094:             * Up-to-date Unicode implementation of java.lang.Character.isISOControl()
3095:             * @param ch code point to determine if it is an ISO control character
3096:             * @return true if code point is a ISO control character
3097:             * @stable ICU 2.1
3098:             */
3099:            public static boolean isISOControl(int ch) {
3100:                return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_
3101:                        && ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_));
3102:            }
3103:
3104:            /**
3105:             * Determines if the specified code point is a letter.
3106:             * Up-to-date Unicode implementation of java.lang.Character.isLetter()
3107:             * @param ch code point to determine if it is a letter
3108:             * @return true if code point is a letter
3109:             * @stable ICU 2.1
3110:             */
3111:            public static boolean isLetter(int ch) {
3112:                // if props == 0, it will just fall through and return false
3113:                return ((1 << getType(ch)) & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3114:                        | (1 << UCharacterCategory.LOWERCASE_LETTER)
3115:                        | (1 << UCharacterCategory.TITLECASE_LETTER)
3116:                        | (1 << UCharacterCategory.MODIFIER_LETTER) | (1 << UCharacterCategory.OTHER_LETTER))) != 0;
3117:            }
3118:
3119:            /**
3120:             * Determines if the specified code point is a letter or digit.
3121:             * Note this method, unlike java.lang.Character does not regard the ascii 
3122:             * characters 'A' - 'Z' and 'a' - 'z' as digits.
3123:             * @param ch code point to determine if it is a letter or a digit
3124:             * @return true if code point is a letter or a digit
3125:             * @stable ICU 2.1
3126:             */
3127:            public static boolean isLetterOrDigit(int ch) {
3128:                return ((1 << getType(ch)) & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3129:                        | (1 << UCharacterCategory.LOWERCASE_LETTER)
3130:                        | (1 << UCharacterCategory.TITLECASE_LETTER)
3131:                        | (1 << UCharacterCategory.MODIFIER_LETTER)
3132:                        | (1 << UCharacterCategory.OTHER_LETTER) | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0;
3133:            }
3134:
3135:            /**
3136:             * Compatibility override of Java deprecated method.  This
3137:             * method will always remain deprecated.  Delegates to
3138:             * java.lang.Character.isJavaIdentifierStart.
3139:             * @param cp the code point
3140:             * @return true if the code point can start a java identifier.
3141:             * @deprecated ICU 3.4 (Java)
3142:             */
3143:            public static boolean isJavaLetter(int cp) {
3144:                return isJavaIdentifierStart(cp);
3145:            }
3146:
3147:            /**
3148:             * Compatibility override of Java deprecated method.  This
3149:             * method will always remain deprecated.  Delegates to
3150:             * java.lang.Character.isJavaIdentifierPart.
3151:             * @param cp the code point
3152:             * @return true if the code point can continue a java identifier.
3153:             * @deprecated ICU 3.4 (Java)
3154:             */
3155:            public static boolean isJavaLetterOrDigit(int cp) {
3156:                return isJavaIdentifierPart(cp);
3157:            }
3158:
3159:            /**
3160:             * Compatibility override of Java method, delegates to
3161:             * java.lang.Character.isJavaIdentifierStart.
3162:             * @param cp the code point
3163:             * @return true if the code point can start a java identifier.
3164:             * @draft ICU 3.4
3165:             * @provisional This API might change or be removed in a future release.
3166:             */
3167:            public static boolean isJavaIdentifierStart(int cp) {
3168:                // note, downcast to char for jdk 1.4 compatibility
3169:                return java.lang.Character.isJavaIdentifierStart((char) cp);
3170:            }
3171:
3172:            /**
3173:             * Compatibility override of Java method, delegates to
3174:             * java.lang.Character.isJavaIdentifierPart.
3175:             * @param cp the code point
3176:             * @return true if the code point can continue a java identifier.
3177:             * @draft ICU 3.4
3178:             * @provisional This API might change or be removed in a future release.
3179:             */
3180:            public static boolean isJavaIdentifierPart(int cp) {
3181:                // note, downcast to char for jdk 1.4 compatibility
3182:                return java.lang.Character.isJavaIdentifierPart((char) cp);
3183:            }
3184:
3185:            /**
3186:             * Determines if the specified code point is a lowercase character.
3187:             * UnicodeData only contains case mappings for code points where they are 
3188:             * one-to-one mappings; it also omits information about context-sensitive 
3189:             * case mappings.<br> For more information about Unicode case mapping 
3190:             * please refer to the 
3191:             * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report 
3192:             * #21</a>.<br>
3193:             * Up-to-date Unicode implementation of java.lang.Character.isLowerCase()
3194:             * @param ch code point to determine if it is in lowercase
3195:             * @return true if code point is a lowercase character
3196:             * @stable ICU 2.1
3197:             */
3198:            public static boolean isLowerCase(int ch) {
3199:                // if props == 0, it will just fall through and return false
3200:                return getType(ch) == UCharacterCategory.LOWERCASE_LETTER;
3201:            }
3202:
3203:            /**
3204:             * Determines if the specified code point is a white space character.
3205:             * A code point is considered to be an whitespace character if and only
3206:             * if it satisfies one of the following criteria:
3207:             * <ul>
3208:             * <li> It is a Unicode space separator (category "Zs"), but is not
3209:             *      a no-break space (&#92u00A0 or &#92u202F or &#92uFEFF).
3210:             * <li> It is a Unicode line separator (category "Zl").
3211:             * <li> It is a Unicode paragraph separator (category "Zp").
3212:             * <li> It is &#92u0009, HORIZONTAL TABULATION. 
3213:             * <li> It is &#92u000A, LINE FEED. 
3214:             * <li> It is &#92u000B, VERTICAL TABULATION. 
3215:             * <li> It is &#92u000C, FORM FEED. 
3216:             * <li> It is &#92u000D, CARRIAGE RETURN. 
3217:             * <li> It is &#92u001C, FILE SEPARATOR. 
3218:             * <li> It is &#92u001D, GROUP SEPARATOR. 
3219:             * <li> It is &#92u001E, RECORD SEPARATOR. 
3220:             * <li> It is &#92u001F, UNIT SEPARATOR.  
3221:             * </ul>
3222:             *
3223:             * This API tries to synch to the semantics of the Java API,
3224:             * java.lang.Character.isWhitespace(). 
3225:             * @param ch code point to determine if it is a white space
3226:             * @return true if the specified code point is a white space character
3227:             * @stable ICU 2.1
3228:             */
3229:            public static boolean isWhitespace(int ch) {
3230:                // exclude no-break spaces
3231:                // if props == 0, it will just fall through and return false
3232:                return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR)
3233:                        | (1 << UCharacterCategory.LINE_SEPARATOR) | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0
3234:                        && (ch != NO_BREAK_SPACE_)
3235:                        && (ch != NARROW_NO_BREAK_SPACE_)
3236:                        && (ch != ZERO_WIDTH_NO_BREAK_SPACE_)
3237:                        // TAB VT LF FF CR FS GS RS US NL are all control characters
3238:                        // that are white spaces.
3239:                        || (ch >= 0x9 && ch <= 0xd)
3240:                        || (ch >= 0x1c && ch <= 0x1f);
3241:            }
3242:
3243:            /**
3244:             * Determines if the specified code point is a Unicode specified space 
3245:             * character, i.e. if code point is in the category Zs, Zl and Zp.
3246:             * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar().
3247:             * @param ch code point to determine if it is a space
3248:             * @return true if the specified code point is a space character
3249:             * @stable ICU 2.1
3250:             */
3251:            public static boolean isSpaceChar(int ch) {
3252:                // if props == 0, it will just fall through and return false
3253:                return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR)
3254:                        | (1 << UCharacterCategory.LINE_SEPARATOR) | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0;
3255:            }
3256:
3257:            /**
3258:             * Determines if the specified code point is a titlecase character.
3259:             * UnicodeData only contains case mappings for code points where they are 
3260:             * one-to-one mappings; it also omits information about context-sensitive 
3261:             * case mappings.<br>
3262:             * For more information about Unicode case mapping please refer to the 
3263:             * <a href=http://www.unicode.org/unicode/reports/tr21/>
3264:             * Technical report #21</a>.<br>
3265:             * Up-to-date Unicode implementation of java.lang.Character.isTitleCase().
3266:             * @param ch code point to determine if it is in title case
3267:             * @return true if the specified code point is a titlecase character
3268:             * @stable ICU 2.1
3269:             */
3270:            public static boolean isTitleCase(int ch) {
3271:                // if props == 0, it will just fall through and return false
3272:                return getType(ch) == UCharacterCategory.TITLECASE_LETTER;
3273:            }
3274:
3275:            /**
3276:             * Determines if the specified code point may be any part of a Unicode 
3277:             * identifier other than the starting character.
3278:             * A code point may be part of a Unicode identifier if and only if it is 
3279:             * one of the following: 
3280:             * <ul>
3281:             * <li> Lu Uppercase letter
3282:             * <li> Ll Lowercase letter
3283:             * <li> Lt Titlecase letter
3284:             * <li> Lm Modifier letter
3285:             * <li> Lo Other letter
3286:             * <li> Nl Letter number
3287:             * <li> Pc Connecting punctuation character 
3288:             * <li> Nd decimal number
3289:             * <li> Mc Spacing combining mark 
3290:             * <li> Mn Non-spacing mark 
3291:             * <li> Cf formatting code
3292:             * </ul>
3293:             * Up-to-date Unicode implementation of 
3294:             * java.lang.Character.isUnicodeIdentifierPart().<br>
3295:             * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3296:             * @param ch code point to determine if is can be part of a Unicode 
3297:             *        identifier
3298:             * @return true if code point is any character belonging a unicode 
3299:             *         identifier suffix after the first character
3300:             * @stable ICU 2.1
3301:             */
3302:            public static boolean isUnicodeIdentifierPart(int ch) {
3303:                // if props == 0, it will just fall through and return false
3304:                // cat == format
3305:                return ((1 << getType(ch)) & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3306:                        | (1 << UCharacterCategory.LOWERCASE_LETTER)
3307:                        | (1 << UCharacterCategory.TITLECASE_LETTER)
3308:                        | (1 << UCharacterCategory.MODIFIER_LETTER)
3309:                        | (1 << UCharacterCategory.OTHER_LETTER)
3310:                        | (1 << UCharacterCategory.LETTER_NUMBER)
3311:                        | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION)
3312:                        | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER)
3313:                        | (1 << UCharacterCategory.COMBINING_SPACING_MARK) | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0
3314:                        || isIdentifierIgnorable(ch);
3315:            }
3316:
3317:            /**
3318:             * Determines if the specified code point is permissible as the first 
3319:             * character in a Unicode identifier.
3320:             * A code point may start a Unicode identifier if it is of type either 
3321:             * <ul> 
3322:             * <li> Lu Uppercase letter
3323:             * <li> Ll Lowercase letter
3324:             * <li> Lt Titlecase letter
3325:             * <li> Lm Modifier letter
3326:             * <li> Lo Other letter
3327:             * <li> Nl Letter number
3328:             * </ul>
3329:             * Up-to-date Unicode implementation of 
3330:             * java.lang.Character.isUnicodeIdentifierStart().<br>
3331:             * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3332:             * @param ch code point to determine if it can start a Unicode identifier
3333:             * @return true if code point is the first character belonging a unicode 
3334:             *              identifier
3335:             * @stable ICU 2.1
3336:             */
3337:            public static boolean isUnicodeIdentifierStart(int ch) {
3338:                /*int cat = getType(ch);*/
3339:                // if props == 0, it will just fall through and return false
3340:                return ((1 << getType(ch)) & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3341:                        | (1 << UCharacterCategory.LOWERCASE_LETTER)
3342:                        | (1 << UCharacterCategory.TITLECASE_LETTER)
3343:                        | (1 << UCharacterCategory.MODIFIER_LETTER)
3344:                        | (1 << UCharacterCategory.OTHER_LETTER) | (1 << UCharacterCategory.LETTER_NUMBER))) != 0;
3345:            }
3346:
3347:            /**
3348:             * Determines if the specified code point should be regarded as an 
3349:             * ignorable character in a Unicode identifier.
3350:             * A character is ignorable in the Unicode standard if it is of the type 
3351:             * Cf, Formatting code.<br>
3352:             * Up-to-date Unicode implementation of 
3353:             * java.lang.Character.isIdentifierIgnorable().<br>
3354:             * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3355:             * @param ch code point to be determined if it can be ignored in a Unicode 
3356:             *        identifier.
3357:             * @return true if the code point is ignorable
3358:             * @stable ICU 2.1
3359:             */
3360:            public static boolean isIdentifierIgnorable(int ch) {
3361:                // see java.lang.Character.isIdentifierIgnorable() on range of 
3362:                // ignorable characters.
3363:                if (ch <= 0x9f) {
3364:                    return isISOControl(ch)
3365:                            && !((ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f));
3366:                }
3367:                return getType(ch) == UCharacterCategory.FORMAT;
3368:            }
3369:
3370:            /**
3371:             * Determines if the specified code point is an uppercase character.
3372:             * UnicodeData only contains case mappings for code point where they are 
3373:             * one-to-one mappings; it also omits information about context-sensitive 
3374:             * case mappings.<br> 
3375:             * For language specific case conversion behavior, use 
3376:             * toUpperCase(locale, str). <br>
3377:             * For example, the case conversion for dot-less i and dotted I in Turkish,
3378:             * or for final sigma in Greek.
3379:             * For more information about Unicode case mapping please refer to the 
3380:             * <a href=http://www.unicode.org/unicode/reports/tr21/>
3381:             * Technical report #21</a>.<br>
3382:             * Up-to-date Unicode implementation of java.lang.Character.isUpperCase().
3383:             * @param ch code point to determine if it is in uppercase
3384:             * @return true if the code point is an uppercase character
3385:             * @stable ICU 2.1
3386:             */
3387:            public static boolean isUpperCase(int ch) {
3388:                // if props == 0, it will just fall through and return false
3389:                return getType(ch) == UCharacterCategory.UPPERCASE_LETTER;
3390:            }
3391:
3392:            /**
3393:             * The given code point is mapped to its lowercase equivalent; if the code 
3394:             * point has no lowercase equivalent, the code point itself is returned.
3395:             * Up-to-date Unicode implementation of java.lang.Character.toLowerCase()
3396:             *
3397:             * <p>This function only returns the simple, single-code point case mapping.
3398:             * Full case mappings should be used whenever possible because they produce
3399:             * better results by working on whole strings.
3400:             * They take into account the string context and the language and can map
3401:             * to a result string with a different length as appropriate.
3402:             * Full case mappings are applied by the case mapping functions
3403:             * that take String parameters rather than code points (int).
3404:             * See also the User Guide chapter on C/POSIX migration:
3405:             * http://icu.sourceforge.net/userguide/posix.html#case_mappings
3406:             *
3407:             * @param ch code point whose lowercase equivalent is to be retrieved
3408:             * @return the lowercase equivalent code point
3409:             * @stable ICU 2.1
3410:             */
3411:            public static int toLowerCase(int ch) {
3412:                return gCsp.tolower(ch);
3413:            }
3414:
3415:            /**
3416:             * Converts argument code point and returns a String object representing 
3417:             * the code point's value in UTF16 format.
3418:             * The result is a string whose length is 1 for non-supplementary code 
3419:             * points, 2 otherwise.<br>
3420:             * com.ibm.ibm.icu.UTF16 can be used to parse Strings generated by this 
3421:             * function.<br>
3422:             * Up-to-date Unicode implementation of java.lang.Character.toString()
3423:             * @param ch code point
3424:             * @return string representation of the code point, null if code point is not
3425:             *         defined in unicode
3426:             * @stable ICU 2.1
3427:             */
3428:            public static String toString(int ch) {
3429:                if (ch < MIN_VALUE || ch > MAX_VALUE) {
3430:                    return null;
3431:                }
3432:
3433:                if (ch < SUPPLEMENTARY_MIN_VALUE) {
3434:                    return String.valueOf((char) ch);
3435:                }
3436:
3437:                StringBuffer result = new StringBuffer();
3438:                result.append(UTF16.getLeadSurrogate(ch));
3439:                result.append(UTF16.getTrailSurrogate(ch));
3440:                return result.toString();
3441:            }
3442:
3443:            /**
3444:             * Converts the code point argument to titlecase.
3445:             * If no titlecase is available, the uppercase is returned. If no uppercase 
3446:             * is available, the code point itself is returned.
3447:             * Up-to-date Unicode implementation of java.lang.Character.toTitleCase()
3448:             *
3449:             * <p>This function only returns the simple, single-code point case mapping.
3450:             * Full case mappings should be used whenever possible because they produce
3451:             * better results by working on whole strings.
3452:             * They take into account the string context and the language and can map
3453:             * to a result string with a different length as appropriate.
3454:             * Full case mappings are applied by the case mapping functions
3455:             * that take String parameters rather than code points (int).
3456:             * See also the User Guide chapter on C/POSIX migration:
3457:             * http://icu.sourceforge.net/userguide/posix.html#case_mappings
3458:             *
3459:             * @param ch code point  whose title case is to be retrieved
3460:             * @return titlecase code point
3461:             * @stable ICU 2.1
3462:             */
3463:            public static int toTitleCase(int ch) {
3464:                return gCsp.totitle(ch);
3465:            }
3466:
3467:            /**
3468:             * Converts the character argument to uppercase.
3469:             * If no uppercase is available, the character itself is returned.
3470:             * Up-to-date Unicode implementation of java.lang.Character.toUpperCase()
3471:             *
3472:             * <p>This function only returns the simple, single-code point case mapping.
3473:             * Full case mappings should be used whenever possible because they produce
3474:             * better results by working on whole strings.
3475:             * They take into account the string context and the language and can map
3476:             * to a result string with a different length as appropriate.
3477:             * Full case mappings are applied by the case mapping functions
3478:             * that take String parameters rather than code points (int).
3479:             * See also the User Guide chapter on C/POSIX migration:
3480:             * http://icu.sourceforge.net/userguide/posix.html#case_mappings
3481:             *
3482:             * @param ch code point whose uppercase is to be retrieved
3483:             * @return uppercase code point
3484:             * @stable ICU 2.1
3485:             */
3486:            public static int toUpperCase(int ch) {
3487:                return gCsp.toupper(ch);
3488:            }
3489:
3490:            // extra methods not in java.lang.Character --------------------------
3491:
3492:            /**
3493:             * Determines if the code point is a supplementary character.
3494:             * A code point is a supplementary character if and only if it is greater 
3495:             * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a>
3496:             * @param ch code point to be determined if it is in the supplementary 
3497:             *        plane
3498:             * @return true if code point is a supplementary character
3499:             * @stable ICU 2.1
3500:             */
3501:            public static boolean isSupplementary(int ch) {
3502:                return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE
3503:                        && ch <= UCharacter.MAX_VALUE;
3504:            }
3505:
3506:            /**
3507:             * Determines if the code point is in the BMP plane.
3508:             * @param ch code point to be determined if it is not a supplementary 
3509:             *        character
3510:             * @return true if code point is not a supplementary character
3511:             * @stable ICU 2.1
3512:             */
3513:            public static boolean isBMP(int ch) {
3514:                return (ch >= 0 && ch <= LAST_CHAR_MASK_);
3515:            }
3516:
3517:            /**
3518:             * Determines whether the specified code point is a printable character 
3519:             * according to the Unicode standard.
3520:             * @param ch code point to be determined if it is printable
3521:             * @return true if the code point is a printable character
3522:             * @stable ICU 2.1
3523:             */
3524:            public static boolean isPrintable(int ch) {
3525:                int cat = getType(ch);
3526:                // if props == 0, it will just fall through and return false
3527:                return (cat != UCharacterCategory.UNASSIGNED
3528:                        && cat != UCharacterCategory.CONTROL
3529:                        && cat != UCharacterCategory.FORMAT
3530:                        && cat != UCharacterCategory.PRIVATE_USE
3531:                        && cat != UCharacterCategory.SURROGATE && cat != UCharacterCategory.GENERAL_OTHER_TYPES);
3532:            }
3533:
3534:            /**
3535:             * Determines whether the specified code point is of base form.
3536:             * A code point of base form does not graphically combine with preceding 
3537:             * characters, and is neither a control nor a format character.
3538:             * @param ch code point to be determined if it is of base form
3539:             * @return true if the code point is of base form
3540:             * @stable ICU 2.1
3541:             */
3542:            public static boolean isBaseForm(int ch) {
3543:                int cat = getType(ch);
3544:                // if props == 0, it will just fall through and return false
3545:                return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER
3546:                        || cat == UCharacterCategory.OTHER_NUMBER
3547:                        || cat == UCharacterCategory.LETTER_NUMBER
3548:                        || cat == UCharacterCategory.UPPERCASE_LETTER
3549:                        || cat == UCharacterCategory.LOWERCASE_LETTER
3550:                        || cat == UCharacterCategory.TITLECASE_LETTER
3551:                        || cat == UCharacterCategory.MODIFIER_LETTER
3552:                        || cat == UCharacterCategory.OTHER_LETTER
3553:                        || cat == UCharacterCategory.NON_SPACING_MARK
3554:                        || cat == UCharacterCategory.ENCLOSING_MARK
3555:                        || cat == UCharacterCategory.COMBINING_SPACING_MARK;
3556:            }
3557:
3558:            /**
3559:             * Returns the Bidirection property of a code point.
3560:             * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional 
3561:             * property.<br>
3562:             * Result returned belongs to the interface 
3563:             * <a href=UCharacterDirection.html>UCharacterDirection</a>
3564:             * @param ch the code point to be determined its direction
3565:             * @return direction constant from UCharacterDirection.
3566:             * @stable ICU 2.1
3567:             */
3568:            public static int getDirection(int ch) {
3569:                return gBdp.getClass(ch);
3570:            }
3571:
3572:            /**
3573:             * Determines whether the code point has the "mirrored" property.
3574:             * This property is set for characters that are commonly used in
3575:             * Right-To-Left contexts and need to be displayed with a "mirrored"
3576:             * glyph.
3577:             * @param ch code point whose mirror is to be determined
3578:             * @return true if the code point has the "mirrored" property
3579:             * @stable ICU 2.1
3580:             */
3581:            public static boolean isMirrored(int ch) {
3582:                return gBdp.isMirrored(ch);
3583:            }
3584:
3585:            /**
3586:             * Maps the specified code point to a "mirror-image" code point.
3587:             * For code points with the "mirrored" property, implementations sometimes 
3588:             * need a "poor man's" mapping to another code point such that the default 
3589:             * glyph may serve as the mirror-image of the default glyph of the 
3590:             * specified code point.<br> 
3591:             * This is useful for text conversion to and from codepages with visual 
3592:             * order, and for displays without glyph selection capabilities.
3593:             * @param ch code point whose mirror is to be retrieved
3594:             * @return another code point that may serve as a mirror-image substitute, 
3595:             *         or ch itself if there is no such mapping or ch does not have the 
3596:             *         "mirrored" property
3597:             * @stable ICU 2.1
3598:             */
3599:            public static int getMirror(int ch) {
3600:                return gBdp.getMirror(ch);
3601:            }
3602:
3603:            /**
3604:             * Gets the combining class of the argument codepoint
3605:             * @param ch code point whose combining is to be retrieved
3606:             * @return the combining class of the codepoint
3607:             * @stable ICU 2.1
3608:             */
3609:            public static int getCombiningClass(int ch) {
3610:                if (ch < MIN_VALUE || ch > MAX_VALUE) {
3611:                    throw new IllegalArgumentException(
3612:                            "Codepoint out of bounds");
3613:                }
3614:                return NormalizerImpl.getCombiningClass(ch);
3615:            }
3616:
3617:            /**
3618:             * A code point is illegal if and only if
3619:             * <ul>
3620:             * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
3621:             * <li> A surrogate value, 0xD800 to 0xDFFF
3622:             * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
3623:             * </ul>
3624:             * Note: legal does not mean that it is assigned in this version of Unicode.
3625:             * @param ch code point to determine if it is a legal code point by itself
3626:             * @return true if and only if legal. 
3627:             * @stable ICU 2.1
3628:             */
3629:            public static boolean isLegal(int ch) {
3630:                if (ch < MIN_VALUE) {
3631:                    return false;
3632:                }
3633:                if (ch < UTF16.SURROGATE_MIN_VALUE) {
3634:                    return true;
3635:                }
3636:                if (ch <= UTF16.SURROGATE_MAX_VALUE) {
3637:                    return false;
3638:                }
3639:                if (UCharacterUtility.isNonCharacter(ch)) {
3640:                    return false;
3641:                }
3642:                return (ch <= MAX_VALUE);
3643:            }
3644:
3645:            /**
3646:             * A string is legal iff all its code points are legal.
3647:             * A code point is illegal if and only if
3648:             * <ul>
3649:             * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
3650:             * <li> A surrogate value, 0xD800 to 0xDFFF
3651:             * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
3652:             * </ul>
3653:             * Note: legal does not mean that it is assigned in this version of Unicode.
3654:             * @param str containing code points to examin
3655:             * @return true if and only if legal. 
3656:             * @stable ICU 2.1
3657:             */
3658:            public static boolean isLegal(String str) {
3659:                int size = str.length();
3660:                int codepoint;
3661:                for (int i = 0; i < size; i++) {
3662:                    codepoint = UTF16.charAt(str, i);
3663:                    if (!isLegal(codepoint)) {
3664:                        return false;
3665:                    }
3666:                    if (isSupplementary(codepoint)) {
3667:                        i++;
3668:                    }
3669:                }
3670:                return true;
3671:            }
3672:
3673:            /**
3674:             * Gets the version of Unicode data used. 
3675:             * @return the unicode version number used
3676:             * @stable ICU 2.1
3677:             */
3678:            public static VersionInfo getUnicodeVersion() {
3679:                return PROPERTY_.m_unicodeVersion_;
3680:            }
3681:
3682:            /**
3683:             * Retrieve the most current Unicode name of the argument code point, or 
3684:             * null if the character is unassigned or outside the range 
3685:             * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
3686:             * <br>
3687:             * Note calling any methods related to code point names, e.g. get*Name*() 
3688:             * incurs a one-time initialisation cost to construct the name tables.
3689:             * @param ch the code point for which to get the name
3690:             * @return most current Unicode name
3691:             * @stable ICU 2.1
3692:             */
3693:            public static String getName(int ch) {
3694:                if (NAME_ == null) {
3695:                    throw new MissingResourceException(
3696:                            "Could not load unames.icu", "", "");
3697:                }
3698:                return NAME_
3699:                        .getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
3700:            }
3701:
3702:            /**
3703:             * Gets the names for each of the characters in a string
3704:             * @param s string to format
3705:             * @param separator string to go between names
3706:             * @return string of names
3707:             * @internal
3708:             * @deprecated This API is ICU internal only.
3709:             */
3710:            public static String getName(String s, String separator) {
3711:                if (s.length() == 1) { // handle common case
3712:                    return getName(s.charAt(0));
3713:                }
3714:                int cp;
3715:                StringBuffer sb = new StringBuffer();
3716:                for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
3717:                    cp = UTF16.charAt(s, i);
3718:                    if (i != 0)
3719:                        sb.append(separator);
3720:                    sb.append(UCharacter.getName(cp));
3721:                }
3722:                return sb.toString();
3723:            }
3724:
3725:            /**
3726:             * Retrieve the earlier version 1.0 Unicode name of the argument code 
3727:             * point, or null if the character is unassigned or outside the range 
3728:             * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
3729:             * <br>
3730:             * Note calling any methods related to code point names, e.g. get*Name*() 
3731:             * incurs a one-time initialisation cost to construct the name tables.
3732:             * @param ch the code point for which to get the name
3733:             * @return version 1.0 Unicode name
3734:             * @stable ICU 2.1
3735:             */
3736:            public static String getName1_0(int ch) {
3737:                if (NAME_ == null) {
3738:                    throw new MissingResourceException(
3739:                            "Could not load unames.icu", "", "");
3740:                }
3741:                return NAME_.getName(ch,
3742:                        UCharacterNameChoice.UNICODE_10_CHAR_NAME);
3743:            }
3744:
3745:            /**
3746:             * <p>Retrieves a name for a valid codepoint. Unlike, getName(int) and
3747:             * getName1_0(int), this method will return a name even for codepoints that
3748:             * are not assigned a name in UnicodeData.txt.
3749:             * </p>
3750:             * The names are returned in the following order.
3751:             * <ul>
3752:             * <li> Most current Unicode name if there is any
3753:             * <li> Unicode 1.0 name if there is any
3754:             * <li> Extended name in the form of 
3755:             *      "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-fffe>
3756:             * </ul>
3757:             * Note calling any methods related to code point names, e.g. get*Name*() 
3758:             * incurs a one-time initialisation cost to construct the name tables.
3759:             * @param ch the code point for which to get the name
3760:             * @return a name for the argument codepoint
3761:             * @stable ICU 2.6
3762:             */
3763:            public static String getExtendedName(int ch) {
3764:                if (NAME_ == null) {
3765:                    throw new MissingResourceException(
3766:                            "Could not load unames.icu", "", "");
3767:                }
3768:                return NAME_.getName(ch,
3769:                        UCharacterNameChoice.EXTENDED_CHAR_NAME);
3770:            }
3771:
3772:            /**
3773:             * Get the ISO 10646 comment for a character.
3774:             * The ISO 10646 comment is an informative field in the Unicode Character
3775:             * Database (UnicodeData.txt field 11) and is from the ISO 10646 names list.
3776:             * @param ch The code point for which to get the ISO comment.
3777:             *           It must be <code>0<=c<=0x10ffff</code>.
3778:             * @return The ISO comment, or null if there is no comment for this 
3779:             *         character.
3780:             * @stable ICU 2.4
3781:             */
3782:            public static String getISOComment(int ch) {
3783:                if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE) {
3784:                    return null;
3785:                }
3786:                if (NAME_ == null) {
3787:                    throw new MissingResourceException(
3788:                            "Could not load unames.icu", "", "");
3789:                }
3790:                String result = NAME_.getGroupName(ch,
3791:                        UCharacterNameChoice.ISO_COMMENT_);
3792:                return result;
3793:            }
3794:
3795:            /**
3796:             * <p>Find a Unicode code point by its most current Unicode name and 
3797:             * return its code point value. All Unicode names are in uppercase.</p>
3798:             * Note calling any methods related to code point names, e.g. get*Name*() 
3799:             * incurs a one-time initialisation cost to construct the name tables.
3800:             * @param name most current Unicode character name whose code point is to 
3801:             *        be returned
3802:             * @return code point or -1 if name is not found
3803:             * @stable ICU 2.1
3804:             */
3805:            public static int getCharFromName(String name) {
3806:                if (NAME_ == null) {
3807:                    throw new MissingResourceException(
3808:                            "Could not load unames.icu", "", "");
3809:                }
3810:                return NAME_.getCharFromName(
3811:                        UCharacterNameChoice.UNICODE_CHAR_NAME, name);
3812:            }
3813:
3814:            /**
3815:             * <p>Find a Unicode character by its version 1.0 Unicode name and return 
3816:             * its code point value. All Unicode names are in uppercase.</p>
3817:             * Note calling any methods related to code point names, e.g. get*Name*() 
3818:             * incurs a one-time initialisation cost to construct the name tables.
3819:             * @param name Unicode 1.0 code point name whose code point is to 
3820:             *             returned
3821:             * @return code point or -1 if name is not found
3822:             * @stable ICU 2.1
3823:             */
3824:            public static int getCharFromName1_0(String name) {
3825:                if (NAME_ == null) {
3826:                    throw new MissingResourceException(
3827:                            "Could not load unames.icu", "", "");
3828:                }
3829:                return NAME_.getCharFromName(
3830:                        UCharacterNameChoice.UNICODE_10_CHAR_NAME, name);
3831:            }
3832:
3833:            /**
3834:             * <p>Find a Unicode character by either its name and return its code 
3835:             * point value. All Unicode names are in uppercase. 
3836:             * Extended names are all lowercase except for numbers and are contained
3837:             * within angle brackets.</p>
3838:             * The names are searched in the following order
3839:             * <ul>
3840:             * <li> Most current Unicode name if there is any
3841:             * <li> Unicode 1.0 name if there is any
3842:             * <li> Extended name in the form of 
3843:             *      "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-FFFE>
3844:             * </ul>
3845:             * Note calling any methods related to code point names, e.g. get*Name*() 
3846:             * incurs a one-time initialisation cost to construct the name tables.
3847:             * @param name codepoint name
3848:             * @return code point associated with the name or -1 if the name is not
3849:             *         found.
3850:             * @stable ICU 2.6
3851:             */
3852:            public static int getCharFromExtendedName(String name) {
3853:                if (NAME_ == null) {
3854:                    throw new MissingResourceException(
3855:                            "Could not load unames.icu", "", "");
3856:                }
3857:                return NAME_.getCharFromName(
3858:                        UCharacterNameChoice.EXTENDED_CHAR_NAME, name);
3859:            }
3860:
3861:            /**
3862:             * Return the Unicode name for a given property, as given in the
3863:             * Unicode database file PropertyAliases.txt.  Most properties
3864:             * have more than one name.  The nameChoice determines which one
3865:             * is returned.
3866:             *
3867:             * In addition, this function maps the property
3868:             * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
3869:             * "General_Category_Mask".  These names are not in
3870:             * PropertyAliases.txt.
3871:             * 
3872:             * @param property UProperty selector.
3873:             *
3874:             * @param nameChoice UProperty.NameChoice selector for which name
3875:             * to get.  All properties have a long name.  Most have a short
3876:             * name, but some do not.  Unicode allows for additional names; if
3877:             * present these will be returned by UProperty.NameChoice.LONG + i,
3878:             * where i=1, 2,...
3879:             *
3880:             * @return a name, or null if Unicode explicitly defines no name
3881:             * ("n/a") for a given property/nameChoice.  If a given nameChoice
3882:             * throws an exception, then all larger values of nameChoice will
3883:             * throw an exception.  If null is returned for a given
3884:             * nameChoice, then other nameChoice values may return non-null
3885:             * results.
3886:             *
3887:             * @exception IllegalArgumentException thrown if property or
3888:             * nameChoice are invalid.
3889:             *
3890:             * @see UProperty
3891:             * @see UProperty.NameChoice
3892:             * @stable ICU 2.4
3893:             */
3894:            public static String getPropertyName(int property, int nameChoice) {
3895:                return PNAMES_.getPropertyName(property, nameChoice);
3896:            }
3897:
3898:            /**
3899:             * Return the UProperty selector for a given property name, as
3900:             * specified in the Unicode database file PropertyAliases.txt.
3901:             * Short, long, and any other variants are recognized.
3902:             *
3903:             * In addition, this function maps the synthetic names "gcm" /
3904:             * "General_Category_Mask" to the property
3905:             * UProperty.GENERAL_CATEGORY_MASK.  These names are not in
3906:             * PropertyAliases.txt.
3907:             *
3908:             * @param propertyAlias the property name to be matched.  The name
3909:             * is compared using "loose matching" as described in
3910:             * PropertyAliases.txt.
3911:             *
3912:             * @return a UProperty enum.
3913:             *
3914:             * @exception IllegalArgumentException thrown if propertyAlias
3915:             * is not recognized.
3916:             *
3917:             * @see UProperty
3918:             * @stable ICU 2.4
3919:             */
3920:            public static int getPropertyEnum(String propertyAlias) {
3921:                return PNAMES_.getPropertyEnum(propertyAlias);
3922:            }
3923:
3924:            /**
3925:             * Return the Unicode name for a given property value, as given in
3926:             * the Unicode database file PropertyValueAliases.txt.  Most
3927:             * values have more than one name.  The nameChoice determines
3928:             * which one is returned.
3929:             *
3930:             * Note: Some of the names in PropertyValueAliases.txt can only be
3931:             * retrieved using UProperty.GENERAL_CATEGORY_MASK, not
3932:             * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
3933:             * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
3934:             * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
3935:             *
3936:             * @param property UProperty selector constant.
3937:             * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
3938:             * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
3939:             * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
3940:             * If out of range, null is returned.
3941:             *
3942:             * @param value selector for a value for the given property.  In
3943:             * general, valid values range from 0 up to some maximum.  There
3944:             * are a few exceptions: (1.) UProperty.BLOCK values begin at the
3945:             * non-zero value BASIC_LATIN.getID().  (2.)
3946:             * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous
3947:             * and range from 0..240.  (3.)  UProperty.GENERAL_CATEGORY_MASK values
3948:             * are mask values produced by left-shifting 1 by
3949:             * UCharacter.getType().  This allows grouped categories such as
3950:             * [:L:] to be represented.  Mask values are non-contiguous.
3951:             *
3952:             * @param nameChoice UProperty.NameChoice selector for which name
3953:             * to get.  All values have a long name.  Most have a short name,
3954:             * but some do not.  Unicode allows for additional names; if
3955:             * present these will be returned by UProperty.NameChoice.LONG + i,
3956:             * where i=1, 2,...
3957:             *
3958:             * @return a name, or null if Unicode explicitly defines no name
3959:             * ("n/a") for a given property/value/nameChoice.  If a given
3960:             * nameChoice throws an exception, then all larger values of
3961:             * nameChoice will throw an exception.  If null is returned for a
3962:             * given nameChoice, then other nameChoice values may return
3963:             * non-null results.
3964:             *
3965:             * @exception IllegalArgumentException thrown if property, value,
3966:             * or nameChoice are invalid.
3967:             *
3968:             * @see UProperty
3969:             * @see UProperty.NameChoice
3970:             * @stable ICU 2.4
3971:             */
3972:            public static String getPropertyValueName(int property, int value,
3973:                    int nameChoice) {
3974:                if (property == UProperty.CANONICAL_COMBINING_CLASS
3975:                        && value >= UCharacter
3976:                                .getIntPropertyMinValue(UProperty.CANONICAL_COMBINING_CLASS)
3977:                        && value <= UCharacter
3978:                                .getIntPropertyMaxValue(UProperty.CANONICAL_COMBINING_CLASS)
3979:                        && nameChoice >= 0
3980:                        && nameChoice < UProperty.NameChoice.COUNT) {
3981:                    // this is hard coded for the valid cc
3982:                    // because PropertyValueAliases.txt does not contain all of them
3983:                    try {
3984:                        return PNAMES_.getPropertyValueName(property, value,
3985:                                nameChoice);
3986:                    } catch (IllegalArgumentException e) {
3987:                        return null;
3988:                    }
3989:                }
3990:                return PNAMES_
3991:                        .getPropertyValueName(property, value, nameChoice);
3992:            }
3993:
3994:            /**
3995:             * Return the property value integer for a given value name, as
3996:             * specified in the Unicode database file PropertyValueAliases.txt.
3997:             * Short, long, and any other variants are recognized.
3998:             *
3999:             * Note: Some of the names in PropertyValueAliases.txt will only be
4000:             * recognized with UProperty.GENERAL_CATEGORY_MASK, not
4001:             * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
4002:             * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
4003:             * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
4004:             *
4005:             * @param property UProperty selector constant.
4006:             * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
4007:             * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
4008:             * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
4009:             * Only these properties can be enumerated.
4010:             *
4011:             * @param valueAlias the value name to be matched.  The name is
4012:             * compared using "loose matching" as described in
4013:             * PropertyValueAliases.txt.
4014:             *
4015:             * @return a value integer.  Note: UProperty.GENERAL_CATEGORY
4016:             * values are mask values produced by left-shifting 1 by
4017:             * UCharacter.getType().  This allows grouped categories such as
4018:             * [:L:] to be represented.
4019:             *
4020:             * @see UProperty
4021:             * @throws IllegalArgumentException if property is not a valid UProperty
4022:             *         selector
4023:             * @stable ICU 2.4
4024:             */
4025:            public static int getPropertyValueEnum(int property,
4026:                    String valueAlias) {
4027:                return PNAMES_.getPropertyValueEnum(property, valueAlias);
4028:            }
4029:
4030:            /**
4031:             * Returns a code point corresponding to the two UTF16 characters.
4032:             * @param lead the lead char
4033:             * @param trail the trail char
4034:             * @return code point if surrogate characters are valid.
4035:             * @exception IllegalArgumentException thrown when argument characters do
4036:             *            not form a valid codepoint
4037:             * @stable ICU 2.1
4038:             */
4039:            public static int getCodePoint(char lead, char trail) {
4040:                if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE
4041:                        && lead <= UTF16.LEAD_SURROGATE_MAX_VALUE
4042:                        && trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE
4043:                        && trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
4044:                    return UCharacterProperty.getRawSupplementary(lead, trail);
4045:                }
4046:                throw new IllegalArgumentException(
4047:                        "Illegal surrogate characters");
4048:            }
4049:
4050:            /**
4051:             * Returns the code point corresponding to the UTF16 character.
4052:             * @param char16 the UTF16 character
4053:             * @return code point if argument is a valid character.
4054:             * @exception IllegalArgumentException thrown when char16 is not a valid
4055:             *            codepoint
4056:             * @stable ICU 2.1
4057:             */
4058:            public static int getCodePoint(char char16) {
4059:                if (UCharacter.isLegal(char16)) {
4060:                    return char16;
4061:                }
4062:                throw new IllegalArgumentException("Illegal codepoint");
4063:            }
4064:
4065:            /**
4066:             * Implementation of UCaseProps.ContextIterator, iterates over a String.
4067:             * See ustrcase.c/utf16_caseContextIterator().
4068:             */
4069:            private static class StringContextIterator implements 
4070:                    UCaseProps.ContextIterator {
4071:                /**
4072:                 * Constructor.
4073:                 * @param s String to iterate over. 
4074:                 */
4075:                StringContextIterator(String s) {
4076:                    this .s = s;
4077:                    limit = s.length();
4078:                    cpStart = cpLimit = index = 0;
4079:                    dir = 0;
4080:                }
4081:
4082:                /**
4083:                 * Set the iteration limit for nextCaseMapCP() to an index within the string.
4084:                 * If the limit parameter is negative or past the string, then the
4085:                 * string length is restored as the iteration limit.
4086:                 *
4087:                 * This limit does not affect the next() function which always
4088:                 * iterates to the very end of the string.
4089:                 *
4090:                 * @param lim The iteration limit.
4091:                 */
4092:                public void setLimit(int lim) {
4093:                    if (0 <= lim && lim <= s.length()) {
4094:                        limit = lim;
4095:                    } else {
4096:                        limit = s.length();
4097:                    }
4098:                }
4099:
4100:                /**
4101:                 * Iterate forward through the string to fetch the next code point
4102:                 * to be case-mapped, and set the context indexes for it.
4103:                 * Performance optimization, to save on function calls and redundant
4104:                 * tests. Combines UTF16.charAt(), UTF16.getCharCount(), and setIndex().
4105:                 *
4106:                 * When the iteration limit is reached (and -1 is returned),
4107:                 * getCPStart() will be at the iteration limit.
4108:                 *
4109:                 * Iteration with next() does not affect the position for nextCaseMapCP().
4110:                 *
4111:                 * @return The next code point to be case-mapped, or <0 when the iteration is done.
4112:                 */
4113:                public int nextCaseMapCP() {
4114:                    cpStart = cpLimit;
4115:                    if (cpLimit < limit) {
4116:                        int c = s.charAt(cpLimit++);
4117:                        if (UTF16.LEAD_SURROGATE_MIN_VALUE <= c
4118:                                || c <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
4119:                            char c2;
4120:                            if (c <= UTF16.LEAD_SURROGATE_MAX_VALUE
4121:                                    && cpLimit < limit
4122:                                    && UTF16.TRAIL_SURROGATE_MIN_VALUE <= (c2 = s
4123:                                            .charAt(cpLimit))
4124:                                    && c2 <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
4125:                                // supplementary code point
4126:                                ++cpLimit;
4127:                                c = UCharacterProperty.getRawSupplementary(
4128:                                        (char) c, c2);
4129:                                // else unpaired surrogate code point
4130:                            }
4131:                            // else BMP code point
4132:                        }
4133:                        return c;
4134:                    } else {
4135:                        return -1;
4136:                    }
4137:                }
4138:
4139:                /**
4140:                 * Get the start of the code point that was last returned
4141:                 * by nextCaseMapCP().
4142:                 */
4143:                public int getCPStart() {
4144:                    return cpStart;
4145:                }
4146:
4147:                // implement UCaseProps.ContextIterator
4148:                public void reset(int dir) {
4149:                    if (dir > 0) {
4150:                        /* reset for forward iteration */
4151:                        this .dir = 1;
4152:                        index = cpLimit;
4153:                    } else if (dir < 0) {
4154:                        /* reset for backward iteration */
4155:                        this .dir = -1;
4156:                        index = cpStart;
4157:                    } else {
4158:                        // not a valid direction
4159:                        this .dir = 0;
4160:                        index = 0;
4161:                    }
4162:                }
4163:
4164:                public int next() {
4165:                    int c;
4166:
4167:                    if (dir > 0 && index < s.length()) {
4168:                        c = UTF16.charAt(s, index);
4169:                        index += UTF16.getCharCount(c);
4170:                        return c;
4171:                    } else if (dir < 0 && index > 0) {
4172:                        c = UTF16.charAt(s, index - 1);
4173:                        index -= UTF16.getCharCount(c);
4174:                        return c;
4175:                    }
4176:                    return -1;
4177:                }
4178:
4179:                // variables
4180:                protected String s;
4181:                protected int index, limit, cpStart, cpLimit;
4182:                protected int dir; // 0=initial state  >0=forward  <0=backward
4183:            }
4184:
4185:            /**
4186:             * Gets uppercase version of the argument string. 
4187:             * Casing is dependent on the default locale and context-sensitive.
4188:             * @param str source string to be performed on
4189:             * @return uppercase version of the argument string
4190:             * @stable ICU 2.1
4191:             */
4192:            public static String toUpperCase(String str) {
4193:                return toUpperCase(ULocale.getDefault(), str);
4194:            }
4195:
4196:            /**
4197:             * Gets lowercase version of the argument string. 
4198:             * Casing is dependent on the default locale and context-sensitive
4199:             * @param str source string to be performed on
4200:             * @return lowercase version of the argument string
4201:             * @stable ICU 2.1
4202:             */
4203:            public static String toLowerCase(String str) {
4204:                return toLowerCase(ULocale.getDefault(), str);
4205:            }
4206:
4207:            /**
4208:             * <p>Gets the titlecase version of the argument string.</p>
4209:             * <p>Position for titlecasing is determined by the argument break 
4210:             * iterator, hence the user can customized his break iterator for 
4211:             * a specialized titlecasing. In this case only the forward iteration 
4212:             * needs to be implemented.
4213:             * If the break iterator passed in is null, the default Unicode algorithm
4214:             * will be used to determine the titlecase positions.
4215:             * </p>
4216:             * <p>Only positions returned by the break iterator will be title cased,
4217:             * character in between the positions will all be in lower case.</p>
4218:             * <p>Casing is dependent on the default locale and context-sensitive</p>
4219:             * @param str source string to be performed on
4220:             * @param breakiter break iterator to determine the positions in which
4221:             *        the character should be title cased.
4222:             * @return lowercase version of the argument string
4223:             * @stable ICU 2.6
4224:             */
4225:            public static String toTitleCase(String str, BreakIterator breakiter) {
4226:                return toTitleCase(ULocale.getDefault(), str, breakiter);
4227:            }
4228:
4229:            /**
4230:             * Gets uppercase version of the argument string. 
4231:             * Casing is dependent on the argument locale and context-sensitive.
4232:             * @param locale which string is to be converted in
4233:             * @param str source string to be performed on
4234:             * @return uppercase version of the argument string
4235:             * @stable ICU 2.1
4236:             */
4237:            public static String toUpperCase(Locale locale, String str) {
4238:                return toUpperCase(ULocale.forLocale(locale), str);
4239:            }
4240:
4241:            /**
4242:             * Gets uppercase version of the argument string. 
4243:             * Casing is dependent on the argument locale and context-sensitive.
4244:             * @param locale which string is to be converted in
4245:             * @param str source string to be performed on
4246:             * @return uppercase version of the argument string
4247:             * @draft ICU 3.2
4248:             * @provisional This API might change or be removed in a future release.
4249:             */
4250:            public static String toUpperCase(ULocale locale, String str) {
4251:                StringContextIterator iter = new StringContextIterator(str);
4252:                StringBuffer result = new StringBuffer(str.length());
4253:                int[] locCache = new int[1];
4254:                int c;
4255:
4256:                if (locale == null) {
4257:                    locale = ULocale.getDefault();
4258:                }
4259:                locCache[0] = 0;
4260:
4261:                while ((c = iter.nextCaseMapCP()) >= 0) {
4262:                    c = gCsp.toFullUpper(c, iter, result, locale, locCache);
4263:
4264:                    /* decode the result */
4265:                    if (c < 0) {
4266:                        /* (not) original code point */
4267:                        c = ~c;
4268:                    } else if (c <= UCaseProps.MAX_STRING_LENGTH) {
4269:                        /* mapping already appended to result */
4270:                        continue;
4271:                        /* } else { append single-code point mapping */
4272:                    }
4273:                    if (c <= 0xffff) {
4274:                        result.append((char) c);
4275:                    } else {
4276:                        UTF16.append(result, c);
4277:                    }
4278:                }
4279:                return result.toString();
4280:            }
4281:
4282:            /**
4283:             * Gets lowercase version of the argument string. 
4284:             * Casing is dependent on the argument locale and context-sensitive
4285:             * @param locale which string is to be converted in
4286:             * @param str source string to be performed on
4287:             * @return lowercase version of the argument string
4288:             * @stable ICU 2.1
4289:             */
4290:            public static String toLowerCase(Locale locale, String str) {
4291:                return toLowerCase(ULocale.forLocale(locale), str);
4292:            }
4293:
4294:            /**
4295:             * Gets lowercase version of the argument string. 
4296:             * Casing is dependent on the argument locale and context-sensitive
4297:             * @param locale which string is to be converted in
4298:             * @param str source string to be performed on
4299:             * @return lowercase version of the argument string
4300:             * @draft ICU 3.2
4301:             * @provisional This API might change or be removed in a future release.
4302:             */
4303:            public static String toLowerCase(ULocale locale, String str) {
4304:                StringContextIterator iter = new StringContextIterator(str);
4305:                StringBuffer result = new StringBuffer(str.length());
4306:                int[] locCache = new int[1];
4307:                int c;
4308:
4309:                if (locale == null) {
4310:                    locale = ULocale.getDefault();
4311:                }
4312:                locCache[0] = 0;
4313:
4314:                while ((c = iter.nextCaseMapCP()) >= 0) {
4315:                    c = gCsp.toFullLower(c, iter, result, locale, locCache);
4316:
4317:                    /* decode the result */
4318:                    if (c < 0) {
4319:                        /* (not) original code point */
4320:                        c = ~c;
4321:                    } else if (c <= UCaseProps.MAX_STRING_LENGTH) {
4322:                        /* mapping already appended to result */
4323:                        continue;
4324:                        /* } else { append single-code point mapping */
4325:                    }
4326:                    if (c <= 0xffff) {
4327:                        result.append((char) c);
4328:                    } else {
4329:                        UTF16.append(result, c);
4330:                    }
4331:                }
4332:                return result.toString();
4333:            }
4334:
4335:            /**
4336:             * <p>Gets the titlecase version of the argument string.</p>
4337:             * <p>Position for titlecasing is determined by the argument break 
4338:             * iterator, hence the user can customized his break iterator for 
4339:             * a specialized titlecasing. In this case only the forward iteration 
4340:             * needs to be implemented.
4341:             * If the break iterator passed in is null, the default Unicode algorithm
4342:             * will be used to determine the titlecase positions.
4343:             * </p>
4344:             * <p>Only positions returned by the break iterator will be title cased,
4345:             * character in between the positions will all be in lower case.</p>
4346:             * <p>Casing is dependent on the argument locale and context-sensitive</p>
4347:             * @param locale which string is to be converted in
4348:             * @param str source string to be performed on
4349:             * @param breakiter break iterator to determine the positions in which
4350:             *        the character should be title cased.
4351:             * @return lowercase version of the argument string
4352:             * @stable ICU 2.6
4353:             */
4354:            public static String toTitleCase(Locale locale, String str,
4355:                    BreakIterator breakiter) {
4356:                return toTitleCase(ULocale.forLocale(locale), str, breakiter);
4357:            }
4358:
4359:            /**
4360:             * <p>Gets the titlecase version of the argument string.</p>
4361:             * <p>Position for titlecasing is determined by the argument break 
4362:             * iterator, hence the user can customized his break iterator for 
4363:             * a specialized titlecasing. In this case only the forward iteration 
4364:             * needs to be implemented.
4365:             * If the break iterator passed in is null, the default Unicode algorithm
4366:             * will be used to determine the titlecase positions.
4367:             * </p>
4368:             * <p>Only positions returned by the break iterator will be title cased,
4369:             * character in between the positions will all be in lower case.</p>
4370:             * <p>Casing is dependent on the argument locale and context-sensitive</p>
4371:             * @param locale which string is to be converted in
4372:             * @param str source string to be performed on
4373:             * @param titleIter break iterator to determine the positions in which
4374:             *        the character should be title cased.
4375:             * @return lowercase version of the argument string
4376:             * @draft ICU 3.2
4377:             * @provisional This API might change or be removed in a future release.
4378:             */
4379:            public static String toTitleCase(ULocale locale, String str,
4380:                    BreakIterator titleIter) {
4381:                StringContextIterator iter = new StringContextIterator(str);
4382:                StringBuffer result = new StringBuffer(str.length());
4383:                int[] locCache = new int[1];
4384:                int c, srcLength = str.length();
4385:
4386:                if (locale == null) {
4387:                    locale = ULocale.getDefault();
4388:                }
4389:                locCache[0] = 0;
4390:
4391:                if (titleIter == null) {
4392:                    titleIter = BreakIterator.getWordInstance(locale);
4393:                }
4394:                titleIter.setText(str);
4395:
4396:                int prev, titleStart, index;
4397:                boolean isFirstIndex;
4398:
4399:                /* set up local variables */
4400:                prev = 0;
4401:                isFirstIndex = true;
4402:
4403:                /* titlecasing loop */
4404:                while (prev < srcLength) {
4405:                    /* find next index where to titlecase */
4406:                    if (isFirstIndex) {
4407:                        isFirstIndex = false;
4408:                        index = titleIter.first();
4409:                    } else {
4410:                        index = titleIter.next();
4411:                    }
4412:                    if (index == BreakIterator.DONE || index > srcLength) {
4413:                        index = srcLength;
4414:                    }
4415:
4416:                    /*
4417:                     * Unicode 4 & 5 section 3.13 Default Case Operations:
4418:                     *
4419:                     * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
4420:                     * #29, "Text Boundaries." Between each pair of word boundaries, find the first
4421:                     * cased character F. If F exists, map F to default_title(F); then map each
4422:                     * subsequent character C to default_lower(C).
4423:                     *
4424:                     * In this implementation, segment [prev..index[ into 3 parts:
4425:                     * a) uncased characters (copy as-is) [prev..titleStart[
4426:                     * b) first case letter (titlecase)         [titleStart..titleLimit[
4427:                     * c) subsequent characters (lowercase)                 [titleLimit..index[
4428:                     */
4429:                    if (prev < index) {
4430:                        /* find and copy uncased characters [prev..titleStart[ */
4431:                        iter.setLimit(index);
4432:                        while ((c = iter.nextCaseMapCP()) >= 0
4433:                                && UCaseProps.NONE == gCsp.getType(c)) {
4434:                        }
4435:                        titleStart = iter.getCPStart();
4436:                        if (prev < titleStart) {
4437:                            // TODO: With Java 5, this would want to be result.append(str, prev, titleStart);
4438:                            result.append(str.substring(prev, titleStart));
4439:                        }
4440:
4441:                        if (titleStart < index) {
4442:                            /* titlecase c which is from titleStart */
4443:                            c = gCsp.toFullTitle(c, iter, result, locale,
4444:                                    locCache);
4445:
4446:                            /* decode the result and lowercase up to index */
4447:                            for (;;) {
4448:                                if (c < 0) {
4449:                                    /* (not) original code point */
4450:                                    c = ~c;
4451:                                    if (c <= 0xffff) {
4452:                                        result.append((char) c);
4453:                                    } else {
4454:                                        UTF16.append(result, c);
4455:                                    }
4456:                                } else if (c <= UCaseProps.MAX_STRING_LENGTH) {
4457:                                    /* mapping already appended to result */
4458:                                } else {
4459:                                    /* append single-code point mapping */
4460:                                    if (c <= 0xffff) {
4461:                                        result.append((char) c);
4462:                                    } else {
4463:                                        UTF16.append(result, c);
4464:                                    }
4465:                                }
4466:
4467:                                if ((c = iter.nextCaseMapCP()) >= 0) {
4468:                                    c = gCsp.toFullLower(c, iter, result,
4469:                                            locale, locCache);
4470:                                } else {
4471:                                    break;
4472:                                }
4473:                            }
4474:                        }
4475:                    }
4476:
4477:                    prev = index;
4478:                }
4479:                return result.toString();
4480:            }
4481:
4482:            /**
4483:             * The given character is mapped to its case folding equivalent according 
4484:             * to UnicodeData.txt and CaseFolding.txt; if the character has no case 
4485:             * folding equivalent, the character itself is returned.
4486:             *
4487:             * <p>This function only returns the simple, single-code point case mapping.
4488:             * Full case mappings should be used whenever possible because they produce
4489:             * better results by working on whole strings.
4490:             * They can map to a result string with a different length as appropriate.
4491:             * Full case mappings are applied by the case mapping functions
4492:             * that take String parameters rather than code points (int).
4493:             * See also the User Guide chapter on C/POSIX migration:
4494:             * http://icu.sourceforge.net/userguide/posix.html#case_mappings
4495:             *
4496:             * @param ch             the character to be converted
4497:             * @param defaultmapping Indicates if all mappings defined in 
4498:             *                       CaseFolding.txt is to be used, otherwise the 
4499:             *                       mappings for dotted I  and dotless i marked with 
4500:             *                       'I' in CaseFolding.txt will be skipped.
4501:             * @return               the case folding equivalent of the character, if 
4502:             *                       any; otherwise the character itself.
4503:             * @see                  #foldCase(String, boolean)
4504:             * @stable ICU 2.1
4505:             */
4506:            public static int foldCase(int ch, boolean defaultmapping) {
4507:                return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT
4508:                        : FOLD_CASE_EXCLUDE_SPECIAL_I);
4509:            }
4510:
4511:            /**
4512:             * The given string is mapped to its case folding equivalent according to
4513:             * UnicodeData.txt and CaseFolding.txt; if any character has no case 
4514:             * folding equivalent, the character itself is returned.
4515:             * "Full", multiple-code point case folding mappings are returned here.
4516:             * For "simple" single-code point mappings use the API 
4517:             * foldCase(int ch, boolean defaultmapping).
4518:             * @param str            the String to be converted
4519:             * @param defaultmapping Indicates if all mappings defined in 
4520:             *                       CaseFolding.txt is to be used, otherwise the 
4521:             *                       mappings for dotted I and dotless i marked with 
4522:             *                       'I' in CaseFolding.txt will be skipped.
4523:             * @return               the case folding equivalent of the character, if 
4524:             *                       any; otherwise the character itself.
4525:             * @see                  #foldCase(int, boolean)
4526:             * @stable ICU 2.1
4527:             */
4528:            public static String foldCase(String str, boolean defaultmapping) {
4529:                return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT
4530:                        : FOLD_CASE_EXCLUDE_SPECIAL_I);
4531:            }
4532:
4533:            /**
4534:             * Option value for case folding: use default mappings defined in CaseFolding.txt.
4535:             * @stable ICU 2.6
4536:             */
4537:            public static final int FOLD_CASE_DEFAULT = 0x0000;
4538:            /** 
4539:             * Option value for case folding: exclude the mappings for dotted I 
4540:             * and dotless i marked with 'I' in CaseFolding.txt. 
4541:             * @stable ICU 2.6
4542:             */
4543:            public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001;
4544:
4545:            /**
4546:             * The given character is mapped to its case folding equivalent according 
4547:             * to UnicodeData.txt and CaseFolding.txt; if the character has no case 
4548:             * folding equivalent, the character itself is returned.
4549:             *
4550:             * <p>This function only returns the simple, single-code point case mapping.
4551:             * Full case mappings should be used whenever possible because they produce
4552:             * better results by working on whole strings.
4553:             * They can map to a result string with a different length as appropriate.
4554:             * Full case mappings are applied by the case mapping functions
4555:             * that take String parameters rather than code points (int).
4556:             * See also the User Guide chapter on C/POSIX migration:
4557:             * http://icu.sourceforge.net/userguide/posix.html#case_mappings
4558:             *
4559:             * @param ch             the character to be converted
4560:             * @param options        A bit set for special processing. Currently the recognised options are
4561:             *                        FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 
4562:             * @return               the case folding equivalent of the character, if 
4563:             *                       any; otherwise the character itself.
4564:             * @see #foldCase(String, boolean)
4565:             * @stable ICU 2.6
4566:             */
4567:            public static int foldCase(int ch, int options) {
4568:                return gCsp.fold(ch, options);
4569:            }
4570:
4571:            /**
4572:             * The given string is mapped to its case folding equivalent according to
4573:             * UnicodeData.txt and CaseFolding.txt; if any character has no case 
4574:             * folding equivalent, the character itself is returned.
4575:             * "Full", multiple-code point case folding mappings are returned here.
4576:             * For "simple" single-code point mappings use the API 
4577:             * foldCase(int ch, boolean defaultmapping).
4578:             * @param str            the String to be converted
4579:             * @param options        A bit set for special processing. Currently the recognised options are
4580:             *                        FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 
4581:             * @return               the case folding equivalent of the character, if 
4582:             *                       any; otherwise the character itself.
4583:             * @see #foldCase(int, boolean)
4584:             * @stable ICU 2.6
4585:             */
4586:            public static final String foldCase(String str, int options) {
4587:                StringBuffer result = new StringBuffer(str.length());
4588:                int c, i, length;
4589:
4590:                length = str.length();
4591:                for (i = 0; i < length;) {
4592:                    c = UTF16.charAt(str, i);
4593:                    i += UTF16.getCharCount(c);
4594:                    c = gCsp.toFullFolding(c, result, options);
4595:
4596:                    /* decode the result */
4597:                    if (c < 0) {
4598:                        /* (not) original code point */
4599:                        c = ~c;
4600:                    } else if (c <= UCaseProps.MAX_STRING_LENGTH) {
4601:                        /* mapping already appended to result */
4602:                        continue;
4603:                        /* } else { append single-code point mapping */
4604:                    }
4605:                    if (c <= 0xffff) {
4606:                        result.append((char) c);
4607:                    } else {
4608:                        UTF16.append(result, c);
4609:                    }
4610:                }
4611:                return result.toString();
4612:            }
4613:
4614:            /**
4615:             * Return numeric value of Han code points.
4616:             * <br> This returns the value of Han 'numeric' code points,
4617:             * including those for zero, ten, hundred, thousand, ten thousand,
4618:             * and hundred million.
4619:             * This includes both the standard and 'checkwriting'
4620:             * characters, the 'big circle' zero character, and the standard
4621:             * zero character.
4622:             * @param ch code point to query
4623:             * @return value if it is a Han 'numeric character,' otherwise return -1.  
4624:             * @stable ICU 2.4
4625:             */
4626:            public static int getHanNumericValue(int ch) {
4627:                // TODO: Are these all covered by Unicode numeric value data?
4628:                switch (ch) {
4629:                case IDEOGRAPHIC_NUMBER_ZERO_:
4630:                case CJK_IDEOGRAPH_COMPLEX_ZERO_:
4631:                    return 0; // Han Zero
4632:                case CJK_IDEOGRAPH_FIRST_:
4633:                case CJK_IDEOGRAPH_COMPLEX_ONE_:
4634:                    return 1; // Han One
4635:                case CJK_IDEOGRAPH_SECOND_:
4636:                case CJK_IDEOGRAPH_COMPLEX_TWO_:
4637:                    return 2; // Han Two
4638:                case CJK_IDEOGRAPH_THIRD_:
4639:                case CJK_IDEOGRAPH_COMPLEX_THREE_:
4640:                    return 3; // Han Three
4641:                case CJK_IDEOGRAPH_FOURTH_:
4642:                case CJK_IDEOGRAPH_COMPLEX_FOUR_:
4643:                    return 4; // Han Four
4644:                case CJK_IDEOGRAPH_FIFTH_:
4645:                case CJK_IDEOGRAPH_COMPLEX_FIVE_:
4646:                    return 5; // Han Five
4647:                case CJK_IDEOGRAPH_SIXTH_:
4648:                case CJK_IDEOGRAPH_COMPLEX_SIX_:
4649:                    return 6; // Han Six
4650:                case CJK_IDEOGRAPH_SEVENTH_:
4651:                case CJK_IDEOGRAPH_COMPLEX_SEVEN_:
4652:                    return 7; // Han Seven
4653:                case CJK_IDEOGRAPH_EIGHTH_:
4654:                case CJK_IDEOGRAPH_COMPLEX_EIGHT_:
4655:                    return 8; // Han Eight
4656:                case CJK_IDEOGRAPH_NINETH_:
4657:                case CJK_IDEOGRAPH_COMPLEX_NINE_:
4658:                    return 9; // Han Nine
4659:                case CJK_IDEOGRAPH_TEN_:
4660:                case CJK_IDEOGRAPH_COMPLEX_TEN_:
4661:                    return 10;
4662:                case CJK_IDEOGRAPH_HUNDRED_:
4663:                case CJK_IDEOGRAPH_COMPLEX_HUNDRED_:
4664:                    return 100;
4665:                case CJK_IDEOGRAPH_THOUSAND_:
4666:                case CJK_IDEOGRAPH_COMPLEX_THOUSAND_:
4667:                    return 1000;
4668:                case CJK_IDEOGRAPH_TEN_THOUSAND_:
4669:                    return 10000;
4670:                case CJK_IDEOGRAPH_HUNDRED_MILLION_:
4671:                    return 100000000;
4672:                }
4673:                return -1; // no value
4674:            }
4675:
4676:            /**
4677:             * <p>Gets an iterator for character types, iterating over codepoints.</p>
4678:             * Example of use:<br>
4679:             * <pre>
4680:             * RangeValueIterator iterator = UCharacter.getTypeIterator();
4681:             * RangeValueIterator.Element element = new RangeValueIterator.Element();
4682:             * while (iterator.next(element)) {
4683:             *     System.out.println("Codepoint \\u" + 
4684:     *                        Integer.toHexString(element.start) + 
4685:     *                        " to codepoint \\u" +
4686:     *                        Integer.toHexString(element.limit - 1) + 
4687:     *                        " has the character type " + 
4688:     *                        element.value);
4689:     * }
4690:     * </pre>
4691:     * @return an iterator 
4692:     * @stable ICU 2.6
4693:     */
4694:            public static RangeValueIterator getTypeIterator() {
4695:                return new UCharacterTypeIterator(PROPERTY_);
4696:            }
4697:
4698:            /**
4699:             * <p>Gets an iterator for character names, iterating over codepoints.</p>
4700:             * <p>This API only gets the iterator for the modern, most up-to-date 
4701:             * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or
4702:             * for extended names use getExtendedNameIterator().</p>
4703:             * Example of use:<br>
4704:             * <pre>
4705:             * ValueIterator iterator = UCharacter.getNameIterator();
4706:             * ValueIterator.Element element = new ValueIterator.Element();
4707:             * while (iterator.next(element)) {
4708:             *     System.out.println("Codepoint \\u" + 
4709:     *                        Integer.toHexString(element.codepoint) +
4710:     *                        " has the name " + (String)element.value);
4711:     * }
4712:     * </pre>
4713:     * <p>The maximal range which the name iterator iterates is from 
4714:     * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.</p>
4715:     * @return an iterator 
4716:     * @stable ICU 2.6
4717:     */
4718:            public static ValueIterator getNameIterator() {
4719:                if (NAME_ == null) {
4720:                    throw new RuntimeException("Could not load unames.icu");
4721:                }
4722:                return new UCharacterNameIterator(NAME_,
4723:                        UCharacterNameChoice.UNICODE_CHAR_NAME);
4724:            }
4725:
4726:            /**
4727:             * <p>Gets an iterator for character names, iterating over codepoints.</p>
4728:             * <p>This API only gets the iterator for the older 1.0 Unicode names. 
4729:             * For modern, most up-to-date Unicode names use getNameIterator() or
4730:             * for extended names use getExtendedNameIterator().</p>
4731:             * Example of use:<br>
4732:             * <pre>
4733:             * ValueIterator iterator = UCharacter.get1_0NameIterator();
4734:             * ValueIterator.Element element = new ValueIterator.Element();
4735:             * while (iterator.next(element)) {
4736:             *     System.out.println("Codepoint \\u" + 
4737:     *                        Integer.toHexString(element.codepoint) +
4738:     *                        " has the name " + (String)element.value);
4739:     * }
4740:     * </pre>
4741:     * <p>The maximal range which the name iterator iterates is from 
4742:     * @return an iterator 
4743:     * @stable ICU 2.6
4744:     */
4745:            public static ValueIterator getName1_0Iterator() {
4746:                if (NAME_ == null) {
4747:                    throw new RuntimeException("Could not load unames.icu");
4748:                }
4749:                return new UCharacterNameIterator(NAME_,
4750:                        UCharacterNameChoice.UNICODE_10_CHAR_NAME);
4751:            }
4752:
4753:            /**
4754:             * <p>Gets an iterator for character names, iterating over codepoints.</p>
4755:             * <p>This API only gets the iterator for the extended names. 
4756:             * For modern, most up-to-date Unicode names use getNameIterator() or
4757:             * for older 1.0 Unicode names use get1_0NameIterator().</p>
4758:             * Example of use:<br>
4759:             * <pre>
4760:             * ValueIterator iterator = UCharacter.getExtendedNameIterator();
4761:             * ValueIterator.Element element = new ValueIterator.Element();
4762:             * while (iterator.next(element)) {
4763:             *     System.out.println("Codepoint \\u" + 
4764:     *                        Integer.toHexString(element.codepoint) +
4765:     *                        " has the name " + (String)element.value);
4766:     * }
4767:     * </pre>
4768:     * <p>The maximal range which the name iterator iterates is from 
4769:     * @return an iterator 
4770:     * @stable ICU 2.6
4771:     */
4772:            public static ValueIterator getExtendedNameIterator() {
4773:                if (NAME_ == null) {
4774:                    throw new MissingResourceException(
4775:                            "Could not load unames.icu", "", "");
4776:                }
4777:                return new UCharacterNameIterator(NAME_,
4778:                        UCharacterNameChoice.EXTENDED_CHAR_NAME);
4779:            }
4780:
4781:            /**
4782:             * <p>Get the "age" of the code point.</p>
4783:             * <p>The "age" is the Unicode version when the code point was first
4784:             * designated (as a non-character or for Private Use) or assigned a 
4785:             * character.
4786:             * <p>This can be useful to avoid emitting code points to receiving 
4787:             * processes that do not accept newer characters.</p>
4788:             * <p>The data is from the UCD file DerivedAge.txt.</p>
4789:             * @param ch The code point.
4790:             * @return the Unicode version number
4791:             * @stable ICU 2.6
4792:             */
4793:            public static VersionInfo getAge(int ch) {
4794:                if (ch < MIN_VALUE || ch > MAX_VALUE) {
4795:                    throw new IllegalArgumentException(
4796:                            "Codepoint out of bounds");
4797:                }
4798:                return PROPERTY_.getAge(ch);
4799:            }
4800:
4801:            /**
4802:             * <p>Check a binary Unicode property for a code point.</p> 
4803:             * <p>Unicode, especially in version 3.2, defines many more properties 
4804:             * than the original set in UnicodeData.txt.</p>
4805:             * <p>This API is intended to reflect Unicode properties as defined in 
4806:             * the Unicode Character Database (UCD) and Unicode Technical Reports 
4807:             * (UTR).</p>
4808:             * <p>For details about the properties see 
4809:             * <a href=http://www.unicode.org/>http://www.unicode.org/</a>.</p>
4810:             * <p>For names of Unicode properties see the UCD file 
4811:             * PropertyAliases.txt.</p>
4812:             * <p>This API does not check the validity of the codepoint.</p>
4813:             * <p>Important: If ICU is built with UCD files from Unicode versions 
4814:             * below 3.2, then properties marked with "new" are not or 
4815:             * not fully available.</p>
4816:             * @param ch code point to test.
4817:             * @param property selector constant from com.ibm.icu.lang.UProperty, 
4818:             *        identifies which binary property to check.
4819:             * @return true or false according to the binary Unicode property value 
4820:             *         for ch. Also false if property is out of bounds or if the 
4821:             *         Unicode version does not have data for the property at all, or 
4822:             *         not for this code point.
4823:             * @see com.ibm.icu.lang.UProperty
4824:             * @stable ICU 2.6
4825:             */
4826:            public static boolean hasBinaryProperty(int ch, int property) {
4827:                if (ch < MIN_VALUE || ch > MAX_VALUE) {
4828:                    throw new IllegalArgumentException(
4829:                            "Codepoint out of bounds");
4830:                }
4831:                return PROPERTY_.hasBinaryProperty(ch, property);
4832:            }
4833:
4834:            /**
4835:             * <p>Check if a code point has the Alphabetic Unicode property.</p> 
4836:             * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).</p>
4837:             * <p>Different from UCharacter.isLetter(ch)!</p> 
4838:             * @stable ICU 2.6
4839:             * @param ch codepoint to be tested
4840:             */
4841:            public static boolean isUAlphabetic(int ch) {
4842:                return hasBinaryProperty(ch, UProperty.ALPHABETIC);
4843:            }
4844:
4845:            /**
4846:             * <p>Check if a code point has the Lowercase Unicode property.</p>
4847:             * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).</p>
4848:             * <p>This is different from UCharacter.isLowerCase(ch)!</p>
4849:             * @param ch codepoint to be tested
4850:             * @stable ICU 2.6
4851:             */
4852:            public static boolean isULowercase(int ch) {
4853:                return hasBinaryProperty(ch, UProperty.LOWERCASE);
4854:            }
4855:
4856:            /**
4857:             * <p>Check if a code point has the Uppercase Unicode property.</p>
4858:             * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).</p>
4859:             * <p>This is different from UCharacter.isUpperCase(ch)!</p>
4860:             * @param ch codepoint to be tested
4861:             * @stable ICU 2.6
4862:             */
4863:            public static boolean isUUppercase(int ch) {
4864:                return hasBinaryProperty(ch, UProperty.UPPERCASE);
4865:            }
4866:
4867:            /**
4868:             * <p>Check if a code point has the White_Space Unicode property.</p>
4869:             * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).</p>
4870:             * <p>This is different from both UCharacter.isSpace(ch) and 
4871:             * UCharacter.isWhitespace(ch)!</p>
4872:             * @param ch codepoint to be tested
4873:             * @stable ICU 2.6
4874:             */
4875:            public static boolean isUWhiteSpace(int ch) {
4876:                return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
4877:            }
4878:
4879:            /**
4880:             * <p>Gets the property value for an Unicode property type of a code point. 
4881:             * Also returns binary and mask property values.</p>
4882:             * <p>Unicode, especially in version 3.2, defines many more properties than 
4883:             * the original set in UnicodeData.txt.</p>
4884:             * <p>The properties APIs are intended to reflect Unicode properties as 
4885:             * defined in the Unicode Character Database (UCD) and Unicode Technical 
4886:             * Reports (UTR). For details about the properties see 
4887:             * http://www.unicode.org/.</p>
4888:             * <p>For names of Unicode properties see the UCD file PropertyAliases.txt.
4889:             * </p>
4890:             * <pre>
4891:             * Sample usage:
4892:             * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH);
4893:             * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC);
4894:             * boolean b = (ideo == 1) ? true : false; 
4895:             * </pre>
4896:             * @param ch code point to test.
4897:             * @param type UProperty selector constant, identifies which binary 
4898:             *        property to check. Must be 
4899:             *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or 
4900:             *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT or 
4901:             *        UProperty.MASK_START &lt;= type &lt; UProperty.MASK_LIMIT.
4902:             * @return numeric value that is directly the property value or,
4903:             *         for enumerated properties, corresponds to the numeric value of 
4904:             *         the enumerated constant of the respective property value 
4905:             *         enumeration type (cast to enum type if necessary).
4906:             *         Returns 0 or 1 (for false / true) for binary Unicode properties.
4907:             *         Returns a bit-mask for mask properties.
4908:             *         Returns 0 if 'type' is out of bounds or if the Unicode version
4909:             *         does not have data for the property at all, or not for this code 
4910:             *         point.
4911:             * @see UProperty
4912:             * @see #hasBinaryProperty
4913:             * @see #getIntPropertyMinValue
4914:             * @see #getIntPropertyMaxValue
4915:             * @see #getUnicodeVersion
4916:             * @stable ICU 2.4
4917:             */
4918:            public static int getIntPropertyValue(int ch, int type) {
4919:                if (type < UProperty.BINARY_START) {
4920:                    return 0; // undefined
4921:                } else if (type < UProperty.BINARY_LIMIT) {
4922:                    return hasBinaryProperty(ch, type) ? 1 : 0;
4923:                } else if (type < UProperty.INT_START) {
4924:                    return 0; // undefined
4925:                } else if (type < UProperty.INT_LIMIT) {
4926:                    //int result = 0;
4927:                    switch (type) {
4928:                    case UProperty.BIDI_CLASS:
4929:                        return getDirection(ch);
4930:                    case UProperty.BLOCK:
4931:                        return UnicodeBlock.idOf(ch);
4932:                    case UProperty.CANONICAL_COMBINING_CLASS:
4933:                        return getCombiningClass(ch);
4934:                    case UProperty.DECOMPOSITION_TYPE:
4935:                        return PROPERTY_.getAdditional(ch, 2)
4936:                                & DECOMPOSITION_TYPE_MASK_;
4937:                    case UProperty.EAST_ASIAN_WIDTH:
4938:                        return (PROPERTY_.getAdditional(ch, 0) & EAST_ASIAN_MASK_) >> EAST_ASIAN_SHIFT_;
4939:                    case UProperty.GENERAL_CATEGORY:
4940:                        return getType(ch);
4941:                    case UProperty.JOINING_GROUP:
4942:                        return gBdp.getJoiningGroup(ch);
4943:                    case UProperty.JOINING_TYPE:
4944:                        return gBdp.getJoiningType(ch);
4945:                    case UProperty.LINE_BREAK:
4946:                        return (int) (PROPERTY_.getAdditional(ch, 0) & LINE_BREAK_MASK_) >> LINE_BREAK_SHIFT_;
4947:                    case UProperty.NUMERIC_TYPE:
4948:                        type = getNumericType(PROPERTY_.getProperty(ch));
4949:                        if (type > NumericType.NUMERIC) {
4950:                            /* keep internal variants of NumericType.NUMERIC from becoming visible */
4951:                            type = NumericType.NUMERIC;
4952:                        }
4953:                        return type;
4954:                    case UProperty.SCRIPT:
4955:                        return UScript.getScript(ch);
4956:                    case UProperty.HANGUL_SYLLABLE_TYPE:
4957:                        /* purely algorithmic; hardcode known characters, check for assigned new ones */
4958:                        if (ch < NormalizerImpl.JAMO_L_BASE) {
4959:                            /* NA */
4960:                        } else if (ch <= 0x11ff) {
4961:                            /* Jamo range */
4962:                            if (ch <= 0x115f) {
4963:                                /* Jamo L range, HANGUL CHOSEONG ... */
4964:                                if (ch == 0x115f
4965:                                        || ch <= 0x1159
4966:                                        || getType(ch) == UCharacterCategory.OTHER_LETTER) {
4967:                                    return HangulSyllableType.LEADING_JAMO;
4968:                                }
4969:                            } else if (ch <= 0x11a7) {
4970:                                /* Jamo V range, HANGUL JUNGSEONG ... */
4971:                                if (ch <= 0x11a2
4972:                                        || getType(ch) == UCharacterCategory.OTHER_LETTER) {
4973:                                    return HangulSyllableType.VOWEL_JAMO;
4974:                                }
4975:                            } else {
4976:                                /* Jamo T range */
4977:                                if (ch <= 0x11f9
4978:                                        || getType(ch) == UCharacterCategory.OTHER_LETTER) {
4979:                                    return HangulSyllableType.TRAILING_JAMO;
4980:                                }
4981:                            }
4982:                        } else if ((ch -= NormalizerImpl.HANGUL_BASE) < 0) {
4983:                            /* NA */
4984:                        } else if (ch < NormalizerImpl.HANGUL_COUNT) {
4985:                            /* Hangul syllable */
4986:                            return ch % NormalizerImpl.JAMO_T_COUNT == 0 ? HangulSyllableType.LV_SYLLABLE
4987:                                    : HangulSyllableType.LVT_SYLLABLE;
4988:                        }
4989:                        return 0; /* NA */
4990:
4991:                    case UProperty.NFD_QUICK_CHECK:
4992:                    case UProperty.NFKD_QUICK_CHECK:
4993:                    case UProperty.NFC_QUICK_CHECK:
4994:                    case UProperty.NFKC_QUICK_CHECK:
4995:                        return NormalizerImpl.quickCheck(ch,
4996:                                (type - UProperty.NFD_QUICK_CHECK) + 2); // 2=UNORM_NFD
4997:                    case UProperty.LEAD_CANONICAL_COMBINING_CLASS:
4998:                        return NormalizerImpl.getFCD16(ch) >> 8;
4999:                    case UProperty.TRAIL_CANONICAL_COMBINING_CLASS:
5000:                        return NormalizerImpl.getFCD16(ch) & 0xff;
5001:                    case UProperty.GRAPHEME_CLUSTER_BREAK:
5002:                        return (int) (PROPERTY_.getAdditional(ch, 2) & GCB_MASK) >> GCB_SHIFT;
5003:                    case UProperty.SENTENCE_BREAK:
5004:                        return (int) (PROPERTY_.getAdditional(ch, 2) & SB_MASK) >> SB_SHIFT;
5005:                    case UProperty.WORD_BREAK:
5006:                        return (int) (PROPERTY_.getAdditional(ch, 2) & WB_MASK) >> WB_SHIFT;
5007:                    default:
5008:
5009:                        return 0; /* undefined */
5010:                    }
5011:                } else if (type == UProperty.GENERAL_CATEGORY_MASK) {
5012:                    return UCharacterProperty.getMask(getType(ch));
5013:                }
5014:                return 0; // undefined
5015:            }
5016:
5017:            /**
5018:             * Returns a string version of the property value.
5019:             * @param propertyEnum
5020:             * @param codepoint
5021:             * @param nameChoice
5022:             * @return value as string
5023:             * @internal
5024:             * @deprecated This API is ICU internal only.
5025:             */
5026:            public static String getStringPropertyValue(int propertyEnum,
5027:                    int codepoint, int nameChoice) {
5028:                // TODO some of these are less efficient, since a string is forced!
5029:                if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT)
5030:                        || (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) {
5031:                    return getPropertyValueName(propertyEnum,
5032:                            getIntPropertyValue(codepoint, propertyEnum),
5033:                            nameChoice);
5034:                }
5035:                if (propertyEnum == UProperty.NUMERIC_VALUE) {
5036:                    return String.valueOf(getUnicodeNumericValue(codepoint));
5037:                }
5038:                // otherwise must be string property
5039:                switch (propertyEnum) {
5040:                case UProperty.AGE:
5041:                    return getAge(codepoint).toString();
5042:                case UProperty.ISO_COMMENT:
5043:                    return getISOComment(codepoint);
5044:                case UProperty.BIDI_MIRRORING_GLYPH:
5045:                    return UTF16.valueOf(getMirror(codepoint));
5046:                case UProperty.CASE_FOLDING:
5047:                    return foldCase(UTF16.valueOf(codepoint), true);
5048:                case UProperty.LOWERCASE_MAPPING:
5049:                    return toLowerCase(UTF16.valueOf(codepoint));
5050:                case UProperty.NAME:
5051:                    return getName(codepoint);
5052:                case UProperty.SIMPLE_CASE_FOLDING:
5053:                    return UTF16.valueOf(foldCase(codepoint, true));
5054:                case UProperty.SIMPLE_LOWERCASE_MAPPING:
5055:                    return UTF16.valueOf(toLowerCase(codepoint));
5056:                case UProperty.SIMPLE_TITLECASE_MAPPING:
5057:                    return UTF16.valueOf(toTitleCase(codepoint));
5058:                case UProperty.SIMPLE_UPPERCASE_MAPPING:
5059:                    return UTF16.valueOf(toUpperCase(codepoint));
5060:                case UProperty.TITLECASE_MAPPING:
5061:                    return toTitleCase(UTF16.valueOf(codepoint), null);
5062:                case UProperty.UNICODE_1_NAME:
5063:                    return getName1_0(codepoint);
5064:                case UProperty.UPPERCASE_MAPPING:
5065:                    return toUpperCase(UTF16.valueOf(codepoint));
5066:                }
5067:                throw new IllegalArgumentException("Illegal Property Enum");
5068:            }
5069:
5070:            /**
5071:             * Get the minimum value for an integer/binary Unicode property type.
5072:             * Can be used together with UCharacter.getIntPropertyMaxValue(int)
5073:             * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar.
5074:             * @param type UProperty selector constant, identifies which binary 
5075:             *        property to check. Must be 
5076:             *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or 
5077:             *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5078:             * @return Minimum value returned by UCharacter.getIntPropertyValue(int) 
5079:             *         for a Unicode property. 0 if the property 
5080:             *         selector 'type' is out of range.
5081:             * @see UProperty
5082:             * @see #hasBinaryProperty
5083:             * @see #getUnicodeVersion
5084:             * @see #getIntPropertyMaxValue
5085:             * @see #getIntPropertyValue
5086:             * @stable ICU 2.4
5087:             */
5088:            public static int getIntPropertyMinValue(int type) {
5089:
5090:                return 0; // undefined; and: all other properties have a minimum value 
5091:                // of 0
5092:            }
5093:
5094:            /**
5095:             * Get the maximum value for an integer/binary Unicode property.
5096:             * Can be used together with UCharacter.getIntPropertyMinValue(int)
5097:             * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar.
5098:             * Examples for min/max values (for Unicode 3.2):
5099:             * <ul>
5100:             * <li> UProperty.BIDI_CLASS:    0/18 (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL)
5101:             * <li> UProperty.SCRIPT:        0/45 (UScript.COMMON/UScript.TAGBANWA)
5102:             * <li> UProperty.IDEOGRAPHIC:   0/1  (false/true)
5103:             * </ul>
5104:             * For undefined UProperty constant values, min/max values will be 0/-1.
5105:             * @param type UProperty selector constant, identifies which binary 
5106:             *        property to check. Must be 
5107:             *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or 
5108:             *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5109:             * @return Maximum value returned by u_getIntPropertyValue for a Unicode 
5110:             *         property. &lt;= 0 if the property selector 'type' is out of range.
5111:             * @see UProperty
5112:             * @see #hasBinaryProperty
5113:             * @see #getUnicodeVersion
5114:             * @see #getIntPropertyMaxValue
5115:             * @see #getIntPropertyValue
5116:             * @stable ICU 2.4
5117:             */
5118:            public static int getIntPropertyMaxValue(int type) {
5119:                if (type < UProperty.BINARY_START) {
5120:                    return -1; // undefined
5121:                } else if (type < UProperty.BINARY_LIMIT) {
5122:                    return 1; // maximum TRUE for all binary properties
5123:                } else if (type < UProperty.INT_START) {
5124:                    return -1; // undefined
5125:                } else if (type < UProperty.INT_LIMIT) {
5126:                    switch (type) {
5127:                    case UProperty.BIDI_CLASS:
5128:                    case UProperty.JOINING_GROUP:
5129:                    case UProperty.JOINING_TYPE:
5130:                        return gBdp.getMaxValue(type);
5131:                    case UProperty.BLOCK:
5132:                        return (PROPERTY_.getMaxValues(0) & BLOCK_MASK_) >> BLOCK_SHIFT_;
5133:                    case UProperty.CANONICAL_COMBINING_CLASS:
5134:                    case UProperty.LEAD_CANONICAL_COMBINING_CLASS:
5135:                    case UProperty.TRAIL_CANONICAL_COMBINING_CLASS:
5136:                        return 0xff; // TODO do we need to be more precise, 
5137:                        // getting the actual maximum?
5138:                    case UProperty.DECOMPOSITION_TYPE:
5139:                        return PROPERTY_.getMaxValues(2)
5140:                                & DECOMPOSITION_TYPE_MASK_;
5141:                    case UProperty.EAST_ASIAN_WIDTH:
5142:                        return (PROPERTY_.getMaxValues(0) & EAST_ASIAN_MASK_) >> EAST_ASIAN_SHIFT_;
5143:                    case UProperty.GENERAL_CATEGORY:
5144:                        return UCharacterCategory.CHAR_CATEGORY_COUNT - 1;
5145:                    case UProperty.LINE_BREAK:
5146:                        return (PROPERTY_.getMaxValues(0) & LINE_BREAK_MASK_) >> LINE_BREAK_SHIFT_;
5147:                    case UProperty.NUMERIC_TYPE:
5148:                        return NumericType.COUNT - 1;
5149:                    case UProperty.SCRIPT:
5150:                        return PROPERTY_.getMaxValues(0) & SCRIPT_MASK_;
5151:                    case UProperty.HANGUL_SYLLABLE_TYPE:
5152:                        return HangulSyllableType.COUNT - 1;
5153:                    case UProperty.NFD_QUICK_CHECK:
5154:                    case UProperty.NFKD_QUICK_CHECK:
5155:                        return 1; // YES -- these are never "maybe", only "no" or "yes"
5156:                    case UProperty.NFC_QUICK_CHECK:
5157:                    case UProperty.NFKC_QUICK_CHECK:
5158:                        return 2; // MAYBE
5159:                    case UProperty.GRAPHEME_CLUSTER_BREAK:
5160:                        return (PROPERTY_.getMaxValues(2) & GCB_MASK) >> GCB_SHIFT;
5161:                    case UProperty.SENTENCE_BREAK:
5162:                        return (PROPERTY_.getMaxValues(2) & SB_MASK) >> SB_SHIFT;
5163:                    case UProperty.WORD_BREAK:
5164:                        return (PROPERTY_.getMaxValues(2) & WB_MASK) >> WB_SHIFT;
5165:                    default:
5166:                        return -1; // undefined
5167:                    }
5168:
5169:                }
5170:                return -1; // undefined
5171:            }
5172:
5173:            /**
5174:             * Provide the java.lang.Character forDigit API, for convenience.
5175:             * @stable ICU 3.0
5176:             */
5177:            public static char forDigit(int digit, int radix) {
5178:                return java.lang.Character.forDigit(digit, radix);
5179:            }
5180:
5181:            // JDK 1.5 API coverage
5182:
5183:            /**
5184:             * Cover the JDK 1.5 API, for convenience.
5185:             * @see UTF16#LEAD_SURROGATE_MIN_VALUE
5186:             * @stable ICU 3.0
5187:             */
5188:            public static final char MIN_HIGH_SURROGATE = UTF16.LEAD_SURROGATE_MIN_VALUE;
5189:
5190:            /**
5191:             * Cover the JDK 1.5 API, for convenience.
5192:             * @see UTF16#LEAD_SURROGATE_MAX_VALUE
5193:             * @stable ICU 3.0
5194:             */
5195:            public static final char MAX_HIGH_SURROGATE = UTF16.LEAD_SURROGATE_MAX_VALUE;
5196:
5197:            /**
5198:             * Cover the JDK 1.5 API, for convenience.
5199:             * @see UTF16#TRAIL_SURROGATE_MIN_VALUE
5200:             * @stable ICU 3.0
5201:             */
5202:            public static final char MIN_LOW_SURROGATE = UTF16.TRAIL_SURROGATE_MIN_VALUE;
5203:
5204:            /**
5205:             * Cover the JDK 1.5 API, for convenience.
5206:             * @see UTF16#TRAIL_SURROGATE_MAX_VALUE
5207:             * @stable ICU 3.0
5208:             */
5209:            public static final char MAX_LOW_SURROGATE = UTF16.TRAIL_SURROGATE_MAX_VALUE;
5210:
5211:            /**
5212:             * Cover the JDK 1.5 API, for convenience.
5213:             * @see UTF16#SURROGATE_MIN_VALUE
5214:             * @stable ICU 3.0
5215:             */
5216:            public static final char MIN_SURROGATE = UTF16.SURROGATE_MIN_VALUE;
5217:
5218:            /**
5219:             * Cover the JDK 1.5 API, for convenience.
5220:             * @see UTF16#SURROGATE_MAX_VALUE
5221:             * @stable ICU 3.0
5222:             */
5223:            public static final char MAX_SURROGATE = UTF16.SURROGATE_MAX_VALUE;
5224:
5225:            /**
5226:             * Cover the JDK 1.5 API, for convenience.
5227:             * @see UTF16#SUPPLEMENTARY_MIN_VALUE
5228:             * @stable ICU 3.0
5229:             */
5230:            public static final int MIN_SUPPLEMENTARY_CODE_POINT = UTF16.SUPPLEMENTARY_MIN_VALUE;
5231:
5232:            /**
5233:             * Cover the JDK 1.5 API, for convenience.
5234:             * @see UTF16#CODEPOINT_MAX_VALUE
5235:             * @stable ICU 3.0
5236:             */
5237:            public static final int MAX_CODE_POINT = UTF16.CODEPOINT_MAX_VALUE;
5238:
5239:            /**
5240:             * Cover the JDK 1.5 API, for convenience.
5241:             * @see UTF16#CODEPOINT_MIN_VALUE
5242:             * @stable ICU 3.0
5243:             */
5244:            public static final int MIN_CODE_POINT = UTF16.CODEPOINT_MIN_VALUE;
5245:
5246:            /**
5247:             * Cover the JDK 1.5 API, for convenience.
5248:             * @param cp the code point to check
5249:             * @return true if cp is a valid code point
5250:             * @stable ICU 3.0
5251:             */
5252:            public static final boolean isValidCodePoint(int cp) {
5253:                return cp >= 0 && cp <= MAX_CODE_POINT;
5254:            }
5255:
5256:            /**
5257:             * Cover the JDK 1.5 API, for convenience.
5258:             * @param cp the code point to check
5259:             * @return true if cp is a supplementary code point
5260:             * @stable ICU 3.0
5261:             */
5262:            public static final boolean isSupplementaryCodePoint(int cp) {
5263:                return cp >= UTF16.SUPPLEMENTARY_MIN_VALUE
5264:                        && cp <= UTF16.CODEPOINT_MAX_VALUE;
5265:            }
5266:
5267:            /**
5268:             * Cover the JDK 1.5 API, for convenience.
5269:             * @param ch the char to check
5270:             * @return true if ch is a high (lead) surrogate
5271:             * @stable ICU 3.0
5272:             */
5273:            public static boolean isHighSurrogate(char ch) {
5274:                return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
5275:            }
5276:
5277:            /**
5278:             * Cover the JDK 1.5 API, for convenience.
5279:             * @param ch the char to check
5280:             * @return true if ch is a low (trail) surrogate
5281:             * @stable ICU 3.0
5282:             */
5283:            public static boolean isLowSurrogate(char ch) {
5284:                return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
5285:            }
5286:
5287:            /**
5288:             * Cover the JDK 1.5 API, for convenience.  Return true if the chars
5289:             * form a valid surrogate pair.
5290:             * @param high the high (lead) char
5291:             * @param low the low (trail) char
5292:             * @return true if high, low form a surrogate pair
5293:             * @stable ICU 3.0
5294:             */
5295:            public static final boolean isSurrogatePair(char high, char low) {
5296:                return isHighSurrogate(high) && isLowSurrogate(low);
5297:            }
5298:
5299:            /**
5300:             * Cover the JDK 1.5 API, for convenience.  Return the number of chars needed
5301:             * to represent the code point.  This does not check the
5302:             * code point for validity.
5303:             * @param cp the code point to check
5304:             * @return the number of chars needed to represent the code point
5305:             * @see UTF16#getCharCount
5306:             * @stable ICU 3.0
5307:             */
5308:            public static int charCount(int cp) {
5309:                return UTF16.getCharCount(cp);
5310:            }
5311:
5312:            /**
5313:             * Cover the JDK 1.5 API, for convenience.  Return the code point represented by
5314:             * the characters.  This does not check the surrogate pair for validity.
5315:             * @param high the high (lead) surrogate
5316:             * @param low the low (trail) surrogate
5317:             * @return the code point formed by the surrogate pair
5318:             * @stable ICU 3.0
5319:             */
5320:            public static final int toCodePoint(char high, char low) {
5321:                return UCharacterProperty.getRawSupplementary(high, low);
5322:            }
5323:
5324:            /**
5325:             * Cover the JDK 1.5 API, for convenience.  Return the code point at index.
5326:             * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5327:             * API.  This examines only the characters at index and index+1.
5328:             * @param seq the characters to check
5329:             * @param index the index of the first or only char forming the code point
5330:             * @return the code point at the index
5331:             * @stable ICU 3.0
5332:             */
5333:            //#ifndef FOUNDATION
5334:            public static final int codePointAt(CharSequence seq, int index) {
5335:                //#else
5336:                //##    public static final int codePointAt(String seq, int index) {
5337:                //#endif
5338:                char c1 = seq.charAt(index++);
5339:                if (isHighSurrogate(c1)) {
5340:                    if (index < seq.length()) {
5341:                        char c2 = seq.charAt(index);
5342:                        if (isLowSurrogate(c2)) {
5343:                            return toCodePoint(c1, c2);
5344:                        }
5345:                    }
5346:                }
5347:                return c1;
5348:            }
5349:
5350:            //#ifdef FOUNDATION
5351:            //##    public static final int codePointAt(StringBuffer seq, int index) {
5352:            //##        return codePointAt(seq.toString(), index);
5353:            //##    }
5354:            //#endif
5355:
5356:            /**
5357:             * Cover the JDK 1.5 API, for convenience.  Return the code point at index.
5358:             * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5359:             * API.  This examines only the characters at index and index+1.
5360:             * @param text the characters to check
5361:             * @param index the index of the first or only char forming the code point
5362:             * @return the code point at the index
5363:             * @stable ICU 3.0
5364:             */
5365:            public static final int codePointAt(char[] text, int index) {
5366:                char c1 = text[index++];
5367:                if (isHighSurrogate(c1)) {
5368:                    if (index < text.length) {
5369:                        char c2 = text[index];
5370:                        if (isLowSurrogate(c2)) {
5371:                            return toCodePoint(c1, c2);
5372:                        }
5373:                    }
5374:                }
5375:                return c1;
5376:            }
5377:
5378:            /**
5379:             * Cover the JDK 1.5 API, for convenience.  Return the code point at index.
5380:             * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5381:             * API.  This examines only the characters at index and index+1.
5382:             * @param text the characters to check
5383:             * @param index the index of the first or only char forming the code point
5384:             * @param limit the limit of the valid text
5385:             * @return the code point at the index
5386:             * @stable ICU 3.0
5387:             */
5388:            public static final int codePointAt(char[] text, int index,
5389:                    int limit) {
5390:                if (index >= limit || limit > text.length) {
5391:                    throw new IndexOutOfBoundsException();
5392:                }
5393:                char c1 = text[index++];
5394:                if (isHighSurrogate(c1)) {
5395:                    if (index < limit) {
5396:                        char c2 = text[index];
5397:                        if (isLowSurrogate(c2)) {
5398:                            return toCodePoint(c1, c2);
5399:                        }
5400:                    }
5401:                }
5402:                return c1;
5403:            }
5404:
5405:            /**
5406:             * Cover the JDK 1.5 API, for convenience.  Return the code point before index.
5407:             * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5408:             * API.  This examines only the characters at index-1 and index-2.
5409:             * @param seq the characters to check
5410:             * @param index the index after the last or only char forming the code point
5411:             * @return the code point before the index
5412:             * @stable ICU 3.0
5413:             */
5414:            //#ifndef FOUNDATION
5415:            public static final int codePointBefore(CharSequence seq, int index) {
5416:                //#else
5417:                //##    public static final int codePointBefore(String seq, int index) {
5418:                //#endif
5419:                char c2 = seq.charAt(--index);
5420:                if (isLowSurrogate(c2)) {
5421:                    if (index > 0) {
5422:                        char c1 = seq.charAt(--index);
5423:                        if (isHighSurrogate(c1)) {
5424:                            return toCodePoint(c1, c2);
5425:                        }
5426:                    }
5427:                }
5428:                return c2;
5429:            }
5430:
5431:            //#ifdef FOUNDATION
5432:            //##    public static final int codePointBefore(StringBuffer seq, int index) {
5433:            //##        return codePointBefore(seq.toString(), index);
5434:            //##    }
5435:            //#endif
5436:
5437:            /**
5438:             * Cover the JDK 1.5 API, for convenience.  Return the code point before index.
5439:             * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5440:             * API.  This examines only the characters at index-1 and index-2.
5441:             * @param text the characters to check
5442:             * @param index the index after the last or only char forming the code point
5443:             * @return the code point before the index
5444:             * @stable ICU 3.0
5445:             */
5446:            public static final int codePointBefore(char[] text, int index) {
5447:                char c2 = text[--index];
5448:                if (isLowSurrogate(c2)) {
5449:                    if (index > 0) {
5450:                        char c1 = text[--index];
5451:                        if (isHighSurrogate(c1)) {
5452:                            return toCodePoint(c1, c2);
5453:                        }
5454:                    }
5455:                }
5456:                return c2;
5457:            }
5458:
5459:            /**
5460:             * Cover the JDK 1.5 API, for convenience.  Return the code point before index.
5461:             * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
5462:             * API.  This examines only the characters at index-1 and index-2.
5463:             * @param text the characters to check
5464:             * @param index the index after the last or only char forming the code point
5465:             * @param limit the start of the valid text
5466:             * @return the code point before the index
5467:             * @stable ICU 3.0
5468:             */
5469:            public static final int codePointBefore(char[] text, int index,
5470:                    int limit) {
5471:                if (index <= limit || limit < 0) {
5472:                    throw new IndexOutOfBoundsException();
5473:                }
5474:                char c2 = text[--index];
5475:                if (isLowSurrogate(c2)) {
5476:                    if (index > limit) {
5477:                        char c1 = text[--index];
5478:                        if (isHighSurrogate(c1)) {
5479:                            return toCodePoint(c1, c2);
5480:                        }
5481:                    }
5482:                }
5483:                return c2;
5484:            }
5485:
5486:            /**
5487:             * Cover the JDK 1.5 API, for convenience.  Writes the chars representing the
5488:             * code point into the destination at the given index.
5489:             * @param cp the code point to convert
5490:             * @param dst the destination array into which to put the char(s) representing the code point
5491:             * @param dstIndex the index at which to put the first (or only) char
5492:             * @return the count of the number of chars written (1 or 2)
5493:             * @throws IllegalArgumentException if cp is not a valid code point
5494:             * @stable ICU 3.0
5495:             */
5496:            public static final int toChars(int cp, char[] dst, int dstIndex) {
5497:                if (cp >= 0) {
5498:                    if (cp < MIN_SUPPLEMENTARY_CODE_POINT) {
5499:                        dst[dstIndex] = (char) cp;
5500:                        return 1;
5501:                    }
5502:                    if (cp <= MAX_CODE_POINT) {
5503:                        dst[dstIndex] = UTF16.getLeadSurrogate(cp);
5504:                        dst[dstIndex + 1] = UTF16.getTrailSurrogate(cp);
5505:                        return 2;
5506:                    }
5507:                }
5508:                throw new IllegalArgumentException();
5509:            }
5510:
5511:            /**
5512:             * Cover the JDK 1.5 API, for convenience.  Returns a char array
5513:             * representing the code point.
5514:             * @param cp the code point to convert
5515:             * @return an array containing the char(s) representing the code point
5516:             * @throws IllegalArgumentException if cp is not a valid code point
5517:             * @stable ICU 3.0
5518:             */
5519:            public static final char[] toChars(int cp) {
5520:                if (cp >= 0) {
5521:                    if (cp < MIN_SUPPLEMENTARY_CODE_POINT) {
5522:                        return new char[] { (char) cp };
5523:                    }
5524:                    if (cp <= MAX_CODE_POINT) {
5525:                        return new char[] { UTF16.getLeadSurrogate(cp),
5526:                                UTF16.getTrailSurrogate(cp) };
5527:                    }
5528:                }
5529:                throw new IllegalArgumentException();
5530:            }
5531:
5532:            /**
5533:             * Cover the JDK API, for convenience.  Return a byte representing the directionality of
5534:             * the character.
5535:             * <br/><b>Note</b>: Unlike the JDK, this returns DIRECTIONALITY_LEFT_TO_RIGHT for undefined or
5536:             * out-of-bounds characters.  <br/><b>Note</b>: The return value must be
5537:             * tested using the constants defined in {@link UCharacterEnums.ECharacterDirection}
5538:             * since the values are different from the ones defined by <code>java.lang.Character</code>.
5539:             * @param cp the code point to check
5540:             * @return the directionality of the code point
5541:             * @see #getDirection
5542:             * @stable ICU 3.0
5543:             */
5544:            public static byte getDirectionality(int cp) {
5545:                return (byte) getDirection(cp);
5546:            }
5547:
5548:            /**
5549:             * Cover the JDK API, for convenience.  Count the number of code points in the range of text.
5550:             * @param text the characters to check
5551:             * @param start the start of the range
5552:             * @param limit the limit of the range
5553:             * @return the number of code points in the range
5554:             * @stable ICU 3.0
5555:             */
5556:            //#ifndef FOUNDATION
5557:            public static int codePointCount(CharSequence text, int start,
5558:                    int limit) {
5559:                //#else
5560:                //##    public static int codePointCount(String text, int start, int limit) {
5561:                //#endif
5562:                if (start < 0 || limit < start || limit > text.length()) {
5563:                    throw new IndexOutOfBoundsException("start (" + start
5564:                            + ") or limit (" + limit
5565:                            + ") invalid or out of range 0, " + text.length());
5566:                }
5567:
5568:                int len = limit - start;
5569:                while (limit > start) {
5570:                    char ch = text.charAt(--limit);
5571:                    while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE
5572:                            && limit > start) {
5573:                        ch = text.charAt(--limit);
5574:                        if (ch >= MIN_HIGH_SURROGATE
5575:                                && ch <= MAX_HIGH_SURROGATE) {
5576:                            --len;
5577:                            break;
5578:                        }
5579:                    }
5580:                }
5581:                return len;
5582:            }
5583:
5584:            /**
5585:             * Cover the JDK API, for convenience.  Count the number of code points in the range of text.
5586:             * @param text the characters to check
5587:             * @param start the start of the range
5588:             * @param limit the limit of the range
5589:             * @return the number of code points in the range
5590:             * @stable ICU 3.0
5591:             */
5592:            public static int codePointCount(char[] text, int start, int limit) {
5593:                if (start < 0 || limit < start || limit > text.length) {
5594:                    throw new IndexOutOfBoundsException("start (" + start
5595:                            + ") or limit (" + limit
5596:                            + ") invalid or out of range 0, " + text.length);
5597:                }
5598:
5599:                int len = limit - start;
5600:                while (limit > start) {
5601:                    char ch = text[--limit];
5602:                    while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE
5603:                            && limit > start) {
5604:                        ch = text[--limit];
5605:                        if (ch >= MIN_HIGH_SURROGATE
5606:                                && ch <= MAX_HIGH_SURROGATE) {
5607:                            --len;
5608:                            break;
5609:                        }
5610:                    }
5611:                }
5612:                return len;
5613:            }
5614:
5615:            /**
5616:             * Cover the JDK API, for convenience.  Adjust the char index by a code point offset.
5617:             * @param text the characters to check
5618:             * @param index the index to adjust
5619:             * @param codePointOffset the number of code points by which to offset the index
5620:             * @return the adjusted index
5621:             * @stable ICU 3.0
5622:             */
5623:            //#ifndef FOUNDATION
5624:            public static int offsetByCodePoints(CharSequence text, int index,
5625:                    int codePointOffset) {
5626:                //#else
5627:                //##    public static int offsetByCodePoints(String text, int index, int codePointOffset) {
5628:                //#endif
5629:                if (index < 0 || index > text.length()) {
5630:                    throw new IndexOutOfBoundsException("index ( " + index
5631:                            + ") out of range 0, " + text.length());
5632:                }
5633:
5634:                if (codePointOffset < 0) {
5635:                    while (++codePointOffset <= 0) {
5636:                        char ch = text.charAt(--index);
5637:                        while (ch >= MIN_LOW_SURROGATE
5638:                                && ch <= MAX_LOW_SURROGATE && index > 0) {
5639:                            ch = text.charAt(--index);
5640:                            if (ch < MIN_HIGH_SURROGATE
5641:                                    || ch > MAX_HIGH_SURROGATE) {
5642:                                if (++codePointOffset > 0) {
5643:                                    return index + 1;
5644:                                }
5645:                            }
5646:                        }
5647:                    }
5648:                } else {
5649:                    int limit = text.length();
5650:                    while (--codePointOffset >= 0) {
5651:                        char ch = text.charAt(index++);
5652:                        while (ch >= MIN_HIGH_SURROGATE
5653:                                && ch <= MAX_HIGH_SURROGATE && index < limit) {
5654:                            ch = text.charAt(index++);
5655:                            if (ch < MIN_LOW_SURROGATE
5656:                                    || ch > MAX_LOW_SURROGATE) {
5657:                                if (--codePointOffset < 0) {
5658:                                    return index - 1;
5659:                                }
5660:                            }
5661:                        }
5662:                    }
5663:                }
5664:
5665:                return index;
5666:            }
5667:
5668:            /**
5669:             * Cover the JDK API, for convenience.  Adjust the char index by a code point offset.
5670:             * @param text the characters to check
5671:             * @param start the start of the range to check
5672:             * @param count the length of the range to check
5673:             * @param index the index to adjust
5674:             * @param codePointOffset the number of code points by which to offset the index
5675:             * @return the adjusted index
5676:             * @stable ICU 3.0
5677:             */
5678:            public static int offsetByCodePoints(char[] text, int start,
5679:                    int count, int index, int codePointOffset) {
5680:                int limit = start + count;
5681:                if (start < 0 || limit < start || limit > text.length
5682:                        || index < start || index > limit) {
5683:                    throw new IndexOutOfBoundsException("index ( " + index
5684:                            + ") out of range " + start + ", " + limit
5685:                            + " in array 0, " + text.length);
5686:                }
5687:
5688:                if (codePointOffset < 0) {
5689:                    while (++codePointOffset <= 0) {
5690:                        char ch = text[--index];
5691:                        if (index < start) {
5692:                            throw new IndexOutOfBoundsException("index ( "
5693:                                    + index + ") < start (" + start + ")");
5694:                        }
5695:                        while (ch >= MIN_LOW_SURROGATE
5696:                                && ch <= MAX_LOW_SURROGATE && index > start) {
5697:                            ch = text[--index];
5698:                            if (ch < MIN_HIGH_SURROGATE
5699:                                    || ch > MAX_HIGH_SURROGATE) {
5700:                                if (++codePointOffset > 0) {
5701:                                    return index + 1;
5702:                                }
5703:                            }
5704:                        }
5705:                    }
5706:                } else {
5707:                    while (--codePointOffset >= 0) {
5708:                        char ch = text[index++];
5709:                        if (index > limit) {
5710:                            throw new IndexOutOfBoundsException("index ( "
5711:                                    + index + ") > limit (" + limit + ")");
5712:                        }
5713:                        while (ch >= MIN_HIGH_SURROGATE
5714:                                && ch <= MAX_HIGH_SURROGATE && index < limit) {
5715:                            ch = text[index++];
5716:                            if (ch < MIN_LOW_SURROGATE
5717:                                    || ch > MAX_LOW_SURROGATE) {
5718:                                if (--codePointOffset < 0) {
5719:                                    return index - 1;
5720:                                }
5721:                            }
5722:                        }
5723:                    }
5724:                }
5725:
5726:                return index;
5727:            }
5728:
5729:            // protected data members --------------------------------------------
5730:
5731:            /**
5732:             * Database storing the sets of character name
5733:             */
5734:            static UCharacterName NAME_ = null;
5735:
5736:            /**
5737:             * Singleton object encapsulating the imported pnames.icu property aliases
5738:             */
5739:            static UPropertyAliases PNAMES_ = null;
5740:
5741:            // block to initialise name database and unicode 1.0 data 
5742:            static {
5743:                try {
5744:                    PNAMES_ = new UPropertyAliases();
5745:                    NAME_ = UCharacterName.getInstance();
5746:                } catch (IOException e) {
5747:                    // e.printStackTrace();
5748:                    throw new MissingResourceException(e.getMessage(), "", "");
5749:                    //throw new RuntimeException(e.getMessage());
5750:                    // DONOT throw an exception
5751:                    // we might be building ICU modularly wothout names.icu and
5752:                    // pnames.icu
5753:                }
5754:            }
5755:
5756:            // private variables -------------------------------------------------
5757:
5758:            /**
5759:             * Database storing the sets of character property
5760:             */
5761:            private static final UCharacterProperty PROPERTY_;
5762:            /**
5763:             * For optimization
5764:             */
5765:            private static final char[] PROPERTY_TRIE_INDEX_;
5766:            private static final char[] PROPERTY_TRIE_DATA_;
5767:            private static final int PROPERTY_INITIAL_VALUE_;
5768:
5769:            private static final UCaseProps gCsp;
5770:            private static final UBiDiProps gBdp;
5771:
5772:            // block to initialise character property database
5773:            static {
5774:                try {
5775:                    PROPERTY_ = UCharacterProperty.getInstance();
5776:                    PROPERTY_TRIE_INDEX_ = PROPERTY_.m_trieIndex_;
5777:                    PROPERTY_TRIE_DATA_ = PROPERTY_.m_trieData_;
5778:                    PROPERTY_INITIAL_VALUE_ = PROPERTY_.m_trieInitialValue_;
5779:                } catch (Exception e) {
5780:                    throw new MissingResourceException(e.getMessage(), "", "");
5781:                }
5782:
5783:                /*
5784:                 * In ICU4J 3.2, most Unicode properties were loaded from uprops.icu.
5785:                 * ICU4J 3.4 adds ucase.icu for case mapping properties and
5786:                 * ubidi.icu for bidi/shaping properties and
5787:                 * removes case/bidi/shaping properties from uprops.icu.
5788:                 *
5789:                 * Loading of uprops.icu was always done during class loading of UCharacter.class.
5790:                 * In order to maintain performance for all such properties,
5791:                 * ucase.icu and ubidi.icu are also loaded during class loading of UCharacter.class.
5792:                 * It will not fail if they are missing.
5793:                 * These data items are loaded early to avoid having to synchronize access to them,
5794:                 * for thread safety and performance.
5795:                 *
5796:                 * We try to load these data items at most once.
5797:                 * If it works, we use the resulting singleton object.
5798:                 * If it fails, then we get a dummy object, which always works unless
5799:                 * we are seriously out of memory.
5800:                 * After UCharacter.class loading, we have a never-changing pointer to either the
5801:                 * real singleton or the dummy.
5802:                 *
5803:                 * This method is used in Unicode properties APIs that
5804:                 * do not have a service object and also do not have an error code parameter.
5805:                 * Other API implementations get the singleton themselves
5806:                 * (synchronized), store it in the service object, and report errors.
5807:                 */
5808:                UCaseProps csp;
5809:                try {
5810:                    csp = UCaseProps.getSingleton();
5811:                } catch (IOException e) {
5812:                    csp = UCaseProps.getDummy();
5813:                }
5814:                gCsp = csp;
5815:
5816:                UBiDiProps bdp;
5817:                try {
5818:                    bdp = UBiDiProps.getSingleton();
5819:                } catch (IOException e) {
5820:                    bdp = UBiDiProps.getDummy();
5821:                }
5822:                gBdp = bdp;
5823:            }
5824:
5825:            /**
5826:             * To get the last character out from a data type
5827:             */
5828:            private static final int LAST_CHAR_MASK_ = 0xFFFF;
5829:
5830:            /**
5831:             * To get the last byte out from a data type
5832:             */
5833:            private static final int LAST_BYTE_MASK_ = 0xFF;
5834:
5835:            /**
5836:             * Shift 16 bits
5837:             */
5838:            private static final int SHIFT_16_ = 16;
5839:
5840:            /**
5841:             * Shift 24 bits
5842:             */
5843:            private static final int SHIFT_24_ = 24;
5844:
5845:            /**
5846:             * Decimal radix
5847:             */
5848:            private static final int DECIMAL_RADIX_ = 10;
5849:
5850:            /**
5851:             * No break space code point
5852:             */
5853:            private static final int NO_BREAK_SPACE_ = 0xA0;
5854:
5855:            /**
5856:             * Narrow no break space code point
5857:             */
5858:            private static final int NARROW_NO_BREAK_SPACE_ = 0x202F;
5859:
5860:            /**
5861:             * Zero width no break space code point
5862:             */
5863:            private static final int ZERO_WIDTH_NO_BREAK_SPACE_ = 0xFEFF;
5864:
5865:            /**
5866:             * Ideographic number zero code point
5867:             */
5868:            private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007;
5869:
5870:            /**
5871:             * CJK Ideograph, First code point
5872:             */
5873:            private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00;
5874:
5875:            /**
5876:             * CJK Ideograph, Second code point
5877:             */
5878:            private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c;
5879:
5880:            /**
5881:             * CJK Ideograph, Third code point
5882:             */
5883:            private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09;
5884:
5885:            /**
5886:             * CJK Ideograph, Fourth code point
5887:             */
5888:            private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56d8;
5889:
5890:            /**
5891:             * CJK Ideograph, FIFTH code point
5892:             */
5893:            private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94;
5894:
5895:            /**
5896:             * CJK Ideograph, Sixth code point
5897:             */
5898:            private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d;
5899:
5900:            /**
5901:             * CJK Ideograph, Seventh code point
5902:             */
5903:            private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03;
5904:
5905:            /**
5906:             * CJK Ideograph, Eighth code point
5907:             */
5908:            private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b;
5909:
5910:            /**
5911:             * CJK Ideograph, Nineth code point
5912:             */
5913:            private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d;
5914:
5915:            /**
5916:             * Application Program command code point
5917:             */
5918:            private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F;
5919:
5920:            /**
5921:             * Unit separator code point
5922:             */
5923:            private static final int UNIT_SEPARATOR_ = 0x001F;
5924:
5925:            /**
5926:             * Delete code point
5927:             */
5928:            private static final int DELETE_ = 0x007F;
5929:            /**
5930:             * ISO control character first range upper limit 0x0 - 0x1F
5931:             */
5932:            private static final int ISO_CONTROL_FIRST_RANGE_MAX_ = 0x1F;
5933:            /**
5934:             * Shift to get numeric type
5935:             */
5936:            private static final int NUMERIC_TYPE_SHIFT_ = 5;
5937:            /**
5938:             * Mask to get numeric type
5939:             */
5940:            private static final int NUMERIC_TYPE_MASK_ = 0x7 << NUMERIC_TYPE_SHIFT_;
5941:
5942:            /* encoding of fractional and large numbers */
5943:            private static final int MAX_SMALL_NUMBER = 0xff;
5944:
5945:            private static final int FRACTION_NUM_SHIFT = 3; /* numerator: bits 7..3 */
5946:            private static final int FRACTION_DEN_MASK = 7; /* denominator: bits 2..0 */
5947:
5948:            private static final int FRACTION_MAX_NUM = 31;
5949:            private static final int FRACTION_DEN_OFFSET = 2; /* denominator values are 2..9 */
5950:
5951:            private static final int FRACTION_MIN_DEN = FRACTION_DEN_OFFSET;
5952:            private static final int FRACTION_MAX_DEN = FRACTION_MIN_DEN
5953:                    + FRACTION_DEN_MASK;
5954:
5955:            private static final int LARGE_MANT_SHIFT = 4; /* mantissa: bits 7..4 */
5956:            private static final int LARGE_EXP_MASK = 0xf; /* exponent: bits 3..0 */
5957:            private static final int LARGE_EXP_OFFSET = 2; /* regular exponents 2..17 */
5958:            private static final int LARGE_EXP_OFFSET_EXTRA = 18; /* extra large exponents 18..33 */
5959:
5960:            private static final int LARGE_MIN_EXP = LARGE_EXP_OFFSET;
5961:            private static final int LARGE_MAX_EXP = LARGE_MIN_EXP
5962:                    + LARGE_EXP_MASK;
5963:            private static final int LARGE_MAX_EXP_EXTRA = LARGE_EXP_OFFSET_EXTRA
5964:                    + LARGE_EXP_MASK;
5965:
5966:            /**
5967:             * Han digit characters
5968:             */
5969:            private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_ = 0x96f6;
5970:            private static final int CJK_IDEOGRAPH_COMPLEX_ONE_ = 0x58f9;
5971:            private static final int CJK_IDEOGRAPH_COMPLEX_TWO_ = 0x8cb3;
5972:            private static final int CJK_IDEOGRAPH_COMPLEX_THREE_ = 0x53c3;
5973:            private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_ = 0x8086;
5974:            private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_ = 0x4f0d;
5975:            private static final int CJK_IDEOGRAPH_COMPLEX_SIX_ = 0x9678;
5976:            private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_ = 0x67d2;
5977:            private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_ = 0x634c;
5978:            private static final int CJK_IDEOGRAPH_COMPLEX_NINE_ = 0x7396;
5979:            private static final int CJK_IDEOGRAPH_TEN_ = 0x5341;
5980:            private static final int CJK_IDEOGRAPH_COMPLEX_TEN_ = 0x62fe;
5981:            private static final int CJK_IDEOGRAPH_HUNDRED_ = 0x767e;
5982:            private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_ = 0x4f70;
5983:            private static final int CJK_IDEOGRAPH_THOUSAND_ = 0x5343;
5984:            private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf;
5985:            private static final int CJK_IDEOGRAPH_TEN_THOUSAND_ = 0x824c;
5986:            private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_ = 0x5104;
5987:
5988:            /**
5989:             * Zero Width Non Joiner.
5990:             * Equivalent to icu4c ZWNJ.
5991:             */
5992:            private static final int ZERO_WIDTH_NON_JOINER_ = 0x200c;
5993:            /**
5994:             * Zero Width Joiner
5995:             * Equivalent to icu4c ZWJ. 
5996:             */
5997:            private static final int ZERO_WIDTH_JOINER_ = 0x200d;
5998:
5999:            /*
6000:             * Properties in vector word 2
6001:             * Bits
6002:             * 31..24   More binary properties (see UCharacterProperty)
6003:             * 23..19   reserved
6004:             * 18..14   Sentence Break
6005:             * 13..10   Word Break
6006:             *  9.. 5   Grapheme Cluster Break
6007:             *  4.. 0   Decomposition Type
6008:             */
6009:            private static final int SB_MASK = 0x0007c000;
6010:            private static final int SB_SHIFT = 14;
6011:
6012:            private static final int WB_MASK = 0x00003c00;
6013:            private static final int WB_SHIFT = 10;
6014:
6015:            private static final int GCB_MASK = 0x000003e0;
6016:            private static final int GCB_SHIFT = 5;
6017:
6018:            /**
6019:             * Integer properties mask for decomposition type.
6020:             * Equivalent to icu4c UPROPS_DT_MASK. 
6021:             */
6022:            private static final int DECOMPOSITION_TYPE_MASK_ = 0x0000001f;
6023:
6024:            /*
6025:             * Properties in vector word 0
6026:             * Bits
6027:             * 31..24   DerivedAge version major/minor one nibble each (see UCharacterProperty)
6028:             * 23..18   Line Break
6029:             * 17..15   East Asian Width
6030:             * 14.. 7   UBlockCode
6031:             *  6.. 0   UScriptCode
6032:             */
6033:
6034:            /**
6035:             * Integer properties mask and shift values for East Asian cell width.
6036:             * Equivalent to icu4c UPROPS_EA_MASK 
6037:             */
6038:            private static final int EAST_ASIAN_MASK_ = 0x00038000;
6039:            /**
6040:             * Integer properties mask and shift values for East Asian cell width.
6041:             * Equivalent to icu4c UPROPS_EA_SHIFT 
6042:             */
6043:            private static final int EAST_ASIAN_SHIFT_ = 15;
6044:            /**
6045:             * Integer properties mask and shift values for line breaks.
6046:             * Equivalent to icu4c UPROPS_LB_MASK 
6047:             */
6048:            private static final int LINE_BREAK_MASK_ = 0x00FC0000;
6049:            /**
6050:             * Integer properties mask and shift values for line breaks.
6051:             * Equivalent to icu4c UPROPS_LB_SHIFT 
6052:             */
6053:            private static final int LINE_BREAK_SHIFT_ = 18;
6054:            /**
6055:             * Integer properties mask and shift values for blocks.
6056:             * Equivalent to icu4c UPROPS_BLOCK_MASK 
6057:             */
6058:            private static final int BLOCK_MASK_ = 0x00007f80;
6059:            /**
6060:             * Integer properties mask and shift values for blocks.
6061:             * Equivalent to icu4c UPROPS_BLOCK_SHIFT 
6062:             */
6063:            private static final int BLOCK_SHIFT_ = 7;
6064:            /**
6065:             * Integer properties mask and shift values for scripts.
6066:             * Equivalent to icu4c UPROPS_SHIFT_MASK
6067:             */
6068:            private static final int SCRIPT_MASK_ = 0x0000007f;
6069:
6070:            // private constructor -----------------------------------------------
6071:            ///CLOVER:OFF  
6072:            /**
6073:             * Private constructor to prevent instantiation
6074:             */
6075:            private UCharacter() {
6076:            }
6077:
6078:            ///CLOVER:ON 
6079:            // private methods ---------------------------------------------------
6080:
6081:            /**
6082:             * Getting the digit values of characters like 'A' - 'Z', normal, 
6083:             * half-width and full-width. This method assumes that the other digit 
6084:             * characters are checked by the calling method.
6085:             * @param ch character to test
6086:             * @return -1 if ch is not a character of the form 'A' - 'Z', otherwise
6087:             *         its corresponding digit will be returned.
6088:             */
6089:            private static int getEuropeanDigit(int ch) {
6090:                if ((ch > 0x7a && ch < 0xff21) || ch < 0x41
6091:                        || (ch > 0x5a && ch < 0x61) || ch > 0xff5a
6092:                        || (ch > 0xff31 && ch < 0xff41)) {
6093:                    return -1;
6094:                }
6095:                if (ch <= 0x7a) {
6096:                    // ch >= 0x41 or ch < 0x61 
6097:                    return ch + 10 - ((ch <= 0x5a) ? 0x41 : 0x61);
6098:                }
6099:                // ch >= 0xff21
6100:                if (ch <= 0xff3a) {
6101:                    return ch + 10 - 0xff21;
6102:                }
6103:                // ch >= 0xff41 && ch <= 0xff5a
6104:                return ch + 10 - 0xff41;
6105:            }
6106:
6107:            /**
6108:             * Gets the numeric type of the property argument
6109:             * @param props 32 bit property
6110:             * @return the numeric type
6111:             */
6112:            private static int getNumericType(int props) {
6113:                return (props & NUMERIC_TYPE_MASK_) >> NUMERIC_TYPE_SHIFT_;
6114:            }
6115:
6116:            /**
6117:             * Gets the property value at the index.
6118:             * This is optimized.
6119:             * Note this is alittle different from CharTrie the index m_trieData_
6120:             * is never negative.
6121:             * This is a duplicate of UCharacterProperty.getProperty. For optimization
6122:             * purposes, this method calls the trie data directly instead of through 
6123:             * UCharacterProperty.getProperty.
6124:             * @param ch code point whose property value is to be retrieved
6125:             * @return property value of code point
6126:             * @stable ICU 2.6
6127:             */
6128:            private static final int getProperty(int ch) {
6129:                if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE
6130:                        || (ch > UTF16.LEAD_SURROGATE_MAX_VALUE && ch < UTF16.SUPPLEMENTARY_MIN_VALUE)) {
6131:                    // BMP codepoint 0000..D7FF or DC00..FFFF
6132:                    try { // using try for ch < 0 is faster than using an if statement
6133:                        return PROPERTY_TRIE_DATA_[(PROPERTY_TRIE_INDEX_[ch >> 5] << 2)
6134:                                + (ch & 0x1f)];
6135:                    } catch (ArrayIndexOutOfBoundsException e) {
6136:                        return PROPERTY_INITIAL_VALUE_;
6137:                    }
6138:                }
6139:                if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
6140:                    // lead surrogate D800..DBFF
6141:                    return PROPERTY_TRIE_DATA_[(PROPERTY_TRIE_INDEX_[(0x2800 >> 5)
6142:                            + (ch >> 5)] << 2)
6143:                            + (ch & 0x1f)];
6144:                }
6145:                // for optimization
6146:                if (ch <= UTF16.CODEPOINT_MAX_VALUE) {
6147:                    // supplementary code point 10000..10FFFF
6148:                    // look at the construction of supplementary characters
6149:                    // trail forms the ends of it.
6150:                    return PROPERTY_.m_trie_.getSurrogateValue(UTF16
6151:                            .getLeadSurrogate(ch), (char) (ch & 0x3ff));
6152:                }
6153:                // return m_dataOffset_ if there is an error, in this case we return 
6154:                // the default value: m_initialValue_
6155:                // we cannot assume that m_initialValue_ is at offset 0
6156:                // this is for optimization.
6157:                return PROPERTY_INITIAL_VALUE_;
6158:            }
6159:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.