0001 /*
0002 * Copyright 2002-2006 Sun Microsystems, Inc. All Rights Reserved.
0003 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0004 *
0005 * This code is free software; you can redistribute it and/or modify it
0006 * under the terms of the GNU General Public License version 2 only, as
0007 * published by the Free Software Foundation. Sun designates this
0008 * particular file as subject to the "Classpath" exception as provided
0009 * by Sun in the LICENSE file that accompanied this code.
0010 *
0011 * This code is distributed in the hope that it will be useful, but WITHOUT
0012 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0013 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
0014 * version 2 for more details (a copy is included in the LICENSE file that
0015 * accompanied this code).
0016 *
0017 * You should have received a copy of the GNU General Public License version
0018 * 2 along with this work; if not, write to the Free Software Foundation,
0019 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0020 *
0021 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
0022 * CA 95054 USA or visit www.sun.com if you need additional information or
0023 * have any questions.
0024 */
0025
0026 package java.lang;
0027
0028 import java.util.Map;
0029 import java.util.HashMap;
0030 import java.util.Locale;
0031
0032 /**
0033 * The <code>Character</code> class wraps a value of the primitive
0034 * type <code>char</code> in an object. An object of type
0035 * <code>Character</code> contains a single field whose type is
0036 * <code>char</code>.
0037 * <p>
0038 * In addition, this class provides several methods for determining
0039 * a character's category (lowercase letter, digit, etc.) and for converting
0040 * characters from uppercase to lowercase and vice versa.
0041 * <p>
0042 * Character information is based on the Unicode Standard, version 4.0.
0043 * <p>
0044 * The methods and data of class <code>Character</code> are defined by
0045 * the information in the <i>UnicodeData</i> file that is part of the
0046 * Unicode Character Database maintained by the Unicode
0047 * Consortium. This file specifies various properties including name
0048 * and general category for every defined Unicode code point or
0049 * character range.
0050 * <p>
0051 * The file and its description are available from the Unicode Consortium at:
0052 * <ul>
0053 * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
0054 * </ul>
0055 *
0056 * <h4><a name="unicode">Unicode Character Representations</a></h4>
0057 *
0058 * <p>The <code>char</code> data type (and therefore the value that a
0059 * <code>Character</code> object encapsulates) are based on the
0060 * original Unicode specification, which defined characters as
0061 * fixed-width 16-bit entities. The Unicode standard has since been
0062 * changed to allow for characters whose representation requires more
0063 * than 16 bits. The range of legal <em>code point</em>s is now
0064 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
0065 * (Refer to the <a
0066 * href="http://www.unicode.org/reports/tr27/#notation"><i>
0067 * definition</i></a> of the U+<i>n</i> notation in the Unicode
0068 * standard.)
0069 *
0070 * <p>The set of characters from U+0000 to U+FFFF is sometimes
0071 * referred to as the <em>Basic Multilingual Plane (BMP)</em>. <a
0072 * name="supplementary">Characters</a> whose code points are greater
0073 * than U+FFFF are called <em>supplementary character</em>s. The Java
0074 * 2 platform uses the UTF-16 representation in <code>char</code>
0075 * arrays and in the <code>String</code> and <code>StringBuffer</code>
0076 * classes. In this representation, supplementary characters are
0077 * represented as a pair of <code>char</code> values, the first from
0078 * the <em>high-surrogates</em> range, (\uD800-\uDBFF), the
0079 * second from the <em>low-surrogates</em> range
0080 * (\uDC00-\uDFFF).
0081 *
0082 * <p>A <code>char</code> value, therefore, represents Basic
0083 * Multilingual Plane (BMP) code points, including the surrogate
0084 * code points, or code units of the UTF-16 encoding. An
0085 * <code>int</code> value represents all Unicode code points,
0086 * including supplementary code points. The lower (least significant)
0087 * 21 bits of <code>int</code> are used to represent Unicode code
0088 * points and the upper (most significant) 11 bits must be zero.
0089 * Unless otherwise specified, the behavior with respect to
0090 * supplementary characters and surrogate <code>char</code> values is
0091 * as follows:
0092 *
0093 * <ul>
0094 * <li>The methods that only accept a <code>char</code> value cannot support
0095 * supplementary characters. They treat <code>char</code> values from the
0096 * surrogate ranges as undefined characters. For example,
0097 * <code>Character.isLetter('\uD840')</code> returns <code>false</code>, even though
0098 * this specific value if followed by any low-surrogate value in a string
0099 * would represent a letter.
0100 *
0101 * <li>The methods that accept an <code>int</code> value support all
0102 * Unicode characters, including supplementary characters. For
0103 * example, <code>Character.isLetter(0x2F81A)</code> returns
0104 * <code>true</code> because the code point value represents a letter
0105 * (a CJK ideograph).
0106 * </ul>
0107 *
0108 * <p>In the Java SE API documentation, <em>Unicode code point</em> is
0109 * used for character values in the range between U+0000 and U+10FFFF,
0110 * and <em>Unicode code unit</em> is used for 16-bit
0111 * <code>char</code> values that are code units of the <em>UTF-16</em>
0112 * encoding. For more information on Unicode terminology, refer to the
0113 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
0114 *
0115 * @author Lee Boynton
0116 * @author Guy Steele
0117 * @author Akira Tanaka
0118 * @since 1.0
0119 */
0120 public final class Character extends Object implements
0121 java.io.Serializable, Comparable<Character> {
0122 /**
0123 * The minimum radix available for conversion to and from strings.
0124 * The constant value of this field is the smallest value permitted
0125 * for the radix argument in radix-conversion methods such as the
0126 * <code>digit</code> method, the <code>forDigit</code>
0127 * method, and the <code>toString</code> method of class
0128 * <code>Integer</code>.
0129 *
0130 * @see java.lang.Character#digit(char, int)
0131 * @see java.lang.Character#forDigit(int, int)
0132 * @see java.lang.Integer#toString(int, int)
0133 * @see java.lang.Integer#valueOf(java.lang.String)
0134 */
0135 public static final int MIN_RADIX = 2;
0136
0137 /**
0138 * The maximum radix available for conversion to and from strings.
0139 * The constant value of this field is the largest value permitted
0140 * for the radix argument in radix-conversion methods such as the
0141 * <code>digit</code> method, the <code>forDigit</code>
0142 * method, and the <code>toString</code> method of class
0143 * <code>Integer</code>.
0144 *
0145 * @see java.lang.Character#digit(char, int)
0146 * @see java.lang.Character#forDigit(int, int)
0147 * @see java.lang.Integer#toString(int, int)
0148 * @see java.lang.Integer#valueOf(java.lang.String)
0149 */
0150 public static final int MAX_RADIX = 36;
0151
0152 /**
0153 * The constant value of this field is the smallest value of type
0154 * <code>char</code>, <code>'\u0000'</code>.
0155 *
0156 * @since 1.0.2
0157 */
0158 public static final char MIN_VALUE = '\u0000';
0159
0160 /**
0161 * The constant value of this field is the largest value of type
0162 * <code>char</code>, <code>'\uFFFF'</code>.
0163 *
0164 * @since 1.0.2
0165 */
0166 public static final char MAX_VALUE = '\uffff';
0167
0168 /**
0169 * The <code>Class</code> instance representing the primitive type
0170 * <code>char</code>.
0171 *
0172 * @since 1.1
0173 */
0174 public static final Class<Character> TYPE = Class
0175 .getPrimitiveClass("char");
0176
0177 /*
0178 * Normative general types
0179 */
0180
0181 /*
0182 * General character types
0183 */
0184
0185 /**
0186 * General category "Cn" in the Unicode specification.
0187 * @since 1.1
0188 */
0189 public static final byte UNASSIGNED = 0;
0190
0191 /**
0192 * General category "Lu" in the Unicode specification.
0193 * @since 1.1
0194 */
0195 public static final byte UPPERCASE_LETTER = 1;
0196
0197 /**
0198 * General category "Ll" in the Unicode specification.
0199 * @since 1.1
0200 */
0201 public static final byte LOWERCASE_LETTER = 2;
0202
0203 /**
0204 * General category "Lt" in the Unicode specification.
0205 * @since 1.1
0206 */
0207 public static final byte TITLECASE_LETTER = 3;
0208
0209 /**
0210 * General category "Lm" in the Unicode specification.
0211 * @since 1.1
0212 */
0213 public static final byte MODIFIER_LETTER = 4;
0214
0215 /**
0216 * General category "Lo" in the Unicode specification.
0217 * @since 1.1
0218 */
0219 public static final byte OTHER_LETTER = 5;
0220
0221 /**
0222 * General category "Mn" in the Unicode specification.
0223 * @since 1.1
0224 */
0225 public static final byte NON_SPACING_MARK = 6;
0226
0227 /**
0228 * General category "Me" in the Unicode specification.
0229 * @since 1.1
0230 */
0231 public static final byte ENCLOSING_MARK = 7;
0232
0233 /**
0234 * General category "Mc" in the Unicode specification.
0235 * @since 1.1
0236 */
0237 public static final byte COMBINING_SPACING_MARK = 8;
0238
0239 /**
0240 * General category "Nd" in the Unicode specification.
0241 * @since 1.1
0242 */
0243 public static final byte DECIMAL_DIGIT_NUMBER = 9;
0244
0245 /**
0246 * General category "Nl" in the Unicode specification.
0247 * @since 1.1
0248 */
0249 public static final byte LETTER_NUMBER = 10;
0250
0251 /**
0252 * General category "No" in the Unicode specification.
0253 * @since 1.1
0254 */
0255 public static final byte OTHER_NUMBER = 11;
0256
0257 /**
0258 * General category "Zs" in the Unicode specification.
0259 * @since 1.1
0260 */
0261 public static final byte SPACE_SEPARATOR = 12;
0262
0263 /**
0264 * General category "Zl" in the Unicode specification.
0265 * @since 1.1
0266 */
0267 public static final byte LINE_SEPARATOR = 13;
0268
0269 /**
0270 * General category "Zp" in the Unicode specification.
0271 * @since 1.1
0272 */
0273 public static final byte PARAGRAPH_SEPARATOR = 14;
0274
0275 /**
0276 * General category "Cc" in the Unicode specification.
0277 * @since 1.1
0278 */
0279 public static final byte CONTROL = 15;
0280
0281 /**
0282 * General category "Cf" in the Unicode specification.
0283 * @since 1.1
0284 */
0285 public static final byte FORMAT = 16;
0286
0287 /**
0288 * General category "Co" in the Unicode specification.
0289 * @since 1.1
0290 */
0291 public static final byte PRIVATE_USE = 18;
0292
0293 /**
0294 * General category "Cs" in the Unicode specification.
0295 * @since 1.1
0296 */
0297 public static final byte SURROGATE = 19;
0298
0299 /**
0300 * General category "Pd" in the Unicode specification.
0301 * @since 1.1
0302 */
0303 public static final byte DASH_PUNCTUATION = 20;
0304
0305 /**
0306 * General category "Ps" in the Unicode specification.
0307 * @since 1.1
0308 */
0309 public static final byte START_PUNCTUATION = 21;
0310
0311 /**
0312 * General category "Pe" in the Unicode specification.
0313 * @since 1.1
0314 */
0315 public static final byte END_PUNCTUATION = 22;
0316
0317 /**
0318 * General category "Pc" in the Unicode specification.
0319 * @since 1.1
0320 */
0321 public static final byte CONNECTOR_PUNCTUATION = 23;
0322
0323 /**
0324 * General category "Po" in the Unicode specification.
0325 * @since 1.1
0326 */
0327 public static final byte OTHER_PUNCTUATION = 24;
0328
0329 /**
0330 * General category "Sm" in the Unicode specification.
0331 * @since 1.1
0332 */
0333 public static final byte MATH_SYMBOL = 25;
0334
0335 /**
0336 * General category "Sc" in the Unicode specification.
0337 * @since 1.1
0338 */
0339 public static final byte CURRENCY_SYMBOL = 26;
0340
0341 /**
0342 * General category "Sk" in the Unicode specification.
0343 * @since 1.1
0344 */
0345 public static final byte MODIFIER_SYMBOL = 27;
0346
0347 /**
0348 * General category "So" in the Unicode specification.
0349 * @since 1.1
0350 */
0351 public static final byte OTHER_SYMBOL = 28;
0352
0353 /**
0354 * General category "Pi" in the Unicode specification.
0355 * @since 1.4
0356 */
0357 public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
0358
0359 /**
0360 * General category "Pf" in the Unicode specification.
0361 * @since 1.4
0362 */
0363 public static final byte FINAL_QUOTE_PUNCTUATION = 30;
0364
0365 /**
0366 * Error flag. Use int (code point) to avoid confusion with U+FFFF.
0367 */
0368 static final int ERROR = 0xFFFFFFFF;
0369
0370 /**
0371 * Undefined bidirectional character type. Undefined <code>char</code>
0372 * values have undefined directionality in the Unicode specification.
0373 * @since 1.4
0374 */
0375 public static final byte DIRECTIONALITY_UNDEFINED = -1;
0376
0377 /**
0378 * Strong bidirectional character type "L" in the Unicode specification.
0379 * @since 1.4
0380 */
0381 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
0382
0383 /**
0384 * Strong bidirectional character type "R" in the Unicode specification.
0385 * @since 1.4
0386 */
0387 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
0388
0389 /**
0390 * Strong bidirectional character type "AL" in the Unicode specification.
0391 * @since 1.4
0392 */
0393 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
0394
0395 /**
0396 * Weak bidirectional character type "EN" in the Unicode specification.
0397 * @since 1.4
0398 */
0399 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
0400
0401 /**
0402 * Weak bidirectional character type "ES" in the Unicode specification.
0403 * @since 1.4
0404 */
0405 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
0406
0407 /**
0408 * Weak bidirectional character type "ET" in the Unicode specification.
0409 * @since 1.4
0410 */
0411 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
0412
0413 /**
0414 * Weak bidirectional character type "AN" in the Unicode specification.
0415 * @since 1.4
0416 */
0417 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
0418
0419 /**
0420 * Weak bidirectional character type "CS" in the Unicode specification.
0421 * @since 1.4
0422 */
0423 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
0424
0425 /**
0426 * Weak bidirectional character type "NSM" in the Unicode specification.
0427 * @since 1.4
0428 */
0429 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
0430
0431 /**
0432 * Weak bidirectional character type "BN" in the Unicode specification.
0433 * @since 1.4
0434 */
0435 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
0436
0437 /**
0438 * Neutral bidirectional character type "B" in the Unicode specification.
0439 * @since 1.4
0440 */
0441 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
0442
0443 /**
0444 * Neutral bidirectional character type "S" in the Unicode specification.
0445 * @since 1.4
0446 */
0447 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
0448
0449 /**
0450 * Neutral bidirectional character type "WS" in the Unicode specification.
0451 * @since 1.4
0452 */
0453 public static final byte DIRECTIONALITY_WHITESPACE = 12;
0454
0455 /**
0456 * Neutral bidirectional character type "ON" in the Unicode specification.
0457 * @since 1.4
0458 */
0459 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
0460
0461 /**
0462 * Strong bidirectional character type "LRE" in the Unicode specification.
0463 * @since 1.4
0464 */
0465 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
0466
0467 /**
0468 * Strong bidirectional character type "LRO" in the Unicode specification.
0469 * @since 1.4
0470 */
0471 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
0472
0473 /**
0474 * Strong bidirectional character type "RLE" in the Unicode specification.
0475 * @since 1.4
0476 */
0477 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
0478
0479 /**
0480 * Strong bidirectional character type "RLO" in the Unicode specification.
0481 * @since 1.4
0482 */
0483 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
0484
0485 /**
0486 * Weak bidirectional character type "PDF" in the Unicode specification.
0487 * @since 1.4
0488 */
0489 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
0490
0491 /**
0492 * The minimum value of a Unicode high-surrogate code unit in the
0493 * UTF-16 encoding. A high-surrogate is also known as a
0494 * <i>leading-surrogate</i>.
0495 *
0496 * @since 1.5
0497 */
0498 public static final char MIN_HIGH_SURROGATE = '\uD800';
0499
0500 /**
0501 * The maximum value of a Unicode high-surrogate code unit in the
0502 * UTF-16 encoding. A high-surrogate is also known as a
0503 * <i>leading-surrogate</i>.
0504 *
0505 * @since 1.5
0506 */
0507 public static final char MAX_HIGH_SURROGATE = '\uDBFF';
0508
0509 /**
0510 * The minimum value of a Unicode low-surrogate code unit in the
0511 * UTF-16 encoding. A low-surrogate is also known as a
0512 * <i>trailing-surrogate</i>.
0513 *
0514 * @since 1.5
0515 */
0516 public static final char MIN_LOW_SURROGATE = '\uDC00';
0517
0518 /**
0519 * The maximum value of a Unicode low-surrogate code unit in the
0520 * UTF-16 encoding. A low-surrogate is also known as a
0521 * <i>trailing-surrogate</i>.
0522 *
0523 * @since 1.5
0524 */
0525 public static final char MAX_LOW_SURROGATE = '\uDFFF';
0526
0527 /**
0528 * The minimum value of a Unicode surrogate code unit in the UTF-16 encoding.
0529 *
0530 * @since 1.5
0531 */
0532 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
0533
0534 /**
0535 * The maximum value of a Unicode surrogate code unit in the UTF-16 encoding.
0536 *
0537 * @since 1.5
0538 */
0539 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
0540
0541 /**
0542 * The minimum value of a supplementary code point.
0543 *
0544 * @since 1.5
0545 */
0546 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
0547
0548 /**
0549 * The minimum value of a Unicode code point.
0550 *
0551 * @since 1.5
0552 */
0553 public static final int MIN_CODE_POINT = 0x000000;
0554
0555 /**
0556 * The maximum value of a Unicode code point.
0557 *
0558 * @since 1.5
0559 */
0560 public static final int MAX_CODE_POINT = 0x10ffff;
0561
0562 /**
0563 * Instances of this class represent particular subsets of the Unicode
0564 * character set. The only family of subsets defined in the
0565 * <code>Character</code> class is <code>{@link Character.UnicodeBlock
0566 * UnicodeBlock}</code>. Other portions of the Java API may define other
0567 * subsets for their own purposes.
0568 *
0569 * @since 1.2
0570 */
0571 public static class Subset {
0572
0573 private String name;
0574
0575 /**
0576 * Constructs a new <code>Subset</code> instance.
0577 *
0578 * @exception NullPointerException if name is <code>null</code>
0579 * @param name The name of this subset
0580 */
0581 protected Subset(String name) {
0582 if (name == null) {
0583 throw new NullPointerException("name");
0584 }
0585 this .name = name;
0586 }
0587
0588 /**
0589 * Compares two <code>Subset</code> objects for equality.
0590 * This method returns <code>true</code> if and only if
0591 * <code>this</code> and the argument refer to the same
0592 * object; since this method is <code>final</code>, this
0593 * guarantee holds for all subclasses.
0594 */
0595 public final boolean equals(Object obj) {
0596 return (this == obj);
0597 }
0598
0599 /**
0600 * Returns the standard hash code as defined by the
0601 * <code>{@link Object#hashCode}</code> method. This method
0602 * is <code>final</code> in order to ensure that the
0603 * <code>equals</code> and <code>hashCode</code> methods will
0604 * be consistent in all subclasses.
0605 */
0606 public final int hashCode() {
0607 return super .hashCode();
0608 }
0609
0610 /**
0611 * Returns the name of this subset.
0612 */
0613 public final String toString() {
0614 return name;
0615 }
0616 }
0617
0618 /**
0619 * A family of character subsets representing the character blocks in the
0620 * Unicode specification. Character blocks generally define characters
0621 * used for a specific script or purpose. A character is contained by
0622 * at most one Unicode block.
0623 *
0624 * @since 1.2
0625 */
0626 public static final class UnicodeBlock extends Subset {
0627
0628 private static Map map = new HashMap();
0629
0630 /**
0631 * Create a UnicodeBlock with the given identifier name.
0632 * This name must be the same as the block identifier.
0633 */
0634 private UnicodeBlock(String idName) {
0635 super (idName);
0636 map.put(idName.toUpperCase(Locale.US), this );
0637 }
0638
0639 /**
0640 * Create a UnicodeBlock with the given identifier name and
0641 * alias name.
0642 */
0643 private UnicodeBlock(String idName, String alias) {
0644 this (idName);
0645 map.put(alias.toUpperCase(Locale.US), this );
0646 }
0647
0648 /**
0649 * Create a UnicodeBlock with the given identifier name and
0650 * alias names.
0651 */
0652 private UnicodeBlock(String idName, String[] aliasName) {
0653 this (idName);
0654 if (aliasName != null) {
0655 for (int x = 0; x < aliasName.length; ++x) {
0656 map.put(aliasName[x].toUpperCase(Locale.US), this );
0657 }
0658 }
0659 }
0660
0661 /**
0662 * Constant for the "Basic Latin" Unicode character block.
0663 * @since 1.2
0664 */
0665 public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock(
0666 "BASIC_LATIN", new String[] { "Basic Latin",
0667 "BasicLatin" });
0668
0669 /**
0670 * Constant for the "Latin-1 Supplement" Unicode character block.
0671 * @since 1.2
0672 */
0673 public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock(
0674 "LATIN_1_SUPPLEMENT", new String[] {
0675 "Latin-1 Supplement", "Latin-1Supplement" });
0676
0677 /**
0678 * Constant for the "Latin Extended-A" Unicode character block.
0679 * @since 1.2
0680 */
0681 public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock(
0682 "LATIN_EXTENDED_A", new String[] { "Latin Extended-A",
0683 "LatinExtended-A" });
0684
0685 /**
0686 * Constant for the "Latin Extended-B" Unicode character block.
0687 * @since 1.2
0688 */
0689 public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock(
0690 "LATIN_EXTENDED_B", new String[] { "Latin Extended-B",
0691 "LatinExtended-B" });
0692
0693 /**
0694 * Constant for the "IPA Extensions" Unicode character block.
0695 * @since 1.2
0696 */
0697 public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock(
0698 "IPA_EXTENSIONS", new String[] { "IPA Extensions",
0699 "IPAExtensions" });
0700
0701 /**
0702 * Constant for the "Spacing Modifier Letters" Unicode character block.
0703 * @since 1.2
0704 */
0705 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock(
0706 "SPACING_MODIFIER_LETTERS", new String[] {
0707 "Spacing Modifier Letters",
0708 "SpacingModifierLetters" });
0709
0710 /**
0711 * Constant for the "Combining Diacritical Marks" Unicode character block.
0712 * @since 1.2
0713 */
0714 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock(
0715 "COMBINING_DIACRITICAL_MARKS", new String[] {
0716 "Combining Diacritical Marks",
0717 "CombiningDiacriticalMarks" });
0718
0719 /**
0720 * Constant for the "Greek and Coptic" Unicode character block.
0721 * <p>
0722 * This block was previously known as the "Greek" block.
0723 *
0724 * @since 1.2
0725 */
0726 public static final UnicodeBlock GREEK = new UnicodeBlock(
0727 "GREEK", new String[] { "Greek and Coptic",
0728 "GreekandCoptic" });
0729
0730 /**
0731 * Constant for the "Cyrillic" Unicode character block.
0732 * @since 1.2
0733 */
0734 public static final UnicodeBlock CYRILLIC = new UnicodeBlock(
0735 "CYRILLIC");
0736
0737 /**
0738 * Constant for the "Armenian" Unicode character block.
0739 * @since 1.2
0740 */
0741 public static final UnicodeBlock ARMENIAN = new UnicodeBlock(
0742 "ARMENIAN");
0743
0744 /**
0745 * Constant for the "Hebrew" Unicode character block.
0746 * @since 1.2
0747 */
0748 public static final UnicodeBlock HEBREW = new UnicodeBlock(
0749 "HEBREW");
0750
0751 /**
0752 * Constant for the "Arabic" Unicode character block.
0753 * @since 1.2
0754 */
0755 public static final UnicodeBlock ARABIC = new UnicodeBlock(
0756 "ARABIC");
0757
0758 /**
0759 * Constant for the "Devanagari" Unicode character block.
0760 * @since 1.2
0761 */
0762 public static final UnicodeBlock DEVANAGARI = new UnicodeBlock(
0763 "DEVANAGARI");
0764
0765 /**
0766 * Constant for the "Bengali" Unicode character block.
0767 * @since 1.2
0768 */
0769 public static final UnicodeBlock BENGALI = new UnicodeBlock(
0770 "BENGALI");
0771
0772 /**
0773 * Constant for the "Gurmukhi" Unicode character block.
0774 * @since 1.2
0775 */
0776 public static final UnicodeBlock GURMUKHI = new UnicodeBlock(
0777 "GURMUKHI");
0778
0779 /**
0780 * Constant for the "Gujarati" Unicode character block.
0781 * @since 1.2
0782 */
0783 public static final UnicodeBlock GUJARATI = new UnicodeBlock(
0784 "GUJARATI");
0785
0786 /**
0787 * Constant for the "Oriya" Unicode character block.
0788 * @since 1.2
0789 */
0790 public static final UnicodeBlock ORIYA = new UnicodeBlock(
0791 "ORIYA");
0792
0793 /**
0794 * Constant for the "Tamil" Unicode character block.
0795 * @since 1.2
0796 */
0797 public static final UnicodeBlock TAMIL = new UnicodeBlock(
0798 "TAMIL");
0799
0800 /**
0801 * Constant for the "Telugu" Unicode character block.
0802 * @since 1.2
0803 */
0804 public static final UnicodeBlock TELUGU = new UnicodeBlock(
0805 "TELUGU");
0806
0807 /**
0808 * Constant for the "Kannada" Unicode character block.
0809 * @since 1.2
0810 */
0811 public static final UnicodeBlock KANNADA = new UnicodeBlock(
0812 "KANNADA");
0813
0814 /**
0815 * Constant for the "Malayalam" Unicode character block.
0816 * @since 1.2
0817 */
0818 public static final UnicodeBlock MALAYALAM = new UnicodeBlock(
0819 "MALAYALAM");
0820
0821 /**
0822 * Constant for the "Thai" Unicode character block.
0823 * @since 1.2
0824 */
0825 public static final UnicodeBlock THAI = new UnicodeBlock("THAI");
0826
0827 /**
0828 * Constant for the "Lao" Unicode character block.
0829 * @since 1.2
0830 */
0831 public static final UnicodeBlock LAO = new UnicodeBlock("LAO");
0832
0833 /**
0834 * Constant for the "Tibetan" Unicode character block.
0835 * @since 1.2
0836 */
0837 public static final UnicodeBlock TIBETAN = new UnicodeBlock(
0838 "TIBETAN");
0839
0840 /**
0841 * Constant for the "Georgian" Unicode character block.
0842 * @since 1.2
0843 */
0844 public static final UnicodeBlock GEORGIAN = new UnicodeBlock(
0845 "GEORGIAN");
0846
0847 /**
0848 * Constant for the "Hangul Jamo" Unicode character block.
0849 * @since 1.2
0850 */
0851 public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock(
0852 "HANGUL_JAMO", new String[] { "Hangul Jamo",
0853 "HangulJamo" });
0854
0855 /**
0856 * Constant for the "Latin Extended Additional" Unicode character block.
0857 * @since 1.2
0858 */
0859 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock(
0860 "LATIN_EXTENDED_ADDITIONAL", new String[] {
0861 "Latin Extended Additional",
0862 "LatinExtendedAdditional" });
0863
0864 /**
0865 * Constant for the "Greek Extended" Unicode character block.
0866 * @since 1.2
0867 */
0868 public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock(
0869 "GREEK_EXTENDED", new String[] { "Greek Extended",
0870 "GreekExtended" });
0871
0872 /**
0873 * Constant for the "General Punctuation" Unicode character block.
0874 * @since 1.2
0875 */
0876 public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock(
0877 "GENERAL_PUNCTUATION", new String[] {
0878 "General Punctuation", "GeneralPunctuation" });
0879
0880 /**
0881 * Constant for the "Superscripts and Subscripts" Unicode character block.
0882 * @since 1.2
0883 */
0884 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock(
0885 "SUPERSCRIPTS_AND_SUBSCRIPTS", new String[] {
0886 "Superscripts and Subscripts",
0887 "SuperscriptsandSubscripts" });
0888
0889 /**
0890 * Constant for the "Currency Symbols" Unicode character block.
0891 * @since 1.2
0892 */
0893 public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock(
0894 "CURRENCY_SYMBOLS", new String[] { "Currency Symbols",
0895 "CurrencySymbols" });
0896
0897 /**
0898 * Constant for the "Combining Diacritical Marks for Symbols" Unicode character block.
0899 * <p>
0900 * This block was previously known as "Combining Marks for Symbols".
0901 * @since 1.2
0902 */
0903 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock(
0904 "COMBINING_MARKS_FOR_SYMBOLS", new String[] {
0905 "Combining Diacritical Marks for Symbols",
0906 "CombiningDiacriticalMarksforSymbols",
0907 "Combining Marks for Symbols",
0908 "CombiningMarksforSymbols" });
0909
0910 /**
0911 * Constant for the "Letterlike Symbols" Unicode character block.
0912 * @since 1.2
0913 */
0914 public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock(
0915 "LETTERLIKE_SYMBOLS", new String[] {
0916 "Letterlike Symbols", "LetterlikeSymbols" });
0917
0918 /**
0919 * Constant for the "Number Forms" Unicode character block.
0920 * @since 1.2
0921 */
0922 public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock(
0923 "NUMBER_FORMS", new String[] { "Number Forms",
0924 "NumberForms" });
0925
0926 /**
0927 * Constant for the "Arrows" Unicode character block.
0928 * @since 1.2
0929 */
0930 public static final UnicodeBlock ARROWS = new UnicodeBlock(
0931 "ARROWS");
0932
0933 /**
0934 * Constant for the "Mathematical Operators" Unicode character block.
0935 * @since 1.2
0936 */
0937 public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock(
0938 "MATHEMATICAL_OPERATORS", new String[] {
0939 "Mathematical Operators",
0940 "MathematicalOperators" });
0941
0942 /**
0943 * Constant for the "Miscellaneous Technical" Unicode character block.
0944 * @since 1.2
0945 */
0946 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock(
0947 "MISCELLANEOUS_TECHNICAL", new String[] {
0948 "Miscellaneous Technical",
0949 "MiscellaneousTechnical" });
0950
0951 /**
0952 * Constant for the "Control Pictures" Unicode character block.
0953 * @since 1.2
0954 */
0955 public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock(
0956 "CONTROL_PICTURES", new String[] { "Control Pictures",
0957 "ControlPictures" });
0958
0959 /**
0960 * Constant for the "Optical Character Recognition" Unicode character block.
0961 * @since 1.2
0962 */
0963 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock(
0964 "OPTICAL_CHARACTER_RECOGNITION", new String[] {
0965 "Optical Character Recognition",
0966 "OpticalCharacterRecognition" });
0967
0968 /**
0969 * Constant for the "Enclosed Alphanumerics" Unicode character block.
0970 * @since 1.2
0971 */
0972 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock(
0973 "ENCLOSED_ALPHANUMERICS", new String[] {
0974 "Enclosed Alphanumerics",
0975 "EnclosedAlphanumerics" });
0976
0977 /**
0978 * Constant for the "Box Drawing" Unicode character block.
0979 * @since 1.2
0980 */
0981 public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock(
0982 "BOX_DRAWING", new String[] { "Box Drawing",
0983 "BoxDrawing" });
0984
0985 /**
0986 * Constant for the "Block Elements" Unicode character block.
0987 * @since 1.2
0988 */
0989 public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock(
0990 "BLOCK_ELEMENTS", new String[] { "Block Elements",
0991 "BlockElements" });
0992
0993 /**
0994 * Constant for the "Geometric Shapes" Unicode character block.
0995 * @since 1.2
0996 */
0997 public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock(
0998 "GEOMETRIC_SHAPES", new String[] { "Geometric Shapes",
0999 "GeometricShapes" });
1000
1001 /**
1002 * Constant for the "Miscellaneous Symbols" Unicode character block.
1003 * @since 1.2
1004 */
1005 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock(
1006 "MISCELLANEOUS_SYMBOLS",
1007 new String[] { "Miscellaneous Symbols",
1008 "MiscellaneousSymbols" });
1009
1010 /**
1011 * Constant for the "Dingbats" Unicode character block.
1012 * @since 1.2
1013 */
1014 public static final UnicodeBlock DINGBATS = new UnicodeBlock(
1015 "DINGBATS");
1016
1017 /**
1018 * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1019 * @since 1.2
1020 */
1021 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock(
1022 "CJK_SYMBOLS_AND_PUNCTUATION", new String[] {
1023 "CJK Symbols and Punctuation",
1024 "CJKSymbolsandPunctuation" });
1025
1026 /**
1027 * Constant for the "Hiragana" Unicode character block.
1028 * @since 1.2
1029 */
1030 public static final UnicodeBlock HIRAGANA = new UnicodeBlock(
1031 "HIRAGANA");
1032
1033 /**
1034 * Constant for the "Katakana" Unicode character block.
1035 * @since 1.2
1036 */
1037 public static final UnicodeBlock KATAKANA = new UnicodeBlock(
1038 "KATAKANA");
1039
1040 /**
1041 * Constant for the "Bopomofo" Unicode character block.
1042 * @since 1.2
1043 */
1044 public static final UnicodeBlock BOPOMOFO = new UnicodeBlock(
1045 "BOPOMOFO");
1046
1047 /**
1048 * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1049 * @since 1.2
1050 */
1051 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock(
1052 "HANGUL_COMPATIBILITY_JAMO", new String[] {
1053 "Hangul Compatibility Jamo",
1054 "HangulCompatibilityJamo" });
1055
1056 /**
1057 * Constant for the "Kanbun" Unicode character block.
1058 * @since 1.2
1059 */
1060 public static final UnicodeBlock KANBUN = new UnicodeBlock(
1061 "KANBUN");
1062
1063 /**
1064 * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1065 * @since 1.2
1066 */
1067 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock(
1068 "ENCLOSED_CJK_LETTERS_AND_MONTHS", new String[] {
1069 "Enclosed CJK Letters and Months",
1070 "EnclosedCJKLettersandMonths" });
1071
1072 /**
1073 * Constant for the "CJK Compatibility" Unicode character block.
1074 * @since 1.2
1075 */
1076 public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock(
1077 "CJK_COMPATIBILITY", new String[] {
1078 "CJK Compatibility", "CJKCompatibility" });
1079
1080 /**
1081 * Constant for the "CJK Unified Ideographs" Unicode character block.
1082 * @since 1.2
1083 */
1084 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock(
1085 "CJK_UNIFIED_IDEOGRAPHS", new String[] {
1086 "CJK Unified Ideographs",
1087 "CJKUnifiedIdeographs" });
1088
1089 /**
1090 * Constant for the "Hangul Syllables" Unicode character block.
1091 * @since 1.2
1092 */
1093 public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock(
1094 "HANGUL_SYLLABLES", new String[] { "Hangul Syllables",
1095 "HangulSyllables" });
1096
1097 /**
1098 * Constant for the "Private Use Area" Unicode character block.
1099 * @since 1.2
1100 */
1101 public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock(
1102 "PRIVATE_USE_AREA", new String[] { "Private Use Area",
1103 "PrivateUseArea" });
1104
1105 /**
1106 * Constant for the "CJK Compatibility Ideographs" Unicode character block.
1107 * @since 1.2
1108 */
1109 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock(
1110 "CJK_COMPATIBILITY_IDEOGRAPHS", new String[] {
1111 "CJK Compatibility Ideographs",
1112 "CJKCompatibilityIdeographs" });
1113
1114 /**
1115 * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1116 * @since 1.2
1117 */
1118 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock(
1119 "ALPHABETIC_PRESENTATION_FORMS", new String[] {
1120 "Alphabetic Presentation Forms",
1121 "AlphabeticPresentationForms" });
1122
1123 /**
1124 * Constant for the "Arabic Presentation Forms-A" Unicode character block.
1125 * @since 1.2
1126 */
1127 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock(
1128 "ARABIC_PRESENTATION_FORMS_A", new String[] {
1129 "Arabic Presentation Forms-A",
1130 "ArabicPresentationForms-A" });
1131
1132 /**
1133 * Constant for the "Combining Half Marks" Unicode character block.
1134 * @since 1.2
1135 */
1136 public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock(
1137 "COMBINING_HALF_MARKS", new String[] {
1138 "Combining Half Marks", "CombiningHalfMarks" });
1139
1140 /**
1141 * Constant for the "CJK Compatibility Forms" Unicode character block.
1142 * @since 1.2
1143 */
1144 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock(
1145 "CJK_COMPATIBILITY_FORMS", new String[] {
1146 "CJK Compatibility Forms",
1147 "CJKCompatibilityForms" });
1148
1149 /**
1150 * Constant for the "Small Form Variants" Unicode character block.
1151 * @since 1.2
1152 */
1153 public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock(
1154 "SMALL_FORM_VARIANTS", new String[] {
1155 "Small Form Variants", "SmallFormVariants" });
1156
1157 /**
1158 * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1159 * @since 1.2
1160 */
1161 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock(
1162 "ARABIC_PRESENTATION_FORMS_B", new String[] {
1163 "Arabic Presentation Forms-B",
1164 "ArabicPresentationForms-B" });
1165
1166 /**
1167 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character block.
1168 * @since 1.2
1169 */
1170 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock(
1171 "HALFWIDTH_AND_FULLWIDTH_FORMS", new String[] {
1172 "Halfwidth and Fullwidth Forms",
1173 "HalfwidthandFullwidthForms" });
1174
1175 /**
1176 * Constant for the "Specials" Unicode character block.
1177 * @since 1.2
1178 */
1179 public static final UnicodeBlock SPECIALS = new UnicodeBlock(
1180 "SPECIALS");
1181
1182 /**
1183 * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1184 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1185 * {@link #LOW_SURROGATES}. These new constants match
1186 * the block definitions of the Unicode Standard.
1187 * The {@link #of(char)} and {@link #of(int)} methods
1188 * return the new constants, not SURROGATES_AREA.
1189 */
1190 @Deprecated
1191 public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock(
1192 "SURROGATES_AREA");
1193
1194 /**
1195 * Constant for the "Syriac" Unicode character block.
1196 * @since 1.4
1197 */
1198 public static final UnicodeBlock SYRIAC = new UnicodeBlock(
1199 "SYRIAC");
1200
1201 /**
1202 * Constant for the "Thaana" Unicode character block.
1203 * @since 1.4
1204 */
1205 public static final UnicodeBlock THAANA = new UnicodeBlock(
1206 "THAANA");
1207
1208 /**
1209 * Constant for the "Sinhala" Unicode character block.
1210 * @since 1.4
1211 */
1212 public static final UnicodeBlock SINHALA = new UnicodeBlock(
1213 "SINHALA");
1214
1215 /**
1216 * Constant for the "Myanmar" Unicode character block.
1217 * @since 1.4
1218 */
1219 public static final UnicodeBlock MYANMAR = new UnicodeBlock(
1220 "MYANMAR");
1221
1222 /**
1223 * Constant for the "Ethiopic" Unicode character block.
1224 * @since 1.4
1225 */
1226 public static final UnicodeBlock ETHIOPIC = new UnicodeBlock(
1227 "ETHIOPIC");
1228
1229 /**
1230 * Constant for the "Cherokee" Unicode character block.
1231 * @since 1.4
1232 */
1233 public static final UnicodeBlock CHEROKEE = new UnicodeBlock(
1234 "CHEROKEE");
1235
1236 /**
1237 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1238 * @since 1.4
1239 */
1240 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock(
1241 "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", new String[] {
1242 "Unified Canadian Aboriginal Syllabics",
1243 "UnifiedCanadianAboriginalSyllabics" });
1244
1245 /**
1246 * Constant for the "Ogham" Unicode character block.
1247 * @since 1.4
1248 */
1249 public static final UnicodeBlock OGHAM = new UnicodeBlock(
1250 "OGHAM");
1251
1252 /**
1253 * Constant for the "Runic" Unicode character block.
1254 * @since 1.4
1255 */
1256 public static final UnicodeBlock RUNIC = new UnicodeBlock(
1257 "RUNIC");
1258
1259 /**
1260 * Constant for the "Khmer" Unicode character block.
1261 * @since 1.4
1262 */
1263 public static final UnicodeBlock KHMER = new UnicodeBlock(
1264 "KHMER");
1265
1266 /**
1267 * Constant for the "Mongolian" Unicode character block.
1268 * @since 1.4
1269 */
1270 public static final UnicodeBlock MONGOLIAN = new UnicodeBlock(
1271 "MONGOLIAN");
1272
1273 /**
1274 * Constant for the "Braille Patterns" Unicode character block.
1275 * @since 1.4
1276 */
1277 public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock(
1278 "BRAILLE_PATTERNS", new String[] { "Braille Patterns",
1279 "BraillePatterns" });
1280
1281 /**
1282 * Constant for the "CJK Radicals Supplement" Unicode character block.
1283 * @since 1.4
1284 */
1285 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock(
1286 "CJK_RADICALS_SUPPLEMENT", new String[] {
1287 "CJK Radicals Supplement",
1288 "CJKRadicalsSupplement" });
1289
1290 /**
1291 * Constant for the "Kangxi Radicals" Unicode character block.
1292 * @since 1.4
1293 */
1294 public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock(
1295 "KANGXI_RADICALS", new String[] { "Kangxi Radicals",
1296 "KangxiRadicals" });
1297
1298 /**
1299 * Constant for the "Ideographic Description Characters" Unicode character block.
1300 * @since 1.4
1301 */
1302 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock(
1303 "IDEOGRAPHIC_DESCRIPTION_CHARACTERS", new String[] {
1304 "Ideographic Description Characters",
1305 "IdeographicDescriptionCharacters" });
1306
1307 /**
1308 * Constant for the "Bopomofo Extended" Unicode character block.
1309 * @since 1.4
1310 */
1311 public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock(
1312 "BOPOMOFO_EXTENDED", new String[] {
1313 "Bopomofo Extended", "BopomofoExtended" });
1314
1315 /**
1316 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1317 * @since 1.4
1318 */
1319 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock(
1320 "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", new String[] {
1321 "CJK Unified Ideographs Extension A",
1322 "CJKUnifiedIdeographsExtensionA" });
1323
1324 /**
1325 * Constant for the "Yi Syllables" Unicode character block.
1326 * @since 1.4
1327 */
1328 public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock(
1329 "YI_SYLLABLES", new String[] { "Yi Syllables",
1330 "YiSyllables" });
1331
1332 /**
1333 * Constant for the "Yi Radicals" Unicode character block.
1334 * @since 1.4
1335 */
1336 public static final UnicodeBlock YI_RADICALS = new UnicodeBlock(
1337 "YI_RADICALS", new String[] { "Yi Radicals",
1338 "YiRadicals" });
1339
1340 /**
1341 * Constant for the "Cyrillic Supplementary" Unicode character block.
1342 * @since 1.5
1343 */
1344 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock(
1345 "CYRILLIC_SUPPLEMENTARY", new String[] {
1346 "Cyrillic Supplementary",
1347 "CyrillicSupplementary" });
1348
1349 /**
1350 * Constant for the "Tagalog" Unicode character block.
1351 * @since 1.5
1352 */
1353 public static final UnicodeBlock TAGALOG = new UnicodeBlock(
1354 "TAGALOG");
1355
1356 /**
1357 * Constant for the "Hanunoo" Unicode character block.
1358 * @since 1.5
1359 */
1360 public static final UnicodeBlock HANUNOO = new UnicodeBlock(
1361 "HANUNOO");
1362
1363 /**
1364 * Constant for the "Buhid" Unicode character block.
1365 * @since 1.5
1366 */
1367 public static final UnicodeBlock BUHID = new UnicodeBlock(
1368 "BUHID");
1369
1370 /**
1371 * Constant for the "Tagbanwa" Unicode character block.
1372 * @since 1.5
1373 */
1374 public static final UnicodeBlock TAGBANWA = new UnicodeBlock(
1375 "TAGBANWA");
1376
1377 /**
1378 * Constant for the "Limbu" Unicode character block.
1379 * @since 1.5
1380 */
1381 public static final UnicodeBlock LIMBU = new UnicodeBlock(
1382 "LIMBU");
1383
1384 /**
1385 * Constant for the "Tai Le" Unicode character block.
1386 * @since 1.5
1387 */
1388 public static final UnicodeBlock TAI_LE = new UnicodeBlock(
1389 "TAI_LE", new String[] { "Tai Le", "TaiLe" });
1390
1391 /**
1392 * Constant for the "Khmer Symbols" Unicode character block.
1393 * @since 1.5
1394 */
1395 public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock(
1396 "KHMER_SYMBOLS", new String[] { "Khmer Symbols",
1397 "KhmerSymbols" });
1398
1399 /**
1400 * Constant for the "Phonetic Extensions" Unicode character block.
1401 * @since 1.5
1402 */
1403 public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock(
1404 "PHONETIC_EXTENSIONS", new String[] {
1405 "Phonetic Extensions", "PhoneticExtensions" });
1406
1407 /**
1408 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1409 * @since 1.5
1410 */
1411 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock(
1412 "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", new String[] {
1413 "Miscellaneous Mathematical Symbols-A",
1414 "MiscellaneousMathematicalSymbols-A" });
1415
1416 /**
1417 * Constant for the "Supplemental Arrows-A" Unicode character block.
1418 * @since 1.5
1419 */
1420 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock(
1421 "SUPPLEMENTAL_ARROWS_A",
1422 new String[] { "Supplemental Arrows-A",
1423 "SupplementalArrows-A" });
1424
1425 /**
1426 * Constant for the "Supplemental Arrows-B" Unicode character block.
1427 * @since 1.5
1428 */
1429 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock(
1430 "SUPPLEMENTAL_ARROWS_B",
1431 new String[] { "Supplemental Arrows-B",
1432 "SupplementalArrows-B" });
1433
1434 /**
1435 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode character block.
1436 * @since 1.5
1437 */
1438 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock(
1439 "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", new String[] {
1440 "Miscellaneous Mathematical Symbols-B",
1441 "MiscellaneousMathematicalSymbols-B" });
1442
1443 /**
1444 * Constant for the "Supplemental Mathematical Operators" Unicode character block.
1445 * @since 1.5
1446 */
1447 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock(
1448 "SUPPLEMENTAL_MATHEMATICAL_OPERATORS", new String[] {
1449 "Supplemental Mathematical Operators",
1450 "SupplementalMathematicalOperators" });
1451
1452 /**
1453 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character block.
1454 * @since 1.5
1455 */
1456 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock(
1457 "MISCELLANEOUS_SYMBOLS_AND_ARROWS", new String[] {
1458 "Miscellaneous Symbols and Arrows",
1459 "MiscellaneousSymbolsandArrows" });
1460
1461 /**
1462 * Constant for the "Katakana Phonetic Extensions" Unicode character block.
1463 * @since 1.5
1464 */
1465 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock(
1466 "KATAKANA_PHONETIC_EXTENSIONS", new String[] {
1467 "Katakana Phonetic Extensions",
1468 "KatakanaPhoneticExtensions" });
1469
1470 /**
1471 * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1472 * @since 1.5
1473 */
1474 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock(
1475 "YIJING_HEXAGRAM_SYMBOLS", new String[] {
1476 "Yijing Hexagram Symbols",
1477 "YijingHexagramSymbols" });
1478
1479 /**
1480 * Constant for the "Variation Selectors" Unicode character block.
1481 * @since 1.5
1482 */
1483 public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock(
1484 "VARIATION_SELECTORS", new String[] {
1485 "Variation Selectors", "VariationSelectors" });
1486
1487 /**
1488 * Constant for the "Linear B Syllabary" Unicode character block.
1489 * @since 1.5
1490 */
1491 public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock(
1492 "LINEAR_B_SYLLABARY", new String[] {
1493 "Linear B Syllabary", "LinearBSyllabary" });
1494
1495 /**
1496 * Constant for the "Linear B Ideograms" Unicode character block.
1497 * @since 1.5
1498 */
1499 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock(
1500 "LINEAR_B_IDEOGRAMS", new String[] {
1501 "Linear B Ideograms", "LinearBIdeograms" });
1502
1503 /**
1504 * Constant for the "Aegean Numbers" Unicode character block.
1505 * @since 1.5
1506 */
1507 public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock(
1508 "AEGEAN_NUMBERS", new String[] { "Aegean Numbers",
1509 "AegeanNumbers" });
1510
1511 /**
1512 * Constant for the "Old Italic" Unicode character block.
1513 * @since 1.5
1514 */
1515 public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock(
1516 "OLD_ITALIC",
1517 new String[] { "Old Italic", "OldItalic" });
1518
1519 /**
1520 * Constant for the "Gothic" Unicode character block.
1521 * @since 1.5
1522 */
1523 public static final UnicodeBlock GOTHIC = new UnicodeBlock(
1524 "GOTHIC");
1525
1526 /**
1527 * Constant for the "Ugaritic" Unicode character block.
1528 * @since 1.5
1529 */
1530 public static final UnicodeBlock UGARITIC = new UnicodeBlock(
1531 "UGARITIC");
1532
1533 /**
1534 * Constant for the "Deseret" Unicode character block.
1535 * @since 1.5
1536 */
1537 public static final UnicodeBlock DESERET = new UnicodeBlock(
1538 "DESERET");
1539
1540 /**
1541 * Constant for the "Shavian" Unicode character block.
1542 * @since 1.5
1543 */
1544 public static final UnicodeBlock SHAVIAN = new UnicodeBlock(
1545 "SHAVIAN");
1546
1547 /**
1548 * Constant for the "Osmanya" Unicode character block.
1549 * @since 1.5
1550 */
1551 public static final UnicodeBlock OSMANYA = new UnicodeBlock(
1552 "OSMANYA");
1553
1554 /**
1555 * Constant for the "Cypriot Syllabary" Unicode character block.
1556 * @since 1.5
1557 */
1558 public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock(
1559 "CYPRIOT_SYLLABARY", new String[] {
1560 "Cypriot Syllabary", "CypriotSyllabary" });
1561
1562 /**
1563 * Constant for the "Byzantine Musical Symbols" Unicode character block.
1564 * @since 1.5
1565 */
1566 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock(
1567 "BYZANTINE_MUSICAL_SYMBOLS", new String[] {
1568 "Byzantine Musical Symbols",
1569 "ByzantineMusicalSymbols" });
1570
1571 /**
1572 * Constant for the "Musical Symbols" Unicode character block.
1573 * @since 1.5
1574 */
1575 public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock(
1576 "MUSICAL_SYMBOLS", new String[] { "Musical Symbols",
1577 "MusicalSymbols" });
1578
1579 /**
1580 * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1581 * @since 1.5
1582 */
1583 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock(
1584 "TAI_XUAN_JING_SYMBOLS", new String[] {
1585 "Tai Xuan Jing Symbols", "TaiXuanJingSymbols" });
1586
1587 /**
1588 * Constant for the "Mathematical Alphanumeric Symbols" Unicode character block.
1589 * @since 1.5
1590 */
1591 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock(
1592 "MATHEMATICAL_ALPHANUMERIC_SYMBOLS", new String[] {
1593 "Mathematical Alphanumeric Symbols",
1594 "MathematicalAlphanumericSymbols" });
1595
1596 /**
1597 * Constant for the "CJK Unified Ideographs Extension B" Unicode character block.
1598 * @since 1.5
1599 */
1600 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock(
1601 "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", new String[] {
1602 "CJK Unified Ideographs Extension B",
1603 "CJKUnifiedIdeographsExtensionB" });
1604
1605 /**
1606 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1607 * @since 1.5
1608 */
1609 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock(
1610 "CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1611 new String[] {
1612 "CJK Compatibility Ideographs Supplement",
1613 "CJKCompatibilityIdeographsSupplement" });
1614
1615 /**
1616 * Constant for the "Tags" Unicode character block.
1617 * @since 1.5
1618 */
1619 public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS");
1620
1621 /**
1622 * Constant for the "Variation Selectors Supplement" Unicode character block.
1623 * @since 1.5
1624 */
1625 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock(
1626 "VARIATION_SELECTORS_SUPPLEMENT", new String[] {
1627 "Variation Selectors Supplement",
1628 "VariationSelectorsSupplement" });
1629
1630 /**
1631 * Constant for the "Supplementary Private Use Area-A" Unicode character block.
1632 * @since 1.5
1633 */
1634 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock(
1635 "SUPPLEMENTARY_PRIVATE_USE_AREA_A", new String[] {
1636 "Supplementary Private Use Area-A",
1637 "SupplementaryPrivateUseArea-A" });
1638
1639 /**
1640 * Constant for the "Supplementary Private Use Area-B" Unicode character block.
1641 * @since 1.5
1642 */
1643 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock(
1644 "SUPPLEMENTARY_PRIVATE_USE_AREA_B", new String[] {
1645 "Supplementary Private Use Area-B",
1646 "SupplementaryPrivateUseArea-B" });
1647
1648 /**
1649 * Constant for the "High Surrogates" Unicode character block.
1650 * This block represents codepoint values in the high surrogate
1651 * range: 0xD800 through 0xDB7F
1652 *
1653 * @since 1.5
1654 */
1655 public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock(
1656 "HIGH_SURROGATES", new String[] { "High Surrogates",
1657 "HighSurrogates" });
1658
1659 /**
1660 * Constant for the "High Private Use Surrogates" Unicode character block.
1661 * This block represents codepoint values in the high surrogate
1662 * range: 0xDB80 through 0xDBFF
1663 *
1664 * @since 1.5
1665 */
1666 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock(
1667 "HIGH_PRIVATE_USE_SURROGATES", new String[] {
1668 "High Private Use Surrogates",
1669 "HighPrivateUseSurrogates" });
1670
1671 /**
1672 * Constant for the "Low Surrogates" Unicode character block.
1673 * This block represents codepoint values in the high surrogate
1674 * range: 0xDC00 through 0xDFFF
1675 *
1676 * @since 1.5
1677 */
1678 public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock(
1679 "LOW_SURROGATES", new String[] { "Low Surrogates",
1680 "LowSurrogates" });
1681
1682 private static final int blockStarts[] = { 0x0000, // Basic Latin
1683 0x0080, // Latin-1 Supplement
1684 0x0100, // Latin Extended-A
1685 0x0180, // Latin Extended-B
1686 0x0250, // IPA Extensions
1687 0x02B0, // Spacing Modifier Letters
1688 0x0300, // Combining Diacritical Marks
1689 0x0370, // Greek and Coptic
1690 0x0400, // Cyrillic
1691 0x0500, // Cyrillic Supplementary
1692 0x0530, // Armenian
1693 0x0590, // Hebrew
1694 0x0600, // Arabic
1695 0x0700, // Syriac
1696 0x0750, // unassigned
1697 0x0780, // Thaana
1698 0x07C0, // unassigned
1699 0x0900, // Devanagari
1700 0x0980, // Bengali
1701 0x0A00, // Gurmukhi
1702 0x0A80, // Gujarati
1703 0x0B00, // Oriya
1704 0x0B80, // Tamil
1705 0x0C00, // Telugu
1706 0x0C80, // Kannada
1707 0x0D00, // Malayalam
1708 0x0D80, // Sinhala
1709 0x0E00, // Thai
1710 0x0E80, // Lao
1711 0x0F00, // Tibetan
1712 0x1000, // Myanmar
1713 0x10A0, // Georgian
1714 0x1100, // Hangul Jamo
1715 0x1200, // Ethiopic
1716 0x1380, // unassigned
1717 0x13A0, // Cherokee
1718 0x1400, // Unified Canadian Aboriginal Syllabics
1719 0x1680, // Ogham
1720 0x16A0, // Runic
1721 0x1700, // Tagalog
1722 0x1720, // Hanunoo
1723 0x1740, // Buhid
1724 0x1760, // Tagbanwa
1725 0x1780, // Khmer
1726 0x1800, // Mongolian
1727 0x18B0, // unassigned
1728 0x1900, // Limbu
1729 0x1950, // Tai Le
1730 0x1980, // unassigned
1731 0x19E0, // Khmer Symbols
1732 0x1A00, // unassigned
1733 0x1D00, // Phonetic Extensions
1734 0x1D80, // unassigned
1735 0x1E00, // Latin Extended Additional
1736 0x1F00, // Greek Extended
1737 0x2000, // General Punctuation
1738 0x2070, // Superscripts and Subscripts
1739 0x20A0, // Currency Symbols
1740 0x20D0, // Combining Diacritical Marks for Symbols
1741 0x2100, // Letterlike Symbols
1742 0x2150, // Number Forms
1743 0x2190, // Arrows
1744 0x2200, // Mathematical Operators
1745 0x2300, // Miscellaneous Technical
1746 0x2400, // Control Pictures
1747 0x2440, // Optical Character Recognition
1748 0x2460, // Enclosed Alphanumerics
1749 0x2500, // Box Drawing
1750 0x2580, // Block Elements
1751 0x25A0, // Geometric Shapes
1752 0x2600, // Miscellaneous Symbols
1753 0x2700, // Dingbats
1754 0x27C0, // Miscellaneous Mathematical Symbols-A
1755 0x27F0, // Supplemental Arrows-A
1756 0x2800, // Braille Patterns
1757 0x2900, // Supplemental Arrows-B
1758 0x2980, // Miscellaneous Mathematical Symbols-B
1759 0x2A00, // Supplemental Mathematical Operators
1760 0x2B00, // Miscellaneous Symbols and Arrows
1761 0x2C00, // unassigned
1762 0x2E80, // CJK Radicals Supplement
1763 0x2F00, // Kangxi Radicals
1764 0x2FE0, // unassigned
1765 0x2FF0, // Ideographic Description Characters
1766 0x3000, // CJK Symbols and Punctuation
1767 0x3040, // Hiragana
1768 0x30A0, // Katakana
1769 0x3100, // Bopomofo
1770 0x3130, // Hangul Compatibility Jamo
1771 0x3190, // Kanbun
1772 0x31A0, // Bopomofo Extended
1773 0x31C0, // unassigned
1774 0x31F0, // Katakana Phonetic Extensions
1775 0x3200, // Enclosed CJK Letters and Months
1776 0x3300, // CJK Compatibility
1777 0x3400, // CJK Unified Ideographs Extension A
1778 0x4DC0, // Yijing Hexagram Symbols
1779 0x4E00, // CJK Unified Ideographs
1780 0xA000, // Yi Syllables
1781 0xA490, // Yi Radicals
1782 0xA4D0, // unassigned
1783 0xAC00, // Hangul Syllables
1784 0xD7B0, // unassigned
1785 0xD800, // High Surrogates
1786 0xDB80, // High Private Use Surrogates
1787 0xDC00, // Low Surrogates
1788 0xE000, // Private Use
1789 0xF900, // CJK Compatibility Ideographs
1790 0xFB00, // Alphabetic Presentation Forms
1791 0xFB50, // Arabic Presentation Forms-A
1792 0xFE00, // Variation Selectors
1793 0xFE10, // unassigned
1794 0xFE20, // Combining Half Marks
1795 0xFE30, // CJK Compatibility Forms
1796 0xFE50, // Small Form Variants
1797 0xFE70, // Arabic Presentation Forms-B
1798 0xFF00, // Halfwidth and Fullwidth Forms
1799 0xFFF0, // Specials
1800 0x10000, // Linear B Syllabary
1801 0x10080, // Linear B Ideograms
1802 0x10100, // Aegean Numbers
1803 0x10140, // unassigned
1804 0x10300, // Old Italic
1805 0x10330, // Gothic
1806 0x10350, // unassigned
1807 0x10380, // Ugaritic
1808 0x103A0, // unassigned
1809 0x10400, // Deseret
1810 0x10450, // Shavian
1811 0x10480, // Osmanya
1812 0x104B0, // unassigned
1813 0x10800, // Cypriot Syllabary
1814 0x10840, // unassigned
1815 0x1D000, // Byzantine Musical Symbols
1816 0x1D100, // Musical Symbols
1817 0x1D200, // unassigned
1818 0x1D300, // Tai Xuan Jing Symbols
1819 0x1D360, // unassigned
1820 0x1D400, // Mathematical Alphanumeric Symbols
1821 0x1D800, // unassigned
1822 0x20000, // CJK Unified Ideographs Extension B
1823 0x2A6E0, // unassigned
1824 0x2F800, // CJK Compatibility Ideographs Supplement
1825 0x2FA20, // unassigned
1826 0xE0000, // Tags
1827 0xE0080, // unassigned
1828 0xE0100, // Variation Selectors Supplement
1829 0xE01F0, // unassigned
1830 0xF0000, // Supplementary Private Use Area-A
1831 0x100000, // Supplementary Private Use Area-B
1832 };
1833
1834 private static final UnicodeBlock[] blocks = { BASIC_LATIN,
1835 LATIN_1_SUPPLEMENT, LATIN_EXTENDED_A, LATIN_EXTENDED_B,
1836 IPA_EXTENSIONS, SPACING_MODIFIER_LETTERS,
1837 COMBINING_DIACRITICAL_MARKS, GREEK, CYRILLIC,
1838 CYRILLIC_SUPPLEMENTARY, ARMENIAN, HEBREW, ARABIC,
1839 SYRIAC, null, THAANA, null, DEVANAGARI, BENGALI,
1840 GURMUKHI, GUJARATI, ORIYA, TAMIL, TELUGU, KANNADA,
1841 MALAYALAM, SINHALA, THAI, LAO, TIBETAN, MYANMAR,
1842 GEORGIAN, HANGUL_JAMO, ETHIOPIC, null, CHEROKEE,
1843 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, OGHAM, RUNIC,
1844 TAGALOG, HANUNOO, BUHID, TAGBANWA, KHMER, MONGOLIAN,
1845 null, LIMBU, TAI_LE, null, KHMER_SYMBOLS, null,
1846 PHONETIC_EXTENSIONS, null, LATIN_EXTENDED_ADDITIONAL,
1847 GREEK_EXTENDED, GENERAL_PUNCTUATION,
1848 SUPERSCRIPTS_AND_SUBSCRIPTS, CURRENCY_SYMBOLS,
1849 COMBINING_MARKS_FOR_SYMBOLS, LETTERLIKE_SYMBOLS,
1850 NUMBER_FORMS, ARROWS, MATHEMATICAL_OPERATORS,
1851 MISCELLANEOUS_TECHNICAL, CONTROL_PICTURES,
1852 OPTICAL_CHARACTER_RECOGNITION, ENCLOSED_ALPHANUMERICS,
1853 BOX_DRAWING, BLOCK_ELEMENTS, GEOMETRIC_SHAPES,
1854 MISCELLANEOUS_SYMBOLS, DINGBATS,
1855 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
1856 SUPPLEMENTAL_ARROWS_A, BRAILLE_PATTERNS,
1857 SUPPLEMENTAL_ARROWS_B,
1858 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1859 SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
1860 MISCELLANEOUS_SYMBOLS_AND_ARROWS, null,
1861 CJK_RADICALS_SUPPLEMENT, KANGXI_RADICALS, null,
1862 IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
1863 CJK_SYMBOLS_AND_PUNCTUATION, HIRAGANA, KATAKANA,
1864 BOPOMOFO, HANGUL_COMPATIBILITY_JAMO, KANBUN,
1865 BOPOMOFO_EXTENDED, null, KATAKANA_PHONETIC_EXTENSIONS,
1866 ENCLOSED_CJK_LETTERS_AND_MONTHS, CJK_COMPATIBILITY,
1867 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
1868 YIJING_HEXAGRAM_SYMBOLS, CJK_UNIFIED_IDEOGRAPHS,
1869 YI_SYLLABLES, YI_RADICALS, null, HANGUL_SYLLABLES,
1870 null, HIGH_SURROGATES, HIGH_PRIVATE_USE_SURROGATES,
1871 LOW_SURROGATES, PRIVATE_USE_AREA,
1872 CJK_COMPATIBILITY_IDEOGRAPHS,
1873 ALPHABETIC_PRESENTATION_FORMS,
1874 ARABIC_PRESENTATION_FORMS_A, VARIATION_SELECTORS, null,
1875 COMBINING_HALF_MARKS, CJK_COMPATIBILITY_FORMS,
1876 SMALL_FORM_VARIANTS, ARABIC_PRESENTATION_FORMS_B,
1877 HALFWIDTH_AND_FULLWIDTH_FORMS, SPECIALS,
1878 LINEAR_B_SYLLABARY, LINEAR_B_IDEOGRAMS, AEGEAN_NUMBERS,
1879 null, OLD_ITALIC, GOTHIC, null, UGARITIC, null,
1880 DESERET, SHAVIAN, OSMANYA, null, CYPRIOT_SYLLABARY,
1881 null, BYZANTINE_MUSICAL_SYMBOLS, MUSICAL_SYMBOLS, null,
1882 TAI_XUAN_JING_SYMBOLS, null,
1883 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, null,
1884 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, null,
1885 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, null, TAGS,
1886 null, VARIATION_SELECTORS_SUPPLEMENT, null,
1887 SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1888 SUPPLEMENTARY_PRIVATE_USE_AREA_B };
1889
1890 /**
1891 * Returns the object representing the Unicode block containing the
1892 * given character, or <code>null</code> if the character is not a
1893 * member of a defined block.
1894 *
1895 * <p><b>Note:</b> This method cannot handle <a
1896 * href="Character.html#supplementary"> supplementary
1897 * characters</a>. To support all Unicode characters,
1898 * including supplementary characters, use the {@link
1899 * #of(int)} method.
1900 *
1901 * @param c The character in question
1902 * @return The <code>UnicodeBlock</code> instance representing the
1903 * Unicode block of which this character is a member, or
1904 * <code>null</code> if the character is not a member of any
1905 * Unicode block
1906 */
1907 public static UnicodeBlock of(char c) {
1908 return of((int) c);
1909 }
1910
1911 /**
1912 * Returns the object representing the Unicode block
1913 * containing the given character (Unicode code point), or
1914 * <code>null</code> if the character is not a member of a
1915 * defined block.
1916 *
1917 * @param codePoint the character (Unicode code point) in question.
1918 * @return The <code>UnicodeBlock</code> instance representing the
1919 * Unicode block of which this character is a member, or
1920 * <code>null</code> if the character is not a member of any
1921 * Unicode block
1922 * @exception IllegalArgumentException if the specified
1923 * <code>codePoint</code> is an invalid Unicode code point.
1924 * @see Character#isValidCodePoint(int)
1925 * @since 1.5
1926 */
1927 public static UnicodeBlock of(int codePoint) {
1928 if (!isValidCodePoint(codePoint)) {
1929 throw new IllegalArgumentException();
1930 }
1931
1932 int top, bottom, current;
1933 bottom = 0;
1934 top = blockStarts.length;
1935 current = top / 2;
1936
1937 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
1938 while (top - bottom > 1) {
1939 if (codePoint >= blockStarts[current]) {
1940 bottom = current;
1941 } else {
1942 top = current;
1943 }
1944 current = (top + bottom) / 2;
1945 }
1946 return blocks[current];
1947 }
1948
1949 /**
1950 * Returns the UnicodeBlock with the given name. Block
1951 * names are determined by The Unicode Standard. The file
1952 * Blocks-<version>.txt defines blocks for a particular
1953 * version of the standard. The {@link Character} class specifies
1954 * the version of the standard that it supports.
1955 * <p>
1956 * This method accepts block names in the following forms:
1957 * <ol>
1958 * <li> Canonical block names as defined by the Unicode Standard.
1959 * For example, the standard defines a "Basic Latin" block. Therefore, this
1960 * method accepts "Basic Latin" as a valid block name. The documentation of
1961 * each UnicodeBlock provides the canonical name.
1962 * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
1963 * is a valid block name for the "Basic Latin" block.
1964 * <li>The text representation of each constant UnicodeBlock identifier.
1965 * For example, this method will return the {@link #BASIC_LATIN} block if
1966 * provided with the "BASIC_LATIN" name. This form replaces all spaces and
1967 * hyphens in the canonical name with underscores.
1968 * </ol>
1969 * Finally, character case is ignored for all of the valid block name forms.
1970 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
1971 * The en_US locale's case mapping rules are used to provide case-insensitive
1972 * string comparisons for block name validation.
1973 * <p>
1974 * If the Unicode Standard changes block names, both the previous and
1975 * current names will be accepted.
1976 *
1977 * @param blockName A <code>UnicodeBlock</code> name.
1978 * @return The <code>UnicodeBlock</code> instance identified
1979 * by <code>blockName</code>
1980 * @throws IllegalArgumentException if <code>blockName</code> is an
1981 * invalid name
1982 * @throws NullPointerException if <code>blockName</code> is null
1983 * @since 1.5
1984 */
1985 public static final UnicodeBlock forName(String blockName) {
1986 UnicodeBlock block = (UnicodeBlock) map.get(blockName
1987 .toUpperCase(Locale.US));
1988 if (block == null) {
1989 throw new IllegalArgumentException();
1990 }
1991 return block;
1992 }
1993 }
1994
1995 /**
1996 * The value of the <code>Character</code>.
1997 *
1998 * @serial
1999 */
2000 private final char value;
2001
2002 /** use serialVersionUID from JDK 1.0.2 for interoperability */
2003 private static final long serialVersionUID = 3786198910865385080L;
2004
2005 /**
2006 * Constructs a newly allocated <code>Character</code> object that
2007 * represents the specified <code>char</code> value.
2008 *
2009 * @param value the value to be represented by the
2010 * <code>Character</code> object.
2011 */
2012 public Character(char value) {
2013 this .value = value;
2014 }
2015
2016 private static class CharacterCache {
2017 private CharacterCache() {
2018 }
2019
2020 static final Character cache[] = new Character[127 + 1];
2021
2022 static {
2023 for (int i = 0; i < cache.length; i++)
2024 cache[i] = new Character((char) i);
2025 }
2026 }
2027
2028 /**
2029 * Returns a <tt>Character</tt> instance representing the specified
2030 * <tt>char</tt> value.
2031 * If a new <tt>Character</tt> instance is not required, this method
2032 * should generally be used in preference to the constructor
2033 * {@link #Character(char)}, as this method is likely to yield
2034 * significantly better space and time performance by caching
2035 * frequently requested values.
2036 *
2037 * @param c a char value.
2038 * @return a <tt>Character</tt> instance representing <tt>c</tt>.
2039 * @since 1.5
2040 */
2041 public static Character valueOf(char c) {
2042 if (c <= 127) { // must cache
2043 return CharacterCache.cache[(int) c];
2044 }
2045 return new Character(c);
2046 }
2047
2048 /**
2049 * Returns the value of this <code>Character</code> object.
2050 * @return the primitive <code>char</code> value represented by
2051 * this object.
2052 */
2053 public char charValue() {
2054 return value;
2055 }
2056
2057 /**
2058 * Returns a hash code for this <code>Character</code>.
2059 * @return a hash code value for this object.
2060 */
2061 public int hashCode() {
2062 return (int) value;
2063 }
2064
2065 /**
2066 * Compares this object against the specified object.
2067 * The result is <code>true</code> if and only if the argument is not
2068 * <code>null</code> and is a <code>Character</code> object that
2069 * represents the same <code>char</code> value as this object.
2070 *
2071 * @param obj the object to compare with.
2072 * @return <code>true</code> if the objects are the same;
2073 * <code>false</code> otherwise.
2074 */
2075 public boolean equals(Object obj) {
2076 if (obj instanceof Character) {
2077 return value == ((Character) obj).charValue();
2078 }
2079 return false;
2080 }
2081
2082 /**
2083 * Returns a <code>String</code> object representing this
2084 * <code>Character</code>'s value. The result is a string of
2085 * length 1 whose sole component is the primitive
2086 * <code>char</code> value represented by this
2087 * <code>Character</code> object.
2088 *
2089 * @return a string representation of this object.
2090 */
2091 public String toString() {
2092 char buf[] = { value };
2093 return String.valueOf(buf);
2094 }
2095
2096 /**
2097 * Returns a <code>String</code> object representing the
2098 * specified <code>char</code>. The result is a string of length
2099 * 1 consisting solely of the specified <code>char</code>.
2100 *
2101 * @param c the <code>char</code> to be converted
2102 * @return the string representation of the specified <code>char</code>
2103 * @since 1.4
2104 */
2105 public static String toString(char c) {
2106 return String.valueOf(c);
2107 }
2108
2109 /**
2110 * Determines whether the specified code point is a valid Unicode
2111 * code point value in the range of <code>0x0000</code> to
2112 * <code>0x10FFFF</code> inclusive. This method is equivalent to
2113 * the expression:
2114 *
2115 * <blockquote><pre>
2116 * codePoint >= 0x0000 && codePoint <= 0x10FFFF
2117 * </pre></blockquote>
2118 *
2119 * @param codePoint the Unicode code point to be tested
2120 * @return <code>true</code> if the specified code point value
2121 * is a valid code point value;
2122 * <code>false</code> otherwise.
2123 * @since 1.5
2124 */
2125 public static boolean isValidCodePoint(int codePoint) {
2126 return codePoint >= MIN_CODE_POINT
2127 && codePoint <= MAX_CODE_POINT;
2128 }
2129
2130 /**
2131 * Determines whether the specified character (Unicode code point)
2132 * is in the supplementary character range. The method call is
2133 * equivalent to the expression:
2134 * <blockquote><pre>
2135 * codePoint >= 0x10000 && codePoint <= 0x10FFFF
2136 * </pre></blockquote>
2137 *
2138 * @param codePoint the character (Unicode code point) to be tested
2139 * @return <code>true</code> if the specified character is in the Unicode
2140 * supplementary character range; <code>false</code> otherwise.
2141 * @since 1.5
2142 */
2143 public static boolean isSupplementaryCodePoint(int codePoint) {
2144 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
2145 && codePoint <= MAX_CODE_POINT;
2146 }
2147
2148 /**
2149 * Determines if the given <code>char</code> value is a
2150 * high-surrogate code unit (also known as <i>leading-surrogate
2151 * code unit</i>). Such values do not represent characters by
2152 * themselves, but are used in the representation of <a
2153 * href="#supplementary">supplementary characters</a> in the
2154 * UTF-16 encoding.
2155 *
2156 * <p>This method returns <code>true</code> if and only if
2157 * <blockquote><pre>ch >= '\uD800' && ch <= '\uDBFF'
2158 * </pre></blockquote>
2159 * is <code>true</code>.
2160 *
2161 * @param ch the <code>char</code> value to be tested.
2162 * @return <code>true</code> if the <code>char</code> value
2163 * is between '\uD800' and '\uDBFF' inclusive;
2164 * <code>false</code> otherwise.
2165 * @see java.lang.Character#isLowSurrogate(char)
2166 * @see Character.UnicodeBlock#of(int)
2167 * @since 1.5
2168 */
2169 public static boolean isHighSurrogate(char ch) {
2170 return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
2171 }
2172
2173 /**
2174 * Determines if the given <code>char</code> value is a
2175 * low-surrogate code unit (also known as <i>trailing-surrogate code
2176 * unit</i>). Such values do not represent characters by themselves,
2177 * but are used in the representation of <a
2178 * href="#supplementary">supplementary characters</a> in the UTF-16 encoding.
2179 *
2180 * <p> This method returns <code>true</code> if and only if
2181 * <blockquote><pre>ch >= '\uDC00' && ch <= '\uDFFF'
2182 * </pre></blockquote> is <code>true</code>.
2183 *
2184 * @param ch the <code>char</code> value to be tested.
2185 * @return <code>true</code> if the <code>char</code> value
2186 * is between '\uDC00' and '\uDFFF' inclusive;
2187 * <code>false</code> otherwise.
2188 * @see java.lang.Character#isHighSurrogate(char)
2189 * @since 1.5
2190 */
2191 public static boolean isLowSurrogate(char ch) {
2192 return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
2193 }
2194
2195 /**
2196 * Determines whether the specified pair of <code>char</code>
2197 * values is a valid surrogate pair. This method is equivalent to
2198 * the expression:
2199 * <blockquote><pre>
2200 * isHighSurrogate(high) && isLowSurrogate(low)
2201 * </pre></blockquote>
2202 *
2203 * @param high the high-surrogate code value to be tested
2204 * @param low the low-surrogate code value to be tested
2205 * @return <code>true</code> if the specified high and
2206 * low-surrogate code values represent a valid surrogate pair;
2207 * <code>false</code> otherwise.
2208 * @since 1.5
2209 */
2210 public static boolean isSurrogatePair(char high, char low) {
2211 return isHighSurrogate(high) && isLowSurrogate(low);
2212 }
2213
2214 /**
2215 * Determines the number of <code>char</code> values needed to
2216 * represent the specified character (Unicode code point). If the
2217 * specified character is equal to or greater than 0x10000, then
2218 * the method returns 2. Otherwise, the method returns 1.
2219 *
2220 * <p>This method doesn't validate the specified character to be a
2221 * valid Unicode code point. The caller must validate the
2222 * character value using {@link #isValidCodePoint(int) isValidCodePoint}
2223 * if necessary.
2224 *
2225 * @param codePoint the character (Unicode code point) to be tested.
2226 * @return 2 if the character is a valid supplementary character; 1 otherwise.
2227 * @see #isSupplementaryCodePoint(int)
2228 * @since 1.5
2229 */
2230 public static int charCount(int codePoint) {
2231 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
2232 }
2233
2234 /**
2235 * Converts the specified surrogate pair to its supplementary code
2236 * point value. This method does not validate the specified
2237 * surrogate pair. The caller must validate it using {@link
2238 * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
2239 *
2240 * @param high the high-surrogate code unit
2241 * @param low the low-surrogate code unit
2242 * @return the supplementary code point composed from the
2243 * specified surrogate pair.
2244 * @since 1.5
2245 */
2246 public static int toCodePoint(char high, char low) {
2247 return ((high - MIN_HIGH_SURROGATE) << 10)
2248 + (low - MIN_LOW_SURROGATE)
2249 + MIN_SUPPLEMENTARY_CODE_POINT;
2250 }
2251
2252 /**
2253 * Returns the code point at the given index of the
2254 * <code>CharSequence</code>. If the <code>char</code> value at
2255 * the given index in the <code>CharSequence</code> is in the
2256 * high-surrogate range, the following index is less than the
2257 * length of the <code>CharSequence</code>, and the
2258 * <code>char</code> value at the following index is in the
2259 * low-surrogate range, then the supplementary code point
2260 * corresponding to this surrogate pair is returned. Otherwise,
2261 * the <code>char</code> value at the given index is returned.
2262 *
2263 * @param seq a sequence of <code>char</code> values (Unicode code
2264 * units)
2265 * @param index the index to the <code>char</code> values (Unicode
2266 * code units) in <code>seq</code> to be converted
2267 * @return the Unicode code point at the given index
2268 * @exception NullPointerException if <code>seq</code> is null.
2269 * @exception IndexOutOfBoundsException if the value
2270 * <code>index</code> is negative or not less than
2271 * {@link CharSequence#length() seq.length()}.
2272 * @since 1.5
2273 */
2274 public static int codePointAt(CharSequence seq, int index) {
2275 char c1 = seq.charAt(index++);
2276 if (isHighSurrogate(c1)) {
2277 if (index < seq.length()) {
2278 char c2 = seq.charAt(index);
2279 if (isLowSurrogate(c2)) {
2280 return toCodePoint(c1, c2);
2281 }
2282 }
2283 }
2284 return c1;
2285 }
2286
2287 /**
2288 * Returns the code point at the given index of the
2289 * <code>char</code> array. If the <code>char</code> value at
2290 * the given index in the <code>char</code> array is in the
2291 * high-surrogate range, the following index is less than the
2292 * length of the <code>char</code> array, and the
2293 * <code>char</code> value at the following index is in the
2294 * low-surrogate range, then the supplementary code point
2295 * corresponding to this surrogate pair is returned. Otherwise,
2296 * the <code>char</code> value at the given index is returned.
2297 *
2298 * @param a the <code>char</code> array
2299 * @param index the index to the <code>char</code> values (Unicode
2300 * code units) in the <code>char</code> array to be converted
2301 * @return the Unicode code point at the given index
2302 * @exception NullPointerException if <code>a</code> is null.
2303 * @exception IndexOutOfBoundsException if the value
2304 * <code>index</code> is negative or not less than
2305 * the length of the <code>char</code> array.
2306 * @since 1.5
2307 */
2308 public static int codePointAt(char[] a, int index) {
2309 return codePointAtImpl(a, index, a.length);
2310 }
2311
2312 /**
2313 * Returns the code point at the given index of the
2314 * <code>char</code> array, where only array elements with
2315 * <code>index</code> less than <code>limit</code> can be used. If
2316 * the <code>char</code> value at the given index in the
2317 * <code>char</code> array is in the high-surrogate range, the
2318 * following index is less than the <code>limit</code>, and the
2319 * <code>char</code> value at the following index is in the
2320 * low-surrogate range, then the supplementary code point
2321 * corresponding to this surrogate pair is returned. Otherwise,
2322 * the <code>char</code> value at the given index is returned.
2323 *
2324 * @param a the <code>char</code> array
2325 * @param index the index to the <code>char</code> values (Unicode
2326 * code units) in the <code>char</code> array to be converted
2327 * @param limit the index after the last array element that can be used in the
2328 * <code>char</code> array
2329 * @return the Unicode code point at the given index
2330 * @exception NullPointerException if <code>a</code> is null.
2331 * @exception IndexOutOfBoundsException if the <code>index</code>
2332 * argument is negative or not less than the <code>limit</code>
2333 * argument, or if the <code>limit</code> argument is negative or
2334 * greater than the length of the <code>char</code> array.
2335 * @since 1.5
2336 */
2337 public static int codePointAt(char[] a, int index, int limit) {
2338 if (index >= limit || limit < 0 || limit > a.length) {
2339 throw new IndexOutOfBoundsException();
2340 }
2341 return codePointAtImpl(a, index, limit);
2342 }
2343
2344 static int codePointAtImpl(char[] a, int index, int limit) {
2345 char c1 = a[index++];
2346 if (isHighSurrogate(c1)) {
2347 if (index < limit) {
2348 char c2 = a[index];
2349 if (isLowSurrogate(c2)) {
2350 return toCodePoint(c1, c2);
2351 }
2352 }
2353 }
2354 return c1;
2355 }
2356
2357 /**
2358 * Returns the code point preceding the given index of the
2359 * <code>CharSequence</code>. If the <code>char</code> value at
2360 * <code>(index - 1)</code> in the <code>CharSequence</code> is in
2361 * the low-surrogate range, <code>(index - 2)</code> is not
2362 * negative, and the <code>char</code> value at <code>(index -
2363 * 2)</code> in the <code>CharSequence</code> is in the
2364 * high-surrogate range, then the supplementary code point
2365 * corresponding to this surrogate pair is returned. Otherwise,
2366 * the <code>char</code> value at <code>(index - 1)</code> is
2367 * returned.
2368 *
2369 * @param seq the <code>CharSequence</code> instance
2370 * @param index the index following the code point that should be returned
2371 * @return the Unicode code point value before the given index.
2372 * @exception NullPointerException if <code>seq</code> is null.
2373 * @exception IndexOutOfBoundsException if the <code>index</code>
2374 * argument is less than 1 or greater than {@link
2375 * CharSequence#length() seq.length()}.
2376 * @since 1.5
2377 */
2378 public static int codePointBefore(CharSequence seq, int index) {
2379 char c2 = seq.charAt(--index);
2380 if (isLowSurrogate(c2)) {
2381 if (index > 0) {
2382 char c1 = seq.charAt(--index);
2383 if (isHighSurrogate(c1)) {
2384 return toCodePoint(c1, c2);
2385 }
2386 }
2387 }
2388 return c2;
2389 }
2390
2391 /**
2392 * Returns the code point preceding the given index of the
2393 * <code>char</code> array. If the <code>char</code> value at
2394 * <code>(index - 1)</code> in the <code>char</code> array is in
2395 * the low-surrogate range, <code>(index - 2)</code> is not
2396 * negative, and the <code>char</code> value at <code>(index -
2397 * 2)</code> in the <code>char</code> array is in the
2398 * high-surrogate range, then the supplementary code point
2399 * corresponding to this surrogate pair is returned. Otherwise,
2400 * the <code>char</code> value at <code>(index - 1)</code> is
2401 * returned.
2402 *
2403 * @param a the <code>char</code> array
2404 * @param index the index following the code point that should be returned
2405 * @return the Unicode code point value before the given index.
2406 * @exception NullPointerException if <code>a</code> is null.
2407 * @exception IndexOutOfBoundsException if the <code>index</code>
2408 * argument is less than 1 or greater than the length of the
2409 * <code>char</code> array
2410 * @since 1.5
2411 */
2412 public static int codePointBefore(char[] a, int index) {
2413 return codePointBeforeImpl(a, index, 0);
2414 }
2415
2416 /**
2417 * Returns the code point preceding the given index of the
2418 * <code>char</code> array, where only array elements with
2419 * <code>index</code> greater than or equal to <code>start</code>
2420 * can be used. If the <code>char</code> value at <code>(index -
2421 * 1)</code> in the <code>char</code> array is in the
2422 * low-surrogate range, <code>(index - 2)</code> is not less than
2423 * <code>start</code>, and the <code>char</code> value at
2424 * <code>(index - 2)</code> in the <code>char</code> array is in
2425 * the high-surrogate range, then the supplementary code point
2426 * corresponding to this surrogate pair is returned. Otherwise,
2427 * the <code>char</code> value at <code>(index - 1)</code> is
2428 * returned.
2429 *
2430 * @param a the <code>char</code> array
2431 * @param index the index following the code point that should be returned
2432 * @param start the index of the first array element in the
2433 * <code>char</code> array
2434 * @return the Unicode code point value before the given index.
2435 * @exception NullPointerException if <code>a</code> is null.
2436 * @exception IndexOutOfBoundsException if the <code>index</code>
2437 * argument is not greater than the <code>start</code> argument or
2438 * is greater than the length of the <code>char</code> array, or
2439 * if the <code>start</code> argument is negative or not less than
2440 * the length of the <code>char</code> array.
2441 * @since 1.5
2442 */
2443 public static int codePointBefore(char[] a, int index, int start) {
2444 if (index <= start || start < 0 || start >= a.length) {
2445 throw new IndexOutOfBoundsException();
2446 }
2447 return codePointBeforeImpl(a, index, start);
2448 }
2449
2450 static int codePointBeforeImpl(char[] a, int index, int start) {
2451 char c2 = a[--index];
2452 if (isLowSurrogate(c2)) {
2453 if (index > start) {
2454 char c1 = a[--index];
2455 if (isHighSurrogate(c1)) {
2456 return toCodePoint(c1, c2);
2457 }
2458 }
2459 }
2460 return c2;
2461 }
2462
2463 /**
2464 * Converts the specified character (Unicode code point) to its
2465 * UTF-16 representation. If the specified code point is a BMP
2466 * (Basic Multilingual Plane or Plane 0) value, the same value is
2467 * stored in <code>dst[dstIndex]</code>, and 1 is returned. If the
2468 * specified code point is a supplementary character, its
2469 * surrogate values are stored in <code>dst[dstIndex]</code>
2470 * (high-surrogate) and <code>dst[dstIndex+1]</code>
2471 * (low-surrogate), and 2 is returned.
2472 *
2473 * @param codePoint the character (Unicode code point) to be converted.
2474 * @param dst an array of <code>char</code> in which the
2475 * <code>codePoint</code>'s UTF-16 value is stored.
2476 * @param dstIndex the start index into the <code>dst</code>
2477 * array where the converted value is stored.
2478 * @return 1 if the code point is a BMP code point, 2 if the
2479 * code point is a supplementary code point.
2480 * @exception IllegalArgumentException if the specified
2481 * <code>codePoint</code> is not a valid Unicode code point.
2482 * @exception NullPointerException if the specified <code>dst</code> is null.
2483 * @exception IndexOutOfBoundsException if <code>dstIndex</code>
2484 * is negative or not less than <code>dst.length</code>, or if
2485 * <code>dst</code> at <code>dstIndex</code> doesn't have enough
2486 * array element(s) to store the resulting <code>char</code>
2487 * value(s). (If <code>dstIndex</code> is equal to
2488 * <code>dst.length-1</code> and the specified
2489 * <code>codePoint</code> is a supplementary character, the
2490 * high-surrogate value is not stored in
2491 * <code>dst[dstIndex]</code>.)
2492 * @since 1.5
2493 */
2494 public static int toChars(int codePoint, char[] dst, int dstIndex) {
2495 if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
2496 throw new IllegalArgumentException();
2497 }
2498 if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
2499 dst[dstIndex] = (char) codePoint;
2500 return 1;
2501 }
2502 toSurrogates(codePoint, dst, dstIndex);
2503 return 2;
2504 }
2505
2506 /**
2507 * Converts the specified character (Unicode code point) to its
2508 * UTF-16 representation stored in a <code>char</code> array. If
2509 * the specified code point is a BMP (Basic Multilingual Plane or
2510 * Plane 0) value, the resulting <code>char</code> array has
2511 * the same value as <code>codePoint</code>. If the specified code
2512 * point is a supplementary code point, the resulting
2513 * <code>char</code> array has the corresponding surrogate pair.
2514 *
2515 * @param codePoint a Unicode code point
2516 * @return a <code>char</code> array having
2517 * <code>codePoint</code>'s UTF-16 representation.
2518 * @exception IllegalArgumentException if the specified
2519 * <code>codePoint</code> is not a valid Unicode code point.
2520 * @since 1.5
2521 */
2522 public static char[] toChars(int codePoint) {
2523 if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
2524 throw new IllegalArgumentException();
2525 }
2526 if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
2527 return new char[] { (char) codePoint };
2528 }
2529 char[] result = new char[2];
2530 toSurrogates(codePoint, result, 0);
2531 return result;
2532 }
2533
2534 static void toSurrogates(int codePoint, char[] dst, int index) {
2535 int offset = codePoint - MIN_SUPPLEMENTARY_CODE_POINT;
2536 dst[index + 1] = (char) ((offset & 0x3ff) + MIN_LOW_SURROGATE);
2537 dst[index] = (char) ((offset >>> 10) + MIN_HIGH_SURROGATE);
2538 }
2539
2540 /**
2541 * Returns the number of Unicode code points in the text range of
2542 * the specified char sequence. The text range begins at the
2543 * specified <code>beginIndex</code> and extends to the
2544 * <code>char</code> at index <code>endIndex - 1</code>. Thus the
2545 * length (in <code>char</code>s) of the text range is
2546 * <code>endIndex-beginIndex</code>. Unpaired surrogates within
2547 * the text range count as one code point each.
2548 *
2549 * @param seq the char sequence
2550 * @param beginIndex the index to the first <code>char</code> of
2551 * the text range.
2552 * @param endIndex the index after the last <code>char</code> of
2553 * the text range.
2554 * @return the number of Unicode code points in the specified text
2555 * range
2556 * @exception NullPointerException if <code>seq</code> is null.
2557 * @exception IndexOutOfBoundsException if the
2558 * <code>beginIndex</code> is negative, or <code>endIndex</code>
2559 * is larger than the length of the given sequence, or
2560 * <code>beginIndex</code> is larger than <code>endIndex</code>.
2561 * @since 1.5
2562 */
2563 public static int codePointCount(CharSequence seq, int beginIndex,
2564 int endIndex) {
2565 int length = seq.length();
2566 if (beginIndex < 0 || endIndex > length
2567 || beginIndex > endIndex) {
2568 throw new IndexOutOfBoundsException();
2569 }
2570 int n = 0;
2571 for (int i = beginIndex; i < endIndex;) {
2572 n++;
2573 if (isHighSurrogate(seq.charAt(i++))) {
2574 if (i < endIndex && isLowSurrogate(seq.charAt(i))) {
2575 i++;
2576 }
2577 }
2578 }
2579 return n;
2580 }
2581
2582 /**
2583 * Returns the number of Unicode code points in a subarray of the
2584 * <code>char</code> array argument. The <code>offset</code>
2585 * argument is the index of the first <code>char</code> of the
2586 * subarray and the <code>count</code> argument specifies the
2587 * length of the subarray in <code>char</code>s. Unpaired
2588 * surrogates within the subarray count as one code point each.
2589 *
2590 * @param a the <code>char</code> array
2591 * @param offset the index of the first <code>char</code> in the
2592 * given <code>char</code> array
2593 * @param count the length of the subarray in <code>char</code>s
2594 * @return the number of Unicode code points in the specified subarray
2595 * @exception NullPointerException if <code>a</code> is null.
2596 * @exception IndexOutOfBoundsException if <code>offset</code> or
2597 * <code>count</code> is negative, or if <code>offset +
2598 * count</code> is larger than the length of the given array.
2599 * @since 1.5
2600 */
2601 public static int codePointCount(char[] a, int offset, int count) {
2602 if (count > a.length - offset || offset < 0 || count < 0) {
2603 throw new IndexOutOfBoundsException();
2604 }
2605 return codePointCountImpl(a, offset, count);
2606 }
2607
2608 static int codePointCountImpl(char[] a, int offset, int count) {
2609 int endIndex = offset + count;
2610 int n = 0;
2611 for (int i = offset; i < endIndex;) {
2612 n++;
2613 if (isHighSurrogate(a[i++])) {
2614 if (i < endIndex && isLowSurrogate(a[i])) {
2615 i++;
2616 }
2617 }
2618 }
2619 return n;
2620 }
2621
2622 /**
2623 * Returns the index within the given char sequence that is offset
2624 * from the given <code>index</code> by <code>codePointOffset</code>
2625 * code points. Unpaired surrogates within the text range given by
2626 * <code>index</code> and <code>codePointOffset</code> count as
2627 * one code point each.
2628 *
2629 * @param seq the char sequence
2630 * @param index the index to be offset
2631 * @param codePointOffset the offset in code points
2632 * @return the index within the char sequence
2633 * @exception NullPointerException if <code>seq</code> is null.
2634 * @exception IndexOutOfBoundsException if <code>index</code>
2635 * is negative or larger then the length of the char sequence,
2636 * or if <code>codePointOffset</code> is positive and the
2637 * subsequence starting with <code>index</code> has fewer than
2638 * <code>codePointOffset</code> code points, or if
2639 * <code>codePointOffset</code> is negative and the subsequence
2640 * before <code>index</code> has fewer than the absolute value
2641 * of <code>codePointOffset</code> code points.
2642 * @since 1.5
2643 */
2644 public static int offsetByCodePoints(CharSequence seq, int index,
2645 int codePointOffset) {
2646 int length = seq.length();
2647 if (index < 0 || index > length) {
2648 throw new IndexOutOfBoundsException();
2649 }
2650
2651 int x = index;
2652 if (codePointOffset >= 0) {
2653 int i;
2654 for (i = 0; x < length && i < codePointOffset; i++) {
2655 if (isHighSurrogate(seq.charAt(x++))) {
2656 if (x < length && isLowSurrogate(seq.charAt(x))) {
2657 x++;
2658 }
2659 }
2660 }
2661 if (i < codePointOffset) {
2662 throw new IndexOutOfBoundsException();
2663 }
2664 } else {
2665 int i;
2666 for (i = codePointOffset; x > 0 && i < 0; i++) {
2667 if (isLowSurrogate(seq.charAt(--x))) {
2668 if (x > 0 && isHighSurrogate(seq.charAt(x - 1))) {
2669 x--;
2670 }
2671 }
2672 }
2673 if (i < 0) {
2674 throw new IndexOutOfBoundsException();
2675 }
2676 }
2677 return x;
2678 }
2679
2680 /**
2681 * Returns the index within the given <code>char</code> subarray
2682 * that is offset from the given <code>index</code> by
2683 * <code>codePointOffset</code> code points. The
2684 * <code>start</code> and <code>count</code> arguments specify a
2685 * subarray of the <code>char</code> array. Unpaired surrogates
2686 * within the text range given by <code>index</code> and
2687 * <code>codePointOffset</code> count as one code point each.
2688 *
2689 * @param a the <code>char</code> array
2690 * @param start the index of the first <code>char</code> of the
2691 * subarray
2692 * @param count the length of the subarray in <code>char</code>s
2693 * @param index the index to be offset
2694 * @param codePointOffset the offset in code points
2695 * @return the index within the subarray
2696 * @exception NullPointerException if <code>a</code> is null.
2697 * @exception IndexOutOfBoundsException
2698 * if <code>start</code> or <code>count</code> is negative,
2699 * or if <code>start + count</code> is larger than the length of
2700 * the given array,
2701 * or if <code>index</code> is less than <code>start</code> or
2702 * larger then <code>start + count</code>,
2703 * or if <code>codePointOffset</code> is positive and the text range
2704 * starting with <code>index</code> and ending with <code>start
2705 * + count - 1</code> has fewer than <code>codePointOffset</code> code
2706 * points,
2707 * or if <code>codePointOffset</code> is negative and the text range
2708 * starting with <code>start</code> and ending with <code>index
2709 * - 1</code> has fewer than the absolute value of
2710 * <code>codePointOffset</code> code points.
2711 * @since 1.5
2712 */
2713 public static int offsetByCodePoints(char[] a, int start,
2714 int count, int index, int codePointOffset) {
2715 if (count > a.length - start || start < 0 || count < 0
2716 || index < start || index > start + count) {
2717 throw new IndexOutOfBoundsException();
2718 }
2719 return offsetByCodePointsImpl(a, start, count, index,
2720 codePointOffset);
2721 }
2722
2723 static int offsetByCodePointsImpl(char[] a, int start, int count,
2724 int index, int codePointOffset) {
2725 int x = index;
2726 if (codePointOffset >= 0) {
2727 int limit = start + count;
2728 int i;
2729 for (i = 0; x < limit && i < codePointOffset; i++) {
2730 if (isHighSurrogate(a[x++])) {
2731 if (x < limit && isLowSurrogate(a[x])) {
2732 x++;
2733 }
2734 }
2735 }
2736 if (i < codePointOffset) {
2737 throw new IndexOutOfBoundsException();
2738 }
2739 } else {
2740 int i;
2741 for (i = codePointOffset; x > start && i < 0; i++) {
2742 if (isLowSurrogate(a[--x])) {
2743 if (x > start && isHighSurrogate(a[x - 1])) {
2744 x--;
2745 }
2746 }
2747 }
2748 if (i < 0) {
2749 throw new IndexOutOfBoundsException();
2750 }
2751 }
2752 return x;
2753 }
2754
2755 /**
2756 * Determines if the specified character is a lowercase character.
2757 * <p>
2758 * A character is lowercase if its general category type, provided
2759 * by <code>Character.getType(ch)</code>, is
2760 * <code>LOWERCASE_LETTER</code>.
2761 * <p>
2762 * The following are examples of lowercase characters:
2763 * <p><blockquote><pre>
2764 * a b c d e f g h i j k l m n o p q r s t u v w x y z
2765 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
2766 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
2767 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
2768 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
2769 * </pre></blockquote>
2770 * <p> Many other Unicode characters are lowercase too.
2771 *
2772 * <p><b>Note:</b> This method cannot handle <a
2773 * href="#supplementary"> supplementary characters</a>. To support
2774 * all Unicode characters, including supplementary characters, use
2775 * the {@link #isLowerCase(int)} method.
2776 *
2777 * @param ch the character to be tested.
2778 * @return <code>true</code> if the character is lowercase;
2779 * <code>false</code> otherwise.
2780 * @see java.lang.Character#isLowerCase(char)
2781 * @see java.lang.Character#isTitleCase(char)
2782 * @see java.lang.Character#toLowerCase(char)
2783 * @see java.lang.Character#getType(char)
2784 */
2785 public static boolean isLowerCase(char ch) {
2786 return isLowerCase((int) ch);
2787 }
2788
2789 /**
2790 * Determines if the specified character (Unicode code point) is a
2791 * lowercase character.
2792 * <p>
2793 * A character is lowercase if its general category type, provided
2794 * by {@link Character#getType getType(codePoint)}, is
2795 * <code>LOWERCASE_LETTER</code>.
2796 * <p>
2797 * The following are examples of lowercase characters:
2798 * <p><blockquote><pre>
2799 * a b c d e f g h i j k l m n o p q r s t u v w x y z
2800 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
2801 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
2802 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
2803 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
2804 * </pre></blockquote>
2805 * <p> Many other Unicode characters are lowercase too.
2806 *
2807 * @param codePoint the character (Unicode code point) to be tested.
2808 * @return <code>true</code> if the character is lowercase;
2809 * <code>false</code> otherwise.
2810 * @see java.lang.Character#isLowerCase(int)
2811 * @see java.lang.Character#isTitleCase(int)
2812 * @see java.lang.Character#toLowerCase(int)
2813 * @see java.lang.Character#getType(int)
2814 * @since 1.5
2815 */
2816 public static boolean isLowerCase(int codePoint) {
2817 return getType(codePoint) == Character.LOWERCASE_LETTER;
2818 }
2819
2820 /**
2821 * Determines if the specified character is an uppercase character.
2822 * <p>
2823 * A character is uppercase if its general category type, provided by
2824 * <code>Character.getType(ch)</code>, is <code>UPPERCASE_LETTER</code>.
2825 * <p>
2826 * The following are examples of uppercase characters:
2827 * <p><blockquote><pre>
2828 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
2829 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
2830 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
2831 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
2832 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
2833 * </pre></blockquote>
2834 * <p> Many other Unicode characters are uppercase too.<p>
2835 *
2836 * <p><b>Note:</b> This method cannot handle <a
2837 * href="#supplementary"> supplementary characters</a>. To support
2838 * all Unicode characters, including supplementary characters, use
2839 * the {@link #isUpperCase(int)} method.
2840 *
2841 * @param ch the character to be tested.
2842 * @return <code>true</code> if the character is uppercase;
2843 * <code>false</code> otherwise.
2844 * @see java.lang.Character#isLowerCase(char)
2845 * @see java.lang.Character#isTitleCase(char)
2846 * @see java.lang.Character#toUpperCase(char)
2847 * @see java.lang.Character#getType(char)
2848 * @since 1.0
2849 */
2850 public static boolean isUpperCase(char ch) {
2851 return isUpperCase((int) ch);
2852 }
2853
2854 /**
2855 * Determines if the specified character (Unicode code point) is an uppercase character.
2856 * <p>
2857 * A character is uppercase if its general category type, provided by
2858 * {@link Character#getType(int) getType(codePoint)}, is <code>UPPERCASE_LETTER</code>.
2859 * <p>
2860 * The following are examples of uppercase characters:
2861 * <p><blockquote><pre>
2862 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
2863 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
2864 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
2865 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
2866 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
2867 * </pre></blockquote>
2868 * <p> Many other Unicode characters are uppercase too.<p>
2869 *
2870 * @param codePoint the character (Unicode code point) to be tested.
2871 * @return <code>true</code> if the character is uppercase;
2872 * <code>false</code> otherwise.
2873 * @see java.lang.Character#isLowerCase(int)
2874 * @see java.lang.Character#isTitleCase(int)
2875 * @see java.lang.Character#toUpperCase(int)
2876 * @see java.lang.Character#getType(int)
2877 * @since 1.5
2878 */
2879 public static boolean isUpperCase(int codePoint) {
2880 return getType(codePoint) == Character.UPPERCASE_LETTER;
2881 }
2882
2883 /**
2884 * Determines if the specified character is a titlecase character.
2885 * <p>
2886 * A character is a titlecase character if its general
2887 * category type, provided by <code>Character.getType(ch)</code>,
2888 * is <code>TITLECASE_LETTER</code>.
2889 * <p>
2890 * Some characters look like pairs of Latin letters. For example, there
2891 * is an uppercase letter that looks like "LJ" and has a corresponding
2892 * lowercase letter that looks like "lj". A third form, which looks like "Lj",
2893 * is the appropriate form to use when rendering a word in lowercase
2894 * with initial capitals, as for a book title.
2895 * <p>
2896 * These are some of the Unicode characters for which this method returns
2897 * <code>true</code>:
2898 * <ul>
2899 * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code>
2900 * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code>
2901 * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code>
2902 * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code>
2903 * </ul>
2904 * <p> Many other Unicode characters are titlecase too.<p>
2905 *
2906 * <p><b>Note:</b> This method cannot handle <a
2907 * href="#supplementary"> supplementary characters</a>. To support
2908 * all Unicode characters, including supplementary characters, use
2909 * the {@link #isTitleCase(int)} method.
2910 *
2911 * @param ch the character to be tested.
2912 * @return <code>true</code> if the character is titlecase;
2913 * <code>false</code> otherwise.
2914 * @see java.lang.Character#isLowerCase(char)
2915 * @see java.lang.Character#isUpperCase(char)
2916 * @see java.lang.Character#toTitleCase(char)
2917 * @see java.lang.Character#getType(char)
2918 * @since 1.0.2
2919 */
2920 public static boolean isTitleCase(char ch) {
2921 return isTitleCase((int) ch);
2922 }
2923
2924 /**
2925 * Determines if the specified character (Unicode code point) is a titlecase character.
2926 * <p>
2927 * A character is a titlecase character if its general
2928 * category type, provided by {@link Character#getType(int) getType(codePoint)},
2929 * is <code>TITLECASE_LETTER</code>.
2930 * <p>
2931 * Some characters look like pairs of Latin letters. For example, there
2932 * is an uppercase letter that looks like "LJ" and has a corresponding
2933 * lowercase letter that looks like "lj". A third form, which looks like "Lj",
2934 * is the appropriate form to use when rendering a word in lowercase
2935 * with initial capitals, as for a book title.
2936 * <p>
2937 * These are some of the Unicode characters for which this method returns
2938 * <code>true</code>:
2939 * <ul>
2940 * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code>
2941 * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code>
2942 * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code>
2943 * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code>
2944 * </ul>
2945 * <p> Many other Unicode characters are titlecase too.<p>
2946 *
2947 * @param codePoint the character (Unicode code point) to be tested.
2948 * @return <code>true</code> if the character is titlecase;
2949 * <code>false</code> otherwise.
2950 * @see java.lang.Character#isLowerCase(int)
2951 * @see java.lang.Character#isUpperCase(int)
2952 * @see java.lang.Character#toTitleCase(int)
2953 * @see java.lang.Character#getType(int)
2954 * @since 1.5
2955 */
2956 public static boolean isTitleCase(int codePoint) {
2957 return getType(codePoint) == Character.TITLECASE_LETTER;
2958 }
2959
2960 /**
2961 * Determines if the specified character is a digit.
2962 * <p>
2963 * A character is a digit if its general category type, provided
2964 * by <code>Character.getType(ch)</code>, is
2965 * <code>DECIMAL_DIGIT_NUMBER</code>.
2966 * <p>
2967 * Some Unicode character ranges that contain digits:
2968 * <ul>
2969 * <li><code>'\u0030'</code> through <code>'\u0039'</code>,
2970 * ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>)
2971 * <li><code>'\u0660'</code> through <code>'\u0669'</code>,
2972 * Arabic-Indic digits
2973 * <li><code>'\u06F0'</code> through <code>'\u06F9'</code>,
2974 * Extended Arabic-Indic digits
2975 * <li><code>'\u0966'</code> through <code>'\u096F'</code>,
2976 * Devanagari digits
2977 * <li><code>'\uFF10'</code> through <code>'\uFF19'</code>,
2978 * Fullwidth digits
2979 * </ul>
2980 *
2981 * Many other character ranges contain digits as well.
2982 *
2983 * <p><b>Note:</b> This method cannot handle <a
2984 * href="#supplementary"> supplementary characters</a>. To support
2985 * all Unicode characters, including supplementary characters, use
2986 * the {@link #isDigit(int)} method.
2987 *
2988 * @param ch the character to be tested.
2989 * @return <code>true</code> if the character is a digit;
2990 * <code>false</code> otherwise.
2991 * @see java.lang.Character#digit(char, int)
2992 * @see java.lang.Character#forDigit(int, int)
2993 * @see java.lang.Character#getType(char)
2994 */
2995 public static boolean isDigit(char ch) {
2996 return isDigit((int) ch);
2997 }
2998
2999 /**
3000 * Determines if the specified character (Unicode code point) is a digit.
3001 * <p>
3002 * A character is a digit if its general category type, provided
3003 * by {@link Character#getType(int) getType(codePoint)}, is
3004 * <code>DECIMAL_DIGIT_NUMBER</code>.
3005 * <p>
3006 * Some Unicode character ranges that contain digits:
3007 * <ul>
3008 * <li><code>'\u0030'</code> through <code>'\u0039'</code>,
3009 * ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>)
3010 * <li><code>'\u0660'</code> through <code>'\u0669'</code>,
3011 * Arabic-Indic digits
3012 * <li><code>'\u06F0'</code> through <code>'\u06F9'</code>,
3013 * Extended Arabic-Indic digits
3014 * <li><code>'\u0966'</code> through <code>'\u096F'</code>,
3015 * Devanagari digits
3016 * <li><code>'\uFF10'</code> through <code>'\uFF19'</code>,
3017 * Fullwidth digits
3018 * </ul>
3019 *
3020 * Many other character ranges contain digits as well.
3021 *
3022 * @param codePoint the character (Unicode code point) to be tested.
3023 * @return <code>true</code> if the character is a digit;
3024 * <code>false</code> otherwise.
3025 * @see java.lang.Character#forDigit(int, int)
3026 * @see java.lang.Character#getType(int)
3027 * @since 1.5
3028 */
3029 public static boolean isDigit(int codePoint) {
3030 return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
3031 }
3032
3033 /**
3034 * Determines if a character is defined in Unicode.
3035 * <p>
3036 * A character is defined if at least one of the following is true:
3037 * <ul>
3038 * <li>It has an entry in the UnicodeData file.
3039 * <li>It has a value in a range defined by the UnicodeData file.
3040 * </ul>
3041 *
3042 * <p><b>Note:</b> This method cannot handle <a
3043 * href="#supplementary"> supplementary characters</a>. To support
3044 * all Unicode characters, including supplementary characters, use
3045 * the {@link #isDefined(int)} method.
3046 *
3047 * @param ch the character to be tested
3048 * @return <code>true</code> if the character has a defined meaning
3049 * in Unicode; <code>false</code> otherwise.
3050 * @see java.lang.Character#isDigit(char)
3051 * @see java.lang.Character#isLetter(char)
3052 * @see java.lang.Character#isLetterOrDigit(char)
3053 * @see java.lang.Character#isLowerCase(char)
3054 * @see java.lang.Character#isTitleCase(char)
3055 * @see java.lang.Character#isUpperCase(char)
3056 * @since 1.0.2
3057 */
3058 public static boolean isDefined(char ch) {
3059 return isDefined((int) ch);
3060 }
3061
3062 /**
3063 * Determines if a character (Unicode code point) is defined in Unicode.
3064 * <p>
3065 * A character is defined if at least one of the following is true:
3066 * <ul>
3067 * <li>It has an entry in the UnicodeData file.
3068 * <li>It has a value in a range defined by the UnicodeData file.
3069 * </ul>
3070 *
3071 * @param codePoint the character (Unicode code point) to be tested.
3072 * @return <code>true</code> if the character has a defined meaning
3073 * in Unicode; <code>false</code> otherwise.
3074 * @see java.lang.Character#isDigit(int)
3075 * @see java.lang.Character#isLetter(int)
3076 * @see java.lang.Character#isLetterOrDigit(int)
3077 * @see java.lang.Character#isLowerCase(int)
3078 * @see java.lang.Character#isTitleCase(int)
3079 * @see java.lang.Character#isUpperCase(int)
3080 * @since 1.5
3081 */
3082 public static boolean isDefined(int codePoint) {
3083 return getType(codePoint) != Character.UNASSIGNED;
3084 }
3085
3086 /**
3087 * Determines if the specified character is a letter.
3088 * <p>
3089 * A character is considered to be a letter if its general
3090 * category type, provided by <code>Character.getType(ch)</code>,
3091 * is any of the following:
3092 * <ul>
3093 * <li> <code>UPPERCASE_LETTER</code>
3094 * <li> <code>LOWERCASE_LETTER</code>
3095 * <li> <code>TITLECASE_LETTER</code>
3096 * <li> <code>MODIFIER_LETTER</code>
3097 * <li> <code>OTHER_LETTER</code>
3098 * </ul>
3099 *
3100 * Not all letters have case. Many characters are
3101 * letters but are neither uppercase nor lowercase nor titlecase.
3102 *
3103 * <p><b>Note:</b> This method cannot handle <a
3104 * href="#supplementary"> supplementary characters</a>. To support
3105 * all Unicode characters, including supplementary characters, use
3106 * the {@link #isLetter(int)} method.
3107 *
3108 * @param ch the character to be tested.
3109 * @return <code>true</code> if the character is a letter;
3110 * <code>false</code> otherwise.
3111 * @see java.lang.Character#isDigit(char)
3112 * @see java.lang.Character#isJavaIdentifierStart(char)
3113 * @see java.lang.Character#isJavaLetter(char)
3114 * @see java.lang.Character#isJavaLetterOrDigit(char)
3115 * @see java.lang.Character#isLetterOrDigit(char)
3116 * @see java.lang.Character#isLowerCase(char)
3117 * @see java.lang.Character#isTitleCase(char)
3118 * @see java.lang.Character#isUnicodeIdentifierStart(char)
3119 * @see java.lang.Character#isUpperCase(char)
3120 */
3121 public static boolean isLetter(char ch) {
3122 return isLetter((int) ch);
3123 }
3124
3125 /**
3126 * Determines if the specified character (Unicode code point) is a letter.
3127 * <p>
3128 * A character is considered to be a letter if its general
3129 * category type, provided by {@link Character#getType(int) getType(codePoint)},
3130 * is any of the following:
3131 * <ul>
3132 * <li> <code>UPPERCASE_LETTER</code>
3133 * <li> <code>LOWERCASE_LETTER</code>
3134 * <li> <code>TITLECASE_LETTER</code>
3135 * <li> <code>MODIFIER_LETTER</code>
3136 * <li> <code>OTHER_LETTER</code>
3137 * </ul>
3138 *
3139 * Not all letters have case. Many characters are
3140 * letters but are neither uppercase nor lowercase nor titlecase.
3141 *
3142 * @param codePoint the character (Unicode code point) to be tested.
3143 * @return <code>true</code> if the character is a letter;
3144 * <code>false</code> otherwise.
3145 * @see java.lang.Character#isDigit(int)
3146 * @see java.lang.Character#isJavaIdentifierStart(int)
3147 * @see java.lang.Character#isLetterOrDigit(int)
3148 * @see java.lang.Character#isLowerCase(int)
3149 * @see java.lang.Character#isTitleCase(int)
3150 * @see java.lang.Character#isUnicodeIdentifierStart(int)
3151 * @see java.lang.Character#isUpperCase(int)
3152 * @since 1.5
3153 */
3154 public static boolean isLetter(int codePoint) {
3155 return ((((1 << Character.UPPERCASE_LETTER)
3156 | (1 << Character.LOWERCASE_LETTER)
3157 | (1 << Character.TITLECASE_LETTER)
3158 | (1 << Character.MODIFIER_LETTER) | (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) != 0;
3159 }
3160
3161 /**
3162 * Determines if the specified character is a letter or digit.
3163 * <p>
3164 * A character is considered to be a letter or digit if either
3165 * <code>Character.isLetter(char ch)</code> or
3166 * <code>Character.isDigit(char ch)</code> returns
3167 * <code>true</code> for the character.
3168 *
3169 * <p><b>Note:</b> This method cannot handle <a
3170 * href="#supplementary"> supplementary characters</a>. To support
3171 * all Unicode characters, including supplementary characters, use
3172 * the {@link #isLetterOrDigit(int)} method.
3173 *
3174 * @param ch the character to be tested.
3175 * @return <code>true</code> if the character is a letter or digit;
3176 * <code>false</code> otherwise.
3177 * @see java.lang.Character#isDigit(char)
3178 * @see java.lang.Character#isJavaIdentifierPart(char)
3179 * @see java.lang.Character#isJavaLetter(char)
3180 * @see java.lang.Character#isJavaLetterOrDigit(char)
3181 * @see java.lang.Character#isLetter(char)
3182 * @see java.lang.Character#isUnicodeIdentifierPart(char)
3183 * @since 1.0.2
3184 */
3185 public static boolean isLetterOrDigit(char ch) {
3186 return isLetterOrDigit((int) ch);
3187 }
3188
3189 /**
3190 * Determines if the specified character (Unicode code point) is a letter or digit.
3191 * <p>
3192 * A character is considered to be a letter or digit if either
3193 * {@link #isLetter(int) isLetter(codePoint)} or
3194 * {@link #isDigit(int) isDigit(codePoint)} returns
3195 * <code>true</code> for the character.
3196 *
3197 * @param codePoint the character (Unicode code point) to be tested.
3198 * @return <code>true</code> if the character is a letter or digit;
3199 * <code>false</code> otherwise.
3200 * @see java.lang.Character#isDigit(int)
3201 * @see java.lang.Character#isJavaIdentifierPart(int)
3202 * @see java.lang.Character#isLetter(int)
3203 * @see java.lang.Character#isUnicodeIdentifierPart(int)
3204 * @since 1.5
3205 */
3206 public static boolean isLetterOrDigit(int codePoint) {
3207 return ((((1 << Character.UPPERCASE_LETTER)
3208 | (1 << Character.LOWERCASE_LETTER)
3209 | (1 << Character.TITLECASE_LETTER)
3210 | (1 << Character.MODIFIER_LETTER)
3211 | (1 << Character.OTHER_LETTER) | (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) != 0;
3212 }
3213
3214 /**
3215 * Determines if the specified character is permissible as the first
3216 * character in a Java identifier.
3217 * <p>
3218 * A character may start a Java identifier if and only if
3219 * one of the following is true:
3220 * <ul>
3221 * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
3222 * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code>
3223 * <li> ch is a currency symbol (such as "$")
3224 * <li> ch is a connecting punctuation character (such as "_").
3225 * </ul>
3226 *
3227 * @param ch the character to be tested.
3228 * @return <code>true</code> if the character may start a Java
3229 * identifier; <code>false</code> otherwise.
3230 * @see java.lang.Character#isJavaLetterOrDigit(char)
3231 * @see java.lang.Character#isJavaIdentifierStart(char)
3232 * @see java.lang.Character#isJavaIdentifierPart(char)
3233 * @see java.lang.Character#isLetter(char)
3234 * @see java.lang.Character#isLetterOrDigit(char)
3235 * @see java.lang.Character#isUnicodeIdentifierStart(char)
3236 * @since 1.02
3237 * @deprecated Replaced by isJavaIdentifierStart(char).
3238 */
3239 @Deprecated
3240 public static boolean isJavaLetter(char ch) {
3241 return isJavaIdentifierStart(ch);
3242 }
3243
3244 /**
3245 * Determines if the specified character may be part of a Java
3246 * identifier as other than the first character.
3247 * <p>
3248 * A character may be part of a Java identifier if and only if any
3249 * of the following are true:
3250 * <ul>
3251 * <li> it is a letter
3252 * <li> it is a currency symbol (such as <code>'$'</code>)
3253 * <li> it is a connecting punctuation character (such as <code>'_'</code>)
3254 * <li> it is a digit
3255 * <li> it is a numeric letter (such as a Roman numeral character)
3256 * <li> it is a combining mark
3257 * <li> it is a non-spacing mark
3258 * <li> <code>isIdentifierIgnorable</code> returns
3259 * <code>true</code> for the character.
3260 * </ul>
3261 *
3262 * @param ch the character to be tested.
3263 * @return <code>true</code> if the character may be part of a
3264 * Java identifier; <code>false</code> otherwise.
3265 * @see java.lang.Character#isJavaLetter(char)
3266 * @see java.lang.Character#isJavaIdentifierStart(char)
3267 * @see java.lang.Character#isJavaIdentifierPart(char)
3268 * @see java.lang.Character#isLetter(char)
3269 * @see java.lang.Character#isLetterOrDigit(char)
3270 * @see java.lang.Character#isUnicodeIdentifierPart(char)
3271 * @see java.lang.Character#isIdentifierIgnorable(char)
3272 * @since 1.02
3273 * @deprecated Replaced by isJavaIdentifierPart(char).
3274 */
3275 @Deprecated
3276 public static boolean isJavaLetterOrDigit(char ch) {
3277 return isJavaIdentifierPart(ch);
3278 }
3279
3280 /**
3281 * Determines if the specified character is
3282 * permissible as the first character in a Java identifier.
3283 * <p>
3284 * A character may start a Java identifier if and only if
3285 * one of the following conditions is true:
3286 * <ul>
3287 * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
3288 * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code>
3289 * <li> ch is a currency symbol (such as "$")
3290 * <li> ch is a connecting punctuation character (such as "_").
3291 * </ul>
3292 *
3293 * <p><b>Note:</b> This method cannot handle <a
3294 * href="#supplementary"> supplementary characters</a>. To support
3295 * all Unicode characters, including supplementary characters, use
3296 * the {@link #isJavaIdentifierStart(int)} method.
3297 *
3298 * @param ch the character to be tested.
3299 * @return <code>true</code> if the character may start a Java identifier;
3300 * <code>false</code> otherwise.
3301 * @see java.lang.Character#isJavaIdentifierPart(char)
3302 * @see java.lang.Character#isLetter(char)
3303 * @see java.lang.Character#isUnicodeIdentifierStart(char)
3304 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3305 * @since 1.1
3306 */
3307 public static boolean isJavaIdentifierStart(char ch) {
3308 return isJavaIdentifierStart((int) ch);
3309 }
3310
3311 /**
3312 * Determines if the character (Unicode code point) is
3313 * permissible as the first character in a Java identifier.
3314 * <p>
3315 * A character may start a Java identifier if and only if
3316 * one of the following conditions is true:
3317 * <ul>
3318 * <li> {@link #isLetter(int) isLetter(codePoint)}
3319 * returns <code>true</code>
3320 * <li> {@link #getType(int) getType(codePoint)}
3321 * returns <code>LETTER_NUMBER</code>
3322 * <li> the referenced character is a currency symbol (such as "$")
3323 * <li> the referenced character is a connecting punctuation character
3324 * (such as "_").
3325 * </ul>
3326 *
3327 * @param codePoint the character (Unicode code point) to be tested.
3328 * @return <code>true</code> if the character may start a Java identifier;
3329 * <code>false</code> otherwise.
3330 * @see java.lang.Character#isJavaIdentifierPart(int)
3331 * @see java.lang.Character#isLetter(int)
3332 * @see java.lang.Character#isUnicodeIdentifierStart(int)
3333 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3334 * @since 1.5
3335 */
3336 public static boolean isJavaIdentifierStart(int codePoint) {
3337 return CharacterData.of(codePoint).isJavaIdentifierStart(
3338 codePoint);
3339 }
3340
3341 /**
3342 * Determines if the specified character may be part of a Java
3343 * identifier as other than the first character.
3344 * <p>
3345 * A character may be part of a Java identifier if any of the following
3346 * are true:
3347 * <ul>
3348 * <li> it is a letter
3349 * <li> it is a currency symbol (such as <code>'$'</code>)
3350 * <li> it is a connecting punctuation character (such as <code>'_'</code>)
3351 * <li> it is a digit
3352 * <li> it is a numeric letter (such as a Roman numeral character)
3353 * <li> it is a combining mark
3354 * <li> it is a non-spacing mark
3355 * <li> <code>isIdentifierIgnorable</code> returns
3356 * <code>true</code> for the character
3357 * </ul>
3358 *
3359 * <p><b>Note:</b> This method cannot handle <a
3360 * href="#supplementary"> supplementary characters</a>. To support
3361 * all Unicode characters, including supplementary characters, use
3362 * the {@link #isJavaIdentifierPart(int)} method.
3363 *
3364 * @param ch the character to be tested.
3365 * @return <code>true</code> if the character may be part of a
3366 * Java identifier; <code>false</code> otherwise.
3367 * @see java.lang.Character#isIdentifierIgnorable(char)
3368 * @see java.lang.Character#isJavaIdentifierStart(char)
3369 * @see java.lang.Character#isLetterOrDigit(char)
3370 * @see java.lang.Character#isUnicodeIdentifierPart(char)
3371 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3372 * @since 1.1
3373 */
3374 public static boolean isJavaIdentifierPart(char ch) {
3375 return isJavaIdentifierPart((int) ch);
3376 }
3377
3378 /**
3379 * Determines if the character (Unicode code point) may be part of a Java
3380 * identifier as other than the first character.
3381 * <p>
3382 * A character may be part of a Java identifier if any of the following
3383 * are true:
3384 * <ul>
3385 * <li> it is a letter
3386 * <li> it is a currency symbol (such as <code>'$'</code>)
3387 * <li> it is a connecting punctuation character (such as <code>'_'</code>)
3388 * <li> it is a digit
3389 * <li> it is a numeric letter (such as a Roman numeral character)
3390 * <li> it is a combining mark
3391 * <li> it is a non-spacing mark
3392 * <li> {@link #isIdentifierIgnorable(int)
3393 * isIdentifierIgnorable(codePoint)} returns <code>true</code> for
3394 * the character
3395 * </ul>
3396 *
3397 * @param codePoint the character (Unicode code point) to be tested.
3398 * @return <code>true</code> if the character may be part of a
3399 * Java identifier; <code>false</code> otherwise.
3400 * @see java.lang.Character#isIdentifierIgnorable(int)
3401 * @see java.lang.Character#isJavaIdentifierStart(int)
3402 * @see java.lang.Character#isLetterOrDigit(int)
3403 * @see java.lang.Character#isUnicodeIdentifierPart(int)
3404 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3405 * @since 1.5
3406 */
3407 public static boolean isJavaIdentifierPart(int codePoint) {
3408 return CharacterData.of(codePoint).isJavaIdentifierPart(
3409 codePoint);
3410 }
3411
3412 /**
3413 * Determines if the specified character is permissible as the
3414 * first character in a Unicode identifier.
3415 * <p>
3416 * A character may start a Unicode identifier if and only if
3417 * one of the following conditions is true:
3418 * <ul>
3419 * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
3420 * <li> {@link #getType(char) getType(ch)} returns
3421 * <code>LETTER_NUMBER</code>.
3422 * </ul>
3423 *
3424 * <p><b>Note:</b> This method cannot handle <a
3425 * href="#supplementary"> supplementary characters</a>. To support
3426 * all Unicode characters, including supplementary characters, use
3427 * the {@link #isUnicodeIdentifierStart(int)} method.
3428 *
3429 * @param ch the character to be tested.
3430 * @return <code>true</code> if the character may start a Unicode
3431 * identifier; <code>false</code> otherwise.
3432 * @see java.lang.Character#isJavaIdentifierStart(char)
3433 * @see java.lang.Character#isLetter(char)
3434 * @see java.lang.Character#isUnicodeIdentifierPart(char)
3435 * @since 1.1
3436 */
3437 public static boolean isUnicodeIdentifierStart(char ch) {
3438 return isUnicodeIdentifierStart((int) ch);
3439 }
3440
3441 /**
3442 * Determines if the specified character (Unicode code point) is permissible as the
3443 * first character in a Unicode identifier.
3444 * <p>
3445 * A character may start a Unicode identifier if and only if
3446 * one of the following conditions is true:
3447 * <ul>
3448 * <li> {@link #isLetter(int) isLetter(codePoint)}
3449 * returns <code>true</code>
3450 * <li> {@link #getType(int) getType(codePoint)}
3451 * returns <code>LETTER_NUMBER</code>.
3452 * </ul>
3453 * @param codePoint the character (Unicode code point) to be tested.
3454 * @return <code>true</code> if the character may start a Unicode
3455 * identifier; <code>false</code> otherwise.
3456 * @see java.lang.Character#isJavaIdentifierStart(int)
3457 * @see java.lang.Character#isLetter(int)
3458 * @see java.lang.Character#isUnicodeIdentifierPart(int)
3459 * @since 1.5
3460 */
3461 public static boolean isUnicodeIdentifierStart(int codePoint) {
3462 return CharacterData.of(codePoint).isUnicodeIdentifierStart(
3463 codePoint);
3464 }
3465
3466 /**
3467 * Determines if the specified character may be part of a Unicode
3468 * identifier as other than the first character.
3469 * <p>
3470 * A character may be part of a Unicode identifier if and only if
3471 * one of the following statements is true:
3472 * <ul>
3473 * <li> it is a letter
3474 * <li> it is a connecting punctuation character (such as <code>'_'</code>)
3475 * <li> it is a digit
3476 * <li> it is a numeric letter (such as a Roman numeral character)
3477 * <li> it is a combining mark
3478 * <li> it is a non-spacing mark
3479 * <li> <code>isIdentifierIgnorable</code> returns
3480 * <code>true</code> for this character.
3481 * </ul>
3482 *
3483 * <p><b>Note:</b> This method cannot handle <a
3484 * href="#supplementary"> supplementary characters</a>. To support
3485 * all Unicode characters, including supplementary characters, use
3486 * the {@link #isUnicodeIdentifierPart(int)} method.
3487 *
3488 * @param ch the character to be tested.
3489 * @return <code>true</code> if the character may be part of a
3490 * Unicode identifier; <code>false</code> otherwise.
3491 * @see java.lang.Character#isIdentifierIgnorable(char)
3492 * @see java.lang.Character#isJavaIdentifierPart(char)
3493 * @see java.lang.Character#isLetterOrDigit(char)
3494 * @see java.lang.Character#isUnicodeIdentifierStart(char)
3495 * @since 1.1
3496 */
3497 public static boolean isUnicodeIdentifierPart(char ch) {
3498 return isUnicodeIdentifierPart((int) ch);
3499 }
3500
3501 /**
3502 * Determines if the specified character (Unicode code point) may be part of a Unicode
3503 * identifier as other than the first character.
3504 * <p>
3505 * A character may be part of a Unicode identifier if and only if
3506 * one of the following statements is true:
3507 * <ul>
3508 * <li> it is a letter
3509 * <li> it is a connecting punctuation character (such as <code>'_'</code>)
3510 * <li> it is a digit
3511 * <li> it is a numeric letter (such as a Roman numeral character)
3512 * <li> it is a combining mark
3513 * <li> it is a non-spacing mark
3514 * <li> <code>isIdentifierIgnorable</code> returns
3515 * <code>true</code> for this character.
3516 * </ul>
3517 * @param codePoint the character (Unicode code point) to be tested.
3518 * @return <code>true</code> if the character may be part of a
3519 * Unicode identifier; <code>false</code> otherwise.
3520 * @see java.lang.Character#isIdentifierIgnorable(int)
3521 * @see java.lang.Character#isJavaIdentifierPart(int)
3522 * @see java.lang.Character#isLetterOrDigit(int)
3523 * @see java.lang.Character#isUnicodeIdentifierStart(int)
3524 * @since 1.5
3525 */
3526 public static boolean isUnicodeIdentifierPart(int codePoint) {
3527 return CharacterData.of(codePoint).isUnicodeIdentifierPart(
3528 codePoint);
3529 }
3530
3531 /**
3532 * Determines if the specified character should be regarded as
3533 * an ignorable character in a Java identifier or a Unicode identifier.
3534 * <p>
3535 * The following Unicode characters are ignorable in a Java identifier
3536 * or a Unicode identifier:
3537 * <ul>
3538 * <li>ISO control characters that are not whitespace
3539 * <ul>
3540 * <li><code>'\u0000'</code> through <code>'\u0008'</code>
3541 * <li><code>'\u000E'</code> through <code>'\u001B'</code>
3542 * <li><code>'\u007F'</code> through <code>'\u009F'</code>
3543 * </ul>
3544 *
3545 * <li>all characters that have the <code>FORMAT</code> general
3546 * category value
3547 * </ul>
3548 *
3549 * <p><b>Note:</b> This method cannot handle <a
3550 * href="#supplementary"> supplementary characters</a>. To support
3551 * all Unicode characters, including supplementary characters, use
3552 * the {@link #isIdentifierIgnorable(int)} method.
3553 *
3554 * @param ch the character to be tested.
3555 * @return <code>true</code> if the character is an ignorable control
3556 * character that may be part of a Java or Unicode identifier;
3557 * <code>false</code> otherwise.
3558 * @see java.lang.Character#isJavaIdentifierPart(char)
3559 * @see java.lang.Character#isUnicodeIdentifierPart(char)
3560 * @since 1.1
3561 */
3562 public static boolean isIdentifierIgnorable(char ch) {
3563 return isIdentifierIgnorable((int) ch);
3564 }
3565
3566 /**
3567 * Determines if the specified character (Unicode code point) should be regarded as
3568 * an ignorable character in a Java identifier or a Unicode identifier.
3569 * <p>
3570 * The following Unicode characters are ignorable in a Java identifier
3571 * or a Unicode identifier:
3572 * <ul>
3573 * <li>ISO control characters that are not whitespace
3574 * <ul>
3575 * <li><code>'\u0000'</code> through <code>'\u0008'</code>
3576 * <li><code>'\u000E'</code> through <code>'\u001B'</code>
3577 * <li><code>'\u007F'</code> through <code>'\u009F'</code>
3578 * </ul>
3579 *
3580 * <li>all characters that have the <code>FORMAT</code> general
3581 * category value
3582 * </ul>
3583 *
3584 * @param codePoint the character (Unicode code point) to be tested.
3585 * @return <code>true</code> if the character is an ignorable control
3586 * character that may be part of a Java or Unicode identifier;
3587 * <code>false</code> otherwise.
3588 * @see java.lang.Character#isJavaIdentifierPart(int)
3589 * @see java.lang.Character#isUnicodeIdentifierPart(int)
3590 * @since 1.5
3591 */
3592 public static boolean isIdentifierIgnorable(int codePoint) {
3593 return CharacterData.of(codePoint).isIdentifierIgnorable(
3594 codePoint);
3595 }
3596
3597 /**
3598 * Converts the character argument to lowercase using case
3599 * mapping information from the UnicodeData file.
3600 * <p>
3601 * Note that
3602 * <code>Character.isLowerCase(Character.toLowerCase(ch))</code>
3603 * does not always return <code>true</code> for some ranges of
3604 * characters, particularly those that are symbols or ideographs.
3605 *
3606 * <p>In general, {@link java.lang.String#toLowerCase()} should be used to map
3607 * characters to lowercase. <code>String</code> case mapping methods
3608 * have several benefits over <code>Character</code> case mapping methods.
3609 * <code>String</code> case mapping methods can perform locale-sensitive
3610 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
3611 * the <code>Character</code> case mapping methods cannot.
3612 *
3613 * <p><b>Note:</b> This method cannot handle <a
3614 * href="#supplementary"> supplementary characters</a>. To support
3615 * all Unicode characters, including supplementary characters, use
3616 * the {@link #toLowerCase(int)} method.
3617 *
3618 * @param ch the character to be converted.
3619 * @return the lowercase equivalent of the character, if any;
3620 * otherwise, the character itself.
3621 * @see java.lang.Character#isLowerCase(char)
3622 * @see java.lang.String#toLowerCase()
3623 */
3624 public static char toLowerCase(char ch) {
3625 return (char) toLowerCase((int) ch);
3626 }
3627
3628 /**
3629 * Converts the character (Unicode code point) argument to
3630 * lowercase using case mapping information from the UnicodeData
3631 * file.
3632 *
3633 * <p> Note that
3634 * <code>Character.isLowerCase(Character.toLowerCase(codePoint))</code>
3635 * does not always return <code>true</code> for some ranges of
3636 * characters, particularly those that are symbols or ideographs.
3637 *
3638 * <p>In general, {@link java.lang.String#toLowerCase()} should be used to map
3639 * characters to lowercase. <code>String</code> case mapping methods
3640 * have several benefits over <code>Character</code> case mapping methods.
3641 * <code>String</code> case mapping methods can perform locale-sensitive
3642 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
3643 * the <code>Character</code> case mapping methods cannot.
3644 *
3645 * @param codePoint the character (Unicode code point) to be converted.
3646 * @return the lowercase equivalent of the character (Unicode code
3647 * point), if any; otherwise, the character itself.
3648 * @see java.lang.Character#isLowerCase(int)
3649 * @see java.lang.String#toLowerCase()
3650 *
3651 * @since 1.5
3652 */
3653 public static int toLowerCase(int codePoint) {
3654 return CharacterData.of(codePoint).toLowerCase(codePoint);
3655 }
3656
3657 /**
3658 * Converts the character argument to uppercase using case mapping
3659 * information from the UnicodeData file.
3660 * <p>
3661 * Note that
3662 * <code>Character.isUpperCase(Character.toUpperCase(ch))</code>
3663 * does not always return <code>true</code> for some ranges of
3664 * characters, particularly those that are symbols or ideographs.
3665 *
3666 * <p>In general, {@link java.lang.String#toUpperCase()} should be used to map
3667 * characters to uppercase. <code>String</code> case mapping methods
3668 * have several benefits over <code>Character</code> case mapping methods.
3669 * <code>String</code> case mapping methods can perform locale-sensitive
3670 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
3671 * the <code>Character</code> case mapping methods cannot.
3672 *
3673 * <p><b>Note:</b> This method cannot handle <a
3674 * href="#supplementary"> supplementary characters</a>. To support
3675 * all Unicode characters, including supplementary characters, use
3676 * the {@link #toUpperCase(int)} method.
3677 *
3678 * @param ch the character to be converted.
3679 * @return the uppercase equivalent of the character, if any;
3680 * otherwise, the character itself.
3681 * @see java.lang.Character#isUpperCase(char)
3682 * @see java.lang.String#toUpperCase()
3683 */
3684 public static char toUpperCase(char ch) {
3685 return (char) toUpperCase((int) ch);
3686 }
3687
3688 /**
3689 * Converts the character (Unicode code point) argument to
3690 * uppercase using case mapping information from the UnicodeData
3691 * file.
3692 *
3693 * <p>Note that
3694 * <code>Character.isUpperCase(Character.toUpperCase(codePoint))</code>
3695 * does not always return <code>true</code> for some ranges of
3696 * characters, particularly those that are symbols or ideographs.
3697 *
3698 * <p>In general, {@link java.lang.String#toUpperCase()} should be used to map
3699 * characters to uppercase. <code>String</code> case mapping methods
3700 * have several benefits over <code>Character</code> case mapping methods.
3701 * <code>String</code> case mapping methods can perform locale-sensitive
3702 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
3703 * the <code>Character</code> case mapping methods cannot.
3704 *
3705 * @param codePoint the character (Unicode code point) to be converted.
3706 * @return the uppercase equivalent of the character, if any;
3707 * otherwise, the character itself.
3708 * @see java.lang.Character#isUpperCase(int)
3709 * @see java.lang.String#toUpperCase()
3710 *
3711 * @since 1.5
3712 */
3713 public static int toUpperCase(int codePoint) {
3714 return CharacterData.of(codePoint).toUpperCase(codePoint);
3715 }
3716
3717 /**
3718 * Converts the character argument to titlecase using case mapping
3719 * information from the UnicodeData file. If a character has no
3720 * explicit titlecase mapping and is not itself a titlecase char
3721 * according to UnicodeData, then the uppercase mapping is
3722 * returned as an equivalent titlecase mapping. If the
3723 * <code>char</code> argument is already a titlecase
3724 * <code>char</code>, the same <code>char</code> value will be
3725 * returned.
3726 * <p>
3727 * Note that
3728 * <code>Character.isTitleCase(Character.toTitleCase(ch))</code>
3729 * does not always return <code>true</code> for some ranges of
3730 * characters.
3731 *
3732 * <p><b>Note:</b> This method cannot handle <a
3733 * href="#supplementary"> supplementary characters</a>. To support
3734 * all Unicode characters, including supplementary characters, use
3735 * the {@link #toTitleCase(int)} method.
3736 *
3737 * @param ch the character to be converted.
3738 * @return the titlecase equivalent of the character, if any;
3739 * otherwise, the character itself.
3740 * @see java.lang.Character#isTitleCase(char)
3741 * @see java.lang.Character#toLowerCase(char)
3742 * @see java.lang.Character#toUpperCase(char)
3743 * @since 1.0.2
3744 */
3745 public static char toTitleCase(char ch) {
3746 return (char) toTitleCase((int) ch);
3747 }
3748
3749 /**
3750 * Converts the character (Unicode code point) argument to titlecase using case mapping
3751 * information from the UnicodeData file. If a character has no
3752 * explicit titlecase mapping and is not itself a titlecase char
3753 * according to UnicodeData, then the uppercase mapping is
3754 * returned as an equivalent titlecase mapping. If the
3755 * character argument is already a titlecase
3756 * character, the same character value will be
3757 * returned.
3758 *
3759 * <p>Note that
3760 * <code>Character.isTitleCase(Character.toTitleCase(codePoint))</code>
3761 * does not always return <code>true</code> for some ranges of
3762 * characters.
3763 *
3764 * @param codePoint the character (Unicode code point) to be converted.
3765 * @return the titlecase equivalent of the character, if any;
3766 * otherwise, the character itself.
3767 * @see java.lang.Character#isTitleCase(int)
3768 * @see java.lang.Character#toLowerCase(int)
3769 * @see java.lang.Character#toUpperCase(int)
3770 * @since 1.5
3771 */
3772 public static int toTitleCase(int codePoint) {
3773 return CharacterData.of(codePoint).toTitleCase(codePoint);
3774 }
3775
3776 /**
3777 * Returns the numeric value of the character <code>ch</code> in the
3778 * specified radix.
3779 * <p>
3780 * If the radix is not in the range <code>MIN_RADIX</code> <=
3781 * <code>radix</code> <= <code>MAX_RADIX</code> or if the
3782 * value of <code>ch</code> is not a valid digit in the specified
3783 * radix, <code>-1</code> is returned. A character is a valid digit
3784 * if at least one of the following is true:
3785 * <ul>
3786 * <li>The method <code>isDigit</code> is <code>true</code> of the character
3787 * and the Unicode decimal digit value of the character (or its
3788 * single-character decomposition) is less than the specified radix.
3789 * In this case the decimal digit value is returned.
3790 * <li>The character is one of the uppercase Latin letters
3791 * <code>'A'</code> through <code>'Z'</code> and its code is less than
3792 * <code>radix + 'A' - 10</code>.
3793 * In this case, <code>ch - 'A' + 10</code>
3794 * is returned.
3795 * <li>The character is one of the lowercase Latin letters
3796 * <code>'a'</code> through <code>'z'</code> and its code is less than
3797 * <code>radix + 'a' - 10</code>.
3798 * In this case, <code>ch - 'a' + 10</code>
3799 * is returned.
3800 * </ul>
3801 *
3802 * <p><b>Note:</b> This method cannot handle <a
3803 * href="#supplementary"> supplementary characters</a>. To support
3804 * all Unicode characters, including supplementary characters, use
3805 * the {@link #digit(int, int)} method.
3806 *
3807 * @param ch the character to be converted.
3808 * @param radix the radix.
3809 * @return the numeric value represented by the character in the
3810 * specified radix.
3811 * @see java.lang.Character#forDigit(int, int)
3812 * @see java.lang.Character#isDigit(char)
3813 */
3814 public static int digit(char ch, int radix) {
3815 return digit((int) ch, radix);
3816 }
3817
3818 /**
3819 * Returns the numeric value of the specified character (Unicode
3820 * code point) in the specified radix.
3821 *
3822 * <p>If the radix is not in the range <code>MIN_RADIX</code> <=
3823 * <code>radix</code> <= <code>MAX_RADIX</code> or if the
3824 * character is not a valid digit in the specified
3825 * radix, <code>-1</code> is returned. A character is a valid digit
3826 * if at least one of the following is true:
3827 * <ul>
3828 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is <code>true</code> of the character
3829 * and the Unicode decimal digit value of the character (or its
3830 * single-character decomposition) is less than the specified radix.
3831 * In this case the decimal digit value is returned.
3832 * <li>The character is one of the uppercase Latin letters
3833 * <code>'A'</code> through <code>'Z'</code> and its code is less than
3834 * <code>radix + 'A' - 10</code>.
3835 * In this case, <code>ch - 'A' + 10</code>
3836 * is returned.
3837 * <li>The character is one of the lowercase Latin letters
3838 * <code>'a'</code> through <code>'z'</code> and its code is less than
3839 * <code>radix + 'a' - 10</code>.
3840 * In this case, <code>ch - 'a' + 10</code>
3841 * is returned.
3842 * </ul>
3843 *
3844 * @param codePoint the character (Unicode code point) to be converted.
3845 * @param radix the radix.
3846 * @return the numeric value represented by the character in the
3847 * specified radix.
3848 * @see java.lang.Character#forDigit(int, int)
3849 * @see java.lang.Character#isDigit(int)
3850 * @since 1.5
3851 */
3852 public static int digit(int codePoint, int radix) {
3853 return CharacterData.of(codePoint).digit(codePoint, radix);
3854 }
3855
3856 /**
3857 * Returns the <code>int</code> value that the specified Unicode
3858 * character represents. For example, the character
3859 * <code>'\u216C'</code> (the roman numeral fifty) will return
3860 * an int with a value of 50.
3861 * <p>
3862 * The letters A-Z in their uppercase (<code>'\u0041'</code> through
3863 * <code>'\u005A'</code>), lowercase
3864 * (<code>'\u0061'</code> through <code>'\u007A'</code>), and
3865 * full width variant (<code>'\uFF21'</code> through
3866 * <code>'\uFF3A'</code> and <code>'\uFF41'</code> through
3867 * <code>'\uFF5A'</code>) forms have numeric values from 10
3868 * through 35. This is independent of the Unicode specification,
3869 * which does not assign numeric values to these <code>char</code>
3870 * values.
3871 * <p>
3872 * If the character does not have a numeric value, then -1 is returned.
3873 * If the character has a numeric value that cannot be represented as a
3874 * nonnegative integer (for example, a fractional value), then -2
3875 * is returned.
3876 *
3877 * <p><b>Note:</b> This method cannot handle <a
3878 * href="#supplementary"> supplementary characters</a>. To support
3879 * all Unicode characters, including supplementary characters, use
3880 * the {@link #getNumericValue(int)} method.
3881 *
3882 * @param ch the character to be converted.
3883 * @return the numeric value of the character, as a nonnegative <code>int</code>
3884 * value; -2 if the character has a numeric value that is not a
3885 * nonnegative integer; -1 if the character has no numeric value.
3886 * @see java.lang.Character#forDigit(int, int)
3887 * @see java.lang.Character#isDigit(char)
3888 * @since 1.1
3889 */
3890 public static int getNumericValue(char ch) {
3891 return getNumericValue((int) ch);
3892 }
3893
3894 /**
3895 * Returns the <code>int</code> value that the specified
3896 * character (Unicode code point) represents. For example, the character
3897 * <code>'\u216C'</code> (the Roman numeral fifty) will return
3898 * an <code>int</code> with a value of 50.
3899 * <p>
3900 * The letters A-Z in their uppercase (<code>'\u0041'</code> through
3901 * <code>'\u005A'</code>), lowercase
3902 * (<code>'\u0061'</code> through <code>'\u007A'</code>), and
3903 * full width variant (<code>'\uFF21'</code> through
3904 * <code>'\uFF3A'</code> and <code>'\uFF41'</code> through
3905 * <code>'\uFF5A'</code>) forms have numeric values from 10
3906 * through 35. This is independent of the Unicode specification,
3907 * which does not assign numeric values to these <code>char</code>
3908 * values.
3909 * <p>
3910 * If the character does not have a numeric value, then -1 is returned.
3911 * If the character has a numeric value that cannot be represented as a
3912 * nonnegative integer (for example, a fractional value), then -2
3913 * is returned.
3914 *
3915 * @param codePoint the character (Unicode code point) to be converted.
3916 * @return the numeric value of the character, as a nonnegative <code>int</code>
3917 * value; -2 if the character has a numeric value that is not a
3918 * nonnegative integer; -1 if the character has no numeric value.
3919 * @see java.lang.Character#forDigit(int, int)
3920 * @see java.lang.Character#isDigit(int)
3921 * @since 1.5
3922 */
3923 public static int getNumericValue(int codePoint) {
3924 return CharacterData.of(codePoint).getNumericValue(codePoint);
3925 }
3926
3927 /**
3928 * Determines if the specified character is ISO-LATIN-1 white space.
3929 * This method returns <code>true</code> for the following five
3930 * characters only:
3931 * <table>
3932 * <tr><td><code>'\t'</code></td> <td><code>'\u0009'</code></td>
3933 * <td><code>HORIZONTAL TABULATION</code></td></tr>
3934 * <tr><td><code>'\n'</code></td> <td><code>'\u000A'</code></td>
3935 * <td><code>NEW LINE</code></td></tr>
3936 * <tr><td><code>'\f'</code></td> <td><code>'\u000C'</code></td>
3937 * <td><code>FORM FEED</code></td></tr>
3938 * <tr><td><code>'\r'</code></td> <td><code>'\u000D'</code></td>
3939 * <td><code>CARRIAGE RETURN</code></td></tr>
3940 * <tr><td><code>' '</code></td> <td><code>'\u0020'</code></td>
3941 * <td><code>SPACE</code></td></tr>
3942 * </table>
3943 *
3944 * @param ch the character to be tested.
3945 * @return <code>true</code> if the character is ISO-LATIN-1 white
3946 * space; <code>false</code> otherwise.
3947 * @see java.lang.Character#isSpaceChar(char)
3948 * @see java.lang.Character#isWhitespace(char)
3949 * @deprecated Replaced by isWhitespace(char).
3950 */
3951 @Deprecated
3952 public static boolean isSpace(char ch) {
3953 return (ch <= 0x0020)
3954 && (((((1L << 0x0009) | (1L << 0x000A) | (1L << 0x000C)
3955 | (1L << 0x000D) | (1L << 0x0020)) >> ch) & 1L) != 0);
3956 }
3957
3958 /**
3959 * Determines if the specified character is a Unicode space character.
3960 * A character is considered to be a space character if and only if
3961 * it is specified to be a space character by the Unicode standard. This
3962 * method returns true if the character's general category type is any of
3963 * the following:
3964 * <ul>
3965 * <li> <code>SPACE_SEPARATOR</code>
3966 * <li> <code>LINE_SEPARATOR</code>
3967 * <li> <code>PARAGRAPH_SEPARATOR</code>
3968 * </ul>
3969 *
3970 * <p><b>Note:</b> This method cannot handle <a
3971 * href="#supplementary"> supplementary characters</a>. To support
3972 * all Unicode characters, including supplementary characters, use
3973 * the {@link #isSpaceChar(int)} method.
3974 *
3975 * @param ch the character to be tested.
3976 * @return <code>true</code> if the character is a space character;
3977 * <code>false</code> otherwise.
3978 * @see java.lang.Character#isWhitespace(char)
3979 * @since 1.1
3980 */
3981 public static boolean isSpaceChar(char ch) {
3982 return isSpaceChar((int) ch);
3983 }
3984
3985 /**
3986 * Determines if the specified character (Unicode code point) is a
3987 * Unicode space character. A character is considered to be a
3988 * space character if and only if it is specified to be a space
3989 * character by the Unicode standard. This method returns true if
3990 * the character's general category type is any of the following:
3991 *
3992 * <ul>
3993 * <li> {@link #SPACE_SEPARATOR}
3994 * <li> {@link #LINE_SEPARATOR}
3995 * <li> {@link #PARAGRAPH_SEPARATOR}
3996 * </ul>
3997 *
3998 * @param codePoint the character (Unicode code point) to be tested.
3999 * @return <code>true</code> if the character is a space character;
4000 * <code>false</code> otherwise.
4001 * @see java.lang.Character#isWhitespace(int)
4002 * @since 1.5
4003 */
4004 public static boolean isSpaceChar(int codePoint) {
4005 return ((((1 << Character.SPACE_SEPARATOR)
4006 | (1 << Character.LINE_SEPARATOR) | (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) != 0;
4007 }
4008
4009 /**
4010 * Determines if the specified character is white space according to Java.
4011 * A character is a Java whitespace character if and only if it satisfies
4012 * one of the following criteria:
4013 * <ul>
4014 * <li> It is a Unicode space character (<code>SPACE_SEPARATOR</code>,
4015 * <code>LINE_SEPARATOR</code>, or <code>PARAGRAPH_SEPARATOR</code>)
4016 * but is not also a non-breaking space (<code>'\u00A0'</code>,
4017 * <code>'\u2007'</code>, <code>'\u202F'</code>).
4018 * <li> It is <code>'\u0009'</code>, HORIZONTAL TABULATION.
4019 * <li> It is <code>'\u000A'</code>, LINE FEED.
4020 * <li> It is <code>'\u000B'</code>, VERTICAL TABULATION.
4021 * <li> It is <code>'\u000C'</code>, FORM FEED.
4022 * <li> It is <code>'\u000D'</code>, CARRIAGE RETURN.
4023 * <li> It is <code>'\u001C'</code>, FILE SEPARATOR.
4024 * <li> It is <code>'\u001D'</code>, GROUP SEPARATOR.
4025 * <li> It is <code>'\u001E'</code>, RECORD SEPARATOR.
4026 * <li> It is <code>'\u001F'</code>, UNIT SEPARATOR.
4027 * </ul>
4028 *
4029 * <p><b>Note:</b> This method cannot handle <a
4030 * href="#supplementary"> supplementary characters</a>. To support
4031 * all Unicode characters, including supplementary characters, use
4032 * the {@link #isWhitespace(int)} method.
4033 *
4034 * @param ch the character to be tested.
4035 * @return <code>true</code> if the character is a Java whitespace
4036 * character; <code>false</code> otherwise.
4037 * @see java.lang.Character#isSpaceChar(char)
4038 * @since 1.1
4039 */
4040 public static boolean isWhitespace(char ch) {
4041 return isWhitespace((int) ch);
4042 }
4043
4044 /**
4045 * Determines if the specified character (Unicode code point) is
4046 * white space according to Java. A character is a Java
4047 * whitespace character if and only if it satisfies one of the
4048 * following criteria:
4049 * <ul>
4050 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
4051 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
4052 * but is not also a non-breaking space (<code>'\u00A0'</code>,
4053 * <code>'\u2007'</code>, <code>'\u202F'</code>).
4054 * <li> It is <code>'\u0009'</code>, HORIZONTAL TABULATION.
4055 * <li> It is <code>'\u000A'</code>, LINE FEED.
4056 * <li> It is <code>'\u000B'</code>, VERTICAL TABULATION.
4057 * <li> It is <code>'\u000C'</code>, FORM FEED.
4058 * <li> It is <code>'\u000D'</code>, CARRIAGE RETURN.
4059 * <li> It is <code>'\u001C'</code>, FILE SEPARATOR.
4060 * <li> It is <code>'\u001D'</code>, GROUP SEPARATOR.
4061 * <li> It is <code>'\u001E'</code>, RECORD SEPARATOR.
4062 * <li> It is <code>'\u001F'</code>, UNIT SEPARATOR.
4063 * </ul>
4064 * <p>
4065 *
4066 * @param codePoint the character (Unicode code point) to be tested.
4067 * @return <code>true</code> if the character is a Java whitespace
4068 * character; <code>false</code> otherwise.
4069 * @see java.lang.Character#isSpaceChar(int)
4070 * @since 1.5
4071 */
4072 public static boolean isWhitespace(int codePoint) {
4073 return CharacterData.of(codePoint).isWhitespace(codePoint);
4074 }
4075
4076 /**
4077 * Determines if the specified character is an ISO control
4078 * character. A character is considered to be an ISO control
4079 * character if its code is in the range <code>'\u0000'</code>
4080 * through <code>'\u001F'</code> or in the range
4081 * <code>'\u007F'</code> through <code>'\u009F'</code>.
4082 *
4083 * <p><b>Note:</b> This method cannot handle <a
4084 * href="#supplementary"> supplementary characters</a>. To support
4085 * all Unicode characters, including supplementary characters, use
4086 * the {@link #isISOControl(int)} method.
4087 *
4088 * @param ch the character to be tested.
4089 * @return <code>true</code> if the character is an ISO control character;
4090 * <code>false</code> otherwise.
4091 *
4092 * @see java.lang.Character#isSpaceChar(char)
4093 * @see java.lang.Character#isWhitespace(char)
4094 * @since 1.1
4095 */
4096 public static boolean isISOControl(char ch) {
4097 return isISOControl((int) ch);
4098 }
4099
4100 /**
4101 * Determines if the referenced character (Unicode code point) is an ISO control
4102 * character. A character is considered to be an ISO control
4103 * character if its code is in the range <code>'\u0000'</code>
4104 * through <code>'\u001F'</code> or in the range
4105 * <code>'\u007F'</code> through <code>'\u009F'</code>.
4106 *
4107 * @param codePoint the character (Unicode code point) to be tested.
4108 * @return <code>true</code> if the character is an ISO control character;
4109 * <code>false</code> otherwise.
4110 * @see java.lang.Character#isSpaceChar(int)
4111 * @see java.lang.Character#isWhitespace(int)
4112 * @since 1.5
4113 */
4114 public static boolean isISOControl(int codePoint) {
4115 return (codePoint >= 0x0000 && codePoint <= 0x001F)
4116 || (codePoint >= 0x007F && codePoint <= 0x009F);
4117 }
4118
4119 /**
4120 * Returns a value indicating a character's general category.
4121 *
4122 * <p><b>Note:</b> This method cannot handle <a
4123 * href="#supplementary"> supplementary characters</a>. To support
4124 * all Unicode characters, including supplementary characters, use
4125 * the {@link #getType(int)} method.
4126 *
4127 * @param ch the character to be tested.
4128 * @return a value of type <code>int</code> representing the
4129 * character's general category.
4130 * @see java.lang.Character#COMBINING_SPACING_MARK
4131 * @see java.lang.Character#CONNECTOR_PUNCTUATION
4132 * @see java.lang.Character#CONTROL
4133 * @see java.lang.Character#CURRENCY_SYMBOL
4134 * @see java.lang.Character#DASH_PUNCTUATION
4135 * @see java.lang.Character#DECIMAL_DIGIT_NUMBER
4136 * @see java.lang.Character#ENCLOSING_MARK
4137 * @see java.lang.Character#END_PUNCTUATION
4138 * @see java.lang.Character#FINAL_QUOTE_PUNCTUATION
4139 * @see java.lang.Character#FORMAT
4140 * @see java.lang.Character#INITIAL_QUOTE_PUNCTUATION
4141 * @see java.lang.Character#LETTER_NUMBER
4142 * @see java.lang.Character#LINE_SEPARATOR
4143 * @see java.lang.Character#LOWERCASE_LETTER
4144 * @see java.lang.Character#MATH_SYMBOL
4145 * @see java.lang.Character#MODIFIER_LETTER
4146 * @see java.lang.Character#MODIFIER_SYMBOL
4147 * @see java.lang.Character#NON_SPACING_MARK
4148 * @see java.lang.Character#OTHER_LETTER
4149 * @see java.lang.Character#OTHER_NUMBER
4150 * @see java.lang.Character#OTHER_PUNCTUATION
4151 * @see java.lang.Character#OTHER_SYMBOL
4152 * @see java.lang.Character#PARAGRAPH_SEPARATOR
4153 * @see java.lang.Character#PRIVATE_USE
4154 * @see java.lang.Character#SPACE_SEPARATOR
4155 * @see java.lang.Character#START_PUNCTUATION
4156 * @see java.lang.Character#SURROGATE
4157 * @see java.lang.Character#TITLECASE_LETTER
4158 * @see java.lang.Character#UNASSIGNED
4159 * @see java.lang.Character#UPPERCASE_LETTER
4160 * @since 1.1
4161 */
4162 public static int getType(char ch) {
4163 return getType((int) ch);
4164 }
4165
4166 /**
4167 * Returns a value indicating a character's general category.
4168 *
4169 * @param codePoint the character (Unicode code point) to be tested.
4170 * @return a value of type <code>int</code> representing the
4171 * character's general category.
4172 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
4173 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
4174 * @see Character#CONTROL CONTROL
4175 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
4176 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION
4177 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
4178 * @see Character#ENCLOSING_MARK ENCLOSING_MARK
4179 * @see Character#END_PUNCTUATION END_PUNCTUATION
4180 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
4181 * @see Character#FORMAT FORMAT
4182 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
4183 * @see Character#LETTER_NUMBER LETTER_NUMBER
4184 * @see Character#LINE_SEPARATOR LINE_SEPARATOR
4185 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER
4186 * @see Character#MATH_SYMBOL MATH_SYMBOL
4187 * @see Character#MODIFIER_LETTER MODIFIER_LETTER
4188 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
4189 * @see Character#NON_SPACING_MARK NON_SPACING_MARK
4190 * @see Character#OTHER_LETTER OTHER_LETTER
4191 * @see Character#OTHER_NUMBER OTHER_NUMBER
4192 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
4193 * @see Character#OTHER_SYMBOL OTHER_SYMBOL
4194 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
4195 * @see Character#PRIVATE_USE PRIVATE_USE
4196 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR
4197 * @see Character#START_PUNCTUATION START_PUNCTUATION
4198 * @see Character#SURROGATE SURROGATE
4199 * @see Character#TITLECASE_LETTER TITLECASE_LETTER
4200 * @see Character#UNASSIGNED UNASSIGNED
4201 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER
4202 * @since 1.5
4203 */
4204 public static int getType(int codePoint) {
4205 return CharacterData.of(codePoint).getType(codePoint);
4206 }
4207
4208 /**
4209 * Determines the character representation for a specific digit in
4210 * the specified radix. If the value of <code>radix</code> is not a
4211 * valid radix, or the value of <code>digit</code> is not a valid
4212 * digit in the specified radix, the null character
4213 * (<code>'\u0000'</code>) is returned.
4214 * <p>
4215 * The <code>radix</code> argument is valid if it is greater than or
4216 * equal to <code>MIN_RADIX</code> and less than or equal to
4217 * <code>MAX_RADIX</code>. The <code>digit</code> argument is valid if
4218 * <code>0 <=digit < radix</code>.
4219 * <p>
4220 * If the digit is less than 10, then
4221 * <code>'0' + digit</code> is returned. Otherwise, the value
4222 * <code>'a' + digit - 10</code> is returned.
4223 *
4224 * @param digit the number to convert to a character.
4225 * @param radix the radix.
4226 * @return the <code>char</code> representation of the specified digit
4227 * in the specified radix.
4228 * @see java.lang.Character#MIN_RADIX
4229 * @see java.lang.Character#MAX_RADIX
4230 * @see java.lang.Character#digit(char, int)
4231 */
4232 public static char forDigit(int digit, int radix) {
4233 if ((digit >= radix) || (digit < 0)) {
4234 return '\0';
4235 }
4236 if ((radix < Character.MIN_RADIX)
4237 || (radix > Character.MAX_RADIX)) {
4238 return '\0';
4239 }
4240 if (digit < 10) {
4241 return (char) ('0' + digit);
4242 }
4243 return (char) ('a' - 10 + digit);
4244 }
4245
4246 /**
4247 * Returns the Unicode directionality property for the given
4248 * character. Character directionality is used to calculate the
4249 * visual ordering of text. The directionality value of undefined
4250 * <code>char</code> values is <code>DIRECTIONALITY_UNDEFINED</code>.
4251 *
4252 * <p><b>Note:</b> This method cannot handle <a
4253 * href="#supplementary"> supplementary characters</a>. To support
4254 * all Unicode characters, including supplementary characters, use
4255 * the {@link #getDirectionality(int)} method.
4256 *
4257 * @param ch <code>char</code> for which the directionality property
4258 * is requested.
4259 * @return the directionality property of the <code>char</code> value.
4260 *
4261 * @see Character#DIRECTIONALITY_UNDEFINED
4262 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
4263 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
4264 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
4265 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
4266 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
4267 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
4268 * @see Character#DIRECTIONALITY_ARABIC_NUMBER
4269 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
4270 * @see Character#DIRECTIONALITY_NONSPACING_MARK
4271 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
4272 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
4273 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
4274 * @see Character#DIRECTIONALITY_WHITESPACE
4275 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
4276 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
4277 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
4278 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
4279 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
4280 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
4281 * @since 1.4
4282 */
4283 public static byte getDirectionality(char ch) {
4284 return getDirectionality((int) ch);
4285 }
4286
4287 /**
4288 * Returns the Unicode directionality property for the given
4289 * character (Unicode code point). Character directionality is
4290 * used to calculate the visual ordering of text. The
4291 * directionality value of undefined character is {@link
4292 * #DIRECTIONALITY_UNDEFINED}.
4293 *
4294 * @param codePoint the character (Unicode code point) for which
4295 * the directionality property is requested.
4296 * @return the directionality property of the character.
4297 *
4298 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
4299 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
4300 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
4301 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
4302 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
4303 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
4304 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
4305 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
4306 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
4307 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
4308 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
4309 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
4310 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
4311 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
4312 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
4313 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
4314 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
4315 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
4316 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
4317 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
4318 * @since 1.5
4319 */
4320 public static byte getDirectionality(int codePoint) {
4321 return CharacterData.of(codePoint).getDirectionality(codePoint);
4322 }
4323
4324 /**
4325 * Determines whether the character is mirrored according to the
4326 * Unicode specification. Mirrored characters should have their
4327 * glyphs horizontally mirrored when displayed in text that is
4328 * right-to-left. For example, <code>'\u0028'</code> LEFT
4329 * PARENTHESIS is semantically defined to be an <i>opening
4330 * parenthesis</i>. This will appear as a "(" in text that is
4331 * left-to-right but as a ")" in text that is right-to-left.
4332 *
4333 * <p><b>Note:</b> This method cannot handle <a
4334 * href="#supplementary"> supplementary characters</a>. To support
4335 * all Unicode characters, including supplementary characters, use
4336 * the {@link #isMirrored(int)} method.
4337 *
4338 * @param ch <code>char</code> for which the mirrored property is requested
4339 * @return <code>true</code> if the char is mirrored, <code>false</code>
4340 * if the <code>char</code> is not mirrored or is not defined.
4341 * @since 1.4
4342 */
4343 public static boolean isMirrored(char ch) {
4344 return isMirrored((int) ch);
4345 }
4346
4347 /**
4348 * Determines whether the specified character (Unicode code point)
4349 * is mirrored according to the Unicode specification. Mirrored
4350 * characters should have their glyphs horizontally mirrored when
4351 * displayed in text that is right-to-left. For example,
4352 * <code>'\u0028'</code> LEFT PARENTHESIS is semantically
4353 * defined to be an <i>opening parenthesis</i>. This will appear
4354 * as a "(" in text that is left-to-right but as a ")" in text
4355 * that is right-to-left.
4356 *
4357 * @param codePoint the character (Unicode code point) to be tested.
4358 * @return <code>true</code> if the character is mirrored, <code>false</code>
4359 * if the character is not mirrored or is not defined.
4360 * @since 1.5
4361 */
4362 public static boolean isMirrored(int codePoint) {
4363 return CharacterData.of(codePoint).isMirrored(codePoint);
4364 }
4365
4366 /**
4367 * Compares two <code>Character</code> objects numerically.
4368 *
4369 * @param anotherCharacter the <code>Character</code> to be compared.
4370
4371 * @return the value <code>0</code> if the argument <code>Character</code>
4372 * is equal to this <code>Character</code>; a value less than
4373 * <code>0</code> if this <code>Character</code> is numerically less
4374 * than the <code>Character</code> argument; and a value greater than
4375 * <code>0</code> if this <code>Character</code> is numerically greater
4376 * than the <code>Character</code> argument (unsigned comparison).
4377 * Note that this is strictly a numerical comparison; it is not
4378 * locale-dependent.
4379 * @since 1.2
4380 */
4381 public int compareTo(Character anotherCharacter) {
4382 return this .value - anotherCharacter.value;
4383 }
4384
4385 /**
4386 * Converts the character (Unicode code point) argument to uppercase using
4387 * information from the UnicodeData file.
4388 * <p>
4389 *
4390 * @param codePoint the character (Unicode code point) to be converted.
4391 * @return either the uppercase equivalent of the character, if
4392 * any, or an error flag (<code>Character.ERROR</code>)
4393 * that indicates that a 1:M <code>char</code> mapping exists.
4394 * @see java.lang.Character#isLowerCase(char)
4395 * @see java.lang.Character#isUpperCase(char)
4396 * @see java.lang.Character#toLowerCase(char)
4397 * @see java.lang.Character#toTitleCase(char)
4398 * @since 1.4
4399 */
4400 static int toUpperCaseEx(int codePoint) {
4401 assert isValidCodePoint(codePoint);
4402 return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
4403 }
4404
4405 /**
4406 * Converts the character (Unicode code point) argument to uppercase using case
4407 * mapping information from the SpecialCasing file in the Unicode
4408 * specification. If a character has no explicit uppercase
4409 * mapping, then the <code>char</code> itself is returned in the
4410 * <code>char[]</code>.
4411 *
4412 * @param codePoint the character (Unicode code point) to be converted.
4413 * @return a <code>char[]</code> with the uppercased character.
4414 * @since 1.4
4415 */
4416 static char[] toUpperCaseCharArray(int codePoint) {
4417 // As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
4418 assert isValidCodePoint(codePoint)
4419 && !isSupplementaryCodePoint(codePoint);
4420 return CharacterData.of(codePoint).toUpperCaseCharArray(
4421 codePoint);
4422 }
4423
4424 /**
4425 * The number of bits used to represent a <tt>char</tt> value in unsigned
4426 * binary form.
4427 *
4428 * @since 1.5
4429 */
4430 public static final int SIZE = 16;
4431
4432 /**
4433 * Returns the value obtained by reversing the order of the bytes in the
4434 * specified <tt>char</tt> value.
4435 *
4436 * @return the value obtained by reversing (or, equivalently, swapping)
4437 * the bytes in the specified <tt>char</tt> value.
4438 * @since 1.5
4439 */
4440 public static char reverseBytes(char ch) {
4441 return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
4442 }
4443 }
|