0001: /**
0002: *******************************************************************************
0003: * Copyright (C) 1996-2006, International Business Machines Corporation and *
0004: * others. All Rights Reserved. *
0005: *******************************************************************************
0006: */package com.ibm.icu.dev.test.lang;
0007:
0008: import com.ibm.icu.impl.UBiDiProps;
0009: import com.ibm.icu.impl.UCaseProps;
0010:
0011: import com.ibm.icu.dev.test.TestFmwk;
0012: import com.ibm.icu.dev.test.TestUtil;
0013: import com.ibm.icu.lang.UCharacter;
0014: import com.ibm.icu.lang.UCharacterCategory;
0015: import com.ibm.icu.lang.UCharacterDirection;
0016: import com.ibm.icu.lang.UProperty;
0017: import com.ibm.icu.lang.UScript;
0018: import com.ibm.icu.text.UTF16;
0019: import com.ibm.icu.text.UnicodeSet;
0020: import com.ibm.icu.text.UnicodeSetIterator;
0021: import com.ibm.icu.util.RangeValueIterator;
0022: import com.ibm.icu.util.ValueIterator;
0023: import com.ibm.icu.util.VersionInfo;
0024: import com.ibm.icu.impl.UCharacterName;
0025: import com.ibm.icu.impl.Utility;
0026: import com.ibm.icu.impl.USerializedSet;
0027: import com.ibm.icu.impl.NormalizerImpl;
0028: import com.ibm.icu.impl.UCharacterProperty;
0029: import java.io.BufferedReader;
0030: import java.util.Arrays;
0031:
0032: /**
0033: * Testing class for UCharacter
0034: * Mostly following the test cases for ICU
0035: * @author Syn Wee Quek
0036: * @since nov 04 2000
0037: */
0038: public final class UCharacterTest extends TestFmwk {
0039: // private variables =============================================
0040:
0041: /**
0042: * ICU4J data version number
0043: */
0044: private final VersionInfo VERSION_ = VersionInfo
0045: .getInstance("5.0.0.0");
0046:
0047: // constructor ===================================================
0048:
0049: /**
0050: * Constructor
0051: */
0052: public UCharacterTest() {
0053: }
0054:
0055: // public methods ================================================
0056:
0057: public static void main(String[] arg) {
0058: try {
0059: UCharacterTest test = new UCharacterTest();
0060: test.run(arg);
0061: } catch (Exception e) {
0062: e.printStackTrace();
0063: }
0064: }
0065:
0066: /**
0067: * Testing the letter and number determination in UCharacter
0068: */
0069: public void TestLetterNumber() {
0070: for (int i = 0x0041; i < 0x005B; i++)
0071: if (!UCharacter.isLetter(i))
0072: errln("FAIL \\u" + hex(i) + " expected to be a letter");
0073:
0074: for (int i = 0x0660; i < 0x066A; i++)
0075: if (UCharacter.isLetter(i))
0076: errln("FAIL \\u" + hex(i)
0077: + " expected not to be a letter");
0078:
0079: for (int i = 0x0660; i < 0x066A; i++)
0080: if (!UCharacter.isDigit(i))
0081: errln("FAIL \\u" + hex(i) + " expected to be a digit");
0082:
0083: for (int i = 0x0041; i < 0x005B; i++)
0084: if (!UCharacter.isLetterOrDigit(i))
0085: errln("FAIL \\u" + hex(i)
0086: + " expected not to be a digit");
0087:
0088: for (int i = 0x0660; i < 0x066A; i++)
0089: if (!UCharacter.isLetterOrDigit(i))
0090: errln("FAIL \\u" + hex(i)
0091: + "expected to be either a letter or a digit");
0092:
0093: /*
0094: * The following checks work only starting from Unicode 4.0.
0095: * Check the version number here.
0096: */
0097: VersionInfo version = UCharacter.getUnicodeVersion();
0098: if (version.getMajor() < 4
0099: || version.equals(VersionInfo.getInstance(4, 0, 1))) {
0100: return;
0101: }
0102:
0103: /*
0104: * Sanity check:
0105: * Verify that exactly the digit characters have decimal digit values.
0106: * This assumption is used in the implementation of u_digit()
0107: * (which checks nt=de)
0108: * compared with the parallel java.lang.Character.digit()
0109: * (which checks Nd).
0110: *
0111: * This was not true in Unicode 3.2 and earlier.
0112: * Unicode 4.0 fixed discrepancies.
0113: * Unicode 4.0.1 re-introduced problems in this area due to an
0114: * unintentionally incomplete last-minute change.
0115: */
0116: String digitsPattern = "[:Nd:]";
0117: String decimalValuesPattern = "[:Numeric_Type=Decimal:]";
0118:
0119: UnicodeSet digits, decimalValues;
0120:
0121: digits = new UnicodeSet(digitsPattern);
0122: decimalValues = new UnicodeSet(decimalValuesPattern);
0123:
0124: compareUSets(digits, decimalValues, "[:Nd:]",
0125: "[:Numeric_Type=Decimal:]", true);
0126:
0127: }
0128:
0129: /**
0130: * Tests for space determination in UCharacter
0131: */
0132: public void TestSpaces() {
0133: int spaces[] = { 0x0020, 0x0000a0, 0x002000, 0x002001, 0x002005 };
0134: int nonspaces[] = { 0x61, 0x0062, 0x0063, 0x0064, 0x0074 };
0135: int whitespaces[] = { 0x2008, 0x002009, 0x00200a, 0x00001c,
0136: 0x00000c };
0137: int nonwhitespaces[] = { 0x61, 0x0062, 0x003c, 0x0028, 0x003f };
0138:
0139: int size = spaces.length;
0140: for (int i = 0; i < size; i++) {
0141: if (!UCharacter.isSpaceChar(spaces[i])) {
0142: errln("FAIL \\u" + hex(spaces[i])
0143: + " expected to be a space character");
0144: break;
0145: }
0146:
0147: if (UCharacter.isSpaceChar(nonspaces[i])) {
0148: errln("FAIL \\u" + hex(nonspaces[i])
0149: + " expected not to be space character");
0150: break;
0151: }
0152:
0153: if (!UCharacter.isWhitespace(whitespaces[i])) {
0154: errln("FAIL \\u" + hex(whitespaces[i])
0155: + " expected to be a white space character");
0156: break;
0157: }
0158: if (UCharacter.isWhitespace(nonwhitespaces[i])) {
0159: errln("FAIL \\u" + hex(nonwhitespaces[i])
0160: + " expected not to be a space character");
0161: break;
0162: }
0163: logln("Ok \\u" + hex(spaces[i]) + " and \\u"
0164: + hex(nonspaces[i]) + " and \\u"
0165: + hex(whitespaces[i]) + " and \\u"
0166: + hex(nonwhitespaces[i]));
0167: }
0168:
0169: int rulewhitespace[] = { 0x9, 0xd, 0x20, 0x85, 0x200e, 0x200f,
0170: 0x2028, 0x2029 };
0171: int nonrulewhitespace[] = { 0x8, 0xe, 0x21, 0x86, 0xa0, 0xa1,
0172: 0x1680, 0x1681, 0x180e, 0x180f, 0x1FFF, 0x2000, 0x200a,
0173: 0x200b, 0x2010, 0x202f, 0x2030, 0x205f, 0x2060, 0x3000,
0174: 0x3001 };
0175: for (int i = 0; i < rulewhitespace.length; i++) {
0176: if (!UCharacterProperty.isRuleWhiteSpace(rulewhitespace[i])) {
0177: errln("\\u" + Utility.hex(rulewhitespace[i], 4)
0178: + " expected to be a rule white space");
0179: }
0180: }
0181: for (int i = 0; i < nonrulewhitespace.length; i++) {
0182: if (UCharacterProperty
0183: .isRuleWhiteSpace(nonrulewhitespace[i])) {
0184: errln("\\u" + Utility.hex(nonrulewhitespace[i], 4)
0185: + " expected to be a non rule white space");
0186: }
0187: }
0188: }
0189:
0190: /**
0191: * Tests for defined and undefined characters
0192: */
0193: public void TestDefined() {
0194: int undefined[] = { 0xfff1, 0xfff7, 0xfa6b };
0195: int defined[] = { 0x523E, 0x004f88, 0x00fffd };
0196:
0197: int size = undefined.length;
0198: for (int i = 0; i < size; i++) {
0199: if (UCharacter.isDefined(undefined[i])) {
0200: errln("FAIL \\u" + hex(undefined[i])
0201: + " expected not to be defined");
0202: break;
0203: }
0204: if (!UCharacter.isDefined(defined[i])) {
0205: errln("FAIL \\u" + hex(defined[i])
0206: + " expected defined");
0207: break;
0208: }
0209: }
0210: }
0211:
0212: /**
0213: * Tests for base characters and their cellwidth
0214: */
0215: public void TestBase() {
0216: int base[] = { 0x0061, 0x000031, 0x0003d2 };
0217: int nonbase[] = { 0x002B, 0x000020, 0x00203B };
0218: int size = base.length;
0219: for (int i = 0; i < size; i++) {
0220: if (UCharacter.isBaseForm(nonbase[i])) {
0221: errln("FAIL \\u" + hex(nonbase[i])
0222: + " expected not to be a base character");
0223: break;
0224: }
0225: if (!UCharacter.isBaseForm(base[i])) {
0226: errln("FAIL \\u" + hex(base[i])
0227: + " expected to be a base character");
0228: break;
0229: }
0230: }
0231: }
0232:
0233: /**
0234: * Tests for digit characters
0235: */
0236: public void TestDigits() {
0237: int digits[] = { 0x0030, 0x000662, 0x000F23, 0x000ED5, 0x002160 };
0238:
0239: //special characters not in the properties table
0240: int digits2[] = { 0x3007, 0x004e00, 0x004e8c, 0x004e09,
0241: 0x0056d8, 0x004e94, 0x00516d, 0x4e03, 0x00516b,
0242: 0x004e5d };
0243: int nondigits[] = { 0x0010, 0x000041, 0x000122, 0x0068FE };
0244:
0245: int digitvalues[] = { 0, 2, 3, 5, 1 };
0246: int digitvalues2[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
0247:
0248: int size = digits.length;
0249: for (int i = 0; i < size; i++) {
0250: if (UCharacter.isDigit(digits[i])
0251: && UCharacter.digit(digits[i]) != digitvalues[i]) {
0252: errln("FAIL \\u" + hex(digits[i])
0253: + " expected digit with value "
0254: + digitvalues[i]);
0255: break;
0256: }
0257: }
0258: size = nondigits.length;
0259: for (int i = 0; i < size; i++)
0260: if (UCharacter.isDigit(nondigits[i])) {
0261: errln("FAIL \\u" + hex(nondigits[i])
0262: + " expected nondigit");
0263: break;
0264: }
0265:
0266: size = digits2.length;
0267: for (int i = 0; i < 10; i++) {
0268: if (UCharacter.isDigit(digits2[i])
0269: && UCharacter.digit(digits2[i]) != digitvalues2[i]) {
0270: errln("FAIL \\u" + hex(digits2[i])
0271: + " expected digit with value "
0272: + digitvalues2[i]);
0273: break;
0274: }
0275: }
0276: }
0277:
0278: /**
0279: * Tests for numeric characters
0280: */
0281: public void TestNumeric() {
0282: if (UCharacter.getNumericValue(0x00BC) != -2) {
0283: errln("Numeric value of 0x00BC expected to be -2");
0284: }
0285:
0286: for (int i = '0'; i < '9'; i++) {
0287: int n1 = UCharacter.getNumericValue(i);
0288: double n2 = UCharacter.getUnicodeNumericValue(i);
0289: if (n1 != n2 || n1 != (i - '0')) {
0290: errln("Numeric value of " + (char) i
0291: + " expected to be " + (i - '0'));
0292: }
0293: }
0294: for (int i = 'A'; i < 'F'; i++) {
0295: int n1 = UCharacter.getNumericValue(i);
0296: double n2 = UCharacter.getUnicodeNumericValue(i);
0297: if (n2 != UCharacter.NO_NUMERIC_VALUE
0298: || n1 != (i - 'A' + 10)) {
0299: errln("Numeric value of " + (char) i
0300: + " expected to be " + (i - 'A' + 10));
0301: }
0302: }
0303: for (int i = 0xFF21; i < 0xFF26; i++) {
0304: // testing full wideth latin characters A-F
0305: int n1 = UCharacter.getNumericValue(i);
0306: double n2 = UCharacter.getUnicodeNumericValue(i);
0307: if (n2 != UCharacter.NO_NUMERIC_VALUE
0308: || n1 != (i - 0xFF21 + 10)) {
0309: errln("Numeric value of " + (char) i
0310: + " expected to be " + (i - 0xFF21 + 10));
0311: }
0312: }
0313: // testing han numbers
0314: int han[] = { 0x96f6, 0, 0x58f9, 1, 0x8cb3, 2, 0x53c3, 3,
0315: 0x8086, 4, 0x4f0d, 5, 0x9678, 6, 0x67d2, 7, 0x634c, 8,
0316: 0x7396, 9, 0x5341, 10, 0x62fe, 10, 0x767e, 100, 0x4f70,
0317: 100, 0x5343, 1000, 0x4edf, 1000, 0x824c, 10000, 0x5104,
0318: 100000000 };
0319: for (int i = 0; i < han.length; i += 2) {
0320: if (UCharacter.getHanNumericValue(han[i]) != han[i + 1]) {
0321: errln("Numeric value of \\u"
0322: + Integer.toHexString(han[i])
0323: + " expected to be " + han[i + 1]);
0324: }
0325: }
0326: }
0327:
0328: /**
0329: * Tests for version
0330: */
0331: public void TestVersion() {
0332: if (!UCharacter.getUnicodeVersion().equals(VERSION_))
0333: errln("FAIL expected: " + VERSION_ + "got: "
0334: + UCharacter.getUnicodeVersion());
0335: }
0336:
0337: /**
0338: * Tests for control characters
0339: */
0340: public void TestISOControl() {
0341: int control[] = { 0x001b, 0x000097, 0x000082 };
0342: int noncontrol[] = { 0x61, 0x000031, 0x0000e2 };
0343:
0344: int size = control.length;
0345: for (int i = 0; i < size; i++) {
0346: if (!UCharacter.isISOControl(control[i])) {
0347: errln("FAIL 0x" + Integer.toHexString(control[i])
0348: + " expected to be a control character");
0349: break;
0350: }
0351: if (UCharacter.isISOControl(noncontrol[i])) {
0352: errln("FAIL 0x" + Integer.toHexString(noncontrol[i])
0353: + " expected to be not a control character");
0354: break;
0355: }
0356:
0357: logln("Ok 0x" + Integer.toHexString(control[i])
0358: + " and 0x" + Integer.toHexString(noncontrol[i]));
0359: }
0360: }
0361:
0362: /**
0363: * Test Supplementary
0364: */
0365: public void TestSupplementary() {
0366: for (int i = 0; i < 0x10000; i++) {
0367: if (UCharacter.isSupplementary(i)) {
0368: errln("Codepoint \\u" + Integer.toHexString(i)
0369: + " is not supplementary");
0370: }
0371: }
0372: for (int i = 0x10000; i < 0x10FFFF; i++) {
0373: if (!UCharacter.isSupplementary(i)) {
0374: errln("Codepoint \\u" + Integer.toHexString(i)
0375: + " is supplementary");
0376: }
0377: }
0378: }
0379:
0380: /**
0381: * Test mirroring
0382: */
0383: public void TestMirror() {
0384: if (!(UCharacter.isMirrored(0x28)
0385: && UCharacter.isMirrored(0xbb)
0386: && UCharacter.isMirrored(0x2045)
0387: && UCharacter.isMirrored(0x232a)
0388: && !UCharacter.isMirrored(0x27)
0389: && !UCharacter.isMirrored(0x61)
0390: && !UCharacter.isMirrored(0x284) && !UCharacter
0391: .isMirrored(0x3400))) {
0392: errln("isMirrored() does not work correctly");
0393: }
0394:
0395: if (!(UCharacter.getMirror(0x3c) == 0x3e
0396: && UCharacter.getMirror(0x5d) == 0x5b
0397: && UCharacter.getMirror(0x208d) == 0x208e
0398: && UCharacter.getMirror(0x3017) == 0x3016 &&
0399:
0400: UCharacter.getMirror(0xbb) == 0xab
0401: && UCharacter.getMirror(0x2215) == 0x29F5
0402: && UCharacter.getMirror(0x29F5) == 0x2215 && /* large delta between the code points */
0403:
0404: UCharacter.getMirror(0x2e) == 0x2e
0405: && UCharacter.getMirror(0x6f3) == 0x6f3
0406: && UCharacter.getMirror(0x301c) == 0x301c && UCharacter
0407: .getMirror(0xa4ab) == 0xa4ab)) {
0408: errln("getMirror() does not work correctly");
0409: }
0410:
0411: /* verify that Bidi_Mirroring_Glyph roundtrips */
0412: UnicodeSet set = new UnicodeSet("[:Bidi_Mirrored:]");
0413: UnicodeSetIterator iter = new UnicodeSetIterator(set);
0414: int start, end, c2, c3;
0415: while (iter.nextRange() && (start = iter.codepoint) >= 0) {
0416: end = iter.codepointEnd;
0417: do {
0418: c2 = UCharacter.getMirror(start);
0419: c3 = UCharacter.getMirror(c2);
0420: if (c3 != start) {
0421: errln("getMirror() does not roundtrip: U+"
0422: + hex(start) + "->U+" + hex(c2) + "->U+"
0423: + hex(c3));
0424: }
0425: } while (++start <= end);
0426: }
0427: }
0428:
0429: /**
0430: * Tests for printable characters
0431: */
0432: public void TestPrint() {
0433: int printable[] = { 0x0042, 0x00005f, 0x002014 };
0434: int nonprintable[] = { 0x200c, 0x00009f, 0x00001b };
0435:
0436: int size = printable.length;
0437: for (int i = 0; i < size; i++) {
0438: if (!UCharacter.isPrintable(printable[i])) {
0439: errln("FAIL \\u" + hex(printable[i])
0440: + " expected to be a printable character");
0441: break;
0442: }
0443: if (UCharacter.isPrintable(nonprintable[i])) {
0444: errln("FAIL \\u" + hex(nonprintable[i])
0445: + " expected not to be a printable character");
0446: break;
0447: }
0448: logln("Ok \\u" + hex(printable[i]) + " and \\u"
0449: + hex(nonprintable[i]));
0450: }
0451:
0452: // test all ISO 8 controls
0453: for (int ch = 0; ch <= 0x9f; ++ch) {
0454: if (ch == 0x20) {
0455: // skip ASCII graphic characters and continue with DEL
0456: ch = 0x7f;
0457: }
0458: if (UCharacter.isPrintable(ch)) {
0459: errln("Fail \\u"
0460: + hex(ch)
0461: + " is a ISO 8 control character hence not printable\n");
0462: }
0463: }
0464:
0465: /* test all Latin-1 graphic characters */
0466: for (int ch = 0x20; ch <= 0xff; ++ch) {
0467: if (ch == 0x7f) {
0468: ch = 0xa0;
0469: }
0470: if (!UCharacter.isPrintable(ch) && ch != 0x00AD/* Unicode 4.0 changed the defintion of soft hyphen to be a Cf*/) {
0471: errln("Fail \\u" + hex(ch)
0472: + " is a Latin-1 graphic character\n");
0473: }
0474: }
0475: }
0476:
0477: /**
0478: * Testing for identifier characters
0479: */
0480: public void TestIdentifier() {
0481: int unicodeidstart[] = { 0x0250, 0x0000e2, 0x000061 };
0482: int nonunicodeidstart[] = { 0x2000, 0x00000a, 0x002019 };
0483: int unicodeidpart[] = { 0x005f, 0x000032, 0x000045 };
0484: int nonunicodeidpart[] = { 0x2030, 0x0000a3, 0x000020 };
0485: int idignore[] = { 0x0006, 0x0010, 0x206b };
0486: int nonidignore[] = { 0x0075, 0x0000a3, 0x000061 };
0487:
0488: int size = unicodeidstart.length;
0489: for (int i = 0; i < size; i++) {
0490: if (!UCharacter.isUnicodeIdentifierStart(unicodeidstart[i])) {
0491: errln("FAIL \\u"
0492: + hex(unicodeidstart[i])
0493: + " expected to be a unicode identifier start character");
0494: break;
0495: }
0496: if (UCharacter
0497: .isUnicodeIdentifierStart(nonunicodeidstart[i])) {
0498: errln("FAIL \\u"
0499: + hex(nonunicodeidstart[i])
0500: + " expected not to be a unicode identifier start "
0501: + "character");
0502: break;
0503: }
0504: if (!UCharacter.isUnicodeIdentifierPart(unicodeidpart[i])) {
0505: errln("FAIL \\u"
0506: + hex(unicodeidpart[i])
0507: + " expected to be a unicode identifier part character");
0508: break;
0509: }
0510: if (UCharacter.isUnicodeIdentifierPart(nonunicodeidpart[i])) {
0511: errln("FAIL \\u"
0512: + hex(nonunicodeidpart[i])
0513: + " expected not to be a unicode identifier part "
0514: + "character");
0515: break;
0516: }
0517: if (!UCharacter.isIdentifierIgnorable(idignore[i])) {
0518: errln("FAIL \\u"
0519: + hex(idignore[i])
0520: + " expected to be a ignorable unicode character");
0521: break;
0522: }
0523: if (UCharacter.isIdentifierIgnorable(nonidignore[i])) {
0524: errln("FAIL \\u"
0525: + hex(nonidignore[i])
0526: + " expected not to be a ignorable unicode character");
0527: break;
0528: }
0529: logln("Ok \\u" + hex(unicodeidstart[i]) + " and \\u"
0530: + hex(nonunicodeidstart[i]) + " and \\u"
0531: + hex(unicodeidpart[i]) + " and \\u"
0532: + hex(nonunicodeidpart[i]) + " and \\u"
0533: + hex(idignore[i]) + " and \\u"
0534: + hex(nonidignore[i]));
0535: }
0536: }
0537:
0538: /**
0539: * Tests for the character types, direction.<br>
0540: * This method reads in UnicodeData.txt file for testing purposes. A
0541: * default path is provided relative to the src path, however the user
0542: * could set a system property to change the directory path.<br>
0543: * e.g. java -DUnicodeData="data_directory_path"
0544: * com.ibm.icu.dev.test.lang.UCharacterTest
0545: */
0546: public void TestUnicodeData() {
0547: // this is the 2 char category types used in the UnicodeData file
0548: final String TYPE = "LuLlLtLmLoMnMeMcNdNlNoZsZlZpCcCfCoCsPdPsPePcPoSmScSkSoPiPf";
0549:
0550: // directory types used in the UnicodeData file
0551: // padded by spaces to make each type size 4
0552: final String DIR = "L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN ";
0553:
0554: final int LASTUNICODECHAR = 0xFFFD;
0555: int ch = 0, index = 0, type = 0, dir = 0;
0556:
0557: try {
0558: BufferedReader input = TestUtil
0559: .getDataReader("unicode/UnicodeData.txt");
0560: int numErrors = 0;
0561:
0562: while (ch != LASTUNICODECHAR) {
0563: String s = input.readLine();
0564: if (s.length() < 4 || s.startsWith("#")) {
0565: continue;
0566: }
0567: // geting the unicode character, its type and its direction
0568: ch = Integer.parseInt(s.substring(0, 4), 16);
0569: index = s.indexOf(';', 5);
0570: String t = s.substring(index + 1, index + 3);
0571: index += 4;
0572: int oldindex = index;
0573: index = s.indexOf(';', index);
0574: int cc = Integer.parseInt(s.substring(oldindex, index));
0575: oldindex = index + 1;
0576: index = s.indexOf(';', oldindex);
0577: String d = s.substring(oldindex, index);
0578:
0579: for (int i = 0; i < 6; i++) {
0580: index = s.indexOf(';', index + 1);
0581: // skipping to the 11th field
0582: }
0583: // iso comment
0584: oldindex = index + 1;
0585: index = s.indexOf(';', oldindex);
0586: String isocomment = s.substring(oldindex, index);
0587: // uppercase
0588: oldindex = index + 1;
0589: index = s.indexOf(';', oldindex);
0590: String upper = s.substring(oldindex, index);
0591: // lowercase
0592: oldindex = index + 1;
0593: index = s.indexOf(';', oldindex);
0594: String lower = s.substring(oldindex, index);
0595: // titlecase last element
0596: oldindex = index + 1;
0597: String title = s.substring(oldindex);
0598:
0599: // testing the category
0600: // we override the general category of some control
0601: // characters
0602: type = TYPE.indexOf(t);
0603: if (type < 0)
0604: type = 0;
0605: else
0606: type = (type >> 1) + 1;
0607: if (UCharacter.getType(ch) != type) {
0608: errln("FAIL \\u" + hex(ch) + " expected type "
0609: + type);
0610: break;
0611: }
0612:
0613: if (UCharacter.getIntPropertyValue(ch,
0614: UProperty.GENERAL_CATEGORY_MASK) != (1 << type)) {
0615: errln("error: getIntPropertyValue(\\u"
0616: + Integer.toHexString(ch)
0617: + ", UProperty.GENERAL_CATEGORY_MASK) != "
0618: + "getMask(getType(ch))");
0619: }
0620:
0621: // testing combining class
0622: if (UCharacter.getCombiningClass(ch) != cc) {
0623: errln("FAIL \\u" + hex(ch) + " expected combining "
0624: + "class " + cc);
0625: break;
0626: }
0627:
0628: // testing the direction
0629: if (d.length() == 1)
0630: d = d + " ";
0631:
0632: dir = DIR.indexOf(d) >> 2;
0633: if (UCharacter.getDirection(ch) != dir) {
0634: errln("FAIL \\u" + hex(ch) + " expected direction "
0635: + dir + " but got "
0636: + UCharacter.getDirection(ch));
0637: break;
0638: }
0639:
0640: byte bdir = (byte) dir;
0641: if (UCharacter.getDirectionality(ch) != bdir) {
0642: errln("FAIL \\u" + hex(ch)
0643: + " expected directionality " + bdir
0644: + " but got "
0645: + UCharacter.getDirectionality(ch));
0646: break;
0647: }
0648:
0649: // testing iso comment
0650: try {
0651: String comment = UCharacter.getISOComment(ch);
0652: if (comment == null) {
0653: comment = "";
0654: }
0655: if (!comment.equals(isocomment)) {
0656: errln("FAIL \\u" + hex(ch)
0657: + " expected iso comment " + isocomment);
0658: break;
0659: }
0660: } catch (Exception e) {
0661: if (e.getMessage().indexOf("unames.icu") >= 0) {
0662: numErrors++;
0663: } else {
0664: throw e;
0665: }
0666: }
0667:
0668: int tempchar = ch;
0669: if (upper.length() > 0) {
0670: tempchar = Integer.parseInt(upper, 16);
0671: }
0672: if (UCharacter.toUpperCase(ch) != tempchar) {
0673: errln("FAIL \\u" + Utility.hex(ch, 4)
0674: + " expected uppercase \\u"
0675: + Utility.hex(tempchar, 4));
0676: break;
0677: }
0678: tempchar = ch;
0679: if (lower.length() > 0) {
0680: tempchar = Integer.parseInt(lower, 16);
0681: }
0682: if (UCharacter.toLowerCase(ch) != tempchar) {
0683: errln("FAIL \\u" + Utility.hex(ch, 4)
0684: + " expected lowercase \\u"
0685: + Utility.hex(tempchar, 4));
0686: break;
0687: }
0688: tempchar = ch;
0689: if (title.length() > 0) {
0690: tempchar = Integer.parseInt(title, 16);
0691: }
0692: if (UCharacter.toTitleCase(ch) != tempchar) {
0693: errln("FAIL \\u" + Utility.hex(ch, 4)
0694: + " expected titlecase \\u"
0695: + Utility.hex(tempchar, 4));
0696: break;
0697: }
0698: }
0699: input.close();
0700: if (numErrors > 0) {
0701: warnln("Could not find unames.icu");
0702: }
0703: } catch (Exception e) {
0704: e.printStackTrace();
0705: }
0706:
0707: if (UCharacter.UnicodeBlock.of(0x0041) != UCharacter.UnicodeBlock.BASIC_LATIN
0708: || UCharacter
0709: .getIntPropertyValue(0x41, UProperty.BLOCK) != UCharacter.UnicodeBlock.BASIC_LATIN
0710: .getID()) {
0711: errln("UCharacter.UnicodeBlock.of(\\u0041) property failed! "
0712: + "Expected : "
0713: + UCharacter.UnicodeBlock.BASIC_LATIN.getID()
0714: + " got " + UCharacter.UnicodeBlock.of(0x0041));
0715: }
0716:
0717: // sanity check on repeated properties
0718: for (ch = 0xfffe; ch <= 0x10ffff;) {
0719: type = UCharacter.getType(ch);
0720: if (UCharacter.getIntPropertyValue(ch,
0721: UProperty.GENERAL_CATEGORY_MASK) != (1 << type)) {
0722: errln("error: UCharacter.getIntPropertyValue(\\u"
0723: + Integer.toHexString(ch)
0724: + ", UProperty.GENERAL_CATEGORY_MASK) != "
0725: + "getMask(getType())");
0726: }
0727: if (type != UCharacterCategory.UNASSIGNED) {
0728: errln("error: UCharacter.getType(\\u"
0729: + Utility.hex(ch, 4)
0730: + " != UCharacterCategory.UNASSIGNED (returns "
0731: + UCharacterCategory.toString(UCharacter
0732: .getType(ch)) + ")");
0733: }
0734: if ((ch & 0xffff) == 0xfffe) {
0735: ++ch;
0736: } else {
0737: ch += 0xffff;
0738: }
0739: }
0740:
0741: // test that PUA is not "unassigned"
0742: for (ch = 0xe000; ch <= 0x10fffd;) {
0743: type = UCharacter.getType(ch);
0744: if (UCharacter.getIntPropertyValue(ch,
0745: UProperty.GENERAL_CATEGORY_MASK) != (1 << type)) {
0746: errln("error: UCharacter.getIntPropertyValue(\\u"
0747: + Integer.toHexString(ch)
0748: + ", UProperty.GENERAL_CATEGORY_MASK) != "
0749: + "getMask(getType())");
0750: }
0751:
0752: if (type == UCharacterCategory.UNASSIGNED) {
0753: errln("error: UCharacter.getType(\\u"
0754: + Utility.hex(ch, 4)
0755: + ") == UCharacterCategory.UNASSIGNED");
0756: } else if (type != UCharacterCategory.PRIVATE_USE) {
0757: logln("PUA override: UCharacter.getType(\\u"
0758: + Utility.hex(ch, 4) + ")=" + type);
0759: }
0760: if (ch == 0xf8ff) {
0761: ch = 0xf0000;
0762: } else if (ch == 0xffffd) {
0763: ch = 0x100000;
0764: } else {
0765: ++ch;
0766: }
0767: }
0768: }
0769:
0770: /**
0771: * Test for the character names
0772: */
0773: public void TestNames() {
0774: try {
0775: int length = UCharacterName.getInstance()
0776: .getMaxCharNameLength();
0777: if (length < 83) { // Unicode 3.2 max char name length
0778: errln("getMaxCharNameLength()=" + length
0779: + " is too short");
0780: }
0781: // ### TODO same tests for max ISO comment length as for max name length
0782:
0783: int c[] = { 0x0061, //LATIN SMALL LETTER A
0784: 0x000284, //LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK
0785: 0x003401, //CJK UNIFIED IDEOGRAPH-3401
0786: 0x007fed, //CJK UNIFIED IDEOGRAPH-7FED
0787: 0x00ac00, //HANGUL SYLLABLE GA
0788: 0x00d7a3, //HANGUL SYLLABLE HIH
0789: 0x00d800, 0x00dc00, //LINEAR B SYLLABLE B008 A
0790: 0xff08, //FULLWIDTH LEFT PARENTHESIS
0791: 0x00ffe5, //FULLWIDTH YEN SIGN
0792: 0x00ffff, //null
0793: 0x0023456 //CJK UNIFIED IDEOGRAPH-23456
0794: };
0795: String name[] = {
0796: "LATIN SMALL LETTER A",
0797: "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
0798: "CJK UNIFIED IDEOGRAPH-3401",
0799: "CJK UNIFIED IDEOGRAPH-7FED", "HANGUL SYLLABLE GA",
0800: "HANGUL SYLLABLE HIH", "", "",
0801: "FULLWIDTH LEFT PARENTHESIS", "FULLWIDTH YEN SIGN",
0802: "", "CJK UNIFIED IDEOGRAPH-23456" };
0803: String oldname[] = { "",
0804: "LATIN SMALL LETTER DOTLESS J BAR HOOK", "", "",
0805: "", "", "", "", "FULLWIDTH OPENING PARENTHESIS",
0806: "", "", "" };
0807: String extendedname[] = {
0808: "LATIN SMALL LETTER A",
0809: "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
0810: "CJK UNIFIED IDEOGRAPH-3401",
0811: "CJK UNIFIED IDEOGRAPH-7FED", "HANGUL SYLLABLE GA",
0812: "HANGUL SYLLABLE HIH", "<lead surrogate-D800>",
0813: "<trail surrogate-DC00>",
0814: "FULLWIDTH LEFT PARENTHESIS", "FULLWIDTH YEN SIGN",
0815: "<noncharacter-FFFF>",
0816: "CJK UNIFIED IDEOGRAPH-23456" };
0817:
0818: int size = c.length;
0819: String str;
0820: int uc;
0821:
0822: for (int i = 0; i < size; i++) {
0823: // modern Unicode character name
0824: str = UCharacter.getName(c[i]);
0825: if ((str == null && name[i].length() > 0)
0826: || (str != null && !str.equals(name[i]))) {
0827: errln("FAIL \\u" + hex(c[i]) + " expected name "
0828: + name[i]);
0829: break;
0830: }
0831:
0832: // 1.0 Unicode character name
0833: str = UCharacter.getName1_0(c[i]);
0834: if ((str == null && oldname[i].length() > 0)
0835: || (str != null && !str.equals(oldname[i]))) {
0836: errln("FAIL \\u" + hex(c[i])
0837: + " expected 1.0 name " + oldname[i]);
0838: break;
0839: }
0840:
0841: // extended character name
0842: str = UCharacter.getExtendedName(c[i]);
0843: if (str == null || !str.equals(extendedname[i])) {
0844: errln("FAIL \\u" + hex(c[i])
0845: + " expected extended name "
0846: + extendedname[i]);
0847: break;
0848: }
0849:
0850: // retrieving unicode character from modern name
0851: uc = UCharacter.getCharFromName(name[i]);
0852: if (uc != c[i] && name[i].length() != 0) {
0853: errln("FAIL " + name[i] + " expected character \\u"
0854: + hex(c[i]));
0855: break;
0856: }
0857:
0858: //retrieving unicode character from 1.0 name
0859: uc = UCharacter.getCharFromName1_0(oldname[i]);
0860: if (uc != c[i] && oldname[i].length() != 0) {
0861: errln("FAIL " + oldname[i]
0862: + " expected 1.0 character \\u" + hex(c[i]));
0863: break;
0864: }
0865:
0866: //retrieving unicode character from 1.0 name
0867: uc = UCharacter
0868: .getCharFromExtendedName(extendedname[i]);
0869: if (uc != c[i] && i != 0 && (i == 1 || i == 6)) {
0870: errln("FAIL " + extendedname[i]
0871: + " expected extended character \\u"
0872: + hex(c[i]));
0873: break;
0874: }
0875: }
0876:
0877: // test getName works with mixed-case names (new in 2.0)
0878: if (0x61 != UCharacter
0879: .getCharFromName("LATin smALl letTER A")) {
0880: errln("FAIL: 'LATin smALl letTER A' should result in character "
0881: + "U+0061");
0882: }
0883:
0884: if (getInclusion() >= 5) {
0885: // extra testing different from icu
0886: for (int i = UCharacter.MIN_VALUE; i < UCharacter.MAX_VALUE; i++) {
0887: str = UCharacter.getName(i);
0888: if (str != null
0889: && UCharacter.getCharFromName(str) != i) {
0890: errln("FAIL \\u" + hex(i) + " " + str
0891: + " retrieval of name and vice versa");
0892: break;
0893: }
0894: }
0895: }
0896:
0897: // Test getCharNameCharacters
0898: if (getInclusion() >= 10) {
0899: boolean map[] = new boolean[256];
0900:
0901: UnicodeSet set = new UnicodeSet(1, 0); // empty set
0902: UnicodeSet dumb = new UnicodeSet(1, 0); // empty set
0903:
0904: // uprv_getCharNameCharacters() will likely return more lowercase
0905: // letters than actual character names contain because
0906: // it includes all the characters in lowercased names of
0907: // general categories, for the full possible set of extended names.
0908: UCharacterName.getInstance().getCharNameCharacters(set);
0909:
0910: // build set the dumb (but sure-fire) way
0911: Arrays.fill(map, false);
0912:
0913: int maxLength = 0;
0914: for (int cp = 0; cp < 0x110000; ++cp) {
0915: String n = UCharacter.getExtendedName(cp);
0916: int len = n.length();
0917: if (len > maxLength) {
0918: maxLength = len;
0919: }
0920:
0921: for (int i = 0; i < len; ++i) {
0922: char ch = n.charAt(i);
0923: if (!map[ch & 0xff]) {
0924: dumb.add(ch);
0925: map[ch & 0xff] = true;
0926: }
0927: }
0928: }
0929:
0930: length = UCharacterName.getInstance()
0931: .getMaxCharNameLength();
0932: if (length != maxLength) {
0933: errln("getMaxCharNameLength()=" + length
0934: + " differs from the maximum length "
0935: + maxLength + " of all extended names");
0936: }
0937:
0938: // compare the sets. Where is my uset_equals?!!
0939: boolean ok = true;
0940: for (int i = 0; i < 256; ++i) {
0941: if (set.contains(i) != dumb.contains(i)) {
0942: if (0x61 <= i && i <= 0x7a // a-z
0943: && set.contains(i) && !dumb.contains(i)) {
0944: // ignore lowercase a-z that are in set but not in dumb
0945: ok = true;
0946: } else {
0947: ok = false;
0948: break;
0949: }
0950: }
0951: }
0952:
0953: String pattern1 = set.toPattern(true);
0954: String pattern2 = dumb.toPattern(true);
0955:
0956: if (!ok) {
0957: errln("FAIL: getCharNameCharacters() returned "
0958: + pattern1 + " expected " + pattern2
0959: + " (too many lowercase a-z are ok)");
0960: } else {
0961: logln("Ok: getCharNameCharacters() returned "
0962: + pattern1);
0963: }
0964: }
0965: // improve code coverage
0966: String expected = "LATIN SMALL LETTER A|LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK|"
0967: + "CJK UNIFIED IDEOGRAPH-3401|CJK UNIFIED IDEOGRAPH-7FED|HANGUL SYLLABLE GA|"
0968: + "HANGUL SYLLABLE HIH|LINEAR B SYLLABLE B008 A|FULLWIDTH LEFT PARENTHESIS|"
0969: + "FULLWIDTH YEN SIGN|" + "null|" + // getName returns null because 0xFFFF does not have a name, but has an extended name!
0970: "CJK UNIFIED IDEOGRAPH-23456";
0971: String separator = "|";
0972: String source = Utility.valueOf(c);
0973: String result = UCharacter.getName(source, separator);
0974: if (!result.equals(expected)) {
0975: errln("UCharacter.getName did not return the expected result.\n\t Expected: "
0976: + expected + "\n\t Got: " + result);
0977: }
0978:
0979: } catch (IllegalArgumentException e) {
0980: if (e.getMessage().indexOf("unames.icu") >= 0) {
0981: warnln("Could not find unames.icu");
0982: } else {
0983: throw e;
0984: }
0985: }
0986:
0987: }
0988:
0989: /**
0990: * Testing name iteration
0991: */
0992: public void TestNameIteration() throws Exception {
0993: try {
0994: ValueIterator iterator = UCharacter
0995: .getExtendedNameIterator();
0996: ValueIterator.Element element = new ValueIterator.Element();
0997: ValueIterator.Element old = new ValueIterator.Element();
0998: // testing subrange
0999: iterator.setRange(-10, -5);
1000: if (iterator.next(element)) {
1001: errln("Fail, expected iterator to return false when range is set outside the meaningful range");
1002: }
1003: iterator.setRange(0x110000, 0x111111);
1004: if (iterator.next(element)) {
1005: errln("Fail, expected iterator to return false when range is set outside the meaningful range");
1006: }
1007: try {
1008: iterator.setRange(50, 10);
1009: errln("Fail, expected exception when encountered invalid range");
1010: } catch (Exception e) {
1011: }
1012:
1013: iterator.setRange(-10, 10);
1014: if (!iterator.next(element) || element.integer != 0) {
1015: errln("Fail, expected iterator to return 0 when range start limit is set outside the meaningful range");
1016: }
1017:
1018: iterator.setRange(0x10FFFE, 0x200000);
1019: int last = 0;
1020: while (iterator.next(element)) {
1021: last = element.integer;
1022: }
1023: if (last != 0x10FFFF) {
1024: errln("Fail, expected iterator to return 0x10FFFF when range end limit is set outside the meaningful range");
1025: }
1026:
1027: iterator = UCharacter.getNameIterator();
1028: iterator.setRange(0xF, 0x45);
1029: while (iterator.next(element)) {
1030: if (element.integer <= old.integer) {
1031: errln("FAIL next returned a less codepoint \\u"
1032: + Integer.toHexString(element.integer)
1033: + " than \\u"
1034: + Integer.toHexString(old.integer));
1035: break;
1036: }
1037: if (!UCharacter.getName(element.integer).equals(
1038: element.value)) {
1039: errln("FAIL next codepoint \\u"
1040: + Integer.toHexString(element.integer)
1041: + " does not have the expected name "
1042: + UCharacter.getName(element.integer)
1043: + " instead have the name "
1044: + (String) element.value);
1045: break;
1046: }
1047: old.integer = element.integer;
1048: }
1049:
1050: iterator.reset();
1051: iterator.next(element);
1052: if (element.integer != 0x20) {
1053: errln("FAIL reset in iterator");
1054: }
1055:
1056: iterator.setRange(0, 0x110000);
1057: old.integer = 0;
1058: while (iterator.next(element)) {
1059: if (element.integer != 0
1060: && element.integer <= old.integer) {
1061: errln("FAIL next returned a less codepoint \\u"
1062: + Integer.toHexString(element.integer)
1063: + " than \\u"
1064: + Integer.toHexString(old.integer));
1065: break;
1066: }
1067: if (!UCharacter.getName(element.integer).equals(
1068: element.value)) {
1069: errln("FAIL next codepoint \\u"
1070: + Integer.toHexString(element.integer)
1071: + " does not have the expected name "
1072: + UCharacter.getName(element.integer)
1073: + " instead have the name "
1074: + (String) element.value);
1075: break;
1076: }
1077: for (int i = old.integer + 1; i < element.integer; i++) {
1078: if (UCharacter.getName(i) != null) {
1079: errln("FAIL between codepoints are not null \\u"
1080: + Integer.toHexString(old.integer)
1081: + " and "
1082: + Integer.toHexString(element.integer)
1083: + " has "
1084: + Integer.toHexString(i)
1085: + " with a name "
1086: + UCharacter.getName(i));
1087: break;
1088: }
1089: }
1090: old.integer = element.integer;
1091: }
1092:
1093: iterator = UCharacter.getExtendedNameIterator();
1094: old.integer = 0;
1095: while (iterator.next(element)) {
1096: if (element.integer != 0
1097: && element.integer != old.integer) {
1098: errln("FAIL next returned a codepoint \\u"
1099: + Integer.toHexString(element.integer)
1100: + " different from \\u"
1101: + Integer.toHexString(old.integer));
1102: break;
1103: }
1104: if (!UCharacter.getExtendedName(element.integer)
1105: .equals(element.value)) {
1106: errln("FAIL next codepoint \\u"
1107: + Integer.toHexString(element.integer)
1108: + " name should be "
1109: + UCharacter
1110: .getExtendedName(element.integer)
1111: + " instead of " + (String) element.value);
1112: break;
1113: }
1114: old.integer++;
1115: }
1116: iterator = UCharacter.getName1_0Iterator();
1117: old.integer = 0;
1118: while (iterator.next(element)) {
1119: logln(Integer.toHexString(element.integer) + " "
1120: + (String) element.value);
1121: if (element.integer != 0
1122: && element.integer <= old.integer) {
1123: errln("FAIL next returned a less codepoint \\u"
1124: + Integer.toHexString(element.integer)
1125: + " than \\u"
1126: + Integer.toHexString(old.integer));
1127: break;
1128: }
1129: if (!element.value.equals(UCharacter
1130: .getName1_0(element.integer))) {
1131: errln("FAIL next codepoint \\u"
1132: + Integer.toHexString(element.integer)
1133: + " name cannot be null");
1134: break;
1135: }
1136: for (int i = old.integer + 1; i < element.integer; i++) {
1137: if (UCharacter.getName1_0(i) != null) {
1138: errln("FAIL between codepoints are not null \\u"
1139: + Integer.toHexString(old.integer)
1140: + " and "
1141: + Integer.toHexString(element.integer)
1142: + " has "
1143: + Integer.toHexString(i)
1144: + " with a name "
1145: + UCharacter.getName1_0(i));
1146: break;
1147: }
1148: }
1149: old.integer = element.integer;
1150: }
1151: } catch (Exception e) {
1152: // !!! wouldn't preflighting be simpler? This looks like
1153: // it is effectively be doing that. It seems that for every
1154: // true error the code will call errln, which will throw the error, which
1155: // this will catch, which this will then rethrow the error. Just seems
1156: // cumbersome.
1157: if (e.getMessage().indexOf("unames.icu") >= 0) {
1158: warnln("Could not find unames.icu");
1159: } else {
1160: errln(e.getMessage());
1161: }
1162: }
1163: }
1164:
1165: /**
1166: * Testing the for illegal characters
1167: */
1168: public void TestIsLegal() {
1169: int illegal[] = { 0xFFFE, 0x00FFFF, 0x005FFFE, 0x005FFFF,
1170: 0x0010FFFE, 0x0010FFFF, 0x110000, 0x00FDD0, 0x00FDDF,
1171: 0x00FDE0, 0x00FDEF, 0xD800, 0xDC00, -1 };
1172: int legal[] = { 0x61, 0x00FFFD, 0x0010000, 0x005FFFD,
1173: 0x0060000, 0x0010FFFD, 0xFDCF, 0x00FDF0 };
1174: for (int count = 0; count < illegal.length; count++) {
1175: if (UCharacter.isLegal(illegal[count])) {
1176: errln("FAIL \\u" + hex(illegal[count])
1177: + " is not a legal character");
1178: }
1179: }
1180:
1181: for (int count = 0; count < legal.length; count++) {
1182: if (!UCharacter.isLegal(legal[count])) {
1183: errln("FAIL \\u" + hex(legal[count])
1184: + " is a legal character");
1185: }
1186: }
1187:
1188: String illegalStr = "This is an illegal string ";
1189: String legalStr = "This is a legal string ";
1190:
1191: for (int count = 0; count < illegal.length; count++) {
1192: StringBuffer str = new StringBuffer(illegalStr);
1193: if (illegal[count] < 0x10000) {
1194: str.append((char) illegal[count]);
1195: } else {
1196: char lead = UTF16.getLeadSurrogate(illegal[count]);
1197: char trail = UTF16.getTrailSurrogate(illegal[count]);
1198: str.append(lead);
1199: str.append(trail);
1200: }
1201: if (UCharacter.isLegal(str.toString())) {
1202: errln("FAIL " + hex(str.toString())
1203: + " is not a legal string");
1204: }
1205: }
1206:
1207: for (int count = 0; count < legal.length; count++) {
1208: StringBuffer str = new StringBuffer(legalStr);
1209: if (legal[count] < 0x10000) {
1210: str.append((char) legal[count]);
1211: } else {
1212: char lead = UTF16.getLeadSurrogate(legal[count]);
1213: char trail = UTF16.getTrailSurrogate(legal[count]);
1214: str.append(lead);
1215: str.append(trail);
1216: }
1217: if (!UCharacter.isLegal(str.toString())) {
1218: errln("FAIL " + hex(str.toString())
1219: + " is a legal string");
1220: }
1221: }
1222: }
1223:
1224: /**
1225: * Test getCodePoint
1226: */
1227: public void TestCodePoint() {
1228: int ch = 0x10000;
1229: for (char i = 0xD800; i < 0xDC00; i++) {
1230: for (char j = 0xDC00; j <= 0xDFFF; j++) {
1231: if (UCharacter.getCodePoint(i, j) != ch) {
1232: errln("Error getting codepoint for surrogate "
1233: + "characters \\u" + Integer.toHexString(i)
1234: + " \\u" + Integer.toHexString(j));
1235: }
1236: ch++;
1237: }
1238: }
1239: try {
1240: UCharacter.getCodePoint((char) 0xD7ff, (char) 0xDC00);
1241: errln("Invalid surrogate characters should not form a "
1242: + "supplementary");
1243: } catch (Exception e) {
1244: }
1245: for (char i = 0; i < 0xFFFF; i++) {
1246: if (i == 0xFFFE || (i >= 0xD800 && i <= 0xDFFF)
1247: || (i >= 0xFDD0 && i <= 0xFDEF)) {
1248: // not a character
1249: try {
1250: UCharacter.getCodePoint(i);
1251: errln("Not a character is not a valid codepoint");
1252: } catch (Exception e) {
1253: }
1254: } else {
1255: if (UCharacter.getCodePoint(i) != i) {
1256: errln("A valid codepoint should return itself");
1257: }
1258: }
1259: }
1260: }
1261:
1262: /**
1263: * This method is alittle different from the type test in icu4c.
1264: * But combined with testUnicodeData, they basically do the same thing.
1265: */
1266: public void TestIteration() {
1267: int limit = 0;
1268: int prevtype = -1;
1269: int shouldBeDir;
1270: int test[][] = { { 0x41, UCharacterCategory.UPPERCASE_LETTER },
1271: { 0x308, UCharacterCategory.NON_SPACING_MARK },
1272: { 0xfffe, UCharacterCategory.GENERAL_OTHER_TYPES },
1273: { 0xe0041, UCharacterCategory.FORMAT },
1274: { 0xeffff, UCharacterCategory.UNASSIGNED } };
1275:
1276: // default Bidi classes for unassigned code points
1277: int defaultBidi[][] = {
1278: { 0x0590, UCharacterDirection.LEFT_TO_RIGHT },
1279: { 0x0600, UCharacterDirection.RIGHT_TO_LEFT },
1280: { 0x07C0, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
1281: { 0x0900, UCharacterDirection.RIGHT_TO_LEFT },
1282: { 0xFB1D, UCharacterDirection.LEFT_TO_RIGHT },
1283: { 0xFB50, UCharacterDirection.RIGHT_TO_LEFT },
1284: { 0xFE00, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
1285: { 0xFE70, UCharacterDirection.LEFT_TO_RIGHT },
1286: { 0xFF00, UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
1287: { 0x10800, UCharacterDirection.LEFT_TO_RIGHT },
1288: { 0x11000, UCharacterDirection.RIGHT_TO_LEFT },
1289: { 0x110000, UCharacterDirection.LEFT_TO_RIGHT } };
1290:
1291: RangeValueIterator iterator = UCharacter.getTypeIterator();
1292: RangeValueIterator.Element result = new RangeValueIterator.Element();
1293: while (iterator.next(result)) {
1294: if (result.start != limit) {
1295: errln("UCharacterIteration failed: Ranges not continuous "
1296: + "0x" + Integer.toHexString(result.start));
1297: }
1298:
1299: limit = result.limit;
1300: if (result.value == prevtype) {
1301: errln("Type of the next set of enumeration should be different");
1302: }
1303: prevtype = result.value;
1304:
1305: for (int i = result.start; i < limit; i++) {
1306: int temptype = UCharacter.getType(i);
1307: if (temptype != result.value) {
1308: errln("UCharacterIteration failed: Codepoint \\u"
1309: + Integer.toHexString(i)
1310: + " should be of type " + temptype
1311: + " not " + result.value);
1312: }
1313: }
1314:
1315: for (int i = 0; i < test.length; ++i) {
1316: if (result.start <= test[i][0]
1317: && test[i][0] < result.limit) {
1318: if (result.value != test[i][1]) {
1319: errln("error: getTypes() has range ["
1320: + Integer.toHexString(result.start)
1321: + ", "
1322: + Integer.toHexString(result.limit)
1323: + "] with type " + result.value
1324: + " instead of ["
1325: + Integer.toHexString(test[i][0])
1326: + ", "
1327: + Integer.toHexString(test[i][1]));
1328: }
1329: }
1330: }
1331:
1332: // LineBreak.txt specifies:
1333: // # - Assigned characters that are not listed explicitly are given the value
1334: // # "AL".
1335: // # - Unassigned characters are given the value "XX".
1336: //
1337: // PUA characters are listed explicitly with "XX".
1338: // Verify that no assigned character has "XX".
1339: if (result.value != UCharacterCategory.UNASSIGNED
1340: && result.value != UCharacterCategory.PRIVATE_USE) {
1341: int c = result.start;
1342: while (c < result.limit) {
1343: if (0 == UCharacter.getIntPropertyValue(c,
1344: UProperty.LINE_BREAK)) {
1345: logln("error UProperty.LINE_BREAK(assigned \\u"
1346: + Utility.hex(c, 4) + ")=XX");
1347: }
1348: ++c;
1349: }
1350: }
1351:
1352: /*
1353: * Verify default Bidi classes.
1354: * For recent Unicode versions, see UCD.html.
1355: *
1356: * For older Unicode versions:
1357: * See table 3-7 "Bidirectional Character Types" in UAX #9.
1358: * http://www.unicode.org/reports/tr9/
1359: *
1360: * See also DerivedBidiClass.txt for Cn code points!
1361: *
1362: * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)
1363: * changed some default values.
1364: * In particular, non-characters and unassigned Default Ignorable Code Points
1365: * change from L to BN.
1366: *
1367: * UCD.html version 4.0.1 does not yet reflect these changes.
1368: */
1369: if (result.value == UCharacterCategory.UNASSIGNED
1370: || result.value == UCharacterCategory.PRIVATE_USE) {
1371: int c = result.start;
1372: for (int i = 0; i < defaultBidi.length
1373: && c < result.limit; ++i) {
1374: if (c < defaultBidi[i][0]) {
1375: while (c < result.limit
1376: && c < defaultBidi[i][0]) {
1377: // TODO change to public UCharacter.isNonCharacter(c) once it's available
1378: if (com.ibm.icu.impl.UCharacterUtility
1379: .isNonCharacter(c)
1380: || UCharacter
1381: .hasBinaryProperty(
1382: c,
1383: UProperty.DEFAULT_IGNORABLE_CODE_POINT)) {
1384: shouldBeDir = UCharacter.BOUNDARY_NEUTRAL;
1385: } else {
1386: shouldBeDir = defaultBidi[i][1];
1387: }
1388:
1389: if (UCharacter.getDirection(c) != shouldBeDir
1390: || UCharacter.getIntPropertyValue(
1391: c, UProperty.BIDI_CLASS) != shouldBeDir) {
1392: errln("error: getDirection(unassigned/PUA "
1393: + Integer.toHexString(c)
1394: + ") should be " + shouldBeDir);
1395: }
1396: ++c;
1397: }
1398: }
1399: }
1400: }
1401: }
1402:
1403: iterator.reset();
1404: if (iterator.next(result) == false || result.start != 0) {
1405: System.out.println("result " + result.start);
1406: errln("UCharacterIteration reset() failed");
1407: }
1408: }
1409:
1410: /**
1411: * Testing getAge
1412: */
1413: public void TestGetAge() {
1414: int ages[] = { 0x41, 1, 1, 0, 0, 0xffff, 1, 1, 0, 0, 0x20ab, 2,
1415: 0, 0, 0, 0x2fffe, 2, 0, 0, 0, 0x20ac, 2, 1, 0, 0,
1416: 0xfb1d, 3, 0, 0, 0, 0x3f4, 3, 1, 0, 0, 0x10300, 3, 1,
1417: 0, 0, 0x220, 3, 2, 0, 0, 0xff60, 3, 2, 0, 0 };
1418: for (int i = 0; i < ages.length; i += 5) {
1419: VersionInfo age = UCharacter.getAge(ages[i]);
1420: if (age != VersionInfo.getInstance(ages[i + 1],
1421: ages[i + 2], ages[i + 3], ages[i + 4])) {
1422: errln("error: getAge(\\u"
1423: + Integer.toHexString(ages[i]) + ") == "
1424: + age.toString() + " instead of " + ages[i + 1]
1425: + "." + ages[i + 2] + "." + ages[i + 3] + "."
1426: + ages[i + 4]);
1427: }
1428: }
1429: }
1430:
1431: /**
1432: * Test binary non core properties
1433: */
1434: public void TestAdditionalProperties() {
1435: // test data for hasBinaryProperty()
1436: int props[][] = { // code point, property
1437: { 0x0627, UProperty.ALPHABETIC, 1 },
1438: { 0x1034a, UProperty.ALPHABETIC, 1 },
1439: { 0x2028, UProperty.ALPHABETIC, 0 },
1440:
1441: { 0x0066, UProperty.ASCII_HEX_DIGIT, 1 },
1442: { 0x0067, UProperty.ASCII_HEX_DIGIT, 0 },
1443:
1444: { 0x202c, UProperty.BIDI_CONTROL, 1 },
1445: { 0x202f, UProperty.BIDI_CONTROL, 0 },
1446:
1447: { 0x003c, UProperty.BIDI_MIRRORED, 1 },
1448: { 0x003d, UProperty.BIDI_MIRRORED, 0 },
1449:
1450: { 0x058a, UProperty.DASH, 1 },
1451: { 0x007e, UProperty.DASH, 0 },
1452:
1453: { 0x0c4d, UProperty.DIACRITIC, 1 },
1454: { 0x3000, UProperty.DIACRITIC, 0 },
1455:
1456: { 0x0e46, UProperty.EXTENDER, 1 },
1457: { 0x0020, UProperty.EXTENDER, 0 },
1458:
1459: { 0xfb1d, UProperty.FULL_COMPOSITION_EXCLUSION, 1 },
1460: { 0x1d15f, UProperty.FULL_COMPOSITION_EXCLUSION, 1 },
1461: { 0xfb1e, UProperty.FULL_COMPOSITION_EXCLUSION, 0 },
1462:
1463: { 0x110a, UProperty.NFD_INERT, 1 }, /* Jamo L */
1464: { 0x0308, UProperty.NFD_INERT, 0 },
1465:
1466: { 0x1164, UProperty.NFKD_INERT, 1 }, /* Jamo V */
1467: { 0x1d79d, UProperty.NFKD_INERT, 0 }, /* math compat version of xi */
1468:
1469: { 0x0021, UProperty.NFC_INERT, 1 }, /* ! */
1470: { 0x0061, UProperty.NFC_INERT, 0 }, /* a */
1471: { 0x00e4, UProperty.NFC_INERT, 0 }, /* a-umlaut */
1472: { 0x0102, UProperty.NFC_INERT, 0 }, /* a-breve */
1473: { 0xac1c, UProperty.NFC_INERT, 0 }, /* Hangul LV */
1474: { 0xac1d, UProperty.NFC_INERT, 1 }, /* Hangul LVT */
1475:
1476: { 0x1d79d, UProperty.NFKC_INERT, 0 }, /* math compat version of xi */
1477: { 0x2a6d6, UProperty.NFKC_INERT, 1 }, /* Han, last of CJK ext. B */
1478:
1479: { 0x00e4, UProperty.SEGMENT_STARTER, 1 },
1480: { 0x0308, UProperty.SEGMENT_STARTER, 0 },
1481: { 0x110a, UProperty.SEGMENT_STARTER, 1 }, /* Jamo L */
1482: { 0x1164, UProperty.SEGMENT_STARTER, 0 },/* Jamo V */
1483: { 0xac1c, UProperty.SEGMENT_STARTER, 1 }, /* Hangul LV */
1484: { 0xac1d, UProperty.SEGMENT_STARTER, 1 }, /* Hangul LVT */
1485:
1486: { 0x0044, UProperty.HEX_DIGIT, 1 },
1487: { 0xff46, UProperty.HEX_DIGIT, 1 },
1488: { 0x0047, UProperty.HEX_DIGIT, 0 },
1489:
1490: { 0x30fb, UProperty.HYPHEN, 1 },
1491: { 0xfe58, UProperty.HYPHEN, 0 },
1492:
1493: { 0x2172, UProperty.ID_CONTINUE, 1 },
1494: { 0x0307, UProperty.ID_CONTINUE, 1 },
1495: { 0x005c, UProperty.ID_CONTINUE, 0 },
1496:
1497: { 0x2172, UProperty.ID_START, 1 },
1498: { 0x007a, UProperty.ID_START, 1 },
1499: { 0x0039, UProperty.ID_START, 0 },
1500:
1501: { 0x4db5, UProperty.IDEOGRAPHIC, 1 },
1502: { 0x2f999, UProperty.IDEOGRAPHIC, 1 },
1503: { 0x2f99, UProperty.IDEOGRAPHIC, 0 },
1504:
1505: { 0x200c, UProperty.JOIN_CONTROL, 1 },
1506: { 0x2029, UProperty.JOIN_CONTROL, 0 },
1507:
1508: { 0x1d7bc, UProperty.LOWERCASE, 1 },
1509: { 0x0345, UProperty.LOWERCASE, 1 },
1510: { 0x0030, UProperty.LOWERCASE, 0 },
1511:
1512: { 0x1d7a9, UProperty.MATH, 1 },
1513: { 0x2135, UProperty.MATH, 1 },
1514: { 0x0062, UProperty.MATH, 0 },
1515:
1516: { 0xfde1, UProperty.NONCHARACTER_CODE_POINT, 1 },
1517: { 0x10ffff, UProperty.NONCHARACTER_CODE_POINT, 1 },
1518: { 0x10fffd, UProperty.NONCHARACTER_CODE_POINT, 0 },
1519:
1520: { 0x0022, UProperty.QUOTATION_MARK, 1 },
1521: { 0xff62, UProperty.QUOTATION_MARK, 1 },
1522: { 0xd840, UProperty.QUOTATION_MARK, 0 },
1523:
1524: { 0x061f, UProperty.TERMINAL_PUNCTUATION, 1 },
1525: { 0xe003f, UProperty.TERMINAL_PUNCTUATION, 0 },
1526:
1527: { 0x1d44a, UProperty.UPPERCASE, 1 },
1528: { 0x2162, UProperty.UPPERCASE, 1 },
1529: { 0x0345, UProperty.UPPERCASE, 0 },
1530:
1531: { 0x0020, UProperty.WHITE_SPACE, 1 },
1532: { 0x202f, UProperty.WHITE_SPACE, 1 },
1533: { 0x3001, UProperty.WHITE_SPACE, 0 },
1534:
1535: { 0x0711, UProperty.XID_CONTINUE, 1 },
1536: { 0x1d1aa, UProperty.XID_CONTINUE, 1 },
1537: { 0x007c, UProperty.XID_CONTINUE, 0 },
1538:
1539: { 0x16ee, UProperty.XID_START, 1 },
1540: { 0x23456, UProperty.XID_START, 1 },
1541: { 0x1d1aa, UProperty.XID_START, 0 },
1542:
1543: /*
1544: * Version break:
1545: * The following properties are only supported starting with the
1546: * Unicode version indicated in the second field.
1547: */
1548: { -1, 0x320, 0 },
1549:
1550: { 0x180c, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 1 },
1551: { 0xfe02, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 1 },
1552: { 0x1801, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 0 },
1553:
1554: { 0x0341, UProperty.DEPRECATED, 1 },
1555: { 0xe0041, UProperty.DEPRECATED, 0 },
1556:
1557: { 0x00a0, UProperty.GRAPHEME_BASE, 1 },
1558: { 0x0a4d, UProperty.GRAPHEME_BASE, 0 },
1559: { 0xff9f, UProperty.GRAPHEME_BASE, 1 }, /* changed from Unicode 3.2 to 4 */
1560:
1561: { 0x0300, UProperty.GRAPHEME_EXTEND, 1 },
1562: { 0xff9f, UProperty.GRAPHEME_EXTEND, 0 }, /* changed from Unicode 3.2 to 4 */
1563: { 0x0603, UProperty.GRAPHEME_EXTEND, 0 },
1564:
1565: { 0x0a4d, UProperty.GRAPHEME_LINK, 1 },
1566: { 0xff9f, UProperty.GRAPHEME_LINK, 0 },
1567:
1568: { 0x2ff7, UProperty.IDS_BINARY_OPERATOR, 1 },
1569: { 0x2ff3, UProperty.IDS_BINARY_OPERATOR, 0 },
1570:
1571: { 0x2ff3, UProperty.IDS_TRINARY_OPERATOR, 1 },
1572: { 0x2f03, UProperty.IDS_TRINARY_OPERATOR, 0 },
1573:
1574: { 0x0ec1, UProperty.LOGICAL_ORDER_EXCEPTION, 1 },
1575: { 0xdcba, UProperty.LOGICAL_ORDER_EXCEPTION, 0 },
1576:
1577: { 0x2e9b, UProperty.RADICAL, 1 },
1578: { 0x4e00, UProperty.RADICAL, 0 },
1579:
1580: { 0x012f, UProperty.SOFT_DOTTED, 1 },
1581: { 0x0049, UProperty.SOFT_DOTTED, 0 },
1582:
1583: { 0xfa11, UProperty.UNIFIED_IDEOGRAPH, 1 },
1584: { 0xfa12, UProperty.UNIFIED_IDEOGRAPH, 0 },
1585:
1586: { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
1587:
1588: { 0x002e, UProperty.S_TERM, 1 },
1589: { 0x0061, UProperty.S_TERM, 0 },
1590:
1591: { 0x180c, UProperty.VARIATION_SELECTOR, 1 },
1592: { 0xfe03, UProperty.VARIATION_SELECTOR, 1 },
1593: { 0xe01ef, UProperty.VARIATION_SELECTOR, 1 },
1594: { 0xe0200, UProperty.VARIATION_SELECTOR, 0 },
1595:
1596: /* enum/integer type properties */
1597: /* test default Bidi classes for unassigned code points */
1598: { 0x0590, UProperty.BIDI_CLASS,
1599: UCharacterDirection.RIGHT_TO_LEFT },
1600: { 0x05cf, UProperty.BIDI_CLASS,
1601: UCharacterDirection.RIGHT_TO_LEFT },
1602: { 0x05ed, UProperty.BIDI_CLASS,
1603: UCharacterDirection.RIGHT_TO_LEFT },
1604: { 0x07f2, UProperty.BIDI_CLASS,
1605: UCharacterDirection.DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
1606: { 0x07fe, UProperty.BIDI_CLASS,
1607: UCharacterDirection.RIGHT_TO_LEFT }, /* unassigned R */
1608: { 0x08ba, UProperty.BIDI_CLASS,
1609: UCharacterDirection.RIGHT_TO_LEFT },
1610: { 0xfb37, UProperty.BIDI_CLASS,
1611: UCharacterDirection.RIGHT_TO_LEFT },
1612: { 0xfb42, UProperty.BIDI_CLASS,
1613: UCharacterDirection.RIGHT_TO_LEFT },
1614: { 0x10806, UProperty.BIDI_CLASS,
1615: UCharacterDirection.RIGHT_TO_LEFT },
1616: { 0x10909, UProperty.BIDI_CLASS,
1617: UCharacterDirection.RIGHT_TO_LEFT },
1618: { 0x10fe4, UProperty.BIDI_CLASS,
1619: UCharacterDirection.RIGHT_TO_LEFT },
1620:
1621: { 0x0606, UProperty.BIDI_CLASS,
1622: UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
1623: { 0x061c, UProperty.BIDI_CLASS,
1624: UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
1625: { 0x063f, UProperty.BIDI_CLASS,
1626: UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
1627: { 0x070e, UProperty.BIDI_CLASS,
1628: UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
1629: { 0x0775, UProperty.BIDI_CLASS,
1630: UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
1631: { 0xfbc2, UProperty.BIDI_CLASS,
1632: UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
1633: { 0xfd90, UProperty.BIDI_CLASS,
1634: UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
1635: { 0xfefe, UProperty.BIDI_CLASS,
1636: UCharacterDirection.RIGHT_TO_LEFT_ARABIC },
1637:
1638: { 0x02AF, UProperty.BLOCK,
1639: UCharacter.UnicodeBlock.IPA_EXTENSIONS.getID() },
1640: { 0x0C4E, UProperty.BLOCK,
1641: UCharacter.UnicodeBlock.TELUGU.getID() },
1642: {
1643: 0x155A,
1644: UProperty.BLOCK,
1645: UCharacter.UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
1646: .getID() },
1647: { 0x1717, UProperty.BLOCK,
1648: UCharacter.UnicodeBlock.TAGALOG.getID() },
1649: { 0x1900, UProperty.BLOCK,
1650: UCharacter.UnicodeBlock.LIMBU.getID() },
1651: { 0x1AFF, UProperty.BLOCK,
1652: UCharacter.UnicodeBlock.NO_BLOCK.getID() },
1653: { 0x3040, UProperty.BLOCK,
1654: UCharacter.UnicodeBlock.HIRAGANA.getID() },
1655: {
1656: 0x1D0FF,
1657: UProperty.BLOCK,
1658: UCharacter.UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS
1659: .getID() },
1660: { 0x50000, UProperty.BLOCK,
1661: UCharacter.UnicodeBlock.NO_BLOCK.getID() },
1662: { 0xEFFFF, UProperty.BLOCK,
1663: UCharacter.UnicodeBlock.NO_BLOCK.getID() },
1664: {
1665: 0x10D0FF,
1666: UProperty.BLOCK,
1667: UCharacter.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B
1668: .getID() },
1669:
1670: /* UProperty.CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
1671: { 0xd7d7, UProperty.CANONICAL_COMBINING_CLASS, 0 },
1672:
1673: { 0x00A0, UProperty.DECOMPOSITION_TYPE,
1674: UCharacter.DecompositionType.NOBREAK },
1675: { 0x00A8, UProperty.DECOMPOSITION_TYPE,
1676: UCharacter.DecompositionType.COMPAT },
1677: { 0x00bf, UProperty.DECOMPOSITION_TYPE,
1678: UCharacter.DecompositionType.NONE },
1679: { 0x00c0, UProperty.DECOMPOSITION_TYPE,
1680: UCharacter.DecompositionType.CANONICAL },
1681: { 0x1E9B, UProperty.DECOMPOSITION_TYPE,
1682: UCharacter.DecompositionType.CANONICAL },
1683: { 0xBCDE, UProperty.DECOMPOSITION_TYPE,
1684: UCharacter.DecompositionType.CANONICAL },
1685: { 0xFB5D, UProperty.DECOMPOSITION_TYPE,
1686: UCharacter.DecompositionType.MEDIAL },
1687: { 0x1D736, UProperty.DECOMPOSITION_TYPE,
1688: UCharacter.DecompositionType.FONT },
1689: { 0xe0033, UProperty.DECOMPOSITION_TYPE,
1690: UCharacter.DecompositionType.NONE },
1691:
1692: { 0x0009, UProperty.EAST_ASIAN_WIDTH,
1693: UCharacter.EastAsianWidth.NEUTRAL },
1694: { 0x0020, UProperty.EAST_ASIAN_WIDTH,
1695: UCharacter.EastAsianWidth.NARROW },
1696: { 0x00B1, UProperty.EAST_ASIAN_WIDTH,
1697: UCharacter.EastAsianWidth.AMBIGUOUS },
1698: { 0x20A9, UProperty.EAST_ASIAN_WIDTH,
1699: UCharacter.EastAsianWidth.HALFWIDTH },
1700: { 0x2FFB, UProperty.EAST_ASIAN_WIDTH,
1701: UCharacter.EastAsianWidth.WIDE },
1702: { 0x3000, UProperty.EAST_ASIAN_WIDTH,
1703: UCharacter.EastAsianWidth.FULLWIDTH },
1704: { 0x35bb, UProperty.EAST_ASIAN_WIDTH,
1705: UCharacter.EastAsianWidth.WIDE },
1706: { 0x58bd, UProperty.EAST_ASIAN_WIDTH,
1707: UCharacter.EastAsianWidth.WIDE },
1708: { 0xD7A3, UProperty.EAST_ASIAN_WIDTH,
1709: UCharacter.EastAsianWidth.WIDE },
1710: { 0xEEEE, UProperty.EAST_ASIAN_WIDTH,
1711: UCharacter.EastAsianWidth.AMBIGUOUS },
1712: { 0x1D198, UProperty.EAST_ASIAN_WIDTH,
1713: UCharacter.EastAsianWidth.NEUTRAL },
1714: { 0x20000, UProperty.EAST_ASIAN_WIDTH,
1715: UCharacter.EastAsianWidth.WIDE },
1716: { 0x2F8C7, UProperty.EAST_ASIAN_WIDTH,
1717: UCharacter.EastAsianWidth.WIDE },
1718: { 0x3a5bd, UProperty.EAST_ASIAN_WIDTH,
1719: UCharacter.EastAsianWidth.WIDE },
1720: { 0x5a5bd, UProperty.EAST_ASIAN_WIDTH,
1721: UCharacter.EastAsianWidth.NEUTRAL },
1722: { 0xFEEEE, UProperty.EAST_ASIAN_WIDTH,
1723: UCharacter.EastAsianWidth.AMBIGUOUS },
1724: { 0x10EEEE, UProperty.EAST_ASIAN_WIDTH,
1725: UCharacter.EastAsianWidth.AMBIGUOUS },
1726:
1727: /* UProperty.GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
1728: { 0xd7d7, UProperty.GENERAL_CATEGORY, 0 },
1729:
1730: { 0x0444, UProperty.JOINING_GROUP,
1731: UCharacter.JoiningGroup.NO_JOINING_GROUP },
1732: { 0x0639, UProperty.JOINING_GROUP,
1733: UCharacter.JoiningGroup.AIN },
1734: { 0x072A, UProperty.JOINING_GROUP,
1735: UCharacter.JoiningGroup.DALATH_RISH },
1736: { 0x0647, UProperty.JOINING_GROUP,
1737: UCharacter.JoiningGroup.HEH },
1738: { 0x06C1, UProperty.JOINING_GROUP,
1739: UCharacter.JoiningGroup.HEH_GOAL },
1740: { 0x06C3, UProperty.JOINING_GROUP,
1741: UCharacter.JoiningGroup.HAMZA_ON_HEH_GOAL },
1742:
1743: { 0x200C, UProperty.JOINING_TYPE,
1744: UCharacter.JoiningType.NON_JOINING },
1745: { 0x200D, UProperty.JOINING_TYPE,
1746: UCharacter.JoiningType.JOIN_CAUSING },
1747: { 0x0639, UProperty.JOINING_TYPE,
1748: UCharacter.JoiningType.DUAL_JOINING },
1749: { 0x0640, UProperty.JOINING_TYPE,
1750: UCharacter.JoiningType.JOIN_CAUSING },
1751: { 0x06C3, UProperty.JOINING_TYPE,
1752: UCharacter.JoiningType.RIGHT_JOINING },
1753: { 0x0300, UProperty.JOINING_TYPE,
1754: UCharacter.JoiningType.TRANSPARENT },
1755: { 0x070F, UProperty.JOINING_TYPE,
1756: UCharacter.JoiningType.TRANSPARENT },
1757: { 0xe0033, UProperty.JOINING_TYPE,
1758: UCharacter.JoiningType.TRANSPARENT },
1759:
1760: /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
1761: { 0xe7e7, UProperty.LINE_BREAK,
1762: UCharacter.LineBreak.UNKNOWN },
1763: { 0x10fffd, UProperty.LINE_BREAK,
1764: UCharacter.LineBreak.UNKNOWN },
1765: { 0x0028, UProperty.LINE_BREAK,
1766: UCharacter.LineBreak.OPEN_PUNCTUATION },
1767: { 0x232A, UProperty.LINE_BREAK,
1768: UCharacter.LineBreak.CLOSE_PUNCTUATION },
1769: { 0x3401, UProperty.LINE_BREAK,
1770: UCharacter.LineBreak.IDEOGRAPHIC },
1771: { 0x4e02, UProperty.LINE_BREAK,
1772: UCharacter.LineBreak.IDEOGRAPHIC },
1773: { 0x20004, UProperty.LINE_BREAK,
1774: UCharacter.LineBreak.IDEOGRAPHIC },
1775: { 0xf905, UProperty.LINE_BREAK,
1776: UCharacter.LineBreak.IDEOGRAPHIC },
1777: { 0xdb7e, UProperty.LINE_BREAK,
1778: UCharacter.LineBreak.SURROGATE },
1779: { 0xdbfd, UProperty.LINE_BREAK,
1780: UCharacter.LineBreak.SURROGATE },
1781: { 0xdffc, UProperty.LINE_BREAK,
1782: UCharacter.LineBreak.SURROGATE },
1783: { 0x2762, UProperty.LINE_BREAK,
1784: UCharacter.LineBreak.EXCLAMATION },
1785: { 0x002F, UProperty.LINE_BREAK,
1786: UCharacter.LineBreak.BREAK_SYMBOLS },
1787: { 0x1D49C, UProperty.LINE_BREAK,
1788: UCharacter.LineBreak.ALPHABETIC },
1789: { 0x1731, UProperty.LINE_BREAK,
1790: UCharacter.LineBreak.ALPHABETIC },
1791:
1792: /* UProperty.NUMERIC_TYPE tested in TestNumericProperties() */
1793:
1794: /* UProperty.SCRIPT tested in TestUScriptCodeAPI() */
1795:
1796: { 0x1100, UProperty.HANGUL_SYLLABLE_TYPE,
1797: UCharacter.HangulSyllableType.LEADING_JAMO },
1798: { 0x1111, UProperty.HANGUL_SYLLABLE_TYPE,
1799: UCharacter.HangulSyllableType.LEADING_JAMO },
1800: { 0x1159, UProperty.HANGUL_SYLLABLE_TYPE,
1801: UCharacter.HangulSyllableType.LEADING_JAMO },
1802: { 0x115f, UProperty.HANGUL_SYLLABLE_TYPE,
1803: UCharacter.HangulSyllableType.LEADING_JAMO },
1804:
1805: { 0x1160, UProperty.HANGUL_SYLLABLE_TYPE,
1806: UCharacter.HangulSyllableType.VOWEL_JAMO },
1807: { 0x1161, UProperty.HANGUL_SYLLABLE_TYPE,
1808: UCharacter.HangulSyllableType.VOWEL_JAMO },
1809: { 0x1172, UProperty.HANGUL_SYLLABLE_TYPE,
1810: UCharacter.HangulSyllableType.VOWEL_JAMO },
1811: { 0x11a2, UProperty.HANGUL_SYLLABLE_TYPE,
1812: UCharacter.HangulSyllableType.VOWEL_JAMO },
1813:
1814: { 0x11a8, UProperty.HANGUL_SYLLABLE_TYPE,
1815: UCharacter.HangulSyllableType.TRAILING_JAMO },
1816: { 0x11b8, UProperty.HANGUL_SYLLABLE_TYPE,
1817: UCharacter.HangulSyllableType.TRAILING_JAMO },
1818: { 0x11c8, UProperty.HANGUL_SYLLABLE_TYPE,
1819: UCharacter.HangulSyllableType.TRAILING_JAMO },
1820: { 0x11f9, UProperty.HANGUL_SYLLABLE_TYPE,
1821: UCharacter.HangulSyllableType.TRAILING_JAMO },
1822:
1823: { 0x115a, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
1824: { 0x115e, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
1825: { 0x11a3, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
1826: { 0x11a7, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
1827: { 0x11fa, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
1828: { 0x11ff, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
1829:
1830: { 0xac00, UProperty.HANGUL_SYLLABLE_TYPE,
1831: UCharacter.HangulSyllableType.LV_SYLLABLE },
1832: { 0xac1c, UProperty.HANGUL_SYLLABLE_TYPE,
1833: UCharacter.HangulSyllableType.LV_SYLLABLE },
1834: { 0xc5ec, UProperty.HANGUL_SYLLABLE_TYPE,
1835: UCharacter.HangulSyllableType.LV_SYLLABLE },
1836: { 0xd788, UProperty.HANGUL_SYLLABLE_TYPE,
1837: UCharacter.HangulSyllableType.LV_SYLLABLE },
1838:
1839: { 0xac01, UProperty.HANGUL_SYLLABLE_TYPE,
1840: UCharacter.HangulSyllableType.LVT_SYLLABLE },
1841: { 0xac1b, UProperty.HANGUL_SYLLABLE_TYPE,
1842: UCharacter.HangulSyllableType.LVT_SYLLABLE },
1843: { 0xac1d, UProperty.HANGUL_SYLLABLE_TYPE,
1844: UCharacter.HangulSyllableType.LVT_SYLLABLE },
1845: { 0xc5ee, UProperty.HANGUL_SYLLABLE_TYPE,
1846: UCharacter.HangulSyllableType.LVT_SYLLABLE },
1847: { 0xd7a3, UProperty.HANGUL_SYLLABLE_TYPE,
1848: UCharacter.HangulSyllableType.LVT_SYLLABLE },
1849:
1850: { 0xd7a4, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
1851:
1852: { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
1853:
1854: { 0x00d7, UProperty.PATTERN_SYNTAX, 1 },
1855: { 0xfe45, UProperty.PATTERN_SYNTAX, 1 },
1856: { 0x0061, UProperty.PATTERN_SYNTAX, 0 },
1857:
1858: { 0x0020, UProperty.PATTERN_WHITE_SPACE, 1 },
1859: { 0x0085, UProperty.PATTERN_WHITE_SPACE, 1 },
1860: { 0x200f, UProperty.PATTERN_WHITE_SPACE, 1 },
1861: { 0x00a0, UProperty.PATTERN_WHITE_SPACE, 0 },
1862: { 0x3000, UProperty.PATTERN_WHITE_SPACE, 0 },
1863:
1864: {
1865: 0x1d200,
1866: UProperty.BLOCK,
1867: UCharacter.UnicodeBlock.ANCIENT_GREEK_MUSICAL_NOTATION_ID },
1868: { 0x2c8e, UProperty.BLOCK,
1869: UCharacter.UnicodeBlock.COPTIC_ID },
1870: { 0xfe17, UProperty.BLOCK,
1871: UCharacter.UnicodeBlock.VERTICAL_FORMS_ID },
1872:
1873: { 0x1a00, UProperty.SCRIPT, UScript.BUGINESE },
1874: { 0x2cea, UProperty.SCRIPT, UScript.COPTIC },
1875: { 0xa82b, UProperty.SCRIPT, UScript.SYLOTI_NAGRI },
1876: { 0x103d0, UProperty.SCRIPT, UScript.OLD_PERSIAN },
1877:
1878: { 0xcc28, UProperty.LINE_BREAK, UCharacter.LineBreak.H2 },
1879: { 0xcc29, UProperty.LINE_BREAK, UCharacter.LineBreak.H3 },
1880: { 0xac03, UProperty.LINE_BREAK, UCharacter.LineBreak.H3 },
1881: { 0x115f, UProperty.LINE_BREAK, UCharacter.LineBreak.JL },
1882: { 0x11aa, UProperty.LINE_BREAK, UCharacter.LineBreak.JT },
1883: { 0x11a1, UProperty.LINE_BREAK, UCharacter.LineBreak.JV },
1884:
1885: { 0xb2c9, UProperty.GRAPHEME_CLUSTER_BREAK,
1886: UCharacter.GraphemeClusterBreak.LVT },
1887: { 0x036f, UProperty.GRAPHEME_CLUSTER_BREAK,
1888: UCharacter.GraphemeClusterBreak.EXTEND },
1889: { 0x0000, UProperty.GRAPHEME_CLUSTER_BREAK,
1890: UCharacter.GraphemeClusterBreak.CONTROL },
1891: { 0x1160, UProperty.GRAPHEME_CLUSTER_BREAK,
1892: UCharacter.GraphemeClusterBreak.V },
1893:
1894: { 0x05f4, UProperty.WORD_BREAK,
1895: UCharacter.WordBreak.MIDLETTER },
1896: { 0x4ef0, UProperty.WORD_BREAK,
1897: UCharacter.WordBreak.OTHER },
1898: { 0x19d9, UProperty.WORD_BREAK,
1899: UCharacter.WordBreak.NUMERIC },
1900: { 0x2044, UProperty.WORD_BREAK,
1901: UCharacter.WordBreak.MIDNUM },
1902:
1903: { 0xfffd, UProperty.SENTENCE_BREAK,
1904: UCharacter.SentenceBreak.OTHER },
1905: { 0x1ffc, UProperty.SENTENCE_BREAK,
1906: UCharacter.SentenceBreak.UPPER },
1907: { 0xff63, UProperty.SENTENCE_BREAK,
1908: UCharacter.SentenceBreak.CLOSE },
1909: { 0x2028, UProperty.SENTENCE_BREAK,
1910: UCharacter.SentenceBreak.SEP },
1911:
1912: /* undefined UProperty values */
1913: { 0x61, 0x4a7, 0 }, { 0x234bc, 0x15ed, 0 } };
1914:
1915: if (UCharacter.getIntPropertyMinValue(UProperty.DASH) != 0
1916: || UCharacter
1917: .getIntPropertyMinValue(UProperty.BIDI_CLASS) != 0
1918: || UCharacter.getIntPropertyMinValue(UProperty.BLOCK) != 0 /* j2478 */
1919: || UCharacter.getIntPropertyMinValue(UProperty.SCRIPT) != 0 /* JB#2410 */
1920: || UCharacter.getIntPropertyMinValue(0x2345) != 0) {
1921: errln("error: UCharacter.getIntPropertyMinValue() wrong");
1922: }
1923:
1924: if (UCharacter.getIntPropertyMaxValue(UProperty.DASH) != 1) {
1925: errln("error: UCharacter.getIntPropertyMaxValue(UProperty.DASH) wrong\n");
1926: }
1927: if (UCharacter.getIntPropertyMaxValue(UProperty.ID_CONTINUE) != 1) {
1928: errln("error: UCharacter.getIntPropertyMaxValue(UProperty.ID_CONTINUE) wrong\n");
1929: }
1930: if (UCharacter
1931: .getIntPropertyMaxValue(UProperty.BINARY_LIMIT - 1) != 1) {
1932: errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BINARY_LIMIT-1) wrong\n");
1933: }
1934:
1935: if (UCharacter.getIntPropertyMaxValue(UProperty.BIDI_CLASS) != UCharacterDirection.CHAR_DIRECTION_COUNT - 1) {
1936: errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BIDI_CLASS) wrong\n");
1937: }
1938: if (UCharacter.getIntPropertyMaxValue(UProperty.BLOCK) != UCharacter.UnicodeBlock.COUNT - 1) {
1939: errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BLOCK) wrong\n");
1940: }
1941: if (UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK) != UCharacter.LineBreak.COUNT - 1) {
1942: errln("error: UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK) wrong\n");
1943: }
1944: if (UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT) != UScript.CODE_LIMIT - 1) {
1945: errln("error: UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT) wrong\n");
1946: }
1947: if (UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE) != UCharacter.NumericType.COUNT - 1) {
1948: errln("error: UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE) wrong\n");
1949: }
1950: if (UCharacter
1951: .getIntPropertyMaxValue(UProperty.GENERAL_CATEGORY) != UCharacterCategory.CHAR_CATEGORY_COUNT - 1) {
1952: errln("error: UCharacter.getIntPropertyMaxValue(UProperty.GENERAL_CATEGORY) wrong\n");
1953: }
1954: if (UCharacter
1955: .getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE) != UCharacter.HangulSyllableType.COUNT - 1) {
1956: errln("error: UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE) wrong\n");
1957: }
1958: if (UCharacter
1959: .getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK) != UCharacter.GraphemeClusterBreak.COUNT - 1) {
1960: errln("error: UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK) wrong\n");
1961: }
1962: if (UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK) != UCharacter.SentenceBreak.COUNT - 1) {
1963: errln("error: UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK) wrong\n");
1964: }
1965: if (UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK) != UCharacter.WordBreak.COUNT - 1) {
1966: errln("error: UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK) wrong\n");
1967: }
1968: /*JB#2410*/
1969: if (UCharacter.getIntPropertyMaxValue(0x2345) != -1) {
1970: errln("error: UCharacter.getIntPropertyMaxValue(0x2345) wrong\n");
1971: }
1972: if (UCharacter
1973: .getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE) != (UCharacter.DecompositionType.COUNT - 1)) {
1974: errln("error: UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE) wrong\n");
1975: }
1976: if (UCharacter.getIntPropertyMaxValue(UProperty.JOINING_GROUP) != (UCharacter.JoiningGroup.COUNT - 1)) {
1977: errln("error: UCharacter.getIntPropertyMaxValue(UProperty.JOINING_GROUP) wrong\n");
1978: }
1979: if (UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE) != (UCharacter.JoiningType.COUNT - 1)) {
1980: errln("error: UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE) wrong\n");
1981: }
1982: if (UCharacter
1983: .getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH) != (UCharacter.EastAsianWidth.COUNT - 1)) {
1984: errln("error: UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH) wrong\n");
1985: }
1986:
1987: VersionInfo version = UCharacter.getUnicodeVersion();
1988:
1989: // test hasBinaryProperty()
1990: for (int i = 0; i < props.length; ++i) {
1991: if (props[i][0] < 0) {
1992: if (version.compareTo(VersionInfo.getInstance(
1993: props[i][1] >> 8, (props[i][1] >> 4) & 0xF,
1994: props[i][1] & 0xF, 0)) < 0) {
1995: break;
1996: }
1997: continue;
1998: }
1999: boolean expect = true;
2000: if (props[i][2] == 0) {
2001: expect = false;
2002: }
2003: if (props[i][1] < UProperty.INT_START) {
2004: if (UCharacter.hasBinaryProperty(props[i][0],
2005: props[i][1]) != expect) {
2006: errln("error: UCharacter.hasBinaryProperty(\\u"
2007: + Integer.toHexString(props[i][0]) + ", "
2008: + Integer.toHexString(props[i][1])
2009: + ") has an error expected " + props[i][2]);
2010: }
2011: }
2012:
2013: int retVal = UCharacter.getIntPropertyValue(props[i][0],
2014: props[i][1]);
2015: if (retVal != props[i][2]) {
2016: errln("error: UCharacter.getIntPropertyValue(\\u"
2017: + Utility.hex(props[i][0], 4) + ", "
2018: + props[i][1] + " is wrong, should be "
2019: + props[i][2] + " not " + retVal);
2020: }
2021:
2022: // test separate functions, too
2023: switch (props[i][1]) {
2024: case UProperty.ALPHABETIC:
2025: if (UCharacter.isUAlphabetic(props[i][0]) != expect) {
2026: errln("error: UCharacter.isUAlphabetic(\\u"
2027: + Integer.toHexString(props[i][0])
2028: + ") is wrong expected " + props[i][2]);
2029: }
2030: break;
2031: case UProperty.LOWERCASE:
2032: if (UCharacter.isULowercase(props[i][0]) != expect) {
2033: errln("error: UCharacter.isULowercase(\\u"
2034: + Integer.toHexString(props[i][0])
2035: + ") is wrong expected " + props[i][2]);
2036: }
2037: break;
2038: case UProperty.UPPERCASE:
2039: if (UCharacter.isUUppercase(props[i][0]) != expect) {
2040: errln("error: UCharacter.isUUppercase(\\u"
2041: + Integer.toHexString(props[i][0])
2042: + ") is wrong expected " + props[i][2]);
2043: }
2044: break;
2045: case UProperty.WHITE_SPACE:
2046: if (UCharacter.isUWhiteSpace(props[i][0]) != expect) {
2047: errln("error: UCharacter.isUWhiteSpace(\\u"
2048: + Integer.toHexString(props[i][0])
2049: + ") is wrong expected " + props[i][2]);
2050: }
2051: break;
2052: default:
2053: break;
2054: }
2055: }
2056: }
2057:
2058: public void TestNumericProperties() {
2059: // see UnicodeData.txt, DerivedNumericValues.txt
2060: int testvar[][] = { { 0x0F33, UCharacter.NumericType.NUMERIC },
2061: { 0x0C66, UCharacter.NumericType.DECIMAL },
2062: { 0x2159, UCharacter.NumericType.NUMERIC },
2063: { 0x00BD, UCharacter.NumericType.NUMERIC },
2064: { 0x0031, UCharacter.NumericType.DECIMAL },
2065: { 0x10320, UCharacter.NumericType.NUMERIC },
2066: { 0x0F2B, UCharacter.NumericType.NUMERIC },
2067: { 0x00B2, UCharacter.NumericType.DIGIT }, /* Unicode 4.0 change */
2068: { 0x1813, UCharacter.NumericType.DECIMAL },
2069: { 0x2173, UCharacter.NumericType.NUMERIC },
2070: { 0x278E, UCharacter.NumericType.DIGIT },
2071: { 0x1D7F2, UCharacter.NumericType.DECIMAL },
2072: { 0x247A, UCharacter.NumericType.DIGIT },
2073: { 0x1372, UCharacter.NumericType.NUMERIC },
2074: { 0x216B, UCharacter.NumericType.NUMERIC },
2075: { 0x16EE, UCharacter.NumericType.NUMERIC },
2076: { 0x249A, UCharacter.NumericType.NUMERIC },
2077: { 0x303A, UCharacter.NumericType.NUMERIC },
2078: { 0x32B2, UCharacter.NumericType.NUMERIC },
2079: { 0x1375, UCharacter.NumericType.NUMERIC },
2080: { 0x10323, UCharacter.NumericType.NUMERIC },
2081: { 0x0BF1, UCharacter.NumericType.NUMERIC },
2082: { 0x217E, UCharacter.NumericType.NUMERIC },
2083: { 0x2180, UCharacter.NumericType.NUMERIC },
2084: { 0x2181, UCharacter.NumericType.NUMERIC },
2085: { 0x137C, UCharacter.NumericType.NUMERIC },
2086: { 0x61, UCharacter.NumericType.NONE },
2087: { 0x3000, UCharacter.NumericType.NONE },
2088: { 0xfffe, UCharacter.NumericType.NONE },
2089: { 0x10301, UCharacter.NumericType.NONE },
2090: { 0xe0033, UCharacter.NumericType.NONE },
2091: { 0x10ffff, UCharacter.NumericType.NONE },
2092: /* Unicode 4.0 Changes */
2093: { 0x96f6, UCharacter.NumericType.NUMERIC },
2094: { 0x4e00, UCharacter.NumericType.NUMERIC },
2095: { 0x58f1, UCharacter.NumericType.NUMERIC },
2096: { 0x5f10, UCharacter.NumericType.NUMERIC },
2097: { 0x5f0e, UCharacter.NumericType.NUMERIC },
2098: { 0x8086, UCharacter.NumericType.NUMERIC },
2099: { 0x7396, UCharacter.NumericType.NUMERIC },
2100: { 0x5345, UCharacter.NumericType.NUMERIC },
2101: { 0x964c, UCharacter.NumericType.NUMERIC },
2102: { 0x4edf, UCharacter.NumericType.NUMERIC },
2103: { 0x4e07, UCharacter.NumericType.NUMERIC },
2104: { 0x4ebf, UCharacter.NumericType.NUMERIC },
2105: { 0x5146, UCharacter.NumericType.NUMERIC } };
2106:
2107: double expected[] = { -1 / (double) 2, 0, 1 / (double) 6,
2108: 1 / (double) 2, 1, 1, 3 / (double) 2, 2, 3, 4, 5, 6, 7,
2109: 10, 12, 17, 19, 30, 37, 40, 50, 100, 500, 1000, 5000,
2110: 10000, UCharacter.NO_NUMERIC_VALUE,
2111: UCharacter.NO_NUMERIC_VALUE,
2112: UCharacter.NO_NUMERIC_VALUE,
2113: UCharacter.NO_NUMERIC_VALUE,
2114: UCharacter.NO_NUMERIC_VALUE,
2115: UCharacter.NO_NUMERIC_VALUE, 0, 1, 1, 2, 3, 4, 9, 30,
2116: 100, 1000, 10000, 100000000, 1000000000000.00 };
2117:
2118: for (int i = 0; i < testvar.length; ++i) {
2119: int c = testvar[i][0];
2120: int type = UCharacter.getIntPropertyValue(c,
2121: UProperty.NUMERIC_TYPE);
2122: double nv = UCharacter.getUnicodeNumericValue(c);
2123:
2124: if (type != testvar[i][1]) {
2125: errln("UProperty.NUMERIC_TYPE(\\u" + Utility.hex(c, 4)
2126: + ") = " + type + " should be " + testvar[i][1]);
2127: }
2128: if (0.000001 <= Math.abs(nv - expected[i])) {
2129: errln("UCharacter.getNumericValue(\\u"
2130: + Utility.hex(c, 4) + ") = " + nv
2131: + " should be " + expected[i]);
2132: }
2133: }
2134: }
2135:
2136: /**
2137: * Test the property values API. See JB#2410.
2138: */
2139: public void TestPropertyValues() {
2140: int i, p, min, max;
2141:
2142: /* Min should be 0 for everything. */
2143: /* Until JB#2478 is fixed, the one exception is UProperty.BLOCK. */
2144: for (p = UProperty.INT_START; p < UProperty.INT_LIMIT; ++p) {
2145: min = UCharacter.getIntPropertyMinValue(p);
2146: if (min != 0) {
2147: if (p == UProperty.BLOCK) {
2148: /* This is okay...for now. See JB#2487.
2149: TODO Update this for JB#2487. */
2150: } else {
2151: String name;
2152: name = UCharacter.getPropertyName(p,
2153: UProperty.NameChoice.LONG);
2154: errln("FAIL: UCharacter.getIntPropertyMinValue("
2155: + name + ") = " + min + ", exp. 0");
2156: }
2157: }
2158: }
2159:
2160: if (UCharacter
2161: .getIntPropertyMinValue(UProperty.GENERAL_CATEGORY_MASK) != 0
2162: || UCharacter
2163: .getIntPropertyMaxValue(UProperty.GENERAL_CATEGORY_MASK) != -1) {
2164: errln("error: UCharacter.getIntPropertyMin/MaxValue("
2165: + "UProperty.GENERAL_CATEGORY_MASK) is wrong");
2166: }
2167:
2168: /* Max should be -1 for invalid properties. */
2169: max = UCharacter.getIntPropertyMaxValue(-1);
2170: if (max != -1) {
2171: errln("FAIL: UCharacter.getIntPropertyMaxValue(-1) = "
2172: + max + ", exp. -1");
2173: }
2174:
2175: /* Script should return 0 for an invalid code point. If the API
2176: throws an exception then that's fine too. */
2177: for (i = 0; i < 2; ++i) {
2178: try {
2179: int script = 0;
2180: String desc = null;
2181: switch (i) {
2182: case 0:
2183: script = UScript.getScript(-1);
2184: desc = "UScript.getScript(-1)";
2185: break;
2186: case 1:
2187: script = UCharacter.getIntPropertyValue(-1,
2188: UProperty.SCRIPT);
2189: desc = "UCharacter.getIntPropertyValue(-1, UProperty.SCRIPT)";
2190: break;
2191: }
2192: if (script != 0) {
2193: errln("FAIL: " + desc + " = " + script + ", exp. 0");
2194: }
2195: } catch (IllegalArgumentException e) {
2196: }
2197: }
2198: }
2199:
2200: public void TestIsBMP() {
2201: int ch[] = { 0x0, -1, 0xffff, 0x10ffff, 0xff, 0x1ffff };
2202: boolean flag[] = { true, false, true, false, true, false };
2203: for (int i = 0; i < ch.length; i++) {
2204: if (UCharacter.isBMP(ch[i]) != flag[i]) {
2205: errln("Fail: \\u" + Utility.hex(ch[i], 8)
2206: + " failed at UCharacter.isBMP");
2207: }
2208: }
2209: }
2210:
2211: /* add characters from a serialized set to a normal one */
2212: private static void _setAddSerialized(UnicodeSet set,
2213: USerializedSet sset) {
2214: // int start, end;
2215: int i, count;
2216:
2217: count = sset.countRanges();
2218: int[] range = new int[2];
2219: for (i = 0; i < count; ++i) {
2220: sset.getRange(i, range);
2221: set.add(range[0], range[1]);
2222: }
2223: }
2224:
2225: private boolean showADiffB(UnicodeSet a, UnicodeSet b,
2226: String a_name, String b_name, boolean expect,
2227: boolean diffIsError) {
2228: int i, start, end, length;
2229: boolean equal;
2230: equal = true;
2231: i = 0;
2232: for (;;) {
2233: start = a.getRangeStart(i);
2234: length = (i < a.getRangeCount()) ? 0 : a.getRangeCount();
2235: end = a.getRangeEnd(i);
2236:
2237: if (length != 0) {
2238: return equal; /* done with code points, got a string or -1 */
2239: }
2240:
2241: if (expect != b.contains(start, end)) {
2242: equal = false;
2243: while (start <= end) {
2244: if (expect != b.contains(start)) {
2245: if (diffIsError) {
2246: if (expect) {
2247: errln("error: " + a_name + " contains "
2248: + hex(start) + " but " + b_name
2249: + " does not");
2250: } else {
2251: errln("error: " + a_name + " and "
2252: + b_name + " both contain "
2253: + hex(start)
2254: + " but should not intersect");
2255: }
2256: } else {
2257: if (expect) {
2258: logln("info: " + a_name + " contains "
2259: + hex(start) + "but " + b_name
2260: + " does not");
2261: } else {
2262: logln("info: " + a_name + " and "
2263: + b_name + " both contain "
2264: + hex(start)
2265: + " but should not intersect");
2266: }
2267: }
2268: }
2269: ++start;
2270: }
2271: }
2272:
2273: ++i;
2274: }
2275: }
2276:
2277: private boolean showAMinusB(UnicodeSet a, UnicodeSet b,
2278: String a_name, String b_name, boolean diffIsError) {
2279:
2280: return showADiffB(a, b, a_name, b_name, true, diffIsError);
2281: }
2282:
2283: private boolean showAIntersectB(UnicodeSet a, UnicodeSet b,
2284: String a_name, String b_name, boolean diffIsError) {
2285: return showADiffB(a, b, a_name, b_name, false, diffIsError);
2286: }
2287:
2288: private boolean compareUSets(UnicodeSet a, UnicodeSet b,
2289: String a_name, String b_name, boolean diffIsError) {
2290: return showAMinusB(a, b, a_name, b_name, diffIsError)
2291: && showAMinusB(b, a, b_name, a_name, diffIsError);
2292: }
2293:
2294: /* various tests for consistency of UCD data and API behavior */
2295: public void TestConsistency() {
2296: char[] buffer16 = new char[300];
2297: char[] buffer = new char[300];
2298: UnicodeSet set1, set2, set3, set4;
2299:
2300: USerializedSet sset;
2301: int start, end;
2302: int i, length;
2303:
2304: String hyphenPattern = "[:Hyphen:]";
2305: String dashPattern = "[:Dash:]";
2306: String lowerPattern = "[:Lowercase:]";
2307: String formatPattern = "[:Cf:]";
2308: String alphaPattern = "[:Alphabetic:]";
2309:
2310: /*
2311: * It used to be that UCD.html and its precursors said
2312: * "Those dashes used to mark connections between pieces of words,
2313: * plus the Katakana middle dot."
2314: *
2315: * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
2316: * but not from Hyphen.
2317: * UTC 94 (2003mar) decided to leave it that way and to changed UCD.html.
2318: * Therefore, do not show errors when testing the Hyphen property.
2319: */
2320: logln("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
2321: + "known to the UTC and not considered errors.\n");
2322:
2323: set1 = new UnicodeSet(hyphenPattern);
2324: set2 = new UnicodeSet(dashPattern);
2325:
2326: /* remove the Katakana middle dot(s) from set1 */
2327: set1.remove(0x30fb);
2328: set2.remove(0xff65); /* halfwidth variant */
2329: showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", false);
2330:
2331: /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
2332: set3 = new UnicodeSet(formatPattern);
2333: set4 = new UnicodeSet(alphaPattern);
2334:
2335: showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", false);
2336: showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", true);
2337: showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", true);
2338: /*
2339: * Check that each lowercase character has "small" in its name
2340: * and not "capital".
2341: * There are some such characters, some of which seem odd.
2342: * Use the verbose flag to see these notices.
2343: */
2344: set1 = new UnicodeSet(lowerPattern);
2345:
2346: for (i = 0;; ++i) {
2347: // try{
2348: // length=set1.getItem(set1, i, &start, &end, NULL, 0, &errorCode);
2349: // }catch(Exception e){
2350: // break;
2351: // }
2352: start = set1.getRangeStart(i);
2353: end = set1.getRangeEnd(i);
2354: length = i < set1.getRangeCount() ? set1.getRangeCount()
2355: : 0;
2356: if (length != 0) {
2357: break; /* done with code points, got a string or -1 */
2358: }
2359:
2360: while (start <= end) {
2361: String name = UCharacter.getName(start);
2362:
2363: if ((name.indexOf("SMALL") < 0 || name
2364: .indexOf("CAPITAL") < -1)
2365: && name.indexOf("SMALL CAPITAL") == -1) {
2366: logln("info: [:Lowercase:] contains U+"
2367: + hex(start)
2368: + " whose name does not suggest lowercase: "
2369: + name);
2370: }
2371: ++start;
2372: }
2373: }
2374:
2375: /*
2376: * Test for an example that unorm_getCanonStartSet() delivers
2377: * all characters that compose from the input one,
2378: * even in multiple steps.
2379: * For example, the set for "I" (0049) should contain both
2380: * I-diaeresis (00CF) and I-diaeresis-acute (1E2E).
2381: * In general, the set for the middle such character should be a subset
2382: * of the set for the first.
2383: */
2384: set1 = new UnicodeSet();
2385: set2 = new UnicodeSet();
2386: sset = new USerializedSet();
2387: NormalizerImpl.getCanonStartSet(0x49, sset);
2388: _setAddSerialized(set1, sset);
2389:
2390: /* enumerate all characters that are plausible to be latin letters */
2391: for (start = 0xa0; start < 0x2000; ++start) {
2392: if (NormalizerImpl.getDecomposition(start, false, buffer16,
2393: 0, buffer16.length) > 1
2394: && buffer[0] == 0x0049) {
2395: set2.add(start);
2396: }
2397: }
2398:
2399: compareUSets(set1, set2, "[canon start set of 0049]",
2400: "[all c with canon decomp with 0049]", false);
2401:
2402: }
2403:
2404: public void TestCoverage() {
2405: //cover forDigit
2406: char ch1 = UCharacter.forDigit(7, 11);
2407: assertEquals("UCharacter.forDigit ", "7", String.valueOf(ch1));
2408: char ch2 = UCharacter.forDigit(17, 20);
2409: assertEquals("UCharacter.forDigit ", "h", String.valueOf(ch2));
2410:
2411: //Jitterbug 4451, for coverage
2412: for (int i = 0x0041; i < 0x005B; i++) {
2413: if (!UCharacter.isJavaLetter(i))
2414: errln("FAIL \\u" + hex(i) + " expected to be a letter");
2415: if (!UCharacter.isJavaIdentifierStart(i))
2416: errln("FAIL \\u"
2417: + hex(i)
2418: + " expected to be a Java identifier start character");
2419: if (!UCharacter.isJavaLetterOrDigit(i))
2420: errln("FAIL \\u" + hex(i)
2421: + " expected not to be a Java letter");
2422: if (!UCharacter.isJavaIdentifierPart(i))
2423: errln("FAIL \\u"
2424: + hex(i)
2425: + " expected to be a Java identifier part character");
2426: }
2427: char[] spaces = { '\t', '\n', '\f', '\r', ' ' };
2428: for (int i = 0; i < spaces.length; i++) {
2429: if (!UCharacter.isSpace(spaces[i]))
2430: errln("FAIL \\u" + hex(spaces[i])
2431: + " expected to be a Java space");
2432: }
2433: if (!UCharacter.getStringPropertyValue(UProperty.AGE, '\u3400',
2434: 0).equals("3.0.0.0")) {
2435: errln("FAIL \\u3400 expected to be 3.0.0.0");
2436: }
2437: }
2438:
2439: public void TestCasePropsDummy() {
2440: // code coverage for UCaseProps.getDummy()
2441: if (UCaseProps.getDummy().tolower(0x41) != 0x41) {
2442: errln("UCaseProps.getDummy().tolower(0x41)!=0x41");
2443: }
2444: }
2445:
2446: public void TestBiDiPropsDummy() {
2447: // code coverage for UBiDiProps.getDummy()
2448: if (UBiDiProps.getDummy().getClass(0x20) != 0) {
2449: errln("UBiDiProps.getDummy().getClass(0x20)!=0");
2450: }
2451: }
2452: }
|