001: /**
002: *******************************************************************************
003: * Copyright (C) 1996-2006, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */package com.ibm.icu.dev.test.lang;
007:
008: import com.ibm.icu.dev.test.TestFmwk;
009: import com.ibm.icu.dev.test.TestUtil;
010: import com.ibm.icu.lang.UCharacter;
011: import com.ibm.icu.text.UTF16;
012: import com.ibm.icu.text.BreakIterator;
013: import com.ibm.icu.util.ULocale;
014: import com.ibm.icu.impl.Utility;
015: import java.util.Locale;
016: import java.io.BufferedReader;
017: import java.util.Vector;
018:
019: /**
020: * <p>Testing character casing</p>
021: * <p>Mostly following the test cases in strcase.cpp for ICU</p>
022: * @author Syn Wee Quek
023: * @since march 14 2002
024: */
025: public final class UCharacterCaseTest extends TestFmwk {
026: // constructor -----------------------------------------------------------
027:
028: /**
029: * Constructor
030: */
031: public UCharacterCaseTest() {
032: }
033:
034: // public methods --------------------------------------------------------
035:
036: public static void main(String[] arg) {
037: try {
038: UCharacterCaseTest test = new UCharacterCaseTest();
039: test.run(arg);
040: } catch (Exception e) {
041: e.printStackTrace();
042: }
043: }
044:
045: /**
046: * Testing the uppercase and lowercase function of UCharacter
047: */
048: public void TestCharacter() {
049: for (int i = 0; i < CHARACTER_LOWER_.length; i++) {
050: if (UCharacter.isLetter(CHARACTER_LOWER_[i])
051: && !UCharacter.isLowerCase(CHARACTER_LOWER_[i])) {
052: errln("FAIL isLowerCase test for \\u"
053: + hex(CHARACTER_LOWER_[i]));
054: break;
055: }
056: if (UCharacter.isLetter(CHARACTER_UPPER_[i])
057: && !(UCharacter.isUpperCase(CHARACTER_UPPER_[i]) || UCharacter
058: .isTitleCase(CHARACTER_UPPER_[i]))) {
059: errln("FAIL isUpperCase test for \\u"
060: + hex(CHARACTER_UPPER_[i]));
061: break;
062: }
063: if (CHARACTER_LOWER_[i] != UCharacter
064: .toLowerCase(CHARACTER_UPPER_[i])
065: || (CHARACTER_UPPER_[i] != UCharacter
066: .toUpperCase(CHARACTER_LOWER_[i]) && CHARACTER_UPPER_[i] != UCharacter
067: .toTitleCase(CHARACTER_LOWER_[i]))) {
068: errln("FAIL case conversion test for \\u"
069: + hex(CHARACTER_UPPER_[i]) + " to \\u"
070: + hex(CHARACTER_LOWER_[i]));
071: break;
072: }
073: if (CHARACTER_LOWER_[i] != UCharacter
074: .toLowerCase(CHARACTER_LOWER_[i])) {
075: errln("FAIL lower case conversion test for \\u"
076: + hex(CHARACTER_LOWER_[i]));
077: break;
078: }
079: if (CHARACTER_UPPER_[i] != UCharacter
080: .toUpperCase(CHARACTER_UPPER_[i])
081: && CHARACTER_UPPER_[i] != UCharacter
082: .toTitleCase(CHARACTER_UPPER_[i])) {
083: errln("FAIL upper case conversion test for \\u"
084: + hex(CHARACTER_UPPER_[i]));
085: break;
086: }
087: logln("Ok \\u" + hex(CHARACTER_UPPER_[i]) + " and \\u"
088: + hex(CHARACTER_LOWER_[i]));
089: }
090: }
091:
092: public void TestFolding() {
093: // test simple case folding
094: for (int i = 0; i < FOLDING_SIMPLE_.length; i += 3) {
095: if (UCharacter.foldCase(FOLDING_SIMPLE_[i], true) != FOLDING_SIMPLE_[i + 1]) {
096: errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i])
097: + ", true) should be \\u"
098: + hex(FOLDING_SIMPLE_[i + 1]));
099: }
100: if (UCharacter.foldCase(FOLDING_SIMPLE_[i],
101: UCharacter.FOLD_CASE_DEFAULT) != FOLDING_SIMPLE_[i + 1]) {
102: errln("FAIL: foldCase(\\u"
103: + hex(FOLDING_SIMPLE_[i])
104: + ", UCharacter.FOLD_CASE_DEFAULT) should be \\u"
105: + hex(FOLDING_SIMPLE_[i + 1]));
106: }
107: if (UCharacter.foldCase(FOLDING_SIMPLE_[i], false) != FOLDING_SIMPLE_[i + 2]) {
108: errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i])
109: + ", false) should be \\u"
110: + hex(FOLDING_SIMPLE_[i + 2]));
111: }
112: if (UCharacter.foldCase(FOLDING_SIMPLE_[i],
113: UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) != FOLDING_SIMPLE_[i + 2]) {
114: errln("FAIL: foldCase(\\u"
115: + hex(FOLDING_SIMPLE_[i])
116: + ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) should be \\u"
117: + hex(FOLDING_SIMPLE_[i + 2]));
118: }
119: }
120:
121: // Test full string case folding with default option and separate
122: // buffers
123: if (!FOLDING_DEFAULT_[0].equals(UCharacter.foldCase(
124: FOLDING_MIXED_[0], true))) {
125: errln("FAIL: foldCase("
126: + prettify(FOLDING_MIXED_[0])
127: + ", true)="
128: + prettify(UCharacter.foldCase(FOLDING_MIXED_[0],
129: true)) + " should be "
130: + prettify(FOLDING_DEFAULT_[0]));
131: }
132:
133: if (!FOLDING_DEFAULT_[0].equals(UCharacter.foldCase(
134: FOLDING_MIXED_[0], UCharacter.FOLD_CASE_DEFAULT))) {
135: errln("FAIL: foldCase("
136: + prettify(FOLDING_MIXED_[0])
137: + ", UCharacter.FOLD_CASE_DEFAULT)="
138: + prettify(UCharacter.foldCase(FOLDING_MIXED_[0],
139: UCharacter.FOLD_CASE_DEFAULT))
140: + " should be " + prettify(FOLDING_DEFAULT_[0]));
141: }
142:
143: if (!FOLDING_EXCLUDE_SPECIAL_I_[0].equals(UCharacter.foldCase(
144: FOLDING_MIXED_[0], false))) {
145: errln("FAIL: foldCase("
146: + prettify(FOLDING_MIXED_[0])
147: + ", false)="
148: + prettify(UCharacter.foldCase(FOLDING_MIXED_[0],
149: false)) + " should be "
150: + prettify(FOLDING_EXCLUDE_SPECIAL_I_[0]));
151: }
152:
153: if (!FOLDING_EXCLUDE_SPECIAL_I_[0].equals(UCharacter.foldCase(
154: FOLDING_MIXED_[0],
155: UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))) {
156: errln("FAIL: foldCase("
157: + prettify(FOLDING_MIXED_[0])
158: + ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)="
159: + prettify(UCharacter.foldCase(FOLDING_MIXED_[0],
160: UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))
161: + " should be "
162: + prettify(FOLDING_EXCLUDE_SPECIAL_I_[0]));
163: }
164:
165: if (!FOLDING_DEFAULT_[1].equals(UCharacter.foldCase(
166: FOLDING_MIXED_[1], true))) {
167: errln("FAIL: foldCase("
168: + prettify(FOLDING_MIXED_[1])
169: + ", true)="
170: + prettify(UCharacter.foldCase(FOLDING_MIXED_[1],
171: true)) + " should be "
172: + prettify(FOLDING_DEFAULT_[1]));
173: }
174:
175: if (!FOLDING_DEFAULT_[1].equals(UCharacter.foldCase(
176: FOLDING_MIXED_[1], UCharacter.FOLD_CASE_DEFAULT))) {
177: errln("FAIL: foldCase("
178: + prettify(FOLDING_MIXED_[1])
179: + ", UCharacter.FOLD_CASE_DEFAULT)="
180: + prettify(UCharacter.foldCase(FOLDING_MIXED_[1],
181: UCharacter.FOLD_CASE_DEFAULT))
182: + " should be " + prettify(FOLDING_DEFAULT_[1]));
183: }
184:
185: // alternate handling for dotted I/dotless i (U+0130, U+0131)
186: if (!FOLDING_EXCLUDE_SPECIAL_I_[1].equals(UCharacter.foldCase(
187: FOLDING_MIXED_[1], false))) {
188: errln("FAIL: foldCase("
189: + prettify(FOLDING_MIXED_[1])
190: + ", false)="
191: + prettify(UCharacter.foldCase(FOLDING_MIXED_[1],
192: false)) + " should be "
193: + prettify(FOLDING_EXCLUDE_SPECIAL_I_[1]));
194: }
195:
196: if (!FOLDING_EXCLUDE_SPECIAL_I_[1].equals(UCharacter.foldCase(
197: FOLDING_MIXED_[1],
198: UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))) {
199: errln("FAIL: foldCase("
200: + prettify(FOLDING_MIXED_[1])
201: + ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)="
202: + prettify(UCharacter.foldCase(FOLDING_MIXED_[1],
203: UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))
204: + " should be "
205: + prettify(FOLDING_EXCLUDE_SPECIAL_I_[1]));
206: }
207: }
208:
209: /**
210: * Testing the strings case mapping methods
211: */
212: public void TestUpper() {
213: // uppercase with root locale and in the same buffer
214: if (!UPPER_ROOT_.equals(UCharacter.toUpperCase(UPPER_BEFORE_))) {
215: errln("Fail " + UPPER_BEFORE_
216: + " after uppercase should be " + UPPER_ROOT_
217: + " instead got "
218: + UCharacter.toUpperCase(UPPER_BEFORE_));
219: }
220:
221: // uppercase with turkish locale and separate buffers
222: if (!UPPER_TURKISH_.equals(UCharacter.toUpperCase(
223: TURKISH_LOCALE_, UPPER_BEFORE_))) {
224: errln("Fail "
225: + UPPER_BEFORE_
226: + " after turkish-sensitive uppercase should be "
227: + UPPER_TURKISH_
228: + " instead of "
229: + UCharacter.toUpperCase(TURKISH_LOCALE_,
230: UPPER_BEFORE_));
231: }
232:
233: // uppercase a short string with root locale
234: if (!UPPER_MINI_UPPER_.equals(UCharacter
235: .toUpperCase(UPPER_MINI_))) {
236: errln("error in toUpper(root locale)=\"" + UPPER_MINI_
237: + "\" expected \"" + UPPER_MINI_UPPER_ + "\"");
238: }
239:
240: if (!SHARED_UPPERCASE_TOPKAP_.equals(UCharacter
241: .toUpperCase(SHARED_LOWERCASE_TOPKAP_))) {
242: errln("toUpper failed: expected \""
243: + SHARED_UPPERCASE_TOPKAP_ + "\", got \""
244: + UCharacter.toUpperCase(SHARED_LOWERCASE_TOPKAP_)
245: + "\".");
246: }
247:
248: if (!SHARED_UPPERCASE_TURKISH_.equals(UCharacter.toUpperCase(
249: TURKISH_LOCALE_, SHARED_LOWERCASE_TOPKAP_))) {
250: errln("toUpper failed: expected \""
251: + SHARED_UPPERCASE_TURKISH_
252: + "\", got \""
253: + UCharacter.toUpperCase(TURKISH_LOCALE_,
254: SHARED_LOWERCASE_TOPKAP_) + "\".");
255: }
256:
257: if (!SHARED_UPPERCASE_GERMAN_.equals(UCharacter.toUpperCase(
258: GERMAN_LOCALE_, SHARED_LOWERCASE_GERMAN_))) {
259: errln("toUpper failed: expected \""
260: + SHARED_UPPERCASE_GERMAN_
261: + "\", got \""
262: + UCharacter.toUpperCase(GERMAN_LOCALE_,
263: SHARED_LOWERCASE_GERMAN_) + "\".");
264: }
265:
266: if (!SHARED_UPPERCASE_GREEK_.equals(UCharacter
267: .toUpperCase(SHARED_LOWERCASE_GREEK_))) {
268: errln("toLower failed: expected \""
269: + SHARED_UPPERCASE_GREEK_ + "\", got \""
270: + UCharacter.toUpperCase(SHARED_LOWERCASE_GREEK_)
271: + "\".");
272: }
273: }
274:
275: public void TestLower() {
276: if (!LOWER_ROOT_.equals(UCharacter.toLowerCase(LOWER_BEFORE_))) {
277: errln("Fail " + LOWER_BEFORE_
278: + " after lowercase should be " + LOWER_ROOT_
279: + " instead of "
280: + UCharacter.toLowerCase(LOWER_BEFORE_));
281: }
282:
283: // lowercase with turkish locale
284: if (!LOWER_TURKISH_.equals(UCharacter.toLowerCase(
285: TURKISH_LOCALE_, LOWER_BEFORE_))) {
286: errln("Fail "
287: + LOWER_BEFORE_
288: + " after turkish-sensitive lowercase should be "
289: + LOWER_TURKISH_
290: + " instead of "
291: + UCharacter.toLowerCase(TURKISH_LOCALE_,
292: LOWER_BEFORE_));
293: }
294: if (!SHARED_LOWERCASE_ISTANBUL_.equals(UCharacter
295: .toLowerCase(SHARED_UPPERCASE_ISTANBUL_))) {
296: errln("1. toLower failed: expected \""
297: + SHARED_LOWERCASE_ISTANBUL_
298: + "\", got \""
299: + UCharacter
300: .toLowerCase(SHARED_UPPERCASE_ISTANBUL_)
301: + "\".");
302: }
303:
304: if (!SHARED_LOWERCASE_TURKISH_.equals(UCharacter.toLowerCase(
305: TURKISH_LOCALE_, SHARED_UPPERCASE_ISTANBUL_))) {
306: errln("2. toLower failed: expected \""
307: + SHARED_LOWERCASE_TURKISH_
308: + "\", got \""
309: + UCharacter.toLowerCase(TURKISH_LOCALE_,
310: SHARED_UPPERCASE_ISTANBUL_) + "\".");
311: }
312: if (!SHARED_LOWERCASE_GREEK_.equals(UCharacter.toLowerCase(
313: GREEK_LOCALE_, SHARED_UPPERCASE_GREEK_))) {
314: errln("toLower failed: expected \""
315: + SHARED_LOWERCASE_GREEK_
316: + "\", got \""
317: + UCharacter.toLowerCase(GREEK_LOCALE_,
318: SHARED_UPPERCASE_GREEK_) + "\".");
319: }
320: }
321:
322: public void TestTitle() {
323: try {
324: for (int i = 0; i < TITLE_DATA_.length;) {
325: String test = TITLE_DATA_[i++];
326: String expected = TITLE_DATA_[i++];
327: ULocale locale = new ULocale(TITLE_DATA_[i++]);
328: int breakType = Integer.parseInt(TITLE_DATA_[i++]);
329: BreakIterator iter = breakType >= 0 ? BreakIterator
330: .getBreakInstance(locale, breakType) : null;
331: String result = UCharacter.toTitleCase(locale, test,
332: iter);
333: if (!expected.equals(result)) {
334: errln("titlecasing for " + prettify(test)
335: + " should be " + prettify(expected)
336: + " but got " + prettify(result));
337: }
338: }
339: } catch (Exception ex) {
340: warnln("Could not find data for BreakIterators");
341: }
342: }
343:
344: public void TestSpecial() {
345: for (int i = 0; i < SPECIAL_LOCALES_.length; i++) {
346: int j = i * 3;
347: Locale locale = SPECIAL_LOCALES_[i];
348: String str = SPECIAL_DATA_[j];
349: if (locale != null) {
350: if (!SPECIAL_DATA_[j + 1].equals(UCharacter
351: .toLowerCase(locale, str))) {
352: errln("error lowercasing special characters "
353: + hex(str) + " expected "
354: + hex(SPECIAL_DATA_[j + 1])
355: + " for locale " + locale.toString()
356: + " but got "
357: + hex(UCharacter.toLowerCase(locale, str)));
358: }
359: if (!SPECIAL_DATA_[j + 2].equals(UCharacter
360: .toUpperCase(locale, str))) {
361: errln("error uppercasing special characters "
362: + hex(str) + " expected "
363: + SPECIAL_DATA_[j + 2] + " for locale "
364: + locale.toString() + " but got "
365: + hex(UCharacter.toUpperCase(locale, str)));
366: }
367: } else {
368: if (!SPECIAL_DATA_[j + 1].equals(UCharacter
369: .toLowerCase(str))) {
370: errln("error lowercasing special characters "
371: + hex(str) + " expected "
372: + SPECIAL_DATA_[j + 1] + " but got "
373: + hex(UCharacter.toLowerCase(locale, str)));
374: }
375: if (!SPECIAL_DATA_[j + 2].equals(UCharacter
376: .toUpperCase(locale, str))) {
377: errln("error uppercasing special characters "
378: + hex(str) + " expected "
379: + SPECIAL_DATA_[j + 2] + " but got "
380: + hex(UCharacter.toUpperCase(locale, str)));
381: }
382: }
383: }
384:
385: // turkish & azerbaijani dotless i & dotted I
386: // remove dot above if there was a capital I before and there are no
387: // more accents above
388: if (!SPECIAL_DOTTED_LOWER_TURKISH_.equals(UCharacter
389: .toLowerCase(TURKISH_LOCALE_, SPECIAL_DOTTED_))) {
390: errln("error in dots.toLower(tr)=\""
391: + SPECIAL_DOTTED_
392: + "\" expected \""
393: + SPECIAL_DOTTED_LOWER_TURKISH_
394: + "\" but got "
395: + UCharacter.toLowerCase(TURKISH_LOCALE_,
396: SPECIAL_DOTTED_));
397: }
398: if (!SPECIAL_DOTTED_LOWER_GERMAN_.equals(UCharacter
399: .toLowerCase(GERMAN_LOCALE_, SPECIAL_DOTTED_))) {
400: errln("error in dots.toLower(de)=\""
401: + SPECIAL_DOTTED_
402: + "\" expected \""
403: + SPECIAL_DOTTED_LOWER_GERMAN_
404: + "\" but got "
405: + UCharacter.toLowerCase(GERMAN_LOCALE_,
406: SPECIAL_DOTTED_));
407: }
408:
409: // lithuanian dot above in uppercasing
410: if (!SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_.equals(UCharacter
411: .toUpperCase(LITHUANIAN_LOCALE_, SPECIAL_DOT_ABOVE_))) {
412: errln("error in dots.toUpper(lt)=\""
413: + SPECIAL_DOT_ABOVE_
414: + "\" expected \""
415: + SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_
416: + "\" but got "
417: + UCharacter.toUpperCase(LITHUANIAN_LOCALE_,
418: SPECIAL_DOT_ABOVE_));
419: }
420: if (!SPECIAL_DOT_ABOVE_UPPER_GERMAN_.equals(UCharacter
421: .toUpperCase(GERMAN_LOCALE_, SPECIAL_DOT_ABOVE_))) {
422: errln("error in dots.toUpper(de)=\""
423: + SPECIAL_DOT_ABOVE_
424: + "\" expected \""
425: + SPECIAL_DOT_ABOVE_UPPER_GERMAN_
426: + "\" but got "
427: + UCharacter.toUpperCase(GERMAN_LOCALE_,
428: SPECIAL_DOT_ABOVE_));
429: }
430:
431: // lithuanian adds dot above to i in lowercasing if there are more
432: // above accents
433: if (!SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_.equals(UCharacter
434: .toLowerCase(LITHUANIAN_LOCALE_,
435: SPECIAL_DOT_ABOVE_UPPER_))) {
436: errln("error in dots.toLower(lt)=\""
437: + SPECIAL_DOT_ABOVE_UPPER_
438: + "\" expected \""
439: + SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_
440: + "\" but got "
441: + UCharacter.toLowerCase(LITHUANIAN_LOCALE_,
442: SPECIAL_DOT_ABOVE_UPPER_));
443: }
444: if (!SPECIAL_DOT_ABOVE_LOWER_GERMAN_.equals(UCharacter
445: .toLowerCase(GERMAN_LOCALE_, SPECIAL_DOT_ABOVE_UPPER_))) {
446: errln("error in dots.toLower(de)=\""
447: + SPECIAL_DOT_ABOVE_UPPER_
448: + "\" expected \""
449: + SPECIAL_DOT_ABOVE_LOWER_GERMAN_
450: + "\" but got "
451: + UCharacter.toLowerCase(GERMAN_LOCALE_,
452: SPECIAL_DOT_ABOVE_UPPER_));
453: }
454: }
455:
456: /**
457: * Tests for case mapping in the file SpecialCasing.txt
458: * This method reads in SpecialCasing.txt file for testing purposes.
459: * A default path is provided relative to the src path, however the user
460: * could set a system property to change the directory path.<br>
461: * e.g. java -DUnicodeData="data_dir_path" com.ibm.dev.test.lang.UCharacterTest
462: */
463: public void TestSpecialCasingTxt() {
464: try {
465: // reading in the SpecialCasing file
466: BufferedReader input = TestUtil
467: .getDataReader("unicode/SpecialCasing.txt");
468: while (true) {
469: String s = input.readLine();
470: if (s == null) {
471: break;
472: }
473: if (s.length() == 0 || s.charAt(0) == '#') {
474: continue;
475: }
476:
477: String chstr[] = getUnicodeStrings(s);
478: StringBuffer strbuffer = new StringBuffer(chstr[0]);
479: StringBuffer lowerbuffer = new StringBuffer(chstr[1]);
480: StringBuffer upperbuffer = new StringBuffer(chstr[3]);
481: Locale locale = null;
482: for (int i = 4; i < chstr.length; i++) {
483: String condition = chstr[i];
484: if (Character.isLowerCase(chstr[i].charAt(0))) {
485: // specified locale
486: locale = new Locale(chstr[i], "");
487: } else if (condition
488: .compareToIgnoreCase("Not_Before_Dot") == 0) {
489: // turns I into dotless i
490: } else if (condition
491: .compareToIgnoreCase("More_Above") == 0) {
492: strbuffer.append((char) 0x300);
493: lowerbuffer.append((char) 0x300);
494: upperbuffer.append((char) 0x300);
495: } else if (condition
496: .compareToIgnoreCase("After_Soft_Dotted") == 0) {
497: strbuffer.insert(0, 'i');
498: lowerbuffer.insert(0, 'i');
499: String lang = "";
500: if (locale != null) {
501: lang = locale.getLanguage();
502: }
503: if (lang.equals("tr") || lang.equals("az")) {
504: // this is to be removed when 4.0 data comes out
505: // and upperbuffer.insert uncommented
506: // see jitterbug 2344
507: chstr[i] = "After_I";
508: strbuffer.deleteCharAt(0);
509: lowerbuffer.deleteCharAt(0);
510: i--;
511: continue;
512: // upperbuffer.insert(0, '\u0130');
513: } else {
514: upperbuffer.insert(0, 'I');
515: }
516: } else if (condition
517: .compareToIgnoreCase("Final_Sigma") == 0) {
518: strbuffer.insert(0, 'c');
519: lowerbuffer.insert(0, 'c');
520: upperbuffer.insert(0, 'C');
521: } else if (condition.compareToIgnoreCase("After_I") == 0) {
522: strbuffer.insert(0, 'I');
523: lowerbuffer.insert(0, 'i');
524: String lang = "";
525: if (locale != null) {
526: lang = locale.getLanguage();
527: }
528: if (lang.equals("tr") || lang.equals("az")) {
529: upperbuffer.insert(0, 'I');
530: }
531: }
532: }
533: chstr[0] = strbuffer.toString();
534: chstr[1] = lowerbuffer.toString();
535: chstr[3] = upperbuffer.toString();
536: if (locale == null) {
537: if (!UCharacter.toLowerCase(chstr[0]).equals(
538: chstr[1])) {
539: errln(s);
540: errln("Fail: toLowerCase for character "
541: + Utility.escape(chstr[0])
542: + ", expected "
543: + Utility.escape(chstr[1])
544: + " but resulted in "
545: + Utility.escape(UCharacter
546: .toLowerCase(chstr[0])));
547: }
548: if (!UCharacter.toUpperCase(chstr[0]).equals(
549: chstr[3])) {
550: errln(s);
551: errln("Fail: toUpperCase for character "
552: + Utility.escape(chstr[0])
553: + ", expected "
554: + Utility.escape(chstr[3])
555: + " but resulted in "
556: + Utility.escape(UCharacter
557: .toUpperCase(chstr[0])));
558: }
559: } else {
560: if (!UCharacter.toLowerCase(locale, chstr[0])
561: .equals(chstr[1])) {
562: errln(s);
563: errln("Fail: toLowerCase for character "
564: + Utility.escape(chstr[0])
565: + ", expected "
566: + Utility.escape(chstr[1])
567: + " but resulted in "
568: + Utility.escape(UCharacter
569: .toLowerCase(locale, chstr[0])));
570: }
571: if (!UCharacter.toUpperCase(locale, chstr[0])
572: .equals(chstr[3])) {
573: errln(s);
574: errln("Fail: toUpperCase for character "
575: + Utility.escape(chstr[0])
576: + ", expected "
577: + Utility.escape(chstr[3])
578: + " but resulted in "
579: + Utility.escape(UCharacter
580: .toUpperCase(locale, chstr[0])));
581: }
582: }
583: }
584: input.close();
585: } catch (Exception e) {
586: e.printStackTrace();
587: }
588: }
589:
590: public void TestUpperLower() {
591: int upper[] = { 0x0041, 0x0042, 0x00b2, 0x01c4, 0x01c6, 0x01c9,
592: 0x01c8, 0x01c9, 0x000c };
593: int lower[] = { 0x0061, 0x0062, 0x00b2, 0x01c6, 0x01c6, 0x01c9,
594: 0x01c9, 0x01c9, 0x000c };
595: String upperTest = "abcdefg123hij.?:klmno";
596: String lowerTest = "ABCDEFG123HIJ.?:KLMNO";
597:
598: // Checks LetterLike Symbols which were previously a source of
599: // confusion [Bertrand A. D. 02/04/98]
600: for (int i = 0x2100; i < 0x2138; i++) {
601: /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
602: if (i != 0x2126 && i != 0x212a && i != 0x212b
603: && i != 0x2132) {
604: if (i != UCharacter.toLowerCase(i)) { // itself
605: errln("Failed case conversion with itself: \\u"
606: + Utility.hex(i, 4));
607: }
608: if (i != UCharacter.toUpperCase(i)) {
609: errln("Failed case conversion with itself: \\u"
610: + Utility.hex(i, 4));
611: }
612: }
613: }
614: for (int i = 0; i < upper.length; i++) {
615: if (UCharacter.toLowerCase(upper[i]) != lower[i]) {
616: errln("FAILED UCharacter.tolower() for \\u"
617: + Utility.hex(upper[i], 4)
618: + " Expected \\u"
619: + Utility.hex(lower[i], 4)
620: + " Got \\u"
621: + Utility.hex(UCharacter.toLowerCase(upper[i]),
622: 4));
623: }
624: }
625: logln("testing upper lower");
626: for (int i = 0; i < upperTest.length(); i++) {
627: logln("testing to upper to lower");
628: if (UCharacter.isLetter(upperTest.charAt(i))
629: && !UCharacter.isLowerCase(upperTest.charAt(i))) {
630: errln("Failed isLowerCase test at \\u"
631: + Utility.hex(upperTest.charAt(i), 4));
632: } else if (UCharacter.isLetter(lowerTest.charAt(i))
633: && !UCharacter.isUpperCase(lowerTest.charAt(i))) {
634: errln("Failed isUpperCase test at \\u"
635: + Utility.hex(lowerTest.charAt(i), 4));
636: } else if (upperTest.charAt(i) != UCharacter
637: .toLowerCase(lowerTest.charAt(i))) {
638: errln("Failed case conversion from \\u"
639: + Utility.hex(lowerTest.charAt(i), 4)
640: + " To \\u"
641: + Utility.hex(upperTest.charAt(i), 4));
642: } else if (lowerTest.charAt(i) != UCharacter
643: .toUpperCase(upperTest.charAt(i))) {
644: errln("Failed case conversion : \\u"
645: + Utility.hex(upperTest.charAt(i), 4)
646: + " To \\u"
647: + Utility.hex(lowerTest.charAt(i), 4));
648: } else if (upperTest.charAt(i) != UCharacter
649: .toLowerCase(upperTest.charAt(i))) {
650: errln("Failed case conversion with itself: \\u"
651: + Utility.hex(upperTest.charAt(i)));
652: } else if (lowerTest.charAt(i) != UCharacter
653: .toUpperCase(lowerTest.charAt(i))) {
654: errln("Failed case conversion with itself: \\u"
655: + Utility.hex(lowerTest.charAt(i)));
656: }
657: }
658: logln("done testing upper Lower");
659: }
660:
661: // private data members - test data --------------------------------------
662:
663: private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR");
664: private static final Locale GERMAN_LOCALE_ = new Locale("de", "DE");
665: private static final Locale GREEK_LOCALE_ = new Locale("el", "GR");
666: private static final Locale ENGLISH_LOCALE_ = new Locale("en", "US");
667: private static final Locale LITHUANIAN_LOCALE_ = new Locale("lt",
668: "LT");
669:
670: private static final int CHARACTER_UPPER_[] = { 0x41, 0x0042,
671: 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x00b1, 0x00b2,
672: 0xb3, 0x0048, 0x0049, 0x004a, 0x002e, 0x003f, 0x003a,
673: 0x004b, 0x004c, 0x4d, 0x004e, 0x004f, 0x01c4, 0x01c8,
674: 0x000c, 0x0000 };
675: private static final int CHARACTER_LOWER_[] = { 0x61, 0x0062,
676: 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x00b1, 0x00b2,
677: 0xb3, 0x0068, 0x0069, 0x006a, 0x002e, 0x003f, 0x003a,
678: 0x006b, 0x006c, 0x6d, 0x006e, 0x006f, 0x01c6, 0x01c9,
679: 0x000c, 0x0000 };
680:
681: /*
682: * CaseFolding.txt says about i and its cousins:
683: * 0049; C; 0069; # LATIN CAPITAL LETTER I
684: * 0049; T; 0131; # LATIN CAPITAL LETTER I
685: *
686: * 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
687: * 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
688: * That's all.
689: * See CaseFolding.txt and the Unicode Standard for how to apply the case foldings.
690: */
691: private static final int FOLDING_SIMPLE_[] = {
692: // input, default, exclude special i
693: 0x61, 0x61, 0x61, 0x49, 0x69, 0x131, 0x130, 0x130, 0x69,
694: 0x131, 0x131, 0x131, 0xdf, 0xdf, 0xdf, 0xfb03, 0xfb03,
695: 0xfb03, 0x1040e, 0x10436, 0x10436, 0x5ffff, 0x5ffff,
696: 0x5ffff };
697: private static final String FOLDING_MIXED_[] = {
698: "\u0061\u0042\u0130\u0049\u0131\u03d0\u00df\ufb03\ud93f\udfff",
699: "A\u00df\u00b5\ufb03\uD801\uDC0C\u0130\u0131" };
700: private static final String FOLDING_DEFAULT_[] = {
701: "\u0061\u0062\u0069\u0307\u0069\u0131\u03b2\u0073\u0073\u0066\u0066\u0069\ud93f\udfff",
702: "ass\u03bcffi\uD801\uDC34i\u0307\u0131" };
703: private static final String FOLDING_EXCLUDE_SPECIAL_I_[] = {
704: "\u0061\u0062\u0069\u0131\u0131\u03b2\u0073\u0073\u0066\u0066\u0069\ud93f\udfff",
705: "ass\u03bcffi\uD801\uDC34i\u0131" };
706: /**
707: * "IESUS CHRISTOS"
708: */
709: private static final String SHARED_UPPERCASE_GREEK_ = "\u0399\u0395\u03a3\u03a5\u03a3\u0020\u03a7\u03a1\u0399\u03a3\u03a4\u039f\u03a3";
710: /**
711: * "iesus christos"
712: */
713: private static final String SHARED_LOWERCASE_GREEK_ = "\u03b9\u03b5\u03c3\u03c5\u03c2\u0020\u03c7\u03c1\u03b9\u03c3\u03c4\u03bf\u03c2";
714: private static final String SHARED_LOWERCASE_TURKISH_ = "\u0069\u0073\u0074\u0061\u006e\u0062\u0075\u006c\u002c\u0020\u006e\u006f\u0074\u0020\u0063\u006f\u006e\u0073\u0074\u0061\u006e\u0074\u0131\u006e\u006f\u0070\u006c\u0065\u0021";
715: private static final String SHARED_UPPERCASE_TURKISH_ = "\u0054\u004f\u0050\u004b\u0041\u0050\u0049\u0020\u0050\u0041\u004c\u0041\u0043\u0045\u002c\u0020\u0130\u0053\u0054\u0041\u004e\u0042\u0055\u004c";
716: private static final String SHARED_UPPERCASE_ISTANBUL_ = "\u0130STANBUL, NOT CONSTANTINOPLE!";
717: private static final String SHARED_LOWERCASE_ISTANBUL_ = "i\u0307stanbul, not constantinople!";
718: private static final String SHARED_LOWERCASE_TOPKAP_ = "topkap\u0131 palace, istanbul";
719: private static final String SHARED_UPPERCASE_TOPKAP_ = "TOPKAPI PALACE, ISTANBUL";
720: private static final String SHARED_LOWERCASE_GERMAN_ = "S\u00FC\u00DFmayrstra\u00DFe";
721: private static final String SHARED_UPPERCASE_GERMAN_ = "S\u00DCSSMAYRSTRASSE";
722:
723: private static final String UPPER_BEFORE_ = "\u0061\u0042\u0069\u03c2\u00df\u03c3\u002f\ufb03\ufb03\ufb03\ud93f\udfff";
724: private static final String UPPER_ROOT_ = "\u0041\u0042\u0049\u03a3\u0053\u0053\u03a3\u002f\u0046\u0046\u0049\u0046\u0046\u0049\u0046\u0046\u0049\ud93f\udfff";
725: private static final String UPPER_TURKISH_ = "\u0041\u0042\u0130\u03a3\u0053\u0053\u03a3\u002f\u0046\u0046\u0049\u0046\u0046\u0049\u0046\u0046\u0049\ud93f\udfff";
726: private static final String UPPER_MINI_ = "\u00df\u0061";
727: private static final String UPPER_MINI_UPPER_ = "\u0053\u0053\u0041";
728:
729: private static final String LOWER_BEFORE_ = "\u0061\u0042\u0049\u03a3\u00df\u03a3\u002f\ud93f\udfff";
730: private static final String LOWER_ROOT_ = "\u0061\u0062\u0069\u03c3\u00df\u03c2\u002f\ud93f\udfff";
731: private static final String LOWER_TURKISH_ = "\u0061\u0062\u0131\u03c3\u00df\u03c2\u002f\ud93f\udfff";
732:
733: /**
734: * each item is an array with input string, result string, locale ID, break iterator
735: * the break iterator is specified as an int, same as in BreakIterator.KIND_*:
736: * 0=KIND_CHARACTER 1=KIND_WORD 2=KIND_LINE 3=KIND_SENTENCE 4=KIND_TITLE -1=default
737: * see ICU4C source/test/testdata/casing.txt
738: */
739: private static final String TITLE_DATA_[] = {
740: "\u0061\u0042\u0020\u0069\u03c2\u0020\u00df\u03c3\u002f\ufb03\ud93f\udfff",
741: "\u0041\u0042\u0020\u0049\u03a3\u0020\u0053\u0073\u03a3\u002f\u0046\u0066\u0069\ud93f\udfff",
742: "",
743: "0",
744:
745: "\u0061\u0042\u0020\u0069\u03c2\u0020\u00df\u03c3\u002f\ufb03\ud93f\udfff",
746: "\u0041\u0062\u0020\u0049\u03c2\u0020\u0053\u0073\u03c3\u002f\u0046\u0066\u0069\ud93f\udfff",
747: "",
748: "1",
749:
750: "\u02bbaMeLikA huI P\u016b \u02bb\u02bb\u02bbiA",
751: "\u02bbAmelika Hui P\u016b \u02bb\u02bb\u02bbIa", // titlecase first _cased_ letter, j4933
752: "", "-1",
753:
754: " tHe QUIcK bRoWn", " The Quick Brown", "", "4",
755:
756: "\u01c4\u01c5\u01c6\u01c7\u01c8\u01c9\u01ca\u01cb\u01cc",
757: "\u01c5\u01c5\u01c5\u01c8\u01c8\u01c8\u01cb\u01cb\u01cb", // UBRK_CHARACTER
758: "", "0",
759:
760: "\u01c9ubav ljubav",
761: "\u01c8ubav Ljubav", // Lj vs. L+j
762: "", "-1",
763:
764: "'oH dOn'T tItLeCaSe AfTeR lEtTeR+'",
765: "'Oh Don't Titlecase After Letter+'", "", "-1" };
766:
767: /**
768: * <p>basic string, lower string, upper string, title string</p>
769: */
770: private static final String SPECIAL_DATA_[] = {
771: UTF16.valueOf(0x1043C) + UTF16.valueOf(0x10414),
772: UTF16.valueOf(0x1043C) + UTF16.valueOf(0x1043C),
773: UTF16.valueOf(0x10414) + UTF16.valueOf(0x10414),
774: "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 "
775: + UTF16.valueOf(0x1043C) + UTF16.valueOf(0x10414),
776: "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 "
777: + UTF16.valueOf(0x1043C) + UTF16.valueOf(0x1043C),
778: "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 "
779: + UTF16.valueOf(0x10414) + UTF16.valueOf(0x10414),
780: // sigmas followed/preceded by cased letters
781: "i\u0307\u03a3\u0308j \u0307\u03a3\u0308j i\u00ad\u03a3\u0308 \u0307\u03a3\u0308 ",
782: "i\u0307\u03c3\u0308j \u0307\u03c3\u0308j i\u00ad\u03c2\u0308 \u0307\u03c3\u0308 ",
783: "I\u0307\u03a3\u0308J \u0307\u03a3\u0308J I\u00ad\u03a3\u0308 \u0307\u03a3\u0308 " };
784: private static final Locale SPECIAL_LOCALES_[] = { null,
785: ENGLISH_LOCALE_, null, };
786:
787: private static final String SPECIAL_DOTTED_ = "I \u0130 I\u0307 I\u0327\u0307 I\u0301\u0307 I\u0327\u0307\u0301";
788: private static final String SPECIAL_DOTTED_LOWER_TURKISH_ = "\u0131 i i i\u0327 \u0131\u0301\u0307 i\u0327\u0301";
789: private static final String SPECIAL_DOTTED_LOWER_GERMAN_ = "i i\u0307 i\u0307 i\u0327\u0307 i\u0301\u0307 i\u0327\u0307\u0301";
790: private static final String SPECIAL_DOT_ABOVE_ = "a\u0307 \u0307 i\u0307 j\u0327\u0307 j\u0301\u0307";
791: private static final String SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_ = "A\u0307 \u0307 I J\u0327 J\u0301\u0307";
792: private static final String SPECIAL_DOT_ABOVE_UPPER_GERMAN_ = "A\u0307 \u0307 I\u0307 J\u0327\u0307 J\u0301\u0307";
793: private static final String SPECIAL_DOT_ABOVE_UPPER_ = "I I\u0301 J J\u0301 \u012e \u012e\u0301 \u00cc\u00cd\u0128";
794: private static final String SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_ = "i i\u0307\u0301 j j\u0307\u0301 \u012f \u012f\u0307\u0301 i\u0307\u0300i\u0307\u0301i\u0307\u0303";
795: private static final String SPECIAL_DOT_ABOVE_LOWER_GERMAN_ = "i i\u0301 j j\u0301 \u012f \u012f\u0301 \u00ec\u00ed\u0129";
796:
797: // private methods -------------------------------------------------------
798:
799: /**
800: * Converting the hex numbers represented betwee n ';' to Unicode strings
801: * @param str string to break up into Unicode strings
802: * @return array of Unicode strings ending with a null
803: */
804: private String[] getUnicodeStrings(String str) {
805: Vector v = new Vector(10);
806: int start = 0;
807: for (int casecount = 4; casecount > 0; casecount--) {
808: int end = str.indexOf("; ", start);
809: String casestr = str.substring(start, end);
810: StringBuffer buffer = new StringBuffer();
811: int spaceoffset = 0;
812: while (spaceoffset < casestr.length()) {
813: int nextspace = casestr.indexOf(' ', spaceoffset);
814: if (nextspace == -1) {
815: nextspace = casestr.length();
816: }
817: buffer.append((char) Integer.parseInt(casestr
818: .substring(spaceoffset, nextspace), 16));
819: spaceoffset = nextspace + 1;
820: }
821: start = end + 2;
822: v.add(buffer.toString());
823: }
824: int comments = str.indexOf(" #", start);
825: if (comments != -1 && comments != start) {
826: if (str.charAt(comments - 1) == ';') {
827: comments--;
828: }
829: String conditions = str.substring(start, comments);
830: int offset = 0;
831: while (offset < conditions.length()) {
832: int spaceoffset = conditions.indexOf(' ', offset);
833: if (spaceoffset == -1) {
834: spaceoffset = conditions.length();
835: }
836: v.add(conditions.substring(offset, spaceoffset));
837: offset = spaceoffset + 1;
838: }
839: }
840: int size = v.size();
841: String result[] = new String[size];
842: for (int i = 0; i < size; i++) {
843: result[i] = (String) v.elementAt(i);
844: }
845: return result;
846: }
847: }
|