001: //##header
002: /*
003: *******************************************************************************
004: * Copyright (C) 2002-2006, International Business Machines Corporation and *
005: * others. All Rights Reserved. *
006: *******************************************************************************
007: */
008: //#ifndef FOUNDATION
009: package com.ibm.icu.dev.test.util;
010:
011: import java.util.ArrayList;
012: import java.util.Arrays;
013: import java.util.Collection;
014: import java.util.HashMap;
015: import java.util.Iterator;
016: import java.util.List;
017: import java.util.Locale;
018: import java.util.Map;
019:
020: import com.ibm.icu.lang.UCharacter;
021: import com.ibm.icu.lang.UProperty;
022: import com.ibm.icu.text.Normalizer;
023: import com.ibm.icu.text.UTF16;
024: import com.ibm.icu.util.VersionInfo;
025:
026: /**
027: * Provides a general interface for Unicode Properties, and
028: * extracting sets based on those values.
029: * @author Davis
030: */
031:
032: public class ICUPropertyFactory extends UnicodeProperty.Factory {
033:
034: static class ICUProperty extends UnicodeProperty {
035: protected int propEnum = Integer.MIN_VALUE;
036:
037: protected ICUProperty(String propName, int propEnum) {
038: setName(propName);
039: this .propEnum = propEnum;
040: setType(internalGetPropertyType(propEnum));
041: }
042:
043: boolean shownException = false;
044:
045: public String _getValue(int codePoint) {
046: switch (propEnum) {
047: case UProperty.AGE:
048: String temp = UCharacter.getAge(codePoint).toString();
049: if (temp.equals("0.0.0.0"))
050: return "unassigned";
051: if (temp.endsWith(".0.0"))
052: return temp.substring(0, temp.length() - 4);
053: return temp;
054: case UProperty.BIDI_MIRRORING_GLYPH:
055: return UTF16.valueOf(UCharacter.getMirror(codePoint));
056: case UProperty.CASE_FOLDING:
057: return UCharacter.foldCase(UTF16.valueOf(codePoint),
058: true);
059: case UProperty.ISO_COMMENT:
060: return UCharacter.getISOComment(codePoint);
061: case UProperty.LOWERCASE_MAPPING:
062: return UCharacter.toLowerCase(Locale.ENGLISH, UTF16
063: .valueOf(codePoint));
064: case UProperty.NAME:
065: return UCharacter.getName(codePoint);
066: case UProperty.SIMPLE_CASE_FOLDING:
067: return UTF16.valueOf(UCharacter.foldCase(codePoint,
068: true));
069: case UProperty.SIMPLE_LOWERCASE_MAPPING:
070: return UTF16.valueOf(UCharacter.toLowerCase(codePoint));
071: case UProperty.SIMPLE_TITLECASE_MAPPING:
072: return UTF16.valueOf(UCharacter.toTitleCase(codePoint));
073: case UProperty.SIMPLE_UPPERCASE_MAPPING:
074: return UTF16.valueOf(UCharacter.toUpperCase(codePoint));
075: case UProperty.TITLECASE_MAPPING:
076: return UCharacter.toTitleCase(Locale.ENGLISH, UTF16
077: .valueOf(codePoint), null);
078: case UProperty.UNICODE_1_NAME:
079: return UCharacter.getName1_0(codePoint);
080: case UProperty.UPPERCASE_MAPPING:
081: return UCharacter.toUpperCase(Locale.ENGLISH, UTF16
082: .valueOf(codePoint));
083: case NFC:
084: return Normalizer.normalize(codePoint, Normalizer.NFC);
085: case NFD:
086: return Normalizer.normalize(codePoint, Normalizer.NFD);
087: case NFKC:
088: return Normalizer.normalize(codePoint, Normalizer.NFKC);
089: case NFKD:
090: return Normalizer.normalize(codePoint, Normalizer.NFKD);
091: case isNFC:
092: return String.valueOf(Normalizer.normalize(codePoint,
093: Normalizer.NFC)
094: .equals(UTF16.valueOf(codePoint)));
095: case isNFD:
096: return String.valueOf(Normalizer.normalize(codePoint,
097: Normalizer.NFD)
098: .equals(UTF16.valueOf(codePoint)));
099: case isNFKC:
100: return String.valueOf(Normalizer.normalize(codePoint,
101: Normalizer.NFKC).equals(
102: UTF16.valueOf(codePoint)));
103: case isNFKD:
104: return String.valueOf(Normalizer.normalize(codePoint,
105: Normalizer.NFKD).equals(
106: UTF16.valueOf(codePoint)));
107: case isLowercase:
108: return String.valueOf(UCharacter.toLowerCase(
109: Locale.ENGLISH, UTF16.valueOf(codePoint))
110: .equals(UTF16.valueOf(codePoint)));
111: case isUppercase:
112: return String.valueOf(UCharacter.toUpperCase(
113: Locale.ENGLISH, UTF16.valueOf(codePoint))
114: .equals(UTF16.valueOf(codePoint)));
115: case isTitlecase:
116: return String.valueOf(UCharacter.toTitleCase(
117: Locale.ENGLISH, UTF16.valueOf(codePoint), null)
118: .equals(UTF16.valueOf(codePoint)));
119: case isCasefolded:
120: return String.valueOf(UCharacter.foldCase(
121: UTF16.valueOf(codePoint), true).equals(
122: UTF16.valueOf(codePoint)));
123: case isCased:
124: return String.valueOf(UCharacter.toLowerCase(
125: Locale.ENGLISH, UTF16.valueOf(codePoint))
126: .equals(UTF16.valueOf(codePoint)));
127: }
128: if (propEnum < UProperty.INT_LIMIT) {
129: int enumValue = -1;
130: String value = null;
131: try {
132: enumValue = UCharacter.getIntPropertyValue(
133: codePoint, propEnum);
134: if (enumValue >= 0)
135: value = fixedGetPropertyValueName(propEnum,
136: enumValue, UProperty.NameChoice.LONG);
137: } catch (IllegalArgumentException e) {
138: if (!shownException) {
139: System.out.println("Fail: " + getName() + ", "
140: + Integer.toHexString(codePoint));
141: shownException = true;
142: }
143: }
144: return value != null ? value : String
145: .valueOf(enumValue);
146: } else if (propEnum < UProperty.DOUBLE_LIMIT) {
147: double num = UCharacter
148: .getUnicodeNumericValue(codePoint);
149: if (num == UCharacter.NO_NUMERIC_VALUE)
150: return null;
151: return Double.toString(num);
152: // TODO: Fix HACK -- API deficient
153: }
154: return null;
155: }
156:
157: /**
158: * @param valueAlias null if unused.
159: * @param valueEnum -1 if unused
160: * @param nameChoice
161: * @return
162: */
163: private String getFixedValueAlias(String valueAlias,
164: int valueEnum, int nameChoice) {
165: if (propEnum >= UProperty.STRING_START) {
166: if (nameChoice != UProperty.NameChoice.LONG)
167: return null;
168: return "<string>";
169: } else if (propEnum >= UProperty.DOUBLE_START) {
170: if (nameChoice != UProperty.NameChoice.LONG)
171: return null;
172: return "<number>";
173: }
174: if (valueAlias != null && !valueAlias.equals("<integer>")) {
175: valueEnum = fixedGetPropertyValueEnum(propEnum,
176: valueAlias);
177: }
178: // because these are defined badly, there may be no normal (long) name.
179: // if there is
180: String result = fixedGetPropertyValueName(propEnum,
181: valueEnum, nameChoice);
182: if (result != null)
183: return result;
184: // HACK try other namechoice
185: if (nameChoice == UProperty.NameChoice.LONG) {
186: result = fixedGetPropertyValueName(propEnum, valueEnum,
187: UProperty.NameChoice.SHORT);
188: if (result != null)
189: return result;
190: if (propEnum == UProperty.CANONICAL_COMBINING_CLASS)
191: return null;
192: return "<integer>";
193: }
194: return null;
195: }
196:
197: private static int fixedGetPropertyValueEnum(int propEnum,
198: String valueAlias) {
199: try {
200: return UCharacter.getPropertyValueEnum(propEnum,
201: valueAlias);
202: } catch (Exception e) {
203: return Integer.parseInt(valueAlias);
204: }
205: }
206:
207: static Map fixSkeleton = new HashMap();
208:
209: private static String fixedGetPropertyValueName(int propEnum,
210: int valueEnum, int nameChoice) {
211:
212: try {
213: String value = UCharacter.getPropertyValueName(
214: propEnum, valueEnum, nameChoice);
215: String newValue = (String) fixSkeleton.get(value);
216: if (newValue == null) {
217: newValue = value;
218: if (propEnum == UProperty.JOINING_GROUP) {
219: newValue = newValue.toLowerCase(Locale.ENGLISH);
220: }
221: newValue = regularize(newValue, true);
222: fixSkeleton.put(value, newValue);
223: }
224: return newValue;
225: } catch (Exception e) {
226: return null;
227: }
228: }
229:
230: public List _getNameAliases(List result) {
231: if (result == null)
232: result = new ArrayList();
233: String alias = String_Extras.get(propEnum);
234: if (alias == null)
235: alias = Binary_Extras.get(propEnum);
236: if (alias != null) {
237: addUnique(alias, result);
238: } else {
239: addUnique(getFixedPropertyName(propEnum,
240: UProperty.NameChoice.SHORT), result);
241: addUnique(getFixedPropertyName(propEnum,
242: UProperty.NameChoice.LONG), result);
243: }
244: return result;
245: }
246:
247: public String getFixedPropertyName(int propName, int nameChoice) {
248: try {
249: return UCharacter.getPropertyName(propEnum, nameChoice);
250: } catch (IllegalArgumentException e) {
251: return null;
252: }
253: }
254:
255: private Map cccHack = new HashMap();
256: boolean needCccHack = true;
257:
258: public List _getAvailableValues(List result) {
259: if (result == null)
260: result = new ArrayList();
261: if (propEnum == UProperty.AGE) {
262: addAllUnique(new String[] { "unassigned", "1.1", "2.0",
263: "2.1", "3.0", "3.1", "3.2", "4.0" }, result);
264: return result;
265: }
266: if (propEnum < UProperty.INT_LIMIT) {
267: if (Binary_Extras.isInRange(propEnum)) {
268: propEnum = UProperty.BINARY_START; // HACK
269: }
270: int start = UCharacter.getIntPropertyMinValue(propEnum);
271: int end = UCharacter.getIntPropertyMaxValue(propEnum);
272: for (int i = start; i <= end; ++i) {
273: String alias = getFixedValueAlias(null, i,
274: UProperty.NameChoice.LONG);
275: String alias2 = getFixedValueAlias(null, i,
276: UProperty.NameChoice.SHORT);
277: if (alias == null) {
278: alias = alias2;
279: if (alias == null
280: && propEnum == UProperty.CANONICAL_COMBINING_CLASS) {
281: alias = String.valueOf(i);
282: }
283: }
284: if (needCccHack
285: && propEnum == UProperty.CANONICAL_COMBINING_CLASS) { // HACK
286: cccHack.put(alias, String.valueOf(i));
287: }
288: //System.out.println(propertyAlias + "\t" + i + ":\t" + alias);
289: addUnique(alias, result);
290: }
291: needCccHack = false;
292: } else {
293: String alias = getFixedValueAlias(null, -1,
294: UProperty.NameChoice.LONG);
295: addUnique(alias, result);
296: }
297: return result;
298: }
299:
300: public List _getValueAliases(String valueAlias, List result) {
301: if (result == null)
302: result = new ArrayList();
303: if (propEnum == UProperty.AGE) {
304: addUnique(valueAlias, result);
305: return result;
306: }
307: if (propEnum == UProperty.CANONICAL_COMBINING_CLASS) {
308: addUnique(cccHack.get(valueAlias), result); // add number
309: }
310: addUnique(getFixedValueAlias(valueAlias, -1,
311: UProperty.NameChoice.SHORT), result);
312: addUnique(getFixedValueAlias(valueAlias, -1,
313: UProperty.NameChoice.LONG), result);
314: return result;
315: }
316:
317: /* (non-Javadoc)
318: * @see com.ibm.icu.dev.test.util.UnicodePropertySource#getPropertyType()
319: */
320: private int internalGetPropertyType(int propEnum) {
321: switch (propEnum) {
322: case UProperty.AGE:
323: case UProperty.BLOCK:
324: case UProperty.SCRIPT:
325: return UnicodeProperty.CATALOG;
326: case UProperty.ISO_COMMENT:
327: case UProperty.NAME:
328: case UProperty.UNICODE_1_NAME:
329: return UnicodeProperty.MISC;
330: case UProperty.BIDI_MIRRORING_GLYPH:
331: case UProperty.CASE_FOLDING:
332: case UProperty.LOWERCASE_MAPPING:
333: case UProperty.SIMPLE_CASE_FOLDING:
334: case UProperty.SIMPLE_LOWERCASE_MAPPING:
335: case UProperty.SIMPLE_TITLECASE_MAPPING:
336: case UProperty.SIMPLE_UPPERCASE_MAPPING:
337: case UProperty.TITLECASE_MAPPING:
338: case UProperty.UPPERCASE_MAPPING:
339: return UnicodeProperty.EXTENDED_STRING;
340: }
341: if (propEnum < UProperty.BINARY_START)
342: return UnicodeProperty.UNKNOWN;
343: if (propEnum < UProperty.BINARY_LIMIT)
344: return UnicodeProperty.BINARY;
345: if (propEnum < UProperty.INT_START)
346: return UnicodeProperty.EXTENDED_BINARY;
347: if (propEnum < UProperty.INT_LIMIT)
348: return UnicodeProperty.ENUMERATED;
349: if (propEnum < UProperty.DOUBLE_START)
350: return UnicodeProperty.EXTENDED_ENUMERATED;
351: if (propEnum < UProperty.DOUBLE_LIMIT)
352: return UnicodeProperty.NUMERIC;
353: if (propEnum < UProperty.STRING_START)
354: return UnicodeProperty.EXTENDED_NUMERIC;
355: if (propEnum < UProperty.STRING_LIMIT)
356: return UnicodeProperty.STRING;
357: return UnicodeProperty.EXTENDED_STRING;
358: }
359:
360: /* (non-Javadoc)
361: * @see com.ibm.icu.dev.test.util.UnicodeProperty#getVersion()
362: */
363: public String _getVersion() {
364: return VersionInfo.ICU_VERSION.toString();
365: }
366: }
367:
368: /*{
369: matchIterator = new UnicodeSetIterator(
370: new UnicodeSet("[^[:Cn:]-[:Default_Ignorable_Code_Point:]]"));
371: }*/
372:
373: /*
374: * Other Missing Functions:
375: Expands_On_NFC
376: Expands_On_NFD
377: Expands_On_NFKC
378: Expands_On_NFKD
379: Composition_Exclusion
380: Decomposition_Mapping
381: FC_NFKC_Closure
382: ISO_Comment
383: NFC_Quick_Check
384: NFD_Quick_Check
385: NFKC_Quick_Check
386: NFKD_Quick_Check
387: Special_Case_Condition
388: Unicode_Radical_Stroke
389: */
390:
391: static final Names Binary_Extras = new Names(
392: UProperty.BINARY_LIMIT, new String[] { "isNFC", "isNFD",
393: "isNFKC", "isNFKD", "isLowercase", "isUppercase",
394: "isTitlecase", "isCasefolded", "isCased", });
395:
396: static final Names String_Extras = new Names(
397: UProperty.STRING_LIMIT, new String[] { "toNFC", "toNFD",
398: "toNFKC", "toNKFD", });
399:
400: static final int isNFC = UProperty.BINARY_LIMIT,
401: isNFD = UProperty.BINARY_LIMIT + 1,
402: isNFKC = UProperty.BINARY_LIMIT + 2,
403: isNFKD = UProperty.BINARY_LIMIT + 3,
404: isLowercase = UProperty.BINARY_LIMIT + 4,
405: isUppercase = UProperty.BINARY_LIMIT + 5,
406: isTitlecase = UProperty.BINARY_LIMIT + 6,
407: isCasefolded = UProperty.BINARY_LIMIT + 7,
408: isCased = UProperty.BINARY_LIMIT + 8,
409:
410: NFC = UProperty.STRING_LIMIT,
411: NFD = UProperty.STRING_LIMIT + 1,
412: NFKC = UProperty.STRING_LIMIT + 2,
413: NFKD = UProperty.STRING_LIMIT + 3;
414:
415: private ICUPropertyFactory() {
416: Collection c = getInternalAvailablePropertyAliases(new ArrayList());
417: Iterator it = c.iterator();
418: while (it.hasNext()) {
419: add(getInternalProperty((String) it.next()));
420: }
421: }
422:
423: private static ICUPropertyFactory singleton = null;
424:
425: public static synchronized ICUPropertyFactory make() {
426: if (singleton != null)
427: return singleton;
428: singleton = new ICUPropertyFactory();
429: return singleton;
430: }
431:
432: public List getInternalAvailablePropertyAliases(List result) {
433: int[][] ranges = {
434: { UProperty.BINARY_START, UProperty.BINARY_LIMIT },
435: { UProperty.INT_START, UProperty.INT_LIMIT },
436: { UProperty.DOUBLE_START, UProperty.DOUBLE_LIMIT },
437: { UProperty.STRING_START, UProperty.STRING_LIMIT }, };
438: for (int i = 0; i < ranges.length; ++i) {
439: for (int j = ranges[i][0]; j < ranges[i][1]; ++j) {
440: String alias = UCharacter.getPropertyName(j,
441: UProperty.NameChoice.LONG);
442: UnicodeProperty.addUnique(alias, result);
443: if (!result.contains(alias))
444: result.add(alias);
445: }
446: }
447: result.addAll(String_Extras.getNames());
448: result.addAll(Binary_Extras.getNames());
449: return result;
450: }
451:
452: public UnicodeProperty getInternalProperty(String propertyAlias) {
453: int propEnum;
454: main: {
455: int possibleItem = Binary_Extras.get(propertyAlias);
456: if (possibleItem >= 0) {
457: propEnum = possibleItem;
458: break main;
459: }
460: possibleItem = String_Extras.get(propertyAlias);
461: if (possibleItem >= 0) {
462: propEnum = possibleItem;
463: break main;
464: }
465: propEnum = UCharacter.getPropertyEnum(propertyAlias);
466: }
467: return new ICUProperty(propertyAlias, propEnum);
468: }
469:
470: /* (non-Javadoc)
471: * @see com.ibm.icu.dev.test.util.UnicodePropertySource#getProperty(java.lang.String)
472: */
473: // TODO file bug on getPropertyValueName for Canonical_Combining_Class
474: public static class Names {
475: private String[] names;
476: private int base;
477:
478: public Names(int base, String[] names) {
479: this .base = base;
480: this .names = names;
481: }
482:
483: public int get(String name) {
484: for (int i = 0; i < names.length; ++i) {
485: if (name.equalsIgnoreCase(names[i]))
486: return base + i;
487: }
488: return -1;
489: }
490:
491: public String get(int number) {
492: number -= base;
493: if (number < 0 || names.length <= number)
494: return null;
495: return names[number];
496: }
497:
498: public boolean isInRange(int number) {
499: number -= base;
500: return (0 <= number && number < names.length);
501: }
502:
503: public List getNames() {
504: return Arrays.asList(names);
505: }
506: }
507: }
508: //#endif
|