001: /*
002: *******************************************************************************
003: * Copyright (C) 2002-2006, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */
007: package com.ibm.icu.dev.test.util;
008:
009: import java.util.Set;
010: import java.util.Locale;
011: import java.util.Map;
012: import java.util.List;
013: import java.util.Arrays;
014:
015: import com.ibm.icu.lang.UProperty;
016: import com.ibm.icu.lang.UCharacter;
017: import com.ibm.icu.text.Normalizer;
018: import com.ibm.icu.text.UTF16;
019: import com.ibm.icu.text.UnicodeSet;
020: import com.ibm.icu.text.UnicodeSetIterator;
021:
022: /**
023: * Provides a general interface for Unicode Properties, and
024: * extracting sets based on those values.
025: * @author Davis
026: */
027: public abstract class UnicodePropertySource implements Cloneable {
028:
029: protected String propertyAlias;
030: protected int nameChoice = UProperty.NameChoice.LONG;
031: protected StringFilter filter = new StringFilter();
032: protected UnicodeSetIterator matchIterator = new UnicodeSetIterator(
033: new UnicodeSet(0, 0x10FFFF));
034:
035: abstract public String getPropertyValue(int codepoint);
036:
037: abstract public Set getAvailablePropertyAliases(Set result);
038:
039: abstract public Set getAvailablePropertyValueAliases(Set result);
040:
041: abstract public String getPropertyAlias(int nameChoice);
042:
043: abstract public String getPropertyValueAlias(String valueAlias,
044: int nameChoice);
045:
046: /**
047: * Subclasses should override
048: */
049: public Object clone() {
050: try {
051: UnicodePropertySource result = (UnicodePropertySource) super
052: .clone();
053: result.filter = (StringFilter) filter.clone();
054: return result;
055: } catch (CloneNotSupportedException e) {
056: throw new IllegalStateException("Should never happen.");
057: }
058: }
059:
060: public UnicodePropertySource setPropertyAlias(String propertyAlias) {
061: this .propertyAlias = propertyAlias;
062: return this ;
063: }
064:
065: public String getPropertyAlias() {
066: return propertyAlias;
067: }
068:
069: public static final boolean equals(int codepoint, String other) {
070: if (other.length() == 1) {
071: return codepoint == other.charAt(0);
072: }
073: return other.equals(UTF16.valueOf(codepoint));
074: }
075:
076: public UnicodeSet getPropertySet(boolean charEqualsValue,
077: UnicodeSet result) {
078: if (result == null)
079: result = new UnicodeSet();
080: matchIterator.reset();
081: while (matchIterator.next()) {
082: String value = filter
083: .remap(getPropertyValue(matchIterator.codepoint));
084: if (equals(matchIterator.codepoint, value) == charEqualsValue) {
085: result.add(matchIterator.codepoint);
086: }
087: }
088: return result;
089: }
090:
091: public UnicodeSet getPropertySet(String propertyValue,
092: UnicodeSet result) {
093: if (result == null)
094: result = new UnicodeSet();
095: matchIterator.reset();
096: while (matchIterator.next()) {
097: String value = filter
098: .remap(getPropertyValue(matchIterator.codepoint));
099: if (propertyValue.equals(value)) {
100: result.add(matchIterator.codepoint);
101: }
102: }
103: return result;
104: }
105:
106: public UnicodeSet getPropertySet(Matcher matcher, UnicodeSet result) {
107: if (result == null)
108: result = new UnicodeSet();
109: matchIterator.reset();
110: while (matchIterator.next()) {
111: String value = filter
112: .remap(getPropertyValue(matchIterator.codepoint));
113: if (value == null)
114: continue;
115: if (matcher.matches(value)) {
116: result.add(matchIterator.codepoint);
117: }
118: }
119: return result;
120: }
121:
122: public interface Matcher {
123: public boolean matches(String value);
124: }
125:
126: public int getNameChoice() {
127: return nameChoice;
128: }
129:
130: public UnicodePropertySource setNameChoice(int choice) {
131: nameChoice = choice;
132: return this ;
133: }
134:
135: public static class StringFilter implements Cloneable {
136: public String remap(String original) {
137: return original;
138: }
139:
140: public Object clone() {
141: try {
142: return super .clone();
143: } catch (CloneNotSupportedException e) {
144: throw new IllegalStateException("Should never happen.");
145: }
146: }
147: }
148:
149: public static class MapFilter extends StringFilter {
150: Map valueMap;
151:
152: public String remap(String original) {
153: Object changed = valueMap.get(original);
154: return changed == null ? original : (String) changed;
155: }
156:
157: public Map getMap() {
158: return valueMap;
159: }
160:
161: public MapFilter setMap(Map map) {
162: valueMap = map;
163: return this ;
164: }
165: }
166:
167: static public class ICU extends UnicodePropertySource {
168: protected int propEnum = Integer.MIN_VALUE;
169: {
170: matchIterator = new UnicodeSetIterator(new UnicodeSet(
171: "[^[:Cn:]-[:Default_Ignorable_Code_Point:]]"));
172: }
173:
174: public UnicodePropertySource setPropertyAlias(
175: String propertyAlias) {
176: super .setPropertyAlias(propertyAlias);
177: int extraPosition = Extras.indexOf(propertyAlias);
178: if (extraPosition >= 0) {
179: propEnum = EXTRA_START + extraPosition;
180: } else {
181: propEnum = UCharacter.getPropertyEnum(propertyAlias);
182: }
183: return this ;
184: }
185:
186: public String getPropertyValue(int codePoint) {
187: if (propEnum < UProperty.INT_LIMIT) {
188: int enumValue = UCharacter.getIntPropertyValue(
189: codePoint, propEnum);
190: return UCharacter.getPropertyValueName(propEnum,
191: enumValue, (int) nameChoice);
192: } else if (propEnum < UProperty.DOUBLE_LIMIT) {
193: return Double.toString(UCharacter
194: .getUnicodeNumericValue(codePoint));
195: // TODO: Fix HACK -- API deficient
196: } else
197: switch (propEnum) {
198: case UProperty.AGE:
199: return UCharacter.getAge(codePoint).toString();
200: case UProperty.BIDI_MIRRORING_GLYPH:
201: return UTF16.valueOf(UCharacter
202: .getMirror(codePoint));
203: case UProperty.CASE_FOLDING:
204: return UCharacter.foldCase(
205: UTF16.valueOf(codePoint), true);
206: case UProperty.ISO_COMMENT:
207: return UCharacter.getISOComment(codePoint);
208: case UProperty.LOWERCASE_MAPPING:
209: return UCharacter.toLowerCase(Locale.ENGLISH, UTF16
210: .valueOf(codePoint));
211: case UProperty.NAME:
212: return UCharacter.getName(codePoint);
213: case UProperty.SIMPLE_CASE_FOLDING:
214: return UTF16.valueOf(UCharacter.foldCase(codePoint,
215: true));
216: case UProperty.SIMPLE_LOWERCASE_MAPPING:
217: return UTF16.valueOf(UCharacter
218: .toLowerCase(codePoint));
219: case UProperty.SIMPLE_TITLECASE_MAPPING:
220: return UTF16.valueOf(UCharacter
221: .toTitleCase(codePoint));
222: case UProperty.SIMPLE_UPPERCASE_MAPPING:
223: return UTF16.valueOf(UCharacter
224: .toUpperCase(codePoint));
225: case UProperty.TITLECASE_MAPPING:
226: return UCharacter.toTitleCase(Locale.ENGLISH, UTF16
227: .valueOf(codePoint), null);
228: case UProperty.UNICODE_1_NAME:
229: return UCharacter.getName1_0(codePoint);
230: case UProperty.UPPERCASE_MAPPING:
231: return UCharacter.toUpperCase(Locale.ENGLISH, UTF16
232: .valueOf(codePoint));
233: case NFC:
234: return Normalizer.normalize(codePoint,
235: Normalizer.NFC);
236: case NFD:
237: return Normalizer.normalize(codePoint,
238: Normalizer.NFD);
239: case NFKC:
240: return Normalizer.normalize(codePoint,
241: Normalizer.NFKC);
242: case NFKD:
243: return Normalizer.normalize(codePoint,
244: Normalizer.NFKD);
245: }
246: return null;
247: }
248:
249: static final List Extras = Arrays.asList(new String[] { "NFC",
250: "NFD", "NFKC", "NKFD" });
251:
252: static final int NFC = 0x8000, NFD = 0x8001, NFKC = 0x8002,
253: NFKD = 0x8003, EXTRA_START = NFC,
254: EXTRA_LIMIT = NFKD + 1;
255:
256: static final int[][] ranges = {
257: { UProperty.BINARY_START, UProperty.BINARY_LIMIT },
258: { UProperty.INT_START, UProperty.INT_LIMIT },
259: { UProperty.DOUBLE_START, UProperty.DOUBLE_LIMIT },
260: { UProperty.STRING_START, UProperty.STRING_LIMIT }, };
261:
262: public Set getAvailablePropertyAliases(Set result) {
263: for (int i = 0; i < ranges.length; ++i) {
264: for (int j = ranges[i][0]; j < ranges[i][1]; ++j) {
265: result.add(UCharacter
266: .getPropertyName(j, nameChoice));
267: }
268: }
269: result.addAll(Extras);
270: return result;
271: }
272:
273: public Set getAvailablePropertyValueAliases(Set result) {
274: if (propEnum < UProperty.INT_LIMIT) {
275: int start = UCharacter.getIntPropertyMinValue(propEnum);
276: int end = UCharacter.getIntPropertyMaxValue(propEnum);
277: for (int i = start; i <= end; ++i) {
278: result.add(getFixedValueAlias(null, i, nameChoice));
279: }
280: } else {
281: result.add(getFixedValueAlias(null, -1, nameChoice));
282: }
283: return result;
284: }
285:
286: /**
287: * @param valueAlias null if unused.
288: * @param valueEnum -1 if unused
289: * @param nameChoice
290: * @return the alias
291: */
292: private String getFixedValueAlias(String valueAlias,
293: int valueEnum, int nameChoice) {
294: if (propEnum >= UProperty.STRING_START) {
295: return "<string>";
296: } else if (propEnum >= UProperty.DOUBLE_START) {
297: return "<double>";
298: }
299: if (valueAlias != null && !valueAlias.equals("<integer>")) {
300: valueEnum = UCharacter.getPropertyValueEnum(propEnum,
301: valueAlias);
302: }
303: String result = fixedGetPropertyValueName(propEnum,
304: valueEnum, nameChoice);
305: if (result != null)
306: return result;
307: // try other namechoice
308: result = fixedGetPropertyValueName(
309: propEnum,
310: valueEnum,
311: nameChoice == UProperty.NameChoice.LONG ? UProperty.NameChoice.SHORT
312: : UProperty.NameChoice.LONG);
313: if (result != null)
314: return result;
315: return "<integer>";
316: }
317:
318: private static String fixedGetPropertyValueName(int propEnum,
319: int valueEnum, int nameChoice) {
320: try {
321: return UCharacter.getPropertyValueName(propEnum,
322: valueEnum, nameChoice);
323: } catch (Exception e) {
324: return null;
325: }
326: }
327:
328: public String getPropertyAlias(int nameChoice) {
329: if (propEnum < EXTRA_START) {
330: return UCharacter.getPropertyName(propEnum, nameChoice);
331: }
332: return (String) Extras.get(propEnum - EXTRA_START);
333: }
334:
335: public String getPropertyValueAlias(String valueAlias,
336: int nameChoice) {
337: return getFixedValueAlias(valueAlias, -1, nameChoice);
338: }
339: }
340:
341: // TODO file bug on getPropertyValueName for Canonical_Combining_Class
342:
343: public StringFilter getFilter() {
344: return filter;
345: }
346:
347: public UnicodePropertySource setFilter(StringFilter filter) {
348: this .filter = filter;
349: return this ;
350: }
351:
352: /**
353: */
354: static public void addAll(UnicodeSetIterator source,
355: UnicodeSet result) {
356: while (source.nextRange()) {
357: if (source.codepoint == UnicodeSetIterator.IS_STRING) {
358: result.add(source.string);
359: } else {
360: result.add(source.codepoint, source.codepointEnd);
361: }
362: }
363: }
364:
365: public UnicodeSet getMatchSet(UnicodeSet result) {
366: if (result == null)
367: result = new UnicodeSet();
368: addAll(matchIterator, result);
369: return result;
370: }
371:
372: /**
373: * @param set
374: */
375: public void setMatchSet(UnicodeSet set) {
376: matchIterator = new UnicodeSetIterator(set);
377: }
378:
379: }
|