0001: //##header
0002: /*
0003: *******************************************************************************
0004: * Copyright (C) 1996-2006, International Business Machines Corporation and *
0005: * others. All Rights Reserved. *
0006: *******************************************************************************
0007: */
0008: //#ifndef FOUNDATION
0009: package com.ibm.icu.dev.test.util;
0010:
0011: import java.io.PrintWriter;
0012: import java.io.StringWriter;
0013: import java.text.ParsePosition;
0014: import java.util.ArrayList;
0015: import java.util.Collection;
0016: import java.util.Comparator;
0017: import java.util.HashMap;
0018: import java.util.Iterator;
0019: import java.util.List;
0020: import java.util.Map;
0021: import java.util.TreeMap;
0022: import java.util.regex.Pattern;
0023:
0024: import com.ibm.icu.impl.Utility;
0025: import com.ibm.icu.impl.CollectionUtilities.InverseMatcher;
0026: import com.ibm.icu.impl.CollectionUtilities.ObjectMatcher;
0027: import com.ibm.icu.text.SymbolTable;
0028: import com.ibm.icu.text.UTF16;
0029: import com.ibm.icu.text.UnicodeMatcher;
0030: import com.ibm.icu.text.UnicodeSet;
0031: import com.ibm.icu.text.UnicodeSetIterator;
0032:
0033: public abstract class UnicodeProperty extends UnicodeLabel {
0034:
0035: public static boolean DEBUG = false;
0036: public static String CHECK_NAME = "FC_NFKC_Closure";
0037: public static int CHECK_VALUE = 0x037A;
0038:
0039: private String name;
0040: private String firstNameAlias = null;
0041: private int type;
0042: private Map valueToFirstValueAlias = null;
0043:
0044: /*
0045: Name: Unicode_1_Name
0046: Name: ISO_Comment
0047: Name: Name
0048: Name: Unicode_1_Name
0049: *
0050: */
0051:
0052: public static final int UNKNOWN = 0, BINARY = 2,
0053: EXTENDED_BINARY = 3, ENUMERATED = 4,
0054: EXTENDED_ENUMERATED = 5, CATALOG = 6, EXTENDED_CATALOG = 7,
0055: MISC = 8, EXTENDED_MISC = 9, STRING = 10,
0056: EXTENDED_STRING = 11, NUMERIC = 12, EXTENDED_NUMERIC = 13,
0057: START_TYPE = 2, LIMIT_TYPE = 14, EXTENDED_MASK = 1,
0058: CORE_MASK = ~EXTENDED_MASK, BINARY_MASK = (1 << BINARY)
0059: | (1 << EXTENDED_BINARY),
0060: STRING_MASK = (1 << STRING) | (1 << EXTENDED_STRING),
0061: STRING_OR_MISC_MASK = (1 << STRING)
0062: | (1 << EXTENDED_STRING) | (1 << MISC)
0063: | (1 << EXTENDED_MISC),
0064: ENUMERATED_OR_CATALOG_MASK = (1 << ENUMERATED)
0065: | (1 << EXTENDED_ENUMERATED) | (1 << CATALOG)
0066: | (1 << EXTENDED_CATALOG);
0067:
0068: private static final String[] TYPE_NAMES = { "Unknown", "Unknown",
0069: "Binary", "Extended Binary", "Enumerated",
0070: "Extended Enumerated", "Catalog", "Extended Catalog",
0071: "Miscellaneous", "Extended Miscellaneous", "String",
0072: "Extended String", "Numeric", "Extended Numeric", };
0073:
0074: public static String getTypeName(int propType) {
0075: return TYPE_NAMES[propType];
0076: }
0077:
0078: public final String getName() {
0079: return name;
0080: }
0081:
0082: public final int getType() {
0083: return type;
0084: }
0085:
0086: public final boolean isType(int mask) {
0087: return ((1 << type) & mask) != 0;
0088: }
0089:
0090: protected final void setName(String string) {
0091: if (string == null)
0092: throw new IllegalArgumentException("Name must not be null");
0093: name = string;
0094: }
0095:
0096: protected final void setType(int i) {
0097: type = i;
0098: }
0099:
0100: public String getVersion() {
0101: return _getVersion();
0102: }
0103:
0104: public String getValue(int codepoint) {
0105: if (DEBUG && CHECK_VALUE == codepoint
0106: && CHECK_NAME.equals(getName())) {
0107: String value = _getValue(codepoint);
0108: System.out
0109: .println(getName()
0110: + "("
0111: + Utility.hex(codepoint)
0112: + "):"
0113: + (getType() == STRING ? Utility.hex(value)
0114: : value));
0115: return value;
0116: }
0117: return _getValue(codepoint);
0118: }
0119:
0120: //public String getValue(int codepoint, boolean isShort) {
0121: // return getValue(codepoint);
0122: //}
0123:
0124: public List getNameAliases(List result) {
0125: if (result == null)
0126: result = new ArrayList(1);
0127: return _getNameAliases(result);
0128: }
0129:
0130: public List getValueAliases(String valueAlias, List result) {
0131: if (result == null)
0132: result = new ArrayList(1);
0133: result = _getValueAliases(valueAlias, result);
0134: if (!result.contains(valueAlias)) { // FIX && type < NUMERIC
0135: result = _getValueAliases(valueAlias, result); // for debugging
0136: throw new IllegalArgumentException("Internal error: "
0137: + getName() + " doesn't contain " + valueAlias
0138: + ": " + new BagFormatter().join(result));
0139: }
0140: return result;
0141: }
0142:
0143: public List getAvailableValues(List result) {
0144: if (result == null)
0145: result = new ArrayList(1);
0146: return _getAvailableValues(result);
0147: }
0148:
0149: protected abstract String _getVersion();
0150:
0151: protected abstract String _getValue(int codepoint);
0152:
0153: protected abstract List _getNameAliases(List result);
0154:
0155: protected abstract List _getValueAliases(String valueAlias,
0156: List result);
0157:
0158: protected abstract List _getAvailableValues(List result);
0159:
0160: // conveniences
0161: public final List getNameAliases() {
0162: return getNameAliases(null);
0163: }
0164:
0165: public final List getValueAliases(String valueAlias) {
0166: return getValueAliases(valueAlias, null);
0167: }
0168:
0169: public final List getAvailableValues() {
0170: return getAvailableValues(null);
0171: }
0172:
0173: public final String getValue(int codepoint, boolean getShortest) {
0174: String result = getValue(codepoint);
0175: if (type >= MISC || result == null || !getShortest)
0176: return result;
0177: return getFirstValueAlias(result);
0178: }
0179:
0180: public final String getFirstNameAlias() {
0181: if (firstNameAlias == null) {
0182: firstNameAlias = (String) getNameAliases().get(0);
0183: }
0184: return firstNameAlias;
0185: }
0186:
0187: public final String getFirstValueAlias(String value) {
0188: if (valueToFirstValueAlias == null)
0189: _getFirstValueAliasCache();
0190: return (String) valueToFirstValueAlias.get(value);
0191: }
0192:
0193: private void _getFirstValueAliasCache() {
0194: maxValueWidth = 0;
0195: maxFirstValueAliasWidth = 0;
0196: valueToFirstValueAlias = new HashMap(1);
0197: Iterator it = getAvailableValues().iterator();
0198: while (it.hasNext()) {
0199: String value = (String) it.next();
0200: String first = (String) getValueAliases(value).get(0);
0201: if (first == null) { // internal error
0202: throw new IllegalArgumentException(
0203: "Value not in value aliases: " + value);
0204: }
0205: if (DEBUG && CHECK_NAME.equals(getName())) {
0206: System.out.println("First Alias: "
0207: + getName()
0208: + ": "
0209: + value
0210: + " => "
0211: + first
0212: + new BagFormatter()
0213: .join(getValueAliases(value)));
0214: }
0215: valueToFirstValueAlias.put(value, first);
0216: if (value.length() > maxValueWidth) {
0217: maxValueWidth = value.length();
0218: }
0219: if (first.length() > maxFirstValueAliasWidth) {
0220: maxFirstValueAliasWidth = first.length();
0221: }
0222: }
0223: }
0224:
0225: private int maxValueWidth = -1;
0226: private int maxFirstValueAliasWidth = -1;
0227:
0228: public int getMaxWidth(boolean getShortest) {
0229: if (maxValueWidth < 0)
0230: _getFirstValueAliasCache();
0231: if (getShortest)
0232: return maxFirstValueAliasWidth;
0233: return maxValueWidth;
0234: }
0235:
0236: public final UnicodeSet getSet(String propertyValue) {
0237: return getSet(propertyValue, null);
0238: }
0239:
0240: public final UnicodeSet getSet(PatternMatcher matcher) {
0241: return getSet(matcher, null);
0242: }
0243:
0244: public final UnicodeSet getSet(String propertyValue,
0245: UnicodeSet result) {
0246: return getSet(new SimpleMatcher(propertyValue,
0247: isType(STRING_OR_MISC_MASK) ? null
0248: : PROPERTY_COMPARATOR), result);
0249: }
0250:
0251: private UnicodeMap unicodeMap = null;
0252:
0253: public static final String UNUSED = "??";
0254:
0255: public final UnicodeSet getSet(PatternMatcher matcher,
0256: UnicodeSet result) {
0257: if (result == null)
0258: result = new UnicodeSet();
0259: if (isType(STRING_OR_MISC_MASK)) {
0260: for (int i = 0; i <= 0x10FFFF; ++i) {
0261: String value = getValue(i);
0262: if (value != null && matcher.matches(value)) {
0263: result.add(i);
0264: }
0265: }
0266: return result;
0267: }
0268: List temp = new ArrayList(1); // to avoid reallocating...
0269: UnicodeMap um = getUnicodeMap_internal();
0270: Iterator it = um.getAvailableValues(null).iterator();
0271: main: while (it.hasNext()) {
0272: String value = (String) it.next();
0273: temp.clear();
0274: Iterator it2 = getValueAliases(value, temp).iterator();
0275: while (it2.hasNext()) {
0276: String value2 = (String) it2.next();
0277: //System.out.println("Values:" + value2);
0278: if (matcher.matches(value2)
0279: || matcher.matches(toSkeleton(value2))) {
0280: um.getSet(value, result);
0281: continue main;
0282: }
0283: }
0284: }
0285: return result;
0286: }
0287:
0288: /*
0289: public UnicodeSet getMatchSet(UnicodeSet result) {
0290: if (result == null) result = new UnicodeSet();
0291: addAll(matchIterator, result);
0292: return result;
0293: }
0294:
0295: public void setMatchSet(UnicodeSet set) {
0296: matchIterator = new UnicodeSetIterator(set);
0297: }
0298: */
0299:
0300: /**
0301: * Utility for debugging
0302: */
0303: public static String getStack() {
0304: Exception e = new Exception();
0305: StringWriter sw = new StringWriter();
0306: PrintWriter pw = new PrintWriter(sw);
0307: e.printStackTrace(pw);
0308: pw.flush();
0309: return "Showing Stack with fake " + sw.getBuffer().toString();
0310: }
0311:
0312: // TODO use this instead of plain strings
0313: public static class Name implements Comparable {
0314: private static Map skeletonCache;
0315: private String skeleton;
0316: private String pretty;
0317: public final int RAW = 0, TITLE = 1, NORMAL = 2;
0318:
0319: public Name(String name, int style) {
0320: if (name == null)
0321: name = "";
0322: if (style == RAW) {
0323: skeleton = pretty = name;
0324: } else {
0325: pretty = regularize(name, style == TITLE);
0326: skeleton = toSkeleton(pretty);
0327: }
0328: }
0329:
0330: public int compareTo(Object o) {
0331: return skeleton.compareTo(((Name) o).skeleton);
0332: }
0333:
0334: public boolean equals(Object o) {
0335: return skeleton.equals(((Name) o).skeleton);
0336: }
0337:
0338: public int hashCode() {
0339: return skeleton.hashCode();
0340: }
0341:
0342: public String toString() {
0343: return pretty;
0344: }
0345: }
0346:
0347: /**
0348: * @return the unicode map
0349: */
0350: public UnicodeMap getUnicodeMap() {
0351: return getUnicodeMap(false);
0352: }
0353:
0354: /**
0355: * @return the unicode map
0356: */
0357: public UnicodeMap getUnicodeMap(boolean getShortest) {
0358: if (!getShortest)
0359: return (UnicodeMap) getUnicodeMap_internal()
0360: .cloneAsThawed();
0361: UnicodeMap result = new UnicodeMap();
0362: for (int i = 0; i <= 0x10FFFF; ++i) {
0363: //if (DEBUG && i == 0x41) System.out.println(i + "\t" + getValue(i));
0364: String value = getValue(i, true);
0365: result.put(i, value);
0366: }
0367: return result;
0368: }
0369:
0370: /**
0371: * @return the unicode map
0372: */
0373: protected UnicodeMap getUnicodeMap_internal() {
0374: if (unicodeMap == null)
0375: unicodeMap = _getUnicodeMap();
0376: return unicodeMap;
0377: }
0378:
0379: protected UnicodeMap _getUnicodeMap() {
0380: UnicodeMap result = new UnicodeMap();
0381: HashMap myIntern = new HashMap();
0382: for (int i = 0; i <= 0x10FFFF; ++i) {
0383: //if (DEBUG && i == 0x41) System.out.println(i + "\t" + getValue(i));
0384: String value = getValue(i);
0385: String iValue = (String) myIntern.get(value);
0386: if (iValue == null)
0387: myIntern.put(value, iValue = value);
0388: result.put(i, iValue);
0389: }
0390: if (DEBUG) {
0391: for (int i = 0; i <= 0x10FFFF; ++i) {
0392: //if (DEBUG && i == 0x41) System.out.println(i + "\t" + getValue(i));
0393: String value = getValue(i);
0394: String resultValue = (String) result.getValue(i);
0395: if (!value.equals(resultValue)) {
0396: throw new RuntimeException("Value failure at: "
0397: + Utility.hex(i));
0398: }
0399: }
0400: }
0401: if (DEBUG && CHECK_NAME.equals(getName())) {
0402: System.out.println(getName() + ":\t" + getClass().getName()
0403: + "\t" + getVersion());
0404: System.out.println(getStack());
0405: System.out.println(result);
0406: }
0407: return result;
0408: }
0409:
0410: /**
0411: * Really ought to create a Collection UniqueList, that forces uniqueness. But for now...
0412: */
0413: public static Collection addUnique(Object obj, Collection result) {
0414: if (obj != null && !result.contains(obj))
0415: result.add(obj);
0416: return result;
0417: }
0418:
0419: /**
0420: * Utility for managing property & non-string value aliases
0421: */
0422: public static final Comparator PROPERTY_COMPARATOR = new Comparator() {
0423: public int compare(Object o1, Object o2) {
0424: return compareNames((String) o1, (String) o2);
0425: }
0426: };
0427:
0428: /**
0429: * Utility for managing property & non-string value aliases
0430: *
0431: */
0432: // TODO optimize
0433: public static boolean equalNames(String a, String b) {
0434: if (a == b)
0435: return true;
0436: if (a == null)
0437: return false;
0438: return toSkeleton(a).equals(toSkeleton(b));
0439: }
0440:
0441: /**
0442: * Utility for managing property & non-string value aliases
0443: */
0444: // TODO optimize
0445: public static int compareNames(String a, String b) {
0446: if (a == b)
0447: return 0;
0448: if (a == null)
0449: return -1;
0450: if (b == null)
0451: return 1;
0452: return toSkeleton(a).compareTo(toSkeleton(b));
0453: }
0454:
0455: /**
0456: * Utility for managing property & non-string value aliases
0457: */
0458: // TODO account for special names, tibetan, hangul
0459: public static String toSkeleton(String source) {
0460: if (source == null)
0461: return null;
0462: StringBuffer skeletonBuffer = new StringBuffer();
0463: boolean gotOne = false;
0464: // remove spaces, '_', '-'
0465: // we can do this with char, since no surrogates are involved
0466: for (int i = 0; i < source.length(); ++i) {
0467: char ch = source.charAt(i);
0468: if (i > 0 && (ch == '_' || ch == ' ' || ch == '-')) {
0469: gotOne = true;
0470: } else {
0471: char ch2 = Character.toLowerCase(ch);
0472: if (ch2 != ch) {
0473: gotOne = true;
0474: skeletonBuffer.append(ch2);
0475: } else {
0476: skeletonBuffer.append(ch);
0477: }
0478: }
0479: }
0480: if (!gotOne)
0481: return source; // avoid string creation
0482: return skeletonBuffer.toString();
0483: }
0484:
0485: // get the name skeleton
0486: public static String toNameSkeleton(String source) {
0487: if (source == null)
0488: return null;
0489: StringBuffer result = new StringBuffer();
0490: // remove spaces, medial '-'
0491: // we can do this with char, since no surrogates are involved
0492: for (int i = 0; i < source.length(); ++i) {
0493: char ch = source.charAt(i);
0494: if (('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'Z')
0495: || ch == '<' || ch == '>') {
0496: result.append(ch);
0497: } else if (ch == ' ') {
0498: // don't copy ever
0499: } else if (ch == '-') {
0500: // only copy non-medials AND trailing O-E
0501: if (0 == i
0502: || i == source.length() - 1
0503: || source.charAt(i - 1) == ' '
0504: || source.charAt(i + 1) == ' '
0505: || (i == source.length() - 2
0506: && source.charAt(i - 1) == 'O' && source
0507: .charAt(i + 1) == 'E')) {
0508: System.out.println("****** EXCEPTION " + source);
0509: result.append(ch);
0510: }
0511: // otherwise don't copy
0512: } else {
0513: throw new IllegalArgumentException(
0514: "Illegal Name Char: U+" + Utility.hex(ch)
0515: + ", " + ch);
0516: }
0517: }
0518: return result.toString();
0519: }
0520:
0521: /**
0522: * These routines use the Java functions, because they only need to act on ASCII
0523: * Changes space, - into _, inserts _ between lower and UPPER.
0524: */
0525: public static String regularize(String source,
0526: boolean titlecaseStart) {
0527: if (source == null)
0528: return source;
0529: /*if (source.equals("noBreak")) { // HACK
0530: if (titlecaseStart) return "NoBreak";
0531: return source;
0532: }
0533: */
0534: StringBuffer result = new StringBuffer();
0535: int lastCat = -1;
0536: boolean haveFirstCased = true;
0537: for (int i = 0; i < source.length(); ++i) {
0538: char c = source.charAt(i);
0539: if (c == ' ' || c == '-' || c == '_') {
0540: c = '_';
0541: haveFirstCased = true;
0542: }
0543: if (c == '=')
0544: haveFirstCased = true;
0545: int cat = Character.getType(c);
0546: if (lastCat == Character.LOWERCASE_LETTER
0547: && cat == Character.UPPERCASE_LETTER) {
0548: result.append('_');
0549: }
0550: if (haveFirstCased
0551: && (cat == Character.LOWERCASE_LETTER
0552: || cat == Character.TITLECASE_LETTER || cat == Character.UPPERCASE_LETTER)) {
0553: if (titlecaseStart) {
0554: c = Character.toUpperCase(c);
0555: }
0556: haveFirstCased = false;
0557: }
0558: result.append(c);
0559: lastCat = cat;
0560: }
0561: return result.toString();
0562: }
0563:
0564: /**
0565: * Utility function for comparing codepoint to string without
0566: * generating new string.
0567: * @param codepoint
0568: * @param other
0569: * @return true if the codepoint equals the string
0570: */
0571: public static final boolean equals(int codepoint, String other) {
0572: if (other.length() == 1) {
0573: return codepoint == other.charAt(0);
0574: }
0575: if (other.length() == 2) {
0576: return other.equals(UTF16.valueOf(codepoint));
0577: }
0578: return false;
0579: }
0580:
0581: /**
0582: * Utility that should be on UnicodeSet
0583: * @param source
0584: * @param result
0585: */
0586: static public void addAll(UnicodeSetIterator source,
0587: UnicodeSet result) {
0588: while (source.nextRange()) {
0589: if (source.codepoint == UnicodeSetIterator.IS_STRING) {
0590: result.add(source.string);
0591: } else {
0592: result.add(source.codepoint, source.codepointEnd);
0593: }
0594: }
0595: }
0596:
0597: /**
0598: * Really ought to create a Collection UniqueList, that forces uniqueness. But for now...
0599: */
0600: public static Collection addAllUnique(Collection source,
0601: Collection result) {
0602: for (Iterator it = source.iterator(); it.hasNext();) {
0603: addUnique(it.next(), result);
0604: }
0605: return result;
0606: }
0607:
0608: /**
0609: * Really ought to create a Collection UniqueList, that forces uniqueness. But for now...
0610: */
0611: public static Collection addAllUnique(Object[] source,
0612: Collection result) {
0613: for (int i = 0; i < source.length; ++i) {
0614: addUnique(source[i], result);
0615: }
0616: return result;
0617: }
0618:
0619: static public class Factory {
0620: static boolean DEBUG = false;
0621:
0622: Map canonicalNames = new TreeMap();
0623: Map skeletonNames = new TreeMap();
0624: Map propertyCache = new HashMap(1);
0625:
0626: public final Factory add(UnicodeProperty sp) {
0627: canonicalNames.put(sp.getName(), sp);
0628: List c = sp.getNameAliases(new ArrayList(1));
0629: Iterator it = c.iterator();
0630: while (it.hasNext()) {
0631: skeletonNames.put(toSkeleton((String) it.next()), sp);
0632: }
0633: return this ;
0634: }
0635:
0636: public final UnicodeProperty getProperty(String propertyAlias) {
0637: return (UnicodeProperty) skeletonNames
0638: .get(toSkeleton(propertyAlias));
0639: }
0640:
0641: public final List getAvailableNames() {
0642: return getAvailableNames(null);
0643: }
0644:
0645: public final List getAvailableNames(List result) {
0646: if (result == null)
0647: result = new ArrayList(1);
0648: Iterator it = canonicalNames.keySet().iterator();
0649: while (it.hasNext()) {
0650: addUnique(it.next(), result);
0651: }
0652: return result;
0653: }
0654:
0655: public final List getAvailableNames(int propertyTypeMask) {
0656: return getAvailableNames(propertyTypeMask, null);
0657: }
0658:
0659: public final List getAvailableNames(int propertyTypeMask,
0660: List result) {
0661: if (result == null)
0662: result = new ArrayList(1);
0663: Iterator it = canonicalNames.keySet().iterator();
0664: while (it.hasNext()) {
0665: String item = (String) it.next();
0666: UnicodeProperty property = getProperty(item);
0667: if (DEBUG)
0668: System.out.println("Properties: " + item + ","
0669: + property.getType());
0670: if (!property.isType(propertyTypeMask)) {
0671: //System.out.println("Masking: " + property.getType() + "," + propertyTypeMask);
0672: continue;
0673: }
0674: addUnique(property.getName(), result);
0675: }
0676: return result;
0677: }
0678:
0679: InversePatternMatcher inverseMatcher = new InversePatternMatcher();
0680:
0681: /**
0682: * Format is:
0683: * propname ('=' | '!=') propvalue ( '|' propValue )*
0684: */
0685: public final UnicodeSet getSet(String propAndValue,
0686: PatternMatcher matcher, UnicodeSet result) {
0687: int equalPos = propAndValue.indexOf('=');
0688: String prop = propAndValue.substring(0, equalPos);
0689: String value = propAndValue.substring(equalPos + 1);
0690: boolean negative = false;
0691: if (prop.endsWith("!")) {
0692: prop = prop.substring(0, prop.length() - 1);
0693: negative = true;
0694: }
0695: prop = prop.trim();
0696: UnicodeProperty up = getProperty(prop);
0697: if (matcher == null) {
0698: matcher = new SimpleMatcher(value, up
0699: .isType(STRING_OR_MISC_MASK) ? null
0700: : PROPERTY_COMPARATOR);
0701: }
0702: if (negative) {
0703: inverseMatcher.set(matcher);
0704: matcher = inverseMatcher;
0705: }
0706: return up.getSet(matcher.set(value), result);
0707: }
0708:
0709: public final UnicodeSet getSet(String propAndValue,
0710: PatternMatcher matcher) {
0711: return getSet(propAndValue, matcher, null);
0712: }
0713:
0714: public final UnicodeSet getSet(String propAndValue) {
0715: return getSet(propAndValue, null, null);
0716: }
0717:
0718: public final SymbolTable getSymbolTable(String prefix) {
0719: return new PropertySymbolTable(prefix);
0720: }
0721:
0722: private class MyXSymbolTable extends UnicodeSet.XSymbolTable {
0723: public boolean applyPropertyAlias(String propertyName,
0724: String propertyValue, UnicodeSet result) {
0725: if (false)
0726: System.out.println(propertyName + "="
0727: + propertyValue);
0728: UnicodeProperty prop = getProperty(propertyName);
0729: if (prop == null)
0730: return false;
0731: result.clear();
0732: UnicodeSet x = prop.getSet(propertyValue, result);
0733: return x.size() != 0;
0734: }
0735: }
0736:
0737: public final UnicodeSet.XSymbolTable getXSymbolTable() {
0738: return new MyXSymbolTable();
0739: }
0740:
0741: private class PropertySymbolTable implements SymbolTable {
0742: static final boolean DEBUG = false;
0743: private String prefix;
0744: RegexMatcher regexMatcher = new RegexMatcher();
0745:
0746: PropertySymbolTable(String prefix) {
0747: this .prefix = prefix;
0748: }
0749:
0750: public char[] lookup(String s) {
0751: if (DEBUG)
0752: System.out.println("\t(" + prefix + ")Looking up "
0753: + s);
0754: // ensure, again, that prefix matches
0755: int start = prefix.length();
0756: if (!s.regionMatches(true, 0, prefix, 0, start))
0757: return null;
0758:
0759: int pos = s.indexOf(':', start);
0760: if (pos < 0) { // should never happen
0761: throw new IllegalArgumentException(
0762: "Internal Error: missing =: " + s + "\r\n");
0763: }
0764: UnicodeProperty prop = getProperty(s.substring(start,
0765: pos));
0766: if (prop == null) {
0767: throw new IllegalArgumentException(
0768: "Invalid Property in: " + s + "\r\nUse "
0769: + showSet(getAvailableNames()));
0770: }
0771: String value = s.substring(pos + 1);
0772: UnicodeSet set;
0773: if (value.startsWith("\u00AB")) { // regex!
0774: set = prop.getSet(regexMatcher.set(value.substring(
0775: 1, value.length() - 1)));
0776: } else {
0777: set = prop.getSet(value);
0778: }
0779: if (set.size() == 0) {
0780: throw new IllegalArgumentException(
0781: "Empty Property-Value in: "
0782: + s
0783: + "\r\nUse "
0784: + showSet(prop.getAvailableValues()));
0785: }
0786: if (DEBUG)
0787: System.out.println("\t(" + prefix + ")Returning "
0788: + set.toPattern(true));
0789: return set.toPattern(true).toCharArray(); // really ugly
0790: }
0791:
0792: private String showSet(List list) {
0793: StringBuffer result = new StringBuffer("[");
0794: boolean first = true;
0795: for (Iterator it = list.iterator(); it.hasNext();) {
0796: if (!first)
0797: result.append(", ");
0798: else
0799: first = false;
0800: result.append(it.next().toString());
0801: }
0802: result.append("]");
0803: return result.toString();
0804: }
0805:
0806: public UnicodeMatcher lookupMatcher(int ch) {
0807: return null;
0808: }
0809:
0810: public String parseReference(String text,
0811: ParsePosition pos, int limit) {
0812: if (DEBUG)
0813: System.out.println("\t(" + prefix + ")Parsing <"
0814: + text.substring(pos.getIndex(), limit)
0815: + ">");
0816: int start = pos.getIndex();
0817: int veryStart = start;
0818: // ensure that it starts with 'prefix'
0819: if (!text.regionMatches(true, start, prefix, 0, prefix
0820: .length()))
0821: return null;
0822: start += prefix.length();
0823: // now see if it is of the form identifier:identifier
0824: int i = getIdentifier(text, start, limit);
0825: if (i == start)
0826: return null;
0827: String prop = text.substring(start, i);
0828: String value = "true";
0829: if (i < limit) {
0830: if (text.charAt(i) == ':') {
0831: int j;
0832: if (text.charAt(i + 1) == '\u00AB') { // regular expression
0833: j = text.indexOf('\u00BB', i + 2) + 1; // include last character
0834: if (j <= 0)
0835: return null;
0836: } else {
0837: j = getIdentifier(text, i + 1, limit);
0838: }
0839: value = text.substring(i + 1, j);
0840: i = j;
0841: }
0842: }
0843: pos.setIndex(i);
0844: if (DEBUG)
0845: System.out.println("\t(" + prefix + ")Parsed <"
0846: + prop + ">=<" + value + ">");
0847: return prefix + prop + ":" + value;
0848: }
0849:
0850: private int getIdentifier(String text, int start, int limit) {
0851: if (DEBUG)
0852: System.out.println("\tGetID <"
0853: + text.substring(start, limit) + ">");
0854: int cp = 0;
0855: int i;
0856: for (i = start; i < limit; i += UTF16.getCharCount(cp)) {
0857: cp = UTF16.charAt(text, i);
0858: if (!com.ibm.icu.lang.UCharacter
0859: .isUnicodeIdentifierPart(cp)
0860: && cp != '.') {
0861: break;
0862: }
0863: }
0864: if (DEBUG)
0865: System.out.println("\tGotID <"
0866: + text.substring(start, i) + ">");
0867: return i;
0868: }
0869: }
0870: }
0871:
0872: public static class FilteredProperty extends UnicodeProperty {
0873: private UnicodeProperty property;
0874: protected StringFilter filter;
0875: protected UnicodeSetIterator matchIterator = new UnicodeSetIterator(
0876: new UnicodeSet(0, 0x10FFFF));
0877: protected HashMap backmap;
0878: boolean allowValueAliasCollisions = false;
0879:
0880: public FilteredProperty(UnicodeProperty property,
0881: StringFilter filter) {
0882: this .property = property;
0883: this .filter = filter;
0884: }
0885:
0886: public StringFilter getFilter() {
0887: return filter;
0888: }
0889:
0890: public UnicodeProperty setFilter(StringFilter filter) {
0891: this .filter = filter;
0892: return this ;
0893: }
0894:
0895: List temp = new ArrayList(1);
0896:
0897: public List _getAvailableValues(List result) {
0898: temp.clear();
0899: return filter.addUnique(property.getAvailableValues(temp),
0900: result);
0901: }
0902:
0903: public List _getNameAliases(List result) {
0904: temp.clear();
0905: return filter.addUnique(property.getNameAliases(temp),
0906: result);
0907: }
0908:
0909: public String _getValue(int codepoint) {
0910: return filter.remap(property.getValue(codepoint));
0911: }
0912:
0913: public List _getValueAliases(String valueAlias, List result) {
0914: if (backmap == null) {
0915: backmap = new HashMap(1);
0916: temp.clear();
0917: Iterator it = property.getAvailableValues(temp)
0918: .iterator();
0919: while (it.hasNext()) {
0920: String item = (String) it.next();
0921: String mappedItem = filter.remap(item);
0922: if (backmap.get(mappedItem) != null
0923: && !allowValueAliasCollisions) {
0924: throw new IllegalArgumentException(
0925: "Filter makes values collide! " + item
0926: + ", " + mappedItem);
0927: }
0928: backmap.put(mappedItem, item);
0929: }
0930: }
0931: valueAlias = (String) backmap.get(valueAlias);
0932: temp.clear();
0933: return filter.addUnique(property.getValueAliases(
0934: valueAlias, temp), result);
0935: }
0936:
0937: public String _getVersion() {
0938: return property.getVersion();
0939: }
0940:
0941: public boolean isAllowValueAliasCollisions() {
0942: return allowValueAliasCollisions;
0943: }
0944:
0945: public FilteredProperty setAllowValueAliasCollisions(boolean b) {
0946: allowValueAliasCollisions = b;
0947: return this ;
0948: }
0949:
0950: }
0951:
0952: public static abstract class StringFilter implements Cloneable {
0953: public abstract String remap(String original);
0954:
0955: public final List addUnique(Collection source, List result) {
0956: if (result == null)
0957: result = new ArrayList(1);
0958: Iterator it = source.iterator();
0959: while (it.hasNext()) {
0960: UnicodeProperty.addUnique(remap((String) it.next()),
0961: result);
0962: }
0963: return result;
0964: }
0965: /*
0966: public Object clone() {
0967: try {
0968: return super.clone();
0969: } catch (CloneNotSupportedException e) {
0970: throw new IllegalStateException("Should never happen.");
0971: }
0972: }
0973: */
0974: }
0975:
0976: public static class MapFilter extends StringFilter {
0977: private Map valueMap;
0978:
0979: public MapFilter(Map valueMap) {
0980: this .valueMap = valueMap;
0981: }
0982:
0983: public String remap(String original) {
0984: Object changed = valueMap.get(original);
0985: return changed == null ? original : (String) changed;
0986: }
0987:
0988: public Map getMap() {
0989: return valueMap;
0990: }
0991: }
0992:
0993: public interface PatternMatcher extends ObjectMatcher {
0994: public PatternMatcher set(String pattern);
0995: }
0996:
0997: public static class InversePatternMatcher extends InverseMatcher
0998: implements PatternMatcher {
0999: PatternMatcher other;
1000:
1001: public PatternMatcher set(PatternMatcher toInverse) {
1002: other = toInverse;
1003: return this ;
1004: }
1005:
1006: public boolean matches(Object value) {
1007: return !other.matches(value);
1008: }
1009:
1010: public PatternMatcher set(String pattern) {
1011: other.set(pattern);
1012: return this ;
1013: }
1014: }
1015:
1016: public static class SimpleMatcher implements PatternMatcher {
1017: Comparator comparator;
1018: String pattern;
1019:
1020: public SimpleMatcher(String pattern, Comparator comparator) {
1021: this .comparator = comparator;
1022: this .pattern = pattern;
1023: }
1024:
1025: public boolean matches(Object value) {
1026: if (comparator == null)
1027: return pattern.equals(value);
1028: return comparator.compare(pattern, value) == 0;
1029: }
1030:
1031: public PatternMatcher set(String pattern) {
1032: this .pattern = pattern;
1033: return this ;
1034: }
1035: }
1036:
1037: public static class RegexMatcher implements
1038: UnicodeProperty.PatternMatcher {
1039: private java.util.regex.Matcher matcher;
1040:
1041: public UnicodeProperty.PatternMatcher set(String pattern) {
1042: matcher = Pattern.compile(pattern).matcher("");
1043: return this ;
1044: }
1045:
1046: public boolean matches(Object value) {
1047: matcher.reset(value.toString());
1048: return matcher.matches();
1049: }
1050: }
1051:
1052: public static abstract class BaseProperty extends UnicodeProperty {
1053: protected List propertyAliases = new ArrayList(1);
1054: protected Map toValueAliases;
1055: protected String version;
1056:
1057: public BaseProperty setMain(String alias, String shortAlias,
1058: int propertyType, String version) {
1059: setName(alias);
1060: setType(propertyType);
1061: propertyAliases.add(shortAlias);
1062: propertyAliases.add(alias);
1063: this .version = version;
1064: return this ;
1065: }
1066:
1067: public String _getVersion() {
1068: return version;
1069: }
1070:
1071: public List _getNameAliases(List result) {
1072: addAllUnique(propertyAliases, result);
1073: return result;
1074: }
1075:
1076: public BaseProperty addValueAliases(
1077: String[][] valueAndAlternates, boolean errorIfCant) {
1078: if (toValueAliases == null)
1079: _fixValueAliases();
1080: for (int i = 0; i < valueAndAlternates.length; ++i) {
1081: for (int j = 1; j < valueAndAlternates[0].length; ++j) {
1082: addValueAlias(valueAndAlternates[i][0],
1083: valueAndAlternates[i][j], errorIfCant);
1084: }
1085: }
1086: return this ;
1087: }
1088:
1089: public void addValueAlias(String value, String valueAlias,
1090: boolean errorIfCant) {
1091: List result = (List) toValueAliases.get(value);
1092: if (result == null && !errorIfCant)
1093: return;
1094: addUnique(value, result);
1095: addUnique(valueAlias, result);
1096: }
1097:
1098: protected List _getValueAliases(String valueAlias, List result) {
1099: if (toValueAliases == null)
1100: _fixValueAliases();
1101: List a = (List) toValueAliases.get(valueAlias);
1102: if (a != null)
1103: addAllUnique(a, result);
1104: return result;
1105: }
1106:
1107: protected void _fixValueAliases() {
1108: if (toValueAliases == null)
1109: toValueAliases = new HashMap(1);
1110: for (Iterator it = getAvailableValues().iterator(); it
1111: .hasNext();) {
1112: Object value = it.next();
1113: List result;
1114: _ensureValueInAliases(value);
1115: }
1116: }
1117:
1118: protected void _ensureValueInAliases(Object value) {
1119: List result = (List) toValueAliases.get(value);
1120: if (result == null)
1121: toValueAliases.put(value, result = new ArrayList(1));
1122: addUnique(value, result);
1123: }
1124:
1125: public BaseProperty swapFirst2ValueAliases() {
1126: for (Iterator it = toValueAliases.keySet().iterator(); it
1127: .hasNext();) {
1128: List list = (List) toValueAliases.get(it.next());
1129: if (list.size() < 2)
1130: continue;
1131: Object first = list.get(0);
1132: list.set(0, list.get(1));
1133: list.set(1, first);
1134: }
1135: return this ;
1136: }
1137:
1138: }
1139:
1140: public static abstract class SimpleProperty extends BaseProperty {
1141: List values;
1142:
1143: public SimpleProperty addName(String alias) {
1144: propertyAliases.add(alias);
1145: return this ;
1146: }
1147:
1148: public SimpleProperty setValues(String valueAlias) {
1149: _addToValues(valueAlias, null);
1150: return this ;
1151: }
1152:
1153: public SimpleProperty setValues(String[] valueAliases,
1154: String[] alternateValueAliases) {
1155: for (int i = 0; i < valueAliases.length; ++i) {
1156: if (valueAliases[i].equals(UNUSED))
1157: continue;
1158: _addToValues(
1159: valueAliases[i],
1160: alternateValueAliases != null ? alternateValueAliases[i]
1161: : null);
1162: }
1163: return this ;
1164: }
1165:
1166: public SimpleProperty setValues(List valueAliases) {
1167: this .values = new ArrayList(valueAliases);
1168: for (Iterator it = this .values.iterator(); it.hasNext();) {
1169: _addToValues((String) it.next(), null);
1170: }
1171: return this ;
1172: }
1173:
1174: public List _getAvailableValues(List result) {
1175: if (values == null)
1176: _fillValues();
1177: result.addAll(values);
1178: return result;
1179: }
1180:
1181: protected void _fillValues() {
1182: List newvalues = (List) getUnicodeMap_internal()
1183: .getAvailableValues(new ArrayList());
1184: for (Iterator it = newvalues.iterator(); it.hasNext();) {
1185: _addToValues((String) it.next(), null);
1186: }
1187: }
1188:
1189: private void _addToValues(String item, String alias) {
1190: if (values == null)
1191: values = new ArrayList(1);
1192: if (toValueAliases == null)
1193: _fixValueAliases();
1194: addUnique(item, values);
1195: _ensureValueInAliases(item);
1196: addValueAlias(item, alias, true);
1197: }
1198: /* public String _getVersion() {
1199: return version;
1200: }
1201: */
1202: }
1203:
1204: public static class UnicodeMapProperty extends BaseProperty {
1205: /*
1206: * Example of usage:
1207: * new UnicodeProperty.UnicodeMapProperty() {
1208: {
1209: unicodeMap = new UnicodeMap();
1210: unicodeMap.setErrorOnReset(true);
1211: unicodeMap.put(0xD, "CR");
1212: unicodeMap.put(0xA, "LF");
1213: UnicodeProperty cat = getProperty("General_Category");
1214: UnicodeSet temp = cat.getSet("Line_Separator")
1215: .addAll(cat.getSet("Paragraph_Separator"))
1216: .addAll(cat.getSet("Control"))
1217: .addAll(cat.getSet("Format"))
1218: .remove(0xD).remove(0xA).remove(0x200C).remove(0x200D);
1219: unicodeMap.putAll(temp, "Control");
1220: UnicodeSet graphemeExtend = getProperty("Grapheme_Extend").getSet("true");
1221: unicodeMap.putAll(graphemeExtend,"Extend");
1222: UnicodeProperty hangul = getProperty("Hangul_Syllable_Type");
1223: unicodeMap.putAll(hangul.getSet("L"),"L");
1224: unicodeMap.putAll(hangul.getSet("V"),"V");
1225: unicodeMap.putAll(hangul.getSet("T"),"T");
1226: unicodeMap.putAll(hangul.getSet("LV"),"LV");
1227: unicodeMap.putAll(hangul.getSet("LVT"),"LVT");
1228: unicodeMap.setMissing("Other");
1229: }
1230: }.setMain("Grapheme_Cluster_Break", "GCB", UnicodeProperty.ENUMERATED, version)
1231: */
1232: protected UnicodeMap unicodeMap;
1233:
1234: public UnicodeMapProperty set(UnicodeMap map) {
1235: unicodeMap = map;
1236: return this ;
1237: }
1238:
1239: protected String _getValue(int codepoint) {
1240: return (String) unicodeMap.getValue(codepoint);
1241: }
1242:
1243: /* protected List _getValueAliases(String valueAlias, List result) {
1244: if (!unicodeMap.getAvailableValues().contains(valueAlias)) return result;
1245: result.add(valueAlias);
1246: return result; // no other aliases
1247: }
1248: */protected List _getAvailableValues(List result) {
1249: return (List) unicodeMap.getAvailableValues(result);
1250: }
1251: }
1252: }
1253: //#endif
|