0001: /**
0002: *******************************************************************************
0003: * Copyright (C) 1996-2006, International Business Machines Corporation and *
0004: * others. All Rights Reserved. *
0005: *******************************************************************************
0006: */package com.ibm.icu.text;
0007:
0008: import java.util.Comparator;
0009: import java.util.Locale;
0010: import java.util.MissingResourceException;
0011: import java.util.Set;
0012:
0013: import com.ibm.icu.impl.ICUDebug;
0014: import com.ibm.icu.impl.ICUResourceBundle;
0015: import com.ibm.icu.impl.LocaleUtility;
0016: import com.ibm.icu.util.ULocale;
0017: import com.ibm.icu.util.UResourceBundle;
0018: import com.ibm.icu.util.VersionInfo;
0019:
0020: /**
0021: * <p>Collator performs locale-sensitive string comparison. A concrete
0022: * subclass, RuleBasedCollator, allows customization of the collation
0023: * ordering by the use of rule sets.</p>
0024: *
0025: * <p>Following the <a href=http://www.unicode.org>Unicode
0026: * Consortium</a>'s specifications for the
0027: * <a href="http://www.unicode.org/unicode/reports/tr10/"> Unicode Collation
0028: * Algorithm (UCA)</a>, there are 5 different levels of strength used
0029: * in comparisons:
0030: *
0031: * <ul>
0032: * <li>PRIMARY strength: Typically, this is used to denote differences between
0033: * base characters (for example, "a" < "b").
0034: * It is the strongest difference. For example, dictionaries are divided
0035: * into different sections by base character.
0036: * <li>SECONDARY strength: Accents in the characters are considered secondary
0037: * differences (for example, "as" < "às" < "at"). Other
0038: * differences
0039: * between letters can also be considered secondary differences, depending
0040: * on the language. A secondary difference is ignored when there is a
0041: * primary difference anywhere in the strings.
0042: * <li>TERTIARY strength: Upper and lower case differences in characters are
0043: * distinguished at tertiary strength (for example, "ao" < "Ao" <
0044: * "aò"). In addition, a variant of a letter differs from the base
0045: * form on the tertiary strength (such as "A" and "Ⓐ"). Another
0046: * example is the
0047: * difference between large and small Kana. A tertiary difference is ignored
0048: * when there is a primary or secondary difference anywhere in the strings.
0049: * <li>QUATERNARY strength: When punctuation is ignored
0050: * <a href="http://icu.sourceforge.net/userguide/Collate_Concepts.html#Ignoring_Punctuation">
0051: * (see Ignoring Punctuations in the user guide)</a> at PRIMARY to TERTIARY
0052: * strength, an additional strength level can
0053: * be used to distinguish words with and without punctuation (for example,
0054: * "ab" < "a-b" < "aB").
0055: * This difference is ignored when there is a PRIMARY, SECONDARY or TERTIARY
0056: * difference. The QUATERNARY strength should only be used if ignoring
0057: * punctuation is required.
0058: * <li>IDENTICAL strength:
0059: * When all other strengths are equal, the IDENTICAL strength is used as a
0060: * tiebreaker. The Unicode code point values of the NFD form of each string
0061: * are compared, just in case there is no difference.
0062: * For example, Hebrew cantellation marks are only distinguished at this
0063: * strength. This strength should be used sparingly, as only code point
0064: * value differences between two strings is an extremely rare occurrence.
0065: * Using this strength substantially decreases the performance for both
0066: * comparison and collation key generation APIs. This strength also
0067: * increases the size of the collation key.
0068: * </ul>
0069: *
0070: * Unlike the JDK, ICU4J's Collator deals only with 2 decomposition modes,
0071: * the canonical decomposition mode and one that does not use any decomposition.
0072: * The compatibility decomposition mode, java.text.Collator.FULL_DECOMPOSITION
0073: * is not supported here. If the canonical
0074: * decomposition mode is set, the Collator handles un-normalized text properly,
0075: * producing the same results as if the text were normalized in NFD. If
0076: * canonical decomposition is turned off, it is the user's responsibility to
0077: * ensure that all text is already in the appropriate form before performing
0078: * a comparison or before getting a CollationKey.</p>
0079: *
0080: * <p>For more information about the collation service see the
0081: * <a href="http://icu.sourceforge.net/userguide/Collate_Intro.html">users
0082: * guide</a>.</p>
0083: *
0084: * <p>Examples of use
0085: * <pre>
0086: * // Get the Collator for US English and set its strength to PRIMARY
0087: * Collator usCollator = Collator.getInstance(Locale.US);
0088: * usCollator.setStrength(Collator.PRIMARY);
0089: * if (usCollator.compare("abc", "ABC") == 0) {
0090: * System.out.println("Strings are equivalent");
0091: * }
0092: *
0093: * The following example shows how to compare two strings using the
0094: * Collator for the default locale.
0095: *
0096: * // Compare two strings in the default locale
0097: * Collator myCollator = Collator.getInstance();
0098: * myCollator.setDecomposition(NO_DECOMPOSITION);
0099: * if (myCollator.compare("à\u0325", "a\u0325̀") != 0) {
0100: * System.out.println("à\u0325 is not equals to a\u0325̀ without decomposition");
0101: * myCollator.setDecomposition(CANONICAL_DECOMPOSITION);
0102: * if (myCollator.compare("à\u0325", "a\u0325̀") != 0) {
0103: * System.out.println("Error: à\u0325 should be equals to a\u0325̀ with decomposition");
0104: * }
0105: * else {
0106: * System.out.println("à\u0325 is equals to a\u0325̀ with decomposition");
0107: * }
0108: * }
0109: * else {
0110: * System.out.println("Error: à\u0325 should be not equals to a\u0325̀ without decomposition");
0111: * }
0112: * </pre>
0113: * </p>
0114: * @see RuleBasedCollator
0115: * @see CollationKey
0116: * @author Syn Wee Quek
0117: * @stable ICU 2.8
0118: */
0119: public abstract class Collator implements Comparator, Cloneable {
0120: // public data members ---------------------------------------------------
0121:
0122: /**
0123: * Strongest collator strength value. Typically used to denote differences
0124: * between base characters. See class documentation for more explanation.
0125: * @see #setStrength
0126: * @see #getStrength
0127: * @stable ICU 2.8
0128: */
0129: public final static int PRIMARY = 0;
0130:
0131: /**
0132: * Second level collator strength value.
0133: * Accents in the characters are considered secondary differences.
0134: * Other differences between letters can also be considered secondary
0135: * differences, depending on the language.
0136: * See class documentation for more explanation.
0137: * @see #setStrength
0138: * @see #getStrength
0139: * @stable ICU 2.8
0140: */
0141: public final static int SECONDARY = 1;
0142:
0143: /**
0144: * Third level collator strength value.
0145: * Upper and lower case differences in characters are distinguished at this
0146: * strength level. In addition, a variant of a letter differs from the base
0147: * form on the tertiary level.
0148: * See class documentation for more explanation.
0149: * @see #setStrength
0150: * @see #getStrength
0151: * @stable ICU 2.8
0152: */
0153: public final static int TERTIARY = 2;
0154:
0155: /**
0156: * Fourth level collator strength value.
0157: * When punctuation is ignored
0158: * <a href="http://icu.sourceforge.net/userguide/Collate_Concepts.html#Ignoring_Punctuation">
0159: * (see Ignoring Punctuations in the user guide)</a> at PRIMARY to TERTIARY
0160: * strength, an additional strength level can
0161: * be used to distinguish words with and without punctuation.
0162: * See class documentation for more explanation.
0163: * @see #setStrength
0164: * @see #getStrength
0165: * @stable ICU 2.8
0166: */
0167: public final static int QUATERNARY = 3;
0168:
0169: /**
0170: * <p>
0171: * Smallest Collator strength value. When all other strengths are equal,
0172: * the IDENTICAL strength is used as a tiebreaker. The Unicode code point
0173: * values of the NFD form of each string are compared, just in case there
0174: * is no difference.
0175: * See class documentation for more explanation.
0176: * </p>
0177: * <p>
0178: * Note this value is different from JDK's
0179: * </p>
0180: * @stable ICU 2.8
0181: */
0182: public final static int IDENTICAL = 15;
0183:
0184: /**
0185: * This is for backwards compatibility with Java APIs only. It
0186: * should not be used, IDENTICAL should be used instead. ICU's
0187: * collation does not support Java's FULL_DECOMPOSITION mode.
0188: * @draft ICU 3.4
0189: * @provisional This API might change or be removed in a future release.
0190: */
0191: public final static int FULL_DECOMPOSITION = IDENTICAL;
0192:
0193: /**
0194: * <p>Decomposition mode value. With NO_DECOMPOSITION set, Strings
0195: * will not be decomposed for collation. This is the default
0196: * decomposition setting unless otherwise specified by the locale
0197: * used to create the Collator.</p>
0198: *
0199: * <p><strong>Note</strong> this value is different from the JDK's.</p>
0200: * @see #CANONICAL_DECOMPOSITION
0201: * @see #getDecomposition
0202: * @see #setDecomposition
0203: * @stable ICU 2.8
0204: */
0205: public final static int NO_DECOMPOSITION = 16;
0206:
0207: /**
0208: * <p>Decomposition mode value. With CANONICAL_DECOMPOSITION set,
0209: * characters that are canonical variants according to the Unicode standard
0210: * will be decomposed for collation.</p>
0211: *
0212: * <p>CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
0213: * described in <a href="http://www.unicode.org/unicode/reports/tr15/">
0214: * Unicode Technical Report #15</a>.
0215: * </p>
0216: * @see #NO_DECOMPOSITION
0217: * @see #getDecomposition
0218: * @see #setDecomposition
0219: * @stable ICU 2.8
0220: */
0221: public final static int CANONICAL_DECOMPOSITION = 17;
0222:
0223: // public methods --------------------------------------------------------
0224:
0225: // public setters --------------------------------------------------------
0226:
0227: /**
0228: * <p>Sets this Collator's strength property. The strength property
0229: * determines the minimum level of difference considered significant
0230: * during comparison.</p>
0231: *
0232: * <p>The default strength for the Collator is TERTIARY, unless specified
0233: * otherwise by the locale used to create the Collator.</p>
0234: *
0235: * <p>See the Collator class description for an example of use.</p>
0236: * @param newStrength the new strength value.
0237: * @see #getStrength
0238: * @see #PRIMARY
0239: * @see #SECONDARY
0240: * @see #TERTIARY
0241: * @see #QUATERNARY
0242: * @see #IDENTICAL
0243: * @exception IllegalArgumentException if the new strength value is not one
0244: * of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL.
0245: * @stable ICU 2.8
0246: */
0247: public void setStrength(int newStrength) {
0248: if ((newStrength != PRIMARY) && (newStrength != SECONDARY)
0249: && (newStrength != TERTIARY)
0250: && (newStrength != QUATERNARY)
0251: && (newStrength != IDENTICAL)) {
0252: throw new IllegalArgumentException(
0253: "Incorrect comparison level.");
0254: }
0255: m_strength_ = newStrength;
0256: }
0257:
0258: /**
0259: * <p>Set the decomposition mode of this Collator. Setting this
0260: * decomposition property with CANONICAL_DECOMPOSITION allows the
0261: * Collator to handle un-normalized text properly, producing the
0262: * same results as if the text were normalized. If
0263: * NO_DECOMPOSITION is set, it is the user's responsibility to
0264: * insure that all text is already in the appropriate form before
0265: * a comparison or before getting a CollationKey. Adjusting
0266: * decomposition mode allows the user to select between faster and
0267: * more complete collation behavior.</p>
0268: *
0269: * <p>Since a great many of the world's languages do not require
0270: * text normalization, most locales set NO_DECOMPOSITION as the
0271: * default decomposition mode.</p>
0272: *
0273: * The default decompositon mode for the Collator is
0274: * NO_DECOMPOSITON, unless specified otherwise by the locale used
0275: * to create the Collator.</p>
0276: *
0277: * <p>See getDecomposition for a description of decomposition
0278: * mode.</p>
0279: *
0280: * @param decomposition the new decomposition mode
0281: * @see #getDecomposition
0282: * @see #NO_DECOMPOSITION
0283: * @see #CANONICAL_DECOMPOSITION
0284: * @exception IllegalArgumentException If the given value is not a valid
0285: * decomposition mode.
0286: * @stable ICU 2.8
0287: */
0288: public void setDecomposition(int decomposition) {
0289: if ((decomposition != NO_DECOMPOSITION)
0290: && (decomposition != CANONICAL_DECOMPOSITION)) {
0291: throw new IllegalArgumentException(
0292: "Wrong decomposition mode.");
0293: }
0294: m_decomposition_ = decomposition;
0295: }
0296:
0297: // public getters --------------------------------------------------------
0298:
0299: /**
0300: * Gets the Collator for the current default locale.
0301: * The default locale is determined by java.util.Locale.getDefault().
0302: * @return the Collator for the default locale (for example, en_US) if it
0303: * is created successfully. Otherwise if there is no Collator
0304: * associated with the current locale, the default UCA collator
0305: * will be returned.
0306: * @see java.util.Locale#getDefault()
0307: * @see #getInstance(Locale)
0308: * @stable ICU 2.8
0309: */
0310: public static final Collator getInstance() {
0311: return getInstance(ULocale.getDefault());
0312: }
0313:
0314: /**
0315: * Clone the collator.
0316: * @stable ICU 2.6
0317: * @return a clone of this collator.
0318: */
0319: public Object clone() throws CloneNotSupportedException {
0320: return super .clone();
0321: }
0322:
0323: // begin registry stuff
0324:
0325: /**
0326: * A factory used with registerFactory to register multiple collators and provide
0327: * display names for them. If standard locale display names are sufficient,
0328: * Collator instances may be registered instead.
0329: * <p><b>Note:</b> as of ICU4J 3.2, the default API for CollatorFactory uses
0330: * ULocale instead of Locale. Instead of overriding createCollator(Locale),
0331: * new implementations should override createCollator(ULocale). Note that
0332: * one of these two methods <b>MUST</b> be overridden or else an infinite
0333: * loop will occur.
0334: * @stable ICU 2.6
0335: */
0336: public static abstract class CollatorFactory {
0337: /**
0338: * Return true if this factory will be visible. Default is true.
0339: * If not visible, the locales supported by this factory will not
0340: * be listed by getAvailableLocales.
0341: *
0342: * @return true if this factory is visible
0343: * @stable ICU 2.6
0344: */
0345: public boolean visible() {
0346: return true;
0347: }
0348:
0349: /**
0350: * Return an instance of the appropriate collator. If the locale
0351: * is not supported, return null.
0352: * <b>Note:</b> as of ICU4J 3.2, implementations should override
0353: * this method instead of createCollator(Locale).
0354: * @param loc the locale for which this collator is to be created.
0355: * @return the newly created collator.
0356: * @draft ICU 3.2
0357: * @provisional This API might change or be removed in a future release.
0358: */
0359: public Collator createCollator(ULocale loc) {
0360: return createCollator(loc.toLocale());
0361: }
0362:
0363: /**
0364: * Return an instance of the appropriate collator. If the locale
0365: * is not supported, return null.
0366: * <p><b>Note:</b> as of ICU4J 3.2, implementations should override
0367: * createCollator(ULocale) instead of this method, and inherit this
0368: * method's implementation. This method is no longer abstract
0369: * and instead delegates to createCollator(ULocale).
0370: * @param loc the locale for which this collator is to be created.
0371: * @return the newly created collator.
0372: * @stable ICU 2.6
0373: */
0374: public Collator createCollator(Locale loc) {
0375: return createCollator(ULocale.forLocale(loc));
0376: }
0377:
0378: /**
0379: * Return the name of the collator for the objectLocale, localized for the displayLocale.
0380: * If objectLocale is not visible or not defined by the factory, return null.
0381: * @param objectLocale the locale identifying the collator
0382: * @param displayLocale the locale for which the display name of the collator should be localized
0383: * @return the display name
0384: * @stable ICU 2.6
0385: */
0386: public String getDisplayName(Locale objectLocale,
0387: Locale displayLocale) {
0388: return getDisplayName(ULocale.forLocale(objectLocale),
0389: ULocale.forLocale(displayLocale));
0390: }
0391:
0392: /**
0393: * Return the name of the collator for the objectLocale, localized for the displayLocale.
0394: * If objectLocale is not visible or not defined by the factory, return null.
0395: * @param objectLocale the locale identifying the collator
0396: * @param displayLocale the locale for which the display name of the collator should be localized
0397: * @return the display name
0398: * @draft ICU 3.2
0399: * @provisional This API might change or be removed in a future release.
0400: */
0401: public String getDisplayName(ULocale objectLocale,
0402: ULocale displayLocale) {
0403: if (visible()) {
0404: Set supported = getSupportedLocaleIDs();
0405: String name = objectLocale.getBaseName();
0406: if (supported.contains(name)) {
0407: return objectLocale.getDisplayName(displayLocale);
0408: }
0409: }
0410: return null;
0411: }
0412:
0413: /**
0414: * Return an unmodifiable collection of the locale names directly
0415: * supported by this factory.
0416: *
0417: * @return the set of supported locale IDs.
0418: * @stable ICU 2.6
0419: */
0420: public abstract Set getSupportedLocaleIDs();
0421:
0422: /**
0423: * Empty default constructor.
0424: * @stable ICU 2.6
0425: */
0426: protected CollatorFactory() {
0427: }
0428: }
0429:
0430: static abstract class ServiceShim {
0431: abstract Collator getInstance(ULocale l);
0432:
0433: abstract Object registerInstance(Collator c, ULocale l);
0434:
0435: abstract Object registerFactory(CollatorFactory f);
0436:
0437: abstract boolean unregister(Object k);
0438:
0439: abstract Locale[] getAvailableLocales(); // TODO remove
0440:
0441: abstract ULocale[] getAvailableULocales();
0442:
0443: abstract String getDisplayName(ULocale ol, ULocale dl);
0444: }
0445:
0446: private static ServiceShim shim;
0447:
0448: private static ServiceShim getShim() {
0449: // Note: this instantiation is safe on loose-memory-model configurations
0450: // despite lack of synchronization, since the shim instance has no state--
0451: // it's all in the class init. The worst problem is we might instantiate
0452: // two shim instances, but they'll share the same state so that's ok.
0453: if (shim == null) {
0454: try {
0455: Class cls = Class
0456: .forName("com.ibm.icu.text.CollatorServiceShim");
0457: shim = (ServiceShim) cls.newInstance();
0458: } catch (MissingResourceException e) {
0459: throw e;
0460: } catch (Exception e) {
0461: ///CLOVER:OFF
0462: if (DEBUG) {
0463: e.printStackTrace();
0464: }
0465: throw new RuntimeException(e.getMessage());
0466: ///CLOVER:ON
0467: }
0468: }
0469: return shim;
0470: }
0471:
0472: /**
0473: * Gets the Collator for the desired locale.
0474: * @param locale the desired locale.
0475: * @return Collator for the desired locale if it is created successfully.
0476: * Otherwise if there is no Collator
0477: * associated with the current locale, a default UCA collator will
0478: * be returned.
0479: * @see java.util.Locale
0480: * @see java.util.ResourceBundle
0481: * @see #getInstance(Locale)
0482: * @see #getInstance()
0483: * @stable ICU 3.0
0484: */
0485: public static final Collator getInstance(ULocale locale) {
0486: // fetching from service cache is faster than instantiation
0487: return getShim().getInstance(locale);
0488: }
0489:
0490: /**
0491: * Gets the Collator for the desired locale.
0492: * @param locale the desired locale.
0493: * @return Collator for the desired locale if it is created successfully.
0494: * Otherwise if there is no Collator
0495: * associated with the current locale, a default UCA collator will
0496: * be returned.
0497: * @see java.util.Locale
0498: * @see java.util.ResourceBundle
0499: * @see #getInstance(ULocale)
0500: * @see #getInstance()
0501: * @stable ICU 2.8
0502: */
0503: public static final Collator getInstance(Locale locale) {
0504: return getInstance(ULocale.forLocale(locale));
0505: }
0506:
0507: /**
0508: * Register a collator as the default collator for the provided locale. The
0509: * collator should not be modified after it is registered.
0510: *
0511: * @param collator the collator to register
0512: * @param locale the locale for which this is the default collator
0513: * @return an object that can be used to unregister the registered collator.
0514: *
0515: * @draft ICU 3.2
0516: * @provisional This API might change or be removed in a future release.
0517: */
0518: public static final Object registerInstance(Collator collator,
0519: ULocale locale) {
0520: return getShim().registerInstance(collator, locale);
0521: }
0522:
0523: /**
0524: * Register a collator factory.
0525: *
0526: * @param factory the factory to register
0527: * @return an object that can be used to unregister the registered factory.
0528: *
0529: * @stable ICU 2.6
0530: */
0531: public static final Object registerFactory(CollatorFactory factory) {
0532: return getShim().registerFactory(factory);
0533: }
0534:
0535: /**
0536: * Unregister a collator previously registered using registerInstance.
0537: * @param registryKey the object previously returned by registerInstance.
0538: * @return true if the collator was successfully unregistered.
0539: * @stable ICU 2.6
0540: */
0541: public static final boolean unregister(Object registryKey) {
0542: if (shim == null) {
0543: return false;
0544: }
0545: return shim.unregister(registryKey);
0546: }
0547:
0548: /**
0549: * Get the set of locales, as Locale objects, for which collators
0550: * are installed. Note that Locale objects do not support RFC 3066.
0551: * @return the list of locales in which collators are installed.
0552: * This list includes any that have been registered, in addition to
0553: * those that are installed with ICU4J.
0554: * @stable ICU 2.4
0555: */
0556: public static Locale[] getAvailableLocales() {
0557: // TODO make this wrap getAvailableULocales later
0558: if (shim == null) {
0559: return ICUResourceBundle
0560: .getAvailableLocales(ICUResourceBundle.ICU_COLLATION_BASE_NAME);
0561: }
0562: return shim.getAvailableLocales();
0563: }
0564:
0565: /**
0566: * Get the set of locales, as ULocale objects, for which collators
0567: * are installed. ULocale objects support RFC 3066.
0568: * @return the list of locales in which collators are installed.
0569: * This list includes any that have been registered, in addition to
0570: * those that are installed with ICU4J.
0571: * @stable ICU 3.0
0572: */
0573: public static final ULocale[] getAvailableULocales() {
0574: if (shim == null) {
0575: return ICUResourceBundle
0576: .getAvailableULocales(ICUResourceBundle.ICU_COLLATION_BASE_NAME);
0577: }
0578: return shim.getAvailableULocales();
0579: }
0580:
0581: /**
0582: * The list of keywords for this service. This must be kept in sync with
0583: * the resource data.
0584: * @since ICU 3.0
0585: */
0586: private static final String[] KEYWORDS = { "collation" };
0587:
0588: /**
0589: * The resource name for this service. Note that this is not the same as
0590: * the keyword for this service.
0591: * @since ICU 3.0
0592: */
0593: private static final String RESOURCE = "collations";
0594:
0595: /**
0596: * The resource bundle base name for this service.
0597: * *since ICU 3.0
0598: */
0599: private static final String BASE = ICUResourceBundle.ICU_COLLATION_BASE_NAME;
0600:
0601: /**
0602: * Return an array of all possible keywords that are relevant to
0603: * collation. At this point, the only recognized keyword for this
0604: * service is "collation".
0605: * @return an array of valid collation keywords.
0606: * @see #getKeywordValues
0607: * @stable ICU 3.0
0608: */
0609: public static final String[] getKeywords() {
0610: return KEYWORDS;
0611: }
0612:
0613: /**
0614: * Given a keyword, return an array of all values for
0615: * that keyword that are currently in use.
0616: * @param keyword one of the keywords returned by getKeywords.
0617: * @see #getKeywords
0618: * @stable ICU 3.0
0619: */
0620: public static final String[] getKeywordValues(String keyword) {
0621: if (!keyword.equals(KEYWORDS[0])) {
0622: throw new IllegalArgumentException("Invalid keyword: "
0623: + keyword);
0624: }
0625: return ICUResourceBundle.getKeywordValues(BASE, RESOURCE);
0626: }
0627:
0628: /**
0629: * Return the functionally equivalent locale for the given
0630: * requested locale, with respect to given keyword, for the
0631: * collation service. If two locales return the same result, then
0632: * collators instantiated for these locales will behave
0633: * equivalently. The converse is not always true; two collators
0634: * may in fact be equivalent, but return different results, due to
0635: * internal details. The return result has no other meaning than
0636: * that stated above, and implies nothing as to the relationship
0637: * between the two locales. This is intended for use by
0638: * applications who wish to cache collators, or otherwise reuse
0639: * collators when possible. The functional equivalent may change
0640: * over time. For more information, please see the <a
0641: * href="http://icu.sourceforge.net/userguide/locale.html#services">
0642: * Locales and Services</a> section of the ICU User Guide.
0643: * @param keyword a particular keyword as enumerated by
0644: * getKeywords.
0645: * @param locID The requested locale
0646: * @param isAvailable If non-null, isAvailable[0] will receive and
0647: * output boolean that indicates whether the requested locale was
0648: * 'available' to the collation service. The locale is defined as
0649: * 'available' if it physically exists within the collation locale
0650: * data. If non-null, isAvailable must have length >= 1.
0651: * @return the locale
0652: * @stable ICU 3.0
0653: */
0654: public static final ULocale getFunctionalEquivalent(String keyword,
0655: ULocale locID, boolean isAvailable[]) {
0656: return ICUResourceBundle.getFunctionalEquivalent(BASE,
0657: RESOURCE, keyword, locID, isAvailable);
0658: }
0659:
0660: /**
0661: * Return the functionally equivalent locale for the given
0662: * requested locale, with respect to given keyword, for the
0663: * collation service.
0664: * @param keyword a particular keyword as enumerated by
0665: * getKeywords.
0666: * @param locID The requested locale
0667: * @return the locale
0668: * @see #getFunctionalEquivalent(String,ULocale,boolean[])
0669: * @stable ICU 3.0
0670: */
0671: public static final ULocale getFunctionalEquivalent(String keyword,
0672: ULocale locID) {
0673: return getFunctionalEquivalent(keyword, locID, null);
0674: }
0675:
0676: /**
0677: * Get the name of the collator for the objectLocale, localized for the displayLocale.
0678: * @param objectLocale the locale of the collator
0679: * @param displayLocale the locale for the collator's display name
0680: * @return the display name
0681: * @stable ICU 2.6
0682: */
0683: static public String getDisplayName(Locale objectLocale,
0684: Locale displayLocale) {
0685: return getShim().getDisplayName(
0686: ULocale.forLocale(objectLocale),
0687: ULocale.forLocale(displayLocale));
0688: }
0689:
0690: /**
0691: * Get the name of the collator for the objectLocale, localized for the displayLocale.
0692: * @param objectLocale the locale of the collator
0693: * @param displayLocale the locale for the collator's display name
0694: * @return the display name
0695: * @draft ICU 3.2
0696: * @provisional This API might change or be removed in a future release.
0697: */
0698: static public String getDisplayName(ULocale objectLocale,
0699: ULocale displayLocale) {
0700: return getShim().getDisplayName(objectLocale, displayLocale);
0701: }
0702:
0703: /**
0704: * Get the name of the collator for the objectLocale, localized for the current locale.
0705: * @param objectLocale the locale of the collator
0706: * @return the display name
0707: * @stable ICU 2.6
0708: */
0709: static public String getDisplayName(Locale objectLocale) {
0710: return getShim().getDisplayName(
0711: ULocale.forLocale(objectLocale), ULocale.getDefault());
0712: }
0713:
0714: /**
0715: * Get the name of the collator for the objectLocale, localized for the current locale.
0716: * @param objectLocale the locale of the collator
0717: * @return the display name
0718: * @draft ICU 3.2
0719: * @provisional This API might change or be removed in a future release.
0720: */
0721: static public String getDisplayName(ULocale objectLocale) {
0722: return getShim().getDisplayName(objectLocale,
0723: ULocale.getDefault());
0724: }
0725:
0726: /**
0727: * <p>Returns this Collator's strength property. The strength property
0728: * determines the minimum level of difference considered significant.
0729: * </p>
0730: * <p>
0731: * See the Collator class description for more details.
0732: * </p>
0733: * @return this Collator's current strength property.
0734: * @see #setStrength
0735: * @see #PRIMARY
0736: * @see #SECONDARY
0737: * @see #TERTIARY
0738: * @see #QUATERNARY
0739: * @see #IDENTICAL
0740: * @stable ICU 2.8
0741: */
0742: public int getStrength() {
0743: return m_strength_;
0744: }
0745:
0746: /**
0747: * <p>
0748: * Get the decomposition mode of this Collator. Decomposition mode
0749: * determines how Unicode composed characters are handled.
0750: * </p>
0751: * <p>
0752: * See the Collator class description for more details.
0753: * </p>
0754: * @return the decomposition mode
0755: * @see #setDecomposition
0756: * @see #NO_DECOMPOSITION
0757: * @see #CANONICAL_DECOMPOSITION
0758: * @stable ICU 2.8
0759: */
0760: public int getDecomposition() {
0761: return m_decomposition_;
0762: }
0763:
0764: /**
0765: * <p>
0766: * Compares the source text String to the target text String according to
0767: * this Collator's rules, strength and decomposition mode.
0768: * Returns an integer less than,
0769: * equal to or greater than zero depending on whether the source String is
0770: * less than, equal to or greater than the target String. See the Collator
0771: * class description for an example of use.
0772: * </p>
0773: * @param source the source String.
0774: * @param target the target String.
0775: * @return Returns an integer value. Value is less than zero if source is
0776: * less than target, value is zero if source and target are equal,
0777: * value is greater than zero if source is greater than target.
0778: * @see CollationKey
0779: * @see #getCollationKey
0780: * @exception NullPointerException thrown if either arguments is null.
0781: * IllegalArgumentException thrown if either source or target is
0782: * not of the class String.
0783: * @stable ICU 2.8
0784: */
0785: public int compare(Object source, Object target) {
0786: if (!(source instanceof String) || !(target instanceof String)) {
0787: throw new IllegalArgumentException(
0788: "Arguments have to be of type String");
0789: }
0790: return compare((String) source, (String) target);
0791: }
0792:
0793: // public other methods -------------------------------------------------
0794:
0795: /**
0796: * Convenience method for comparing the equality of two text Strings using
0797: * this Collator's rules, strength and decomposition mode.
0798: * @param source the source string to be compared.
0799: * @param target the target string to be compared.
0800: * @return true if the strings are equal according to the collation
0801: * rules, otherwise false.
0802: * @see #compare
0803: * @exception NullPointerException thrown if either arguments is null.
0804: * @stable ICU 2.8
0805: */
0806: public boolean equals(String source, String target) {
0807: return (compare(source, target) == 0);
0808: }
0809:
0810: /**
0811: * Get an UnicodeSet that contains all the characters and sequences
0812: * tailored in this collator.
0813: * @return a pointer to a UnicodeSet object containing all the
0814: * code points and sequences that may sort differently than
0815: * in the UCA.
0816: * @stable ICU 2.4
0817: */
0818: public UnicodeSet getTailoredSet() {
0819: return new UnicodeSet(0, 0x10FFFF);
0820: }
0821:
0822: /**
0823: * <p>
0824: * Compares the source text String to the target text String according to
0825: * this Collator's rules, strength and decomposition mode.
0826: * Returns an integer less than,
0827: * equal to or greater than zero depending on whether the source String is
0828: * less than, equal to or greater than the target String. See the Collator
0829: * class description for an example of use.
0830: * </p>
0831: * @param source the source String.
0832: * @param target the target String.
0833: * @return Returns an integer value. Value is less than zero if source is
0834: * less than target, value is zero if source and target are equal,
0835: * value is greater than zero if source is greater than target.
0836: * @see CollationKey
0837: * @see #getCollationKey
0838: * @exception NullPointerException thrown if either arguments is null.
0839: * @stable ICU 2.8
0840: */
0841: public abstract int compare(String source, String target);
0842:
0843: /**
0844: * <p>
0845: * Transforms the String into a CollationKey suitable for efficient
0846: * repeated comparison. The resulting key depends on the collator's
0847: * rules, strength and decomposition mode.
0848: * </p>
0849: * <p>See the CollationKey class documentation for more information.</p>
0850: * @param source the string to be transformed into a CollationKey.
0851: * @return the CollationKey for the given String based on this Collator's
0852: * collation rules. If the source String is null, a null
0853: * CollationKey is returned.
0854: * @see CollationKey
0855: * @see #compare(String, String)
0856: * @see #getRawCollationKey
0857: * @stable ICU 2.8
0858: */
0859: public abstract CollationKey getCollationKey(String source);
0860:
0861: /**
0862: * Gets the simpler form of a CollationKey for the String source following
0863: * the rules of this Collator and stores the result into the user provided
0864: * argument key.
0865: * If key has a internal byte array of length that's too small for the
0866: * result, the internal byte array will be grown to the exact required
0867: * size.
0868: * @param source the text String to be transformed into a RawCollationKey
0869: * @return If key is null, a new instance of RawCollationKey will be
0870: * created and returned, otherwise the user provided key will be
0871: * returned.
0872: * @see #compare(String, String)
0873: * @see #getCollationKey
0874: * @see RawCollationKey
0875: * @stable ICU 2.8
0876: */
0877: public abstract RawCollationKey getRawCollationKey(String source,
0878: RawCollationKey key);
0879:
0880: /**
0881: * <p>
0882: * Variable top is a two byte primary value which causes all the codepoints
0883: * with primary values that are less or equal than the variable top to be
0884: * shifted when alternate handling is set to SHIFTED.
0885: * </p>
0886: * <p>
0887: * Sets the variable top to a collation element value of a string supplied.
0888: * </p>
0889: * @param varTop one or more (if contraction) characters to which the
0890: * variable top should be set
0891: * @return a int value containing the value of the variable top in upper 16
0892: * bits. Lower 16 bits are undefined.
0893: * @exception IllegalArgumentException is thrown if varTop argument is not
0894: * a valid variable top element. A variable top element is
0895: * invalid when it is a contraction that does not exist in the
0896: * Collation order or when the PRIMARY strength collation
0897: * element for the variable top has more than two bytes
0898: * @see #getVariableTop
0899: * @see RuleBasedCollator#setAlternateHandlingShifted
0900: * @stable ICU 2.6
0901: */
0902: public abstract int setVariableTop(String varTop);
0903:
0904: /**
0905: * Gets the variable top value of a Collator.
0906: * Lower 16 bits are undefined and should be ignored.
0907: * @return the variable top value of a Collator.
0908: * @see #setVariableTop
0909: * @stable ICU 2.6
0910: */
0911: public abstract int getVariableTop();
0912:
0913: /**
0914: * Sets the variable top to a collation element value supplied.
0915: * Variable top is set to the upper 16 bits.
0916: * Lower 16 bits are ignored.
0917: * @param varTop Collation element value, as returned by setVariableTop or
0918: * getVariableTop
0919: * @see #getVariableTop
0920: * @see #setVariableTop
0921: * @stable ICU 2.6
0922: */
0923: public abstract void setVariableTop(int varTop);
0924:
0925: /**
0926: * Get the version of this collator object.
0927: * @return the version object associated with this collator
0928: * @stable ICU 2.8
0929: */
0930: public abstract VersionInfo getVersion();
0931:
0932: /**
0933: * Get the UCA version of this collator object.
0934: * @return the version object associated with this collator
0935: * @stable ICU 2.8
0936: */
0937: public abstract VersionInfo getUCAVersion();
0938:
0939: // protected constructor -------------------------------------------------
0940:
0941: /**
0942: * Empty default constructor to make javadocs happy
0943: * @stable ICU 2.4
0944: */
0945: protected Collator() {
0946: }
0947:
0948: // package private methods -----------------------------------------------
0949:
0950: // private data members --------------------------------------------------
0951:
0952: /**
0953: * Collation strength
0954: */
0955: private int m_strength_ = TERTIARY;
0956:
0957: /**
0958: * Decomposition mode
0959: */
0960: private int m_decomposition_ = CANONICAL_DECOMPOSITION;
0961:
0962: private static final boolean DEBUG = ICUDebug.enabled("collator");
0963:
0964: // private methods -------------------------------------------------------
0965:
0966: // end registry stuff
0967:
0968: // -------- BEGIN ULocale boilerplate --------
0969:
0970: /**
0971: * Return the locale that was used to create this object, or null.
0972: * This may may differ from the locale requested at the time of
0973: * this object's creation. For example, if an object is created
0974: * for locale <tt>en_US_CALIFORNIA</tt>, the actual data may be
0975: * drawn from <tt>en</tt> (the <i>actual</i> locale), and
0976: * <tt>en_US</tt> may be the most specific locale that exists (the
0977: * <i>valid</i> locale).
0978: *
0979: * <p>Note: This method will be implemented in ICU 3.0; ICU 2.8
0980: * contains a partial preview implementation. The * <i>actual</i>
0981: * locale is returned correctly, but the <i>valid</i> locale is
0982: * not, in most cases.
0983: * @param type type of information requested, either {@link
0984: * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link
0985: * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}.
0986: * @return the information specified by <i>type</i>, or null if
0987: * this object was not constructed from locale data.
0988: * @see com.ibm.icu.util.ULocale
0989: * @see com.ibm.icu.util.ULocale#VALID_LOCALE
0990: * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
0991: * @draft ICU 2.8 (retain)
0992: * @provisional This API might change or be removed in a future release.
0993: */
0994: public final ULocale getLocale(ULocale.Type type) {
0995: return type == ULocale.ACTUAL_LOCALE ? this .actualLocale
0996: : this .validLocale;
0997: }
0998:
0999: /**
1000: * Set information about the locales that were used to create this
1001: * object. If the object was not constructed from locale data,
1002: * both arguments should be set to null. Otherwise, neither
1003: * should be null. The actual locale must be at the same level or
1004: * less specific than the valid locale. This method is intended
1005: * for use by factories or other entities that create objects of
1006: * this class.
1007: * @param valid the most specific locale containing any resource
1008: * data, or null
1009: * @param actual the locale containing data used to construct this
1010: * object, or null
1011: * @see com.ibm.icu.util.ULocale
1012: * @see com.ibm.icu.util.ULocale#VALID_LOCALE
1013: * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
1014: * @internal
1015: */
1016: final void setLocale(ULocale valid, ULocale actual) {
1017: // Change the following to an assertion later
1018: if ((valid == null) != (actual == null)) {
1019: ///CLOVER:OFF
1020: throw new IllegalArgumentException();
1021: ///CLOVER:ON
1022: }
1023: // Another check we could do is that the actual locale is at
1024: // the same level or less specific than the valid locale.
1025: this .validLocale = valid;
1026: this .actualLocale = actual;
1027: }
1028:
1029: /**
1030: * The most specific locale containing any resource data, or null.
1031: * @see com.ibm.icu.util.ULocale
1032: * @internal
1033: */
1034: private ULocale validLocale;
1035:
1036: /**
1037: * The locale containing data used to construct this object, or
1038: * null.
1039: * @see com.ibm.icu.util.ULocale
1040: * @internal
1041: */
1042: private ULocale actualLocale;
1043:
1044: // -------- END ULocale boilerplate --------
1045: }
|