001: /*
002: *
003: * @(#)Collator.java 1.37 06/10/10
004: *
005: * Portions Copyright 2000-2006 Sun Microsystems, Inc. All Rights
006: * Reserved. Use is subject to license terms.
007: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
008: *
009: * This program is free software; you can redistribute it and/or
010: * modify it under the terms of the GNU General Public License version
011: * 2 only, as published by the Free Software Foundation.
012: *
013: * This program is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of
015: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
016: * General Public License version 2 for more details (a copy is
017: * included at /legal/license.txt).
018: *
019: * You should have received a copy of the GNU General Public License
020: * version 2 along with this work; if not, write to the Free Software
021: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
022: * 02110-1301 USA
023: *
024: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
025: * Clara, CA 95054 or visit www.sun.com if you need additional
026: * information or have any questions.
027: */
028:
029: /*
030: * (C) Copyright Taligent, Inc. 1996-1998 - All Rights Reserved
031: * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
032: *
033: * The original version of this source code and documentation is copyrighted
034: * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
035: * materials are provided under terms of a License Agreement between Taligent
036: * and Sun. This technology is protected by multiple US and International
037: * patents. This notice and attribution to Taligent may not be removed.
038: * Taligent is a registered trademark of Taligent, Inc.
039: *
040: */
041:
042: package java.text;
043:
044: import java.util.Locale;
045: import java.util.MissingResourceException;
046: import java.util.ResourceBundle;
047: import sun.misc.SoftCache;
048: import sun.text.resources.LocaleData;
049:
050: /**
051: * The <code>Collator</code> class performs locale-sensitive
052: * <code>String</code> comparison. You use this class to build
053: * searching and sorting routines for natural language text.
054: *
055: * <p>
056: * <code>Collator</code> is an abstract base class. Subclasses
057: * implement specific collation strategies. One subclass,
058: * <code>RuleBasedCollator</code>, is currently provided with
059: * the Java 2 platform and is applicable to a wide set of languages. Other
060: * subclasses may be created to handle more specialized needs.
061: *
062: * <p>
063: * Like other locale-sensitive classes, you can use the static
064: * factory method, <code>getInstance</code>, to obtain the appropriate
065: * <code>Collator</code> object for a given locale. You will only need
066: * to look at the subclasses of <code>Collator</code> if you need
067: * to understand the details of a particular collation strategy or
068: * if you need to modify that strategy.
069: *
070: * <p>
071: * The following example shows how to compare two strings using
072: * the <code>Collator</code> for the default locale.
073: * <blockquote>
074: * <pre>
075: * // Compare two strings in the default locale
076: * Collator myCollator = Collator.getInstance();
077: * if( myCollator.compare("abc", "ABC") < 0 )
078: * System.out.println("abc is less than ABC");
079: * else
080: * System.out.println("abc is greater than or equal to ABC");
081: * </pre>
082: * </blockquote>
083: *
084: * <p>
085: * You can set a <code>Collator</code>'s <em>strength</em> property
086: * to determine the level of difference considered significant in
087: * comparisons. Four strengths are provided: <code>PRIMARY</code>,
088: * <code>SECONDARY</code>, <code>TERTIARY</code>, and <code>IDENTICAL</code>.
089: * The exact assignment of strengths to language features is
090: * locale dependant. For example, in Czech, "e" and "f" are considered
091: * primary differences, while "e" and "\u00EA" are secondary differences,
092: * "e" and "E" are tertiary differences and "e" and "e" are identical.
093: * The following shows how both case and accents could be ignored for
094: * US English.
095: * <blockquote>
096: * <pre>
097: * //Get the Collator for US English and set its strength to PRIMARY
098: * Collator usCollator = Collator.getInstance(Locale.US);
099: * usCollator.setStrength(Collator.PRIMARY);
100: * if( usCollator.compare("abc", "ABC") == 0 ) {
101: * System.out.println("Strings are equivalent");
102: * }
103: * </pre>
104: * </blockquote>
105: * <p>
106: * For comparing <code>String</code>s exactly once, the <code>compare</code>
107: * method provides the best performance. When sorting a list of
108: * <code>String</code>s however, it is generally necessary to compare each
109: * <code>String</code> multiple times. In this case, <code>CollationKey</code>s
110: * provide better performance. The <code>CollationKey</code> class converts
111: * a <code>String</code> to a series of bits that can be compared bitwise
112: * against other <code>CollationKey</code>s. A <code>CollationKey</code> is
113: * created by a <code>Collator</code> object for a given <code>String</code>.
114: * <br>
115: * <strong>Note:</strong> <code>CollationKey</code>s from different
116: * <code>Collator</code>s can not be compared. See the class description
117: * for {@link CollationKey}
118: * for an example using <code>CollationKey</code>s.
119: *
120: * @see RuleBasedCollator
121: * @see CollationKey
122: * @see CollationElementIterator
123: * @see Locale
124: * @version 1.34, 01/27/03
125: * @author Helena Shih, Laura Werner, Richard Gillam
126: */
127:
128: public abstract class Collator implements java.util.Comparator,
129: Cloneable {
130: /**
131: * Collator strength value. When set, only PRIMARY differences are
132: * considered significant during comparison. The assignment of strengths
133: * to language features is locale dependant. A common example is for
134: * different base letters ("a" vs "b") to be considered a PRIMARY difference.
135: * @see java.text.Collator#setStrength
136: * @see java.text.Collator#getStrength
137: */
138: public final static int PRIMARY = 0;
139: /**
140: * Collator strength value. When set, only SECONDARY and above differences are
141: * considered significant during comparison. The assignment of strengths
142: * to language features is locale dependant. A common example is for
143: * different accented forms of the same base letter ("a" vs "\u00E4") to be
144: * considered a SECONDARY difference.
145: * @see java.text.Collator#setStrength
146: * @see java.text.Collator#getStrength
147: */
148: public final static int SECONDARY = 1;
149: /**
150: * Collator strength value. When set, only TERTIARY and above differences are
151: * considered significant during comparison. The assignment of strengths
152: * to language features is locale dependant. A common example is for
153: * case differences ("a" vs "A") to be considered a TERTIARY difference.
154: * @see java.text.Collator#setStrength
155: * @see java.text.Collator#getStrength
156: */
157: public final static int TERTIARY = 2;
158:
159: /**
160: * Collator strength value. When set, all differences are
161: * considered significant during comparison. The assignment of strengths
162: * to language features is locale dependant. A common example is for control
163: * characters ("\u0001" vs "\u0002") to be considered equal at the
164: * PRIMARY, SECONDARY, and TERTIARY levels but different at the IDENTICAL
165: * level. Additionally, differences between pre-composed accents such as
166: * "\u00C0" (A-grave) and combining accents such as "A\u0300"
167: * (A, combining-grave) will be considered significant at the tertiary
168: * level if decomposition is set to NO_DECOMPOSITION.
169: */
170: public final static int IDENTICAL = 3;
171:
172: /**
173: * Decomposition mode value. With NO_DECOMPOSITION
174: * set, accented characters will not be decomposed for collation. This
175: * is the default setting and provides the fastest collation but
176: * will only produce correct results for languages that do not use accents.
177: * @see java.text.Collator#getDecomposition
178: * @see java.text.Collator#setDecomposition
179: */
180: public final static int NO_DECOMPOSITION = 0;
181:
182: /**
183: * Decomposition mode value. With CANONICAL_DECOMPOSITION
184: * set, characters that are canonical variants according to Unicode 2.0
185: * will be decomposed for collation. This
186: * should be used to get correct collation of accented characters.
187: * <p>
188: * CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
189: * described in
190: * <a href="http://www.unicode.org/unicode/reports/tr15/">Unicode
191: * Technical Report #15</a>.
192: * @see java.text.Collator#getDecomposition
193: * @see java.text.Collator#setDecomposition
194: */
195: public final static int CANONICAL_DECOMPOSITION = 1;
196:
197: /**
198: * Decomposition mode value. With FULL_DECOMPOSITION
199: * set, both Unicode canonical variants and Unicode compatibility variants
200: * will be decomposed for collation. This causes not only accented
201: * characters to be collated, but also characters that have special formats
202: * to be collated with their norminal form. For example, the half-width and
203: * full-width ASCII and Katakana characters are then collated together.
204: * FULL_DECOMPOSITION is the most complete and therefore the slowest
205: * decomposition mode.
206: * <p>
207: * FULL_DECOMPOSITION corresponds to Normalization Form KD as
208: * described in
209: * <a href="http://www.unicode.org/unicode/reports/tr15/">Unicode
210: * Technical Report #15</a>.
211: * @see java.text.Collator#getDecomposition
212: * @see java.text.Collator#setDecomposition
213: */
214: public final static int FULL_DECOMPOSITION = 2;
215:
216: /**
217: * Gets the Collator for the current default locale.
218: * The default locale is determined by java.util.Locale.getDefault.
219: * @return the Collator for the default locale.(for example, en_US)
220: * @see java.util.Locale#getDefault
221: */
222: public static synchronized Collator getInstance() {
223: return getInstance(Locale.getDefault());
224: }
225:
226: /**
227: * Gets the Collator for the desired locale.
228: * @param desiredLocale the desired locale.
229: * @return the Collator for the desired locale.
230: * @see java.util.Locale
231: * @see java.util.ResourceBundle
232: */
233: public static synchronized Collator getInstance(Locale desiredLocale) {
234: RuleBasedCollator result = null;
235: result = (RuleBasedCollator) cache.get(desiredLocale);
236: if (result != null) {
237: return (Collator) result.clone(); // make the world safe
238: }
239:
240: // Load the resource of the desired locale from resource
241: // manager.
242: String colString = "";
243: int decomp = CANONICAL_DECOMPOSITION;
244:
245: try {
246: ResourceBundle resource = LocaleData
247: .getLocaleElements(desiredLocale);
248:
249: colString = resource.getString("CollationElements");
250: decomp = ((Integer) resource.getObject("CollationDecomp"))
251: .intValue();
252: } catch (MissingResourceException e) {
253: // Use default values
254: }
255: try {
256: result = new RuleBasedCollator(CollationRules.DEFAULTRULES
257: + colString, decomp);
258: } catch (ParseException foo) {
259: // predefined tables should contain correct grammar
260: try {
261: result = new RuleBasedCollator(
262: CollationRules.DEFAULTRULES);
263: } catch (ParseException bar) {
264: // do nothing
265: }
266: }
267: // Now that RuleBasedCollator adds expansions for pre-composed characters
268: // into their decomposed equivalents, the default collators don't need
269: // to have decomposition turned on. Laura, 5/5/98, bug 4114077
270: result.setDecomposition(NO_DECOMPOSITION);
271:
272: cache.put(desiredLocale, result);
273: return (Collator) result.clone();
274: }
275:
276: /**
277: * Compares the source string to the target string according to the
278: * collation rules for this Collator. Returns an integer less than,
279: * equal to or greater than zero depending on whether the source String is
280: * less than, equal to or greater than the target string. See the Collator
281: * class description for an example of use.
282: * <p>
283: * For a one time comparison, this method has the best performance. If a
284: * given String will be involved in multiple comparisons, CollationKey.compareTo
285: * has the best performance. See the Collator class description for an example
286: * using CollationKeys.
287: * @param source the source string.
288: * @param target the target string.
289: * @return Returns an integer value. Value is less than zero if source is less than
290: * target, value is zero if source and target are equal, value is greater than zero
291: * if source is greater than target.
292: * @see java.text.CollationKey
293: * @see java.text.Collator#getCollationKey
294: */
295: public abstract int compare(String source, String target);
296:
297: /**
298: * Compares its two arguments for order. Returns a negative integer,
299: * zero, or a positive integer as the first argument is less than, equal
300: * to, or greater than the second.
301: * <p>
302: * This implementation merely returns
303: * <code> compare((String)o1, (String)o2) </code>.
304: *
305: * @return a negative integer, zero, or a positive integer as the
306: * first argument is less than, equal to, or greater than the
307: * second.
308: * @exception ClassCastException the arguments cannot be cast to Strings.
309: * @see java.util.Comparator
310: * @since 1.2
311: */
312: public int compare(Object o1, Object o2) {
313: return compare((String) o1, (String) o2);
314: }
315:
316: /**
317: * Transforms the String into a series of bits that can be compared bitwise
318: * to other CollationKeys. CollationKeys provide better performance than
319: * Collator.compare when Strings are involved in multiple comparisons.
320: * See the Collator class description for an example using CollationKeys.
321: * @param source the string to be transformed into a collation key.
322: * @return the CollationKey for the given String based on this Collator's collation
323: * rules. If the source String is null, a null CollationKey is returned.
324: * @see java.text.CollationKey
325: * @see java.text.Collator#compare
326: */
327: public abstract CollationKey getCollationKey(String source);
328:
329: /**
330: * Convenience method for comparing the equality of two strings based on
331: * this Collator's collation rules.
332: * @param source the source string to be compared with.
333: * @param target the target string to be compared with.
334: * @return true if the strings are equal according to the collation
335: * rules. false, otherwise.
336: * @see java.text.Collator#compare
337: */
338: public boolean equals(String source, String target) {
339: return (compare(source, target) == Collator.EQUAL);
340: }
341:
342: /**
343: * Returns this Collator's strength property. The strength property determines
344: * the minimum level of difference considered significant during comparison.
345: * See the Collator class description for an example of use.
346: * @return this Collator's current strength property.
347: * @see java.text.Collator#setStrength
348: * @see java.text.Collator#PRIMARY
349: * @see java.text.Collator#SECONDARY
350: * @see java.text.Collator#TERTIARY
351: * @see java.text.Collator#IDENTICAL
352: */
353: public synchronized int getStrength() {
354: return strength;
355: }
356:
357: /**
358: * Sets this Collator's strength property. The strength property determines
359: * the minimum level of difference considered significant during comparison.
360: * See the Collator class description for an example of use.
361: * @param newStrength the new strength value.
362: * @see java.text.Collator#getStrength
363: * @see java.text.Collator#PRIMARY
364: * @see java.text.Collator#SECONDARY
365: * @see java.text.Collator#TERTIARY
366: * @see java.text.Collator#IDENTICAL
367: * @exception IllegalArgumentException If the new strength value is not one of
368: * PRIMARY, SECONDARY, TERTIARY or IDENTICAL.
369: */
370: public synchronized void setStrength(int newStrength) {
371: if ((newStrength != PRIMARY) && (newStrength != SECONDARY)
372: && (newStrength != TERTIARY)
373: && (newStrength != IDENTICAL))
374: throw new IllegalArgumentException(
375: "Incorrect comparison level.");
376: strength = newStrength;
377: }
378:
379: /**
380: * Get the decomposition mode of this Collator. Decomposition mode
381: * determines how Unicode composed characters are handled. Adjusting
382: * decomposition mode allows the user to select between faster and more
383: * complete collation behavior.
384: * <p>The three values for decomposition mode are:
385: * <UL>
386: * <LI>NO_DECOMPOSITION,
387: * <LI>CANONICAL_DECOMPOSITION
388: * <LI>FULL_DECOMPOSITION.
389: * </UL>
390: * See the documentation for these three constants for a description
391: * of their meaning.
392: * @return the decomposition mode
393: * @see java.text.Collator#setDecomposition
394: * @see java.text.Collator#NO_DECOMPOSITION
395: * @see java.text.Collator#CANONICAL_DECOMPOSITION
396: * @see java.text.Collator#FULL_DECOMPOSITION
397: */
398: public synchronized int getDecomposition() {
399: return decmp;
400: }
401:
402: /**
403: * Set the decomposition mode of this Collator. See getDecomposition
404: * for a description of decomposition mode.
405: * @param decompositionMode the new decomposition mode.
406: * @see java.text.Collator#getDecomposition
407: * @see java.text.Collator#NO_DECOMPOSITION
408: * @see java.text.Collator#CANONICAL_DECOMPOSITION
409: * @see java.text.Collator#FULL_DECOMPOSITION
410: * @exception IllegalArgumentException If the given value is not a valid decomposition
411: * mode.
412: */
413: public synchronized void setDecomposition(int decompositionMode) {
414: if ((decompositionMode != NO_DECOMPOSITION)
415: && (decompositionMode != CANONICAL_DECOMPOSITION)
416: && (decompositionMode != FULL_DECOMPOSITION))
417: throw new IllegalArgumentException(
418: "Wrong decomposition mode.");
419: decmp = decompositionMode;
420: }
421:
422: /**
423: * Get the set of Locales for which Collators are installed.
424: * @return the list of available locales which collators are installed.
425: */
426: public static synchronized Locale[] getAvailableLocales() {
427: return LocaleData.getAvailableLocales("CollationElements");
428: }
429:
430: /**
431: * Overrides Cloneable
432: */
433: public Object clone() {
434: try {
435: return (Collator) super .clone();
436: } catch (CloneNotSupportedException e) {
437: throw new InternalError();
438: }
439: }
440:
441: /**
442: * Compares the equality of two Collators.
443: * @param that the Collator to be compared with this.
444: * @return true if this Collator is the same as that Collator;
445: * false otherwise.
446: */
447: public boolean equals(Object that) {
448: if (this == that)
449: return true;
450: if (that == null)
451: return false;
452: if (getClass() != that.getClass())
453: return false;
454: Collator other = (Collator) that;
455: return ((strength == other.strength) && (decmp == other.decmp));
456: }
457:
458: /**
459: * Generates the hash code for this Collator.
460: */
461: abstract public int hashCode();
462:
463: /**
464: * Default constructor. This constructor is
465: * protected so subclasses can get access to it. Users typically create
466: * a Collator sub-class by calling the factory method getInstance.
467: * @see java.text.Collator#getInstance
468: */
469: protected Collator() {
470: strength = TERTIARY;
471: decmp = CANONICAL_DECOMPOSITION;
472: }
473:
474: private int strength = 0;
475: private int decmp = 0;
476: private static SoftCache cache = new SoftCache();
477:
478: //
479: // These three constants may be removed in CDC. However, they are kept
480: // for field-for-field compatibility with J2SE.
481: //
482: /**
483: * LESS is returned if source string is compared to be less than target
484: * string in the compare() method.
485: * @see java.text.Collator#compare
486: */
487: final static int LESS = -1;
488: /**
489: * EQUAL is returned if source string is compared to be equal to target
490: * string in the compare() method.
491: * @see java.text.Collator#compare
492: */
493: final static int EQUAL = 0;
494: /**
495: * GREATER is returned if source string is compared to be greater than
496: * target string in the compare() method.
497: * @see java.text.Collator#compare
498: */
499: final static int GREATER = 1;
500: }
|