001: /**
002: *******************************************************************************
003: * Copyright (C) 1996-2006, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */package com.ibm.icu.text;
007:
008: /**
009: * <p>A <code>CollationKey</code> represents a <code>String</code>
010: * under the rules of a specific <code>Collator</code>
011: * object. Comparing two <code>CollationKey</code>s returns the
012: * relative order of the <code>String</code>s they represent.</p>
013: *
014: * <p>Since the rule set of <code>Collator</code>s can differ, the
015: * sort orders of the same string under two different
016: * <code>Collator</code>s might differ. Hence comparing
017: * <code>CollationKey</code>s generated from different
018: * <code>Collator</code>s can give incorrect results.</p>
019:
020: * <p>Both the method
021: * <code>CollationKey.compareTo(CollationKey)</code> and the method
022: * <code>Collator.compare(String, String)</code> compare two strings
023: * and returns their relative order. The performance characterictics
024: * of these two approaches can differ.</p>
025: *
026: * <p>During the construction of a <code>CollationKey</code>, the
027: * entire source string is examined and processed into a series of
028: * bits terminated by a null, that are stored in the <code>CollationKey</code>.
029: * When <code>CollationKey.compareTo(CollationKey)</code> executes, it
030: * performs bitwise comparison on the bit sequences. This can incurs
031: * startup cost when creating the <code>CollationKey</code>, but once
032: * the key is created, binary comparisons are fast. This approach is
033: * recommended when the same strings are to be compared over and over
034: * again.</p>
035: *
036: * <p>On the other hand, implementations of
037: * <code>Collator.compare(String, String)</code> can examine and
038: * process the strings only until the first characters differing in
039: * order. This approach is recommended if the strings are to be
040: * compared only once.</p>
041: *
042: * <p>More information about the composition of the bit sequence can
043: * be found in the
044: * <a href="http://icu.sourceforge.net/userguide/Collate_ServiceArchitecture.html">
045: * user guide</a>.</p>
046: *
047: * <p>The following example shows how <code>CollationKey</code>s can be used
048: * to sort a list of <code>String</code>s.</p>
049: * <blockquote>
050: * <pre>
051: * // Create an array of CollationKeys for the Strings to be sorted.
052: * Collator myCollator = Collator.getInstance();
053: * CollationKey[] keys = new CollationKey[3];
054: * keys[0] = myCollator.getCollationKey("Tom");
055: * keys[1] = myCollator.getCollationKey("Dick");
056: * keys[2] = myCollator.getCollationKey("Harry");
057: * sort( keys );
058: * <br>
059: * //...
060: * <br>
061: * // Inside body of sort routine, compare keys this way
062: * if( keys[i].compareTo( keys[j] ) > 0 )
063: * // swap keys[i] and keys[j]
064: * <br>
065: * //...
066: * <br>
067: * // Finally, when we've returned from sort.
068: * System.out.println( keys[0].getSourceString() );
069: * System.out.println( keys[1].getSourceString() );
070: * System.out.println( keys[2].getSourceString() );
071: * </pre>
072: * </blockquote>
073: * </p>
074: * <p>
075: * This class is not subclassable
076: * </p>
077: * @see Collator
078: * @see RuleBasedCollator
079: * @author Syn Wee Quek
080: * @stable ICU 2.8
081: */
082: public final class CollationKey implements Comparable {
083: // public inner classes -------------------------------------------------
084:
085: /**
086: * Options that used in the API CollationKey.getBound() for getting a
087: * CollationKey based on the bound mode requested.
088: * @stable ICU 2.6
089: */
090: public static final class BoundMode {
091: /*
092: * do not change the values assigned to the members of this enum.
093: * Underlying code depends on them having these numbers
094: */
095:
096: /**
097: * Lower bound
098: * @stable ICU 2.6
099: */
100: public static final int LOWER = 0;
101:
102: /**
103: * Upper bound that will match strings of exact size
104: * @stable ICU 2.6
105: */
106: public static final int UPPER = 1;
107:
108: /**
109: * Upper bound that will match all the strings that have the same
110: * initial substring as the given string
111: * @stable ICU 2.6
112: */
113: public static final int UPPER_LONG = 2;
114:
115: /**
116: * Number of bound mode
117: * @stable ICU 2.6
118: */
119: public static final int COUNT = 3;
120:
121: /**
122: * Private Constructor
123: */
124: ///CLOVER:OFF
125: private BoundMode() {
126: }
127: ///CLOVER:ON
128: }
129:
130: // public constructor ---------------------------------------------------
131:
132: /**
133: * CollationKey constructor.
134: * This constructor is given public access, unlike the JDK version, to
135: * allow access to users extending the Collator class. See
136: * {@link Collator#getCollationKey(String)}.
137: * @param source string this CollationKey is to represent
138: * @param key array of bytes that represent the collation order of argument
139: * source terminated by a null
140: * @see Collator
141: * @stable ICU 2.8
142: */
143: public CollationKey(String source, byte key[]) {
144: m_source_ = source;
145: m_key_ = key;
146: m_hashCode_ = 0;
147: m_length_ = -1;
148: }
149:
150: /**
151: * CollationKey constructor that forces key to release its internal byte
152: * array for adoption. key will have a null byte array after this
153: * construction.
154: * @param source string this CollationKey is to represent
155: * @param key RawCollationKey object that represents the collation order of
156: * argument source.
157: * @see Collator
158: * @see RawCollationKey
159: * @stable ICU 2.8
160: */
161: public CollationKey(String source, RawCollationKey key) {
162: m_source_ = source;
163: m_key_ = key.releaseBytes();
164: m_hashCode_ = 0;
165: m_length_ = -1;
166: }
167:
168: // public getters -------------------------------------------------------
169:
170: /**
171: * Return the source string that this CollationKey represents.
172: * @return source string that this CollationKey represents
173: * @stable ICU 2.8
174: */
175: public String getSourceString() {
176: return m_source_;
177: }
178:
179: /**
180: * <p>Duplicates and returns the value of this CollationKey as a sequence
181: * of big-endian bytes terminated by a null.</p>
182: *
183: * <p>If two CollationKeys can be legitimately compared, then one can
184: * compare the byte arrays of each to obtain the same result, e.g.
185: * <pre>
186: * byte key1[] = collationkey1.toByteArray();
187: * byte key2[] = collationkey2.toByteArray();
188: * int key, targetkey;
189: * int i = 0;
190: * do {
191: * key = key1[i] & 0xFF;
192: * targetkey = key2[i] & 0xFF;
193: * if (key < targetkey) {
194: * System.out.println("String 1 is less than string 2");
195: * return;
196: * }
197: * if (targetkey < key) {
198: * System.out.println("String 1 is more than string 2");
199: * }
200: * i ++;
201: * } while (key != 0 && targetKey != 0);
202: *
203: * System.out.println("Strings are equal.");
204: * </pre>
205: * </p>
206: * @return CollationKey value in a sequence of big-endian byte bytes
207: * terminated by a null.
208: * @stable ICU 2.8
209: */
210: public byte[] toByteArray() {
211: int length = 0;
212: while (true) {
213: if (m_key_[length] == 0) {
214: break;
215: }
216: length++;
217: }
218: length++;
219: byte result[] = new byte[length];
220: System.arraycopy(m_key_, 0, result, 0, length);
221: return result;
222: }
223:
224: // public other methods -------------------------------------------------
225:
226: /**
227: * <p>Compare this CollationKey to another CollationKey. The
228: * collation rules of the Collator that created this key are
229: * applied.</p>
230: *
231: * <p><strong>Note:</strong> Comparison between CollationKeys
232: * created by different Collators might return incorrect
233: * results. See class documentation.</p>
234: *
235: * @param target target CollationKey
236: * @return an integer value. If the value is less than zero this CollationKey
237: * is less than than target, if the value is zero they are equal, and
238: * if the value is greater than zero this CollationKey is greater
239: * than target.
240: * @exception NullPointerException is thrown if argument is null.
241: * @see Collator#compare(String, String)
242: * @stable ICU 2.8
243: */
244: public int compareTo(CollationKey target) {
245: for (int i = 0;; ++i) {
246: int l = m_key_[i] & 0xff;
247: int r = target.m_key_[i] & 0xff;
248: if (l < r) {
249: return -1;
250: } else if (l > r) {
251: return 1;
252: } else if (l == 0) {
253: return 0;
254: }
255: }
256: }
257:
258: /**
259: * <p>Compare this CollationKey with the specified Object. The
260: * collation rules of the Collator that created this key are
261: * applied.</p>
262: *
263: * <p>See note in compareTo(CollationKey) for warnings about possible
264: * incorrect results.</p>
265: *
266: * @param obj the Object to be compared to.
267: * @return Returns a negative integer, zero, or a positive integer
268: * respectively if this CollationKey is less than, equal to, or
269: * greater than the given Object.
270: * @exception ClassCastException is thrown when the argument is not
271: * a CollationKey. NullPointerException is thrown when the argument
272: * is null.
273: * @see #compareTo(CollationKey)
274: * @stable ICU 2.8
275: */
276: public int compareTo(Object obj) {
277: return compareTo((CollationKey) obj);
278: }
279:
280: /**
281: * <p>Compare this CollationKey and the specified Object for
282: * equality. The collation rules of the Collator that created
283: * this key are applied.</p>
284: *
285: * <p>See note in compareTo(CollationKey) for warnings about
286: * possible incorrect results.</p>
287: *
288: * @param target the object to compare to.
289: * @return true if the two keys compare as equal, false otherwise.
290: * @see #compareTo(CollationKey)
291: * @exception ClassCastException is thrown when the argument is not
292: * a CollationKey. NullPointerException is thrown when the argument
293: * is null.
294: * @stable ICU 2.8
295: */
296: public boolean equals(Object target) {
297: if (!(target instanceof CollationKey)) {
298: return false;
299: }
300:
301: return equals((CollationKey) target);
302: }
303:
304: /**
305: * <p>
306: * Compare this CollationKey and the argument target CollationKey for
307: * equality.
308: * The collation
309: * rules of the Collator object which created these objects are applied.
310: * </p>
311: * <p>
312: * See note in compareTo(CollationKey) for warnings of incorrect results
313: * </p>
314: * @param target the CollationKey to compare to.
315: * @return true if two objects are equal, false otherwise.
316: * @exception NullPointerException is thrown when the argument is null.
317: * @stable ICU 2.8
318: */
319: public boolean equals(CollationKey target) {
320: if (this == target) {
321: return true;
322: }
323: if (target == null) {
324: return false;
325: }
326: CollationKey other = (CollationKey) target;
327: int i = 0;
328: while (true) {
329: if (m_key_[i] != other.m_key_[i]) {
330: return false;
331: }
332: if (m_key_[i] == 0) {
333: break;
334: }
335: i++;
336: }
337: return true;
338: }
339:
340: /**
341: * <p>Returns a hash code for this CollationKey. The hash value is calculated
342: * on the key itself, not the String from which the key was created. Thus
343: * if x and y are CollationKeys, then x.hashCode(x) == y.hashCode()
344: * if x.equals(y) is true. This allows language-sensitive comparison in a
345: * hash table.
346: * </p>
347: * @return the hash value.
348: * @stable ICU 2.8
349: */
350: public int hashCode() {
351: if (m_hashCode_ == 0) {
352: if (m_key_ == null) {
353: m_hashCode_ = 1;
354: } else {
355: int size = m_key_.length >> 1;
356: StringBuffer key = new StringBuffer(size);
357: int i = 0;
358: while (m_key_[i] != 0 && m_key_[i + 1] != 0) {
359: key
360: .append((char) ((m_key_[i] << 8) | m_key_[i + 1]));
361: i += 2;
362: }
363: if (m_key_[i] != 0) {
364: key.append((char) (m_key_[i] << 8));
365: }
366: m_hashCode_ = key.toString().hashCode();
367: }
368: }
369: return m_hashCode_;
370: }
371:
372: /**
373: * <p>
374: * Produce a bound for the sort order of a given collation key and a
375: * strength level. This API does not attempt to find a bound for the
376: * CollationKey String representation, hence null will be returned in its
377: * place.
378: * </p>
379: * <p>
380: * Resulting bounds can be used to produce a range of strings that are
381: * between upper and lower bounds. For example, if bounds are produced
382: * for a sortkey of string "smith", strings between upper and lower
383: * bounds with primary strength would include "Smith", "SMITH", "sMiTh".
384: * </p>
385: * <p>
386: * There are two upper bounds that can be produced. If BoundMode.UPPER
387: * is produced, strings matched would be as above. However, if a bound
388: * is produced using BoundMode.UPPER_LONG is used, the above example will
389: * also match "Smithsonian" and similar.
390: * </p>
391: * <p>
392: * For more on usage, see example in test procedure
393: * <a href="http://dev.icu-project.org/cgi-bin/viewcvs.cgi/~checkout~/icu4j/src/com/ibm/icu/dev/test/collator/CollationAPITest.java">
394: * src/com/ibm/icu/dev/test/collator/CollationAPITest/TestBounds.
395: * </a>
396: * </p>
397: * <p>
398: * Collation keys produced may be compared using the <TT>compare</TT> API.
399: * </p>
400: * @param boundType Mode of bound required. It can be BoundMode.LOWER, which
401: * produces a lower inclusive bound, BoundMode.UPPER, that
402: * produces upper bound that matches strings of the same
403: * length or BoundMode.UPPER_LONG that matches strings that
404: * have the same starting substring as the source string.
405: * @param noOfLevels Strength levels required in the resulting bound
406: * (for most uses, the recommended value is PRIMARY). This
407: * strength should be less than the maximum strength of
408: * this CollationKey.
409: * See users guide for explanation on the strength levels a
410: * collation key can have.
411: * @return the result bounded CollationKey with a valid sort order but
412: * a null String representation.
413: * @exception IllegalArgumentException thrown when the strength level
414: * requested is higher than or equal to the strength in this
415: * CollationKey.
416: * In the case of an Exception, information
417: * about the maximum strength to use will be returned in the
418: * Exception. The user can then call getBound() again with the
419: * appropriate strength.
420: * @see CollationKey
421: * @see CollationKey.BoundMode
422: * @see Collator#PRIMARY
423: * @see Collator#SECONDARY
424: * @see Collator#TERTIARY
425: * @see Collator#QUATERNARY
426: * @see Collator#IDENTICAL
427: * @stable ICU 2.6
428: */
429: public CollationKey getBound(int boundType, int noOfLevels) {
430: // Scan the string until we skip enough of the key OR reach the end of
431: // the key
432: int offset = 0;
433: int keystrength = Collator.PRIMARY;
434:
435: if (noOfLevels > Collator.PRIMARY) {
436: while (offset < m_key_.length && m_key_[offset] != 0) {
437: if (m_key_[offset++] == RuleBasedCollator.SORT_LEVEL_TERMINATOR_) {
438: keystrength++;
439: noOfLevels--;
440: if (noOfLevels == Collator.PRIMARY
441: || offset == m_key_.length
442: || m_key_[offset] == 0) {
443: offset--;
444: break;
445: }
446: }
447: }
448: }
449:
450: if (noOfLevels > 0) {
451: throw new IllegalArgumentException(
452: "Source collation key has only " + keystrength
453: + " strength level. Call getBound() again "
454: + " with noOfLevels < " + keystrength);
455: }
456:
457: // READ ME: this code assumes that the values for BoundMode variables
458: // will not changes. They are set so that the enum value corresponds to
459: // the number of extra bytes each bound type needs.
460: byte resultkey[] = new byte[offset + boundType + 1];
461: System.arraycopy(m_key_, 0, resultkey, 0, offset);
462: switch (boundType) {
463: case BoundMode.LOWER: // = 0
464: // Lower bound just gets terminated. No extra bytes
465: break;
466: case BoundMode.UPPER: // = 1
467: // Upper bound needs one extra byte
468: resultkey[offset++] = 2;
469: break;
470: case BoundMode.UPPER_LONG: // = 2
471: // Upper long bound needs two extra bytes
472: resultkey[offset++] = (byte) 0xFF;
473: resultkey[offset++] = (byte) 0xFF;
474: break;
475: default:
476: throw new IllegalArgumentException(
477: "Illegal boundType argument");
478: }
479: resultkey[offset++] = 0;
480: return new CollationKey(null, resultkey);
481: }
482:
483: /**
484: * <p>
485: * Merges this CollationKey with another. Only the sorting order of the
486: * CollationKeys will be merged. This API does not attempt to merge the
487: * String representations of the CollationKeys, hence null will be returned
488: * as the String representation.
489: * </p>
490: * <p>
491: * The strength levels are merged with their corresponding counterparts
492: * (PRIMARIES with PRIMARIES, SECONDARIES with SECONDARIES etc.).
493: * </p>
494: * <p>
495: * The merged String representation of the result CollationKey will be a
496: * concatenation of the String representations of the 2 source
497: * CollationKeys.
498: * </p>
499: * <p>
500: * Between the values from the same level a separator is inserted.
501: * example (uncompressed):
502: * <pre>
503: * 191B1D 01 050505 01 910505 00 and 1F2123 01 050505 01 910505 00
504: * will be merged as
505: * 191B1D 02 1F212301 050505 02 050505 01 910505 02 910505 00
506: * </pre>
507: * </p>
508: * <p>
509: * This allows for concatenating of first and last names for sorting, among
510: * other things.
511: * </p>
512: * </p>
513: * @param source CollationKey to merge with
514: * @return a CollationKey that contains the valid merged sorting order
515: * with a null String representation,
516: * i.e. <tt>new CollationKey(null, merge_sort_order)</tt>
517: * @exception IllegalArgumentException thrown if source CollationKey
518: * argument is null or of 0 length.
519: * @stable ICU 2.6
520: */
521: public CollationKey merge(CollationKey source) {
522: // check arguments
523: if (source == null || source.getLength() == 0) {
524: throw new IllegalArgumentException(
525: "CollationKey argument can not be null or of 0 length");
526: }
527:
528: getLength(); // gets the length of this sort key
529: int sourcelength = source.getLength();
530: // 1 extra for the last strength that has no seperators
531: byte result[] = new byte[m_length_ + sourcelength + 2];
532:
533: // merge the sort keys with the same number of levels
534: int rindex = 0;
535: int index = 0;
536: int sourceindex = 0;
537: while (true) {
538: // while both have another level
539: // copy level from src1 not including 00 or 01
540: // unsigned issues
541: while (m_key_[index] < 0
542: || m_key_[index] >= MERGE_SEPERATOR_) {
543: result[rindex++] = m_key_[index++];
544: }
545:
546: // add a 02 merge separator
547: result[rindex++] = MERGE_SEPERATOR_;
548:
549: // copy level from src2 not including 00 or 01
550: while (source.m_key_[sourceindex] < 0
551: || source.m_key_[sourceindex] >= MERGE_SEPERATOR_) {
552: result[rindex++] = source.m_key_[sourceindex++];
553: }
554:
555: // if both sort keys have another level, then add a 01 level
556: // separator and continue
557: if (m_key_[index] == RuleBasedCollator.SORT_LEVEL_TERMINATOR_
558: && source.m_key_[sourceindex] == RuleBasedCollator.SORT_LEVEL_TERMINATOR_) {
559: ++index;
560: ++sourceindex;
561: result[rindex++] = RuleBasedCollator.SORT_LEVEL_TERMINATOR_;
562: } else {
563: break;
564: }
565: }
566:
567: // here, at least one sort key is finished now, but the other one
568: // might have some contents left from containing more levels;
569: // that contents is just appended to the result
570: if (m_key_[index] != 0) {
571: System.arraycopy(m_key_, index, result, rindex, m_length_
572: - index);
573: } else if (source.m_key_[sourceindex] != 0) {
574: System.arraycopy(source.m_key_, sourceindex, result,
575: rindex, source.m_length_ - sourceindex);
576: }
577: result[result.length - 1] = 0;
578:
579: // trust that neither sort key contained illegally embedded zero bytes
580: return new CollationKey(null, result);
581: }
582:
583: // private data members -------------------------------------------------
584:
585: /**
586: * Sequence of bytes that represents the sort key
587: */
588: private byte m_key_[];
589:
590: /**
591: * Source string this CollationKey represents
592: */
593: private String m_source_;
594:
595: /**
596: * Hash code for the key
597: */
598: private int m_hashCode_;
599: /**
600: * Gets the length of this CollationKey
601: */
602: private int m_length_;
603: /**
604: * Collation key merge seperator
605: */
606: private static final int MERGE_SEPERATOR_ = 2;
607:
608: // private methods ------------------------------------------------------
609:
610: /**
611: * Gets the length of the CollationKey
612: * @return length of the CollationKey
613: */
614: private int getLength() {
615: if (m_length_ >= 0) {
616: return m_length_;
617: }
618: int length = m_key_.length;
619: for (int index = 0; index < length; index++) {
620: if (m_key_[index] == 0) {
621: length = index;
622: break;
623: }
624: }
625: m_length_ = length;
626: return m_length_;
627: }
628: }
|