001: /*
002: * Copyright 1999-2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: /*
017: * $Id: StringComparable.java,v 1.2 2004/02/17 04:21:14 minchau Exp $
018: */
019:
020: package org.apache.xml.utils;
021:
022: import java.util.Vector;
023: import java.text.Collator;
024: import java.text.RuleBasedCollator;
025: import java.text.CollationElementIterator;
026: import java.util.Locale;
027: import java.text.CollationKey;
028:
029: /**
030: * International friendly string comparison with case-order
031: * @author Igor Hersht, igorh@ca.ibm.com
032: */
033: public class StringComparable implements Comparable {
034:
035: public final static int UNKNOWN_CASE = -1;
036: public final static int UPPER_CASE = 1;
037: public final static int LOWER_CASE = 2;
038:
039: private String m_text;
040: private Locale m_locale;
041: private RuleBasedCollator m_collator;
042: private String m_caseOrder;
043: private int m_mask = 0xFFFFFFFF;
044:
045: public StringComparable(final String text, final Locale locale,
046: final Collator collator, final String caseOrder) {
047: m_text = text;
048: m_locale = locale;
049: m_collator = (RuleBasedCollator) collator;
050: m_caseOrder = caseOrder;
051: m_mask = getMask(m_collator.getStrength());
052: }
053:
054: public final static Comparable getComparator(final String text,
055: final Locale locale, final Collator collator,
056: final String caseOrder) {
057: if ((caseOrder == null) || (caseOrder.length() == 0)) {// no case-order specified
058: return ((RuleBasedCollator) collator).getCollationKey(text);
059: } else {
060: return new StringComparable(text, locale, collator,
061: caseOrder);
062: }
063: }
064:
065: public final String toString() {
066: return m_text;
067: }
068:
069: public int compareTo(Object o) {
070: final String pattern = ((StringComparable) o).toString();
071: if (m_text.equals(pattern)) {//Code-point equals
072: return 0;
073: }
074: final int savedStrength = m_collator.getStrength();
075: int comp = 0;
076: // Is there difference more significant than case-order?
077: if (((savedStrength == Collator.PRIMARY) || (savedStrength == Collator.SECONDARY))) {
078: comp = m_collator.compare(m_text, pattern);
079: } else {// more than SECONDARY
080: m_collator.setStrength(Collator.SECONDARY);
081: comp = m_collator.compare(m_text, pattern);
082: m_collator.setStrength(savedStrength);
083: }
084: if (comp != 0) {//Difference more significant than case-order
085: return comp;
086: }
087:
088: // No difference more significant than case-order.
089: // Find case difference
090: comp = getCaseDiff(m_text, pattern);
091: if (comp != 0) {
092: return comp;
093: } else {// No case differences. Less significant difference could exist
094: return m_collator.compare(m_text, pattern);
095: }
096: }
097:
098: private final int getCaseDiff(final String text,
099: final String pattern) {
100: final int savedStrength = m_collator.getStrength();
101: final int savedDecomposition = m_collator.getDecomposition();
102: m_collator.setStrength(Collator.TERTIARY);// not to ignore case
103: m_collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);// corresponds NDF
104:
105: final int diff[] = getFirstCaseDiff(text, pattern, m_locale);
106: m_collator.setStrength(savedStrength);// restore
107: m_collator.setDecomposition(savedDecomposition); //restore
108: if (diff != null) {
109: if ((m_caseOrder).equals("upper-first")) {
110: if (diff[0] == UPPER_CASE) {
111: return -1;
112: } else {
113: return 1;
114: }
115: } else {// lower-first
116: if (diff[0] == LOWER_CASE) {
117: return -1;
118: } else {
119: return 1;
120: }
121: }
122: } else {// No case differences
123: return 0;
124: }
125:
126: }
127:
128: private final int[] getFirstCaseDiff(final String text,
129: final String pattern, final Locale locale) {
130:
131: final CollationElementIterator targIter = m_collator
132: .getCollationElementIterator(text);
133: final CollationElementIterator patIter = m_collator
134: .getCollationElementIterator(pattern);
135: int startTarg = -1;
136: int endTarg = -1;
137: int startPatt = -1;
138: int endPatt = -1;
139: final int done = getElement(CollationElementIterator.NULLORDER);
140: int patternElement = 0, targetElement = 0;
141: boolean getPattern = true, getTarget = true;
142:
143: while (true) {
144: if (getPattern) {
145: startPatt = patIter.getOffset();
146: patternElement = getElement(patIter.next());
147: endPatt = patIter.getOffset();
148: }
149: if ((getTarget)) {
150: startTarg = targIter.getOffset();
151: targetElement = getElement(targIter.next());
152: endTarg = targIter.getOffset();
153: }
154: getTarget = getPattern = true;
155: if ((patternElement == done) || (targetElement == done)) {
156: return null;
157: } else if (targetElement == 0) {
158: getPattern = false;
159: } else if (patternElement == 0) {
160: getTarget = false;
161: } else if (targetElement != patternElement) {// mismatch
162: if ((startPatt < endPatt) && (startTarg < endTarg)) {
163: final String subText = text.substring(startTarg,
164: endTarg);
165: final String subPatt = pattern.substring(startPatt,
166: endPatt);
167: final String subTextUp = subText
168: .toUpperCase(locale);
169: final String subPattUp = subPatt
170: .toUpperCase(locale);
171: if (m_collator.compare(subTextUp, subPattUp) != 0) { // not case diffference
172: continue;
173: }
174:
175: int diff[] = { UNKNOWN_CASE, UNKNOWN_CASE };
176: if (m_collator.compare(subText, subTextUp) == 0) {
177: diff[0] = UPPER_CASE;
178: } else if (m_collator.compare(subText, subText
179: .toLowerCase(locale)) == 0) {
180: diff[0] = LOWER_CASE;
181: }
182: if (m_collator.compare(subPatt, subPattUp) == 0) {
183: diff[1] = UPPER_CASE;
184: } else if (m_collator.compare(subPatt, subPatt
185: .toLowerCase(locale)) == 0) {
186: diff[1] = LOWER_CASE;
187: }
188:
189: if (((diff[0] == UPPER_CASE) && (diff[1] == LOWER_CASE))
190: || ((diff[0] == LOWER_CASE) && (diff[1] == UPPER_CASE))) {
191: return diff;
192: } else {// not case diff
193: continue;
194: }
195: } else {
196: continue;
197: }
198:
199: }
200: }
201:
202: }
203:
204: // Return a mask for the part of the order we're interested in
205: private static final int getMask(final int strength) {
206: switch (strength) {
207: case Collator.PRIMARY:
208: return 0xFFFF0000;
209: case Collator.SECONDARY:
210: return 0xFFFFFF00;
211: default:
212: return 0xFFFFFFFF;
213: }
214: }
215:
216: //get collation element with given strength
217: // from the element with max strength
218: private final int getElement(int maxStrengthElement) {
219:
220: return (maxStrengthElement & m_mask);
221: }
222:
223: }//StringComparable
|