Source Code Cross Referenced for RuleBasedCollator.java in  » Internationalization-Localization » icu4j » com » ibm » icu » text » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.text 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        //##header
0002:        /**
0003:         *******************************************************************************
0004:         * Copyright (C) 1996-2006, International Business Machines Corporation and    *
0005:         * others. All Rights Reserved.                                                *
0006:         *******************************************************************************
0007:         */package com.ibm.icu.text;
0008:
0009:        import java.io.IOException;
0010:        import java.text.CharacterIterator;
0011:        import java.text.ParseException;
0012:        import java.util.Arrays;
0013:        import java.util.MissingResourceException;
0014:
0015:        //#ifndef FOUNDATION
0016:        import java.nio.ByteBuffer; //#else
0017:        //##import com.ibm.icu.impl.ByteBuffer;
0018:        //#endif
0019:
0020:        import com.ibm.icu.impl.BOCU;
0021:        import com.ibm.icu.impl.ICUDebug;
0022:        import com.ibm.icu.impl.ICUResourceBundle;
0023:        import com.ibm.icu.impl.ImplicitCEGenerator;
0024:        import com.ibm.icu.impl.IntTrie;
0025:        import com.ibm.icu.impl.StringUCharacterIterator;
0026:        import com.ibm.icu.impl.Trie;
0027:        import com.ibm.icu.impl.TrieIterator;
0028:        import com.ibm.icu.impl.Utility;
0029:        import com.ibm.icu.lang.UCharacter;
0030:        import com.ibm.icu.util.RangeValueIterator;
0031:        import com.ibm.icu.util.ULocale;
0032:        import com.ibm.icu.util.UResourceBundle;
0033:        import com.ibm.icu.util.VersionInfo;
0034:
0035:        /**
0036:         * <p>RuleBasedCollator is a concrete subclass of Collator. It allows
0037:         * customization of the Collator via user-specified rule sets.
0038:         * RuleBasedCollator is designed to be fully compliant to the <a
0039:         * href="http://www.unicode.org/unicode/reports/tr10/"> Unicode
0040:         * Collation Algorithm (UCA)</a> and conforms to ISO 14651.</p>
0041:         *
0042:         * <p>Users are strongly encouraged to read <a
0043:         * href="http://icu.sourceforge.net/userguide/Collate_Intro.html">
0044:         * the users guide</a> for more information about the collation
0045:         * service before using this class.</p>
0046:         *
0047:         * <p>Create a RuleBasedCollator from a locale by calling the
0048:         * getInstance(Locale) factory method in the base class Collator.
0049:         * Collator.getInstance(Locale) creates a RuleBasedCollator object
0050:         * based on the collation rules defined by the argument locale.  If a
0051:         * customized collation ordering ar attributes is required, use the
0052:         * RuleBasedCollator(String) constructor with the appropriate
0053:         * rules. The customized RuleBasedCollator will base its ordering on
0054:         * UCA, while re-adjusting the attributes and orders of the characters
0055:         * in the specified rule accordingly.</p>
0056:         *
0057:         * <p>RuleBasedCollator provides correct collation orders for most
0058:         * locales supported in ICU. If specific data for a locale is not
0059:         * available, the orders eventually falls back to the <a
0060:         * href="http://www.unicode.org/unicode/reports/tr10/">UCA collation
0061:         * order </a>.</p>
0062:         *
0063:         * <p>For information about the collation rule syntax and details
0064:         * about customization, please refer to the
0065:         * <a href="http://icu.sourceforge.net/userguide/Collate_Customization.html">
0066:         * Collation customization</a> section of the user's guide.</p>
0067:         *
0068:         * <p><strong>Note</strong> that there are some differences between
0069:         * the Collation rule syntax used in Java and ICU4J:
0070:         *
0071:         * <ul>
0072:         * <li>According to the JDK documentation:
0073:         * <i>
0074:         * <p>
0075:         * Modifier '!' : Turns on Thai/Lao vowel-consonant swapping. If this rule
0076:         * is in force when a Thai vowel of the range &#92;U0E40-&#92;U0E44 precedes a
0077:         * Thai consonant of the range &#92;U0E01-&#92;U0E2E OR a Lao vowel of the
0078:         * range &#92;U0EC0-&#92;U0EC4 precedes a Lao consonant of the range
0079:         * &#92;U0E81-&#92;U0EAE then the
0080:         * vowel is placed after the consonant for collation purposes.
0081:         * </p>
0082:         * <p>
0083:         * If a rule is without the modifier '!', the Thai/Lao vowel-consonant
0084:         * swapping is not turned on.
0085:         * </p>
0086:         * </i>
0087:         * <p>
0088:         * ICU4J's RuleBasedCollator does not support turning off the Thai/Lao
0089:         * vowel-consonant swapping, since the UCA clearly states that it has to be
0090:         * supported to ensure a correct sorting order. If a '!' is encountered, it is
0091:         * ignored.
0092:         * </p>
0093:         * <li>As mentioned in the documentation of the base class Collator,
0094:         *     compatibility decomposition mode is not supported.
0095:         * </ul>
0096:         * <p>
0097:         * <strong>Examples</strong>
0098:         * </p>
0099:         * <p>
0100:         * Creating Customized RuleBasedCollators:
0101:         * <blockquote>
0102:         * <pre>
0103:         * String simple = "&amp; a &lt; b &lt; c &lt; d";
0104:         * RuleBasedCollator simpleCollator = new RuleBasedCollator(simple);
0105:         *
0106:         * String norwegian = "&amp; a , A &lt; b , B &lt; c , C &lt; d , D &lt; e , E "
0107:         *                    + "&lt; f , F &lt; g , G &lt; h , H &lt; i , I &lt; j , "
0108:         *                    + "J &lt; k , K &lt; l , L &lt; m , M &lt; n , N &lt; "
0109:         *                    + "o , O &lt; p , P &lt; q , Q &lt r , R &lt s , S &lt; "
0110:         *                    + "t , T &lt; u , U &lt; v , V &lt; w , W &lt; x , X "
0111:         *                    + "&lt; y , Y &lt; z , Z &lt; &#92;u00E5 = a&#92;u030A "
0112:         *                    + ", &#92;u00C5 = A&#92;u030A ; aa , AA &lt; &#92;u00E6 "
0113:         *                    + ", &#92;u00C6 &lt; &#92;u00F8 , &#92;u00D8";
0114:         * RuleBasedCollator norwegianCollator = new RuleBasedCollator(norwegian);
0115:         * </pre>
0116:         * </blockquote>
0117:         *
0118:         * Concatenating rules to combine <code>Collator</code>s:
0119:         * <blockquote>
0120:         * <pre>
0121:         * // Create an en_US Collator object
0122:         * RuleBasedCollator en_USCollator = (RuleBasedCollator)
0123:         *     Collator.getInstance(new Locale("en", "US", ""));
0124:         * // Create a da_DK Collator object
0125:         * RuleBasedCollator da_DKCollator = (RuleBasedCollator)
0126:         *     Collator.getInstance(new Locale("da", "DK", ""));
0127:         * // Combine the two
0128:         * // First, get the collation rules from en_USCollator
0129:         * String en_USRules = en_USCollator.getRules();
0130:         * // Second, get the collation rules from da_DKCollator
0131:         * String da_DKRules = da_DKCollator.getRules();
0132:         * RuleBasedCollator newCollator =
0133:         *                             new RuleBasedCollator(en_USRules + da_DKRules);
0134:         * // newCollator has the combined rules
0135:         * </pre>
0136:         * </blockquote>
0137:         *
0138:         * Making changes to an existing RuleBasedCollator to create a new
0139:         * <code>Collator</code> object, by appending changes to the existing rule:
0140:         * <blockquote>
0141:         * <pre>
0142:         * // Create a new Collator object with additional rules
0143:         * String addRules = "&amp; C &lt; ch, cH, Ch, CH";
0144:         * RuleBasedCollator myCollator =
0145:         *     new RuleBasedCollator(en_USCollator + addRules);
0146:         * // myCollator contains the new rules
0147:         * </pre>
0148:         * </blockquote>
0149:         *
0150:         * How to change the order of non-spacing accents:
0151:         * <blockquote>
0152:         * <pre>
0153:         * // old rule with main accents
0154:         * String oldRules = "= &#92;u0301 ; &#92;u0300 ; &#92;u0302 ; &#92;u0308 "
0155:         *                 + "; &#92;u0327 ; &#92;u0303 ; &#92;u0304 ; &#92;u0305 "
0156:         *                 + "; &#92;u0306 ; &#92;u0307 ; &#92;u0309 ; &#92;u030A "
0157:         *                 + "; &#92;u030B ; &#92;u030C ; &#92;u030D ; &#92;u030E "
0158:         *                 + "; &#92;u030F ; &#92;u0310 ; &#92;u0311 ; &#92;u0312 "
0159:         *                 + "&lt; a , A ; ae, AE ; &#92;u00e6 , &#92;u00c6 "
0160:         *                 + "&lt; b , B &lt; c, C &lt; e, E &amp; C &lt; d , D";
0161:         * // change the order of accent characters
0162:         * String addOn = "&amp; &#92;u0300 ; &#92;u0308 ; &#92;u0302";
0163:         * RuleBasedCollator myCollator = new RuleBasedCollator(oldRules + addOn);
0164:         * </pre>
0165:         * </blockquote>
0166:         *
0167:         * Putting in a new primary ordering before the default setting,
0168:         * e.g. sort English characters before or after Japanese characters in the Japanese
0169:         * <code>Collator</code>:
0170:         * <blockquote>
0171:         * <pre>
0172:         * // get en_US Collator rules
0173:         * RuleBasedCollator en_USCollator
0174:         *                        = (RuleBasedCollator)Collator.getInstance(Locale.US);
0175:         * // add a few Japanese characters to sort before English characters
0176:         * // suppose the last character before the first base letter 'a' in
0177:         * // the English collation rule is &#92;u2212
0178:         * String jaString = "& &#92;u2212 &lt &#92;u3041, &#92;u3042 &lt &#92;u3043, "
0179:         *                   + "&#92;u3044";
0180:         * RuleBasedCollator myJapaneseCollator
0181:         *              = new RuleBasedCollator(en_USCollator.getRules() + jaString);
0182:         * </pre>
0183:         * </blockquote>
0184:         * </p>
0185:         * <p>
0186:         * This class is not subclassable
0187:         * </p>
0188:         * @author Syn Wee Quek
0189:         * @stable ICU 2.8
0190:         */
0191:        public final class RuleBasedCollator extends Collator {
0192:            // public constructors ---------------------------------------------------
0193:
0194:            /**
0195:             * <p>
0196:             * Constructor that takes the argument rules for
0197:             * customization. The collator will be based on UCA,
0198:             * with the attributes and re-ordering of the characters specified in the
0199:             * argument rules.
0200:             * </p>
0201:             * <p>See the user guide's section on
0202:             * <a href="http://icu.sourceforge.net/userguide/Collate_Customization.html">
0203:             * Collation Customization</a> for details on the rule syntax.
0204:             * </p>
0205:             * @param rules the collation rules to build the collation table from.
0206:             * @exception ParseException and IOException thrown. ParseException thrown
0207:             *            when argument rules have an invalid syntax. IOException
0208:             *            thrown when an error occured while reading internal data.
0209:             * @stable ICU 2.8
0210:             */
0211:            public RuleBasedCollator(String rules) throws Exception {
0212:                checkUCA();
0213:                if (rules == null) {
0214:                    throw new IllegalArgumentException(
0215:                            "Collation rules can not be null");
0216:                }
0217:                init(rules);
0218:            }
0219:
0220:            // public methods --------------------------------------------------------
0221:
0222:            /**
0223:             * Clones the RuleBasedCollator
0224:             * @return a new instance of this RuleBasedCollator object
0225:             * @stable ICU 2.8
0226:             */
0227:            public Object clone() throws CloneNotSupportedException {
0228:                RuleBasedCollator result = (RuleBasedCollator) super .clone();
0229:                if (latinOneCEs_ != null) {
0230:                    result.m_reallocLatinOneCEs_ = true;
0231:                }
0232:                // since all collation data in the RuleBasedCollator do not change
0233:                // we can safely assign the result.fields to this collator
0234:                result.initUtility(false); // let the new clone have their own util
0235:                // iterators
0236:                return result;
0237:            }
0238:
0239:            /**
0240:             * Return a CollationElementIterator for the given String.
0241:             * @see CollationElementIterator
0242:             * @stable ICU 2.8
0243:             */
0244:            public CollationElementIterator getCollationElementIterator(
0245:                    String source) {
0246:                return new CollationElementIterator(source, this );
0247:            }
0248:
0249:            /**
0250:             * Return a CollationElementIterator for the given CharacterIterator.
0251:             * The source iterator's integrity will be preserved since a new copy
0252:             * will be created for use.
0253:             * @see CollationElementIterator
0254:             * @stable ICU 2.8
0255:             */
0256:            public CollationElementIterator getCollationElementIterator(
0257:                    CharacterIterator source) {
0258:                CharacterIterator newsource = (CharacterIterator) source
0259:                        .clone();
0260:                return new CollationElementIterator(newsource, this );
0261:            }
0262:
0263:            /**
0264:             * Return a CollationElementIterator for the given UCharacterIterator.
0265:             * The source iterator's integrity will be preserved since a new copy
0266:             * will be created for use.
0267:             * @see CollationElementIterator
0268:             * @stable ICU 2.8
0269:             */
0270:            public CollationElementIterator getCollationElementIterator(
0271:                    UCharacterIterator source) {
0272:                return new CollationElementIterator(source, this );
0273:            }
0274:
0275:            // public setters --------------------------------------------------------
0276:
0277:            /**
0278:             * Sets the Hiragana Quaternary mode to be on or off.
0279:             * When the Hiragana Quaternary mode is turned on, the collator
0280:             * positions Hiragana characters before all non-ignorable characters in
0281:             * QUATERNARY strength. This is to produce a correct JIS collation order,
0282:             * distinguishing between Katakana  and Hiragana characters.
0283:             * @param flag true if Hiragana Quaternary mode is to be on, false
0284:             *        otherwise
0285:             * @see #setHiraganaQuaternaryDefault
0286:             * @see #isHiraganaQuaternary
0287:             * @stable ICU 2.8
0288:             */
0289:            public void setHiraganaQuaternary(boolean flag) {
0290:                m_isHiragana4_ = flag;
0291:                updateInternalState();
0292:            }
0293:
0294:            /**
0295:             * Sets the Hiragana Quaternary mode to the initial mode set during
0296:             * construction of the RuleBasedCollator.
0297:             * See setHiraganaQuaternary(boolean) for more details.
0298:             * @see #setHiraganaQuaternary(boolean)
0299:             * @see #isHiraganaQuaternary
0300:             * @stable ICU 2.8
0301:             */
0302:            public void setHiraganaQuaternaryDefault() {
0303:                m_isHiragana4_ = m_defaultIsHiragana4_;
0304:                updateInternalState();
0305:            }
0306:
0307:            /**
0308:             * Sets whether uppercase characters sort before lowercase
0309:             * characters or vice versa, in strength TERTIARY. The default
0310:             * mode is false, and so lowercase characters sort before uppercase
0311:             * characters.
0312:             * If true, sort upper case characters first.
0313:             * @param upperfirst true to sort uppercase characters before
0314:             *                   lowercase characters, false to sort lowercase
0315:             *                   characters before uppercase characters
0316:             * @see #isLowerCaseFirst
0317:             * @see #isUpperCaseFirst
0318:             * @see #setLowerCaseFirst
0319:             * @see #setCaseFirstDefault
0320:             * @stable ICU 2.8
0321:             */
0322:            public void setUpperCaseFirst(boolean upperfirst) {
0323:                if (upperfirst) {
0324:                    if (m_caseFirst_ != AttributeValue.UPPER_FIRST_) {
0325:                        latinOneRegenTable_ = true;
0326:                    }
0327:                    m_caseFirst_ = AttributeValue.UPPER_FIRST_;
0328:                } else {
0329:                    if (m_caseFirst_ != AttributeValue.OFF_) {
0330:                        latinOneRegenTable_ = true;
0331:                    }
0332:                    m_caseFirst_ = AttributeValue.OFF_;
0333:                }
0334:                updateInternalState();
0335:            }
0336:
0337:            /**
0338:             * Sets the orders of lower cased characters to sort before upper cased
0339:             * characters, in strength TERTIARY. The default
0340:             * mode is false.
0341:             * If true is set, the RuleBasedCollator will sort lower cased characters
0342:             * before the upper cased ones.
0343:             * Otherwise, if false is set, the RuleBasedCollator will ignore case
0344:             * preferences.
0345:             * @param lowerfirst true for sorting lower cased characters before
0346:             *                   upper cased characters, false to ignore case
0347:             *                   preferences.
0348:             * @see #isLowerCaseFirst
0349:             * @see #isUpperCaseFirst
0350:             * @see #setUpperCaseFirst
0351:             * @see #setCaseFirstDefault
0352:             * @stable ICU 2.8
0353:             */
0354:            public void setLowerCaseFirst(boolean lowerfirst) {
0355:                if (lowerfirst) {
0356:                    if (m_caseFirst_ != AttributeValue.LOWER_FIRST_) {
0357:                        latinOneRegenTable_ = true;
0358:                    }
0359:                    m_caseFirst_ = AttributeValue.LOWER_FIRST_;
0360:                } else {
0361:                    if (m_caseFirst_ != AttributeValue.OFF_) {
0362:                        latinOneRegenTable_ = true;
0363:                    }
0364:                    m_caseFirst_ = AttributeValue.OFF_;
0365:                }
0366:                updateInternalState();
0367:            }
0368:
0369:            /**
0370:             * Sets the case first mode to the initial mode set during
0371:             * construction of the RuleBasedCollator.
0372:             * See setUpperCaseFirst(boolean) and setLowerCaseFirst(boolean) for more
0373:             * details.
0374:             * @see #isLowerCaseFirst
0375:             * @see #isUpperCaseFirst
0376:             * @see #setLowerCaseFirst(boolean)
0377:             * @see #setUpperCaseFirst(boolean)
0378:             * @stable ICU 2.8
0379:             */
0380:            public final void setCaseFirstDefault() {
0381:                if (m_caseFirst_ != m_defaultCaseFirst_) {
0382:                    latinOneRegenTable_ = true;
0383:                }
0384:                m_caseFirst_ = m_defaultCaseFirst_;
0385:                updateInternalState();
0386:            }
0387:
0388:            /**
0389:             * Sets the alternate handling mode to the initial mode set during
0390:             * construction of the RuleBasedCollator.
0391:             * See setAlternateHandling(boolean) for more details.
0392:             * @see #setAlternateHandlingShifted(boolean)
0393:             * @see #isAlternateHandlingShifted()
0394:             * @stable ICU 2.8
0395:             */
0396:            public void setAlternateHandlingDefault() {
0397:                m_isAlternateHandlingShifted_ = m_defaultIsAlternateHandlingShifted_;
0398:                updateInternalState();
0399:            }
0400:
0401:            /**
0402:             * Sets the case level mode to the initial mode set during
0403:             * construction of the RuleBasedCollator.
0404:             * See setCaseLevel(boolean) for more details.
0405:             * @see #setCaseLevel(boolean)
0406:             * @see #isCaseLevel
0407:             * @stable ICU 2.8
0408:             */
0409:            public void setCaseLevelDefault() {
0410:                m_isCaseLevel_ = m_defaultIsCaseLevel_;
0411:                updateInternalState();
0412:            }
0413:
0414:            /**
0415:             * Sets the decomposition mode to the initial mode set during construction
0416:             * of the RuleBasedCollator.
0417:             * See setDecomposition(int) for more details.
0418:             * @see #getDecomposition
0419:             * @see #setDecomposition(int)
0420:             * @stable ICU 2.8
0421:             */
0422:            public void setDecompositionDefault() {
0423:                setDecomposition(m_defaultDecomposition_);
0424:                updateInternalState();
0425:            }
0426:
0427:            /**
0428:             * Sets the French collation mode to the initial mode set during
0429:             * construction of the RuleBasedCollator.
0430:             * See setFrenchCollation(boolean) for more details.
0431:             * @see #isFrenchCollation
0432:             * @see #setFrenchCollation(boolean)
0433:             * @stable ICU 2.8
0434:             */
0435:            public void setFrenchCollationDefault() {
0436:                if (m_isFrenchCollation_ != m_defaultIsFrenchCollation_) {
0437:                    latinOneRegenTable_ = true;
0438:                }
0439:                m_isFrenchCollation_ = m_defaultIsFrenchCollation_;
0440:                updateInternalState();
0441:            }
0442:
0443:            /**
0444:             * Sets the collation strength to the initial mode set during the
0445:             * construction of the RuleBasedCollator.
0446:             * See setStrength(int) for more details.
0447:             * @see #setStrength(int)
0448:             * @see #getStrength
0449:             * @stable ICU 2.8
0450:             */
0451:            public void setStrengthDefault() {
0452:                setStrength(m_defaultStrength_);
0453:                updateInternalState();
0454:            }
0455:
0456:            /**
0457:             * Method to set numeric collation to its default value.
0458:             * When numeric collation is turned on, this Collator generates a collation 
0459:             * key for the numeric value of substrings of digits. This is a way to get 
0460:             * '100' to sort AFTER '2'
0461:             * @see #getNumericCollation
0462:             * @see #setNumericCollation
0463:             * @stable ICU 2.8
0464:             */
0465:            public void setNumericCollationDefault() {
0466:                setNumericCollation(m_defaultIsNumericCollation_);
0467:                updateInternalState();
0468:            }
0469:
0470:            /**
0471:             * Sets the mode for the direction of SECONDARY weights to be used in
0472:             * French collation.
0473:             * The default value is false, which treats SECONDARY weights in the order
0474:             * they appear.
0475:             * If set to true, the SECONDARY weights will be sorted backwards.
0476:             * See the section on
0477:             * <a href="http://icu.sourceforge.net/userguide/Collate_ServiceArchitecture.html">
0478:             * French collation</a> for more information.
0479:             * @param flag true to set the French collation on, false to set it off
0480:             * @stable ICU 2.8
0481:             * @see #isFrenchCollation
0482:             * @see #setFrenchCollationDefault
0483:             */
0484:            public void setFrenchCollation(boolean flag) {
0485:                if (m_isFrenchCollation_ != flag) {
0486:                    latinOneRegenTable_ = true;
0487:                }
0488:                m_isFrenchCollation_ = flag;
0489:                updateInternalState();
0490:            }
0491:
0492:            /**
0493:             * Sets the alternate handling for QUATERNARY strength to be either
0494:             * shifted or non-ignorable.
0495:             * See the UCA definition on
0496:             * <a href="http://www.unicode.org/unicode/reports/tr10/#Variable_Weighting">
0497:             * Alternate Weighting</a>.
0498:             * This attribute will only be effective when QUATERNARY strength is set.
0499:             * The default value for this mode is false, corresponding to the
0500:             * NON_IGNORABLE mode in UCA. In the NON-IGNORABLE mode, the
0501:             * RuleBasedCollator will treats all the codepoints with non-ignorable
0502:             * primary weights in the same way.
0503:             * If the mode is set to true, the behaviour corresponds to SHIFTED defined
0504:             * in UCA, this causes codepoints with PRIMARY orders that are equal or
0505:             * below the variable top value to be ignored in PRIMARY order and
0506:             * moved to the QUATERNARY order.
0507:             * @param shifted true if SHIFTED behaviour for alternate handling is
0508:             *        desired, false for the NON_IGNORABLE behaviour.
0509:             * @see #isAlternateHandlingShifted
0510:             * @see #setAlternateHandlingDefault
0511:             * @stable ICU 2.8
0512:             */
0513:            public void setAlternateHandlingShifted(boolean shifted) {
0514:                m_isAlternateHandlingShifted_ = shifted;
0515:                updateInternalState();
0516:            }
0517:
0518:            /**
0519:             * <p>
0520:             * When case level is set to true, an additional weight is formed
0521:             * between the SECONDARY and TERTIARY weight, known as the case level.
0522:             * The case level is used to distinguish large and small Japanese Kana
0523:             * characters. Case level could also be used in other situations.
0524:             * For example to distinguish certain Pinyin characters.
0525:             * The default value is false, which means the case level is not generated.
0526:             * The contents of the case level are affected by the case first
0527:             * mode. A simple way to ignore accent differences in a string is to set
0528:             * the strength to PRIMARY and enable case level.
0529:             * </p>
0530:             * <p>
0531:             * See the section on
0532:             * <a href="http://icu.sourceforge.net/userguide/Collate_ServiceArchitecture.html">
0533:             * case level</a> for more information.
0534:             * </p>
0535:             * @param flag true if case level sorting is required, false otherwise
0536:             * @stable ICU 2.8
0537:             * @see #setCaseLevelDefault
0538:             * @see #isCaseLevel
0539:             */
0540:            public void setCaseLevel(boolean flag) {
0541:                m_isCaseLevel_ = flag;
0542:                updateInternalState();
0543:            }
0544:
0545:            /**
0546:             * <p>
0547:             * Sets this Collator's strength property. The strength property
0548:             * determines the minimum level of difference considered significant
0549:             * during comparison.
0550:             * </p>
0551:             * <p>See the Collator class description for an example of use.</p>
0552:             * @param newStrength the new strength value.
0553:             * @see #getStrength
0554:             * @see #setStrengthDefault
0555:             * @see #PRIMARY
0556:             * @see #SECONDARY
0557:             * @see #TERTIARY
0558:             * @see #QUATERNARY
0559:             * @see #IDENTICAL
0560:             * @exception IllegalArgumentException If the new strength value is not one
0561:             *              of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL.
0562:             * @stable ICU 2.8
0563:             */
0564:            public void setStrength(int newStrength) {
0565:                super .setStrength(newStrength);
0566:                updateInternalState();
0567:            }
0568:
0569:            /** 
0570:             * <p>
0571:             * Variable top is a two byte primary value which causes all the codepoints 
0572:             * with primary values that are less or equal than the variable top to be 
0573:             * shifted when alternate handling is set to SHIFTED.
0574:             * </p>
0575:             * <p>
0576:             * Sets the variable top to a collation element value of a string supplied.
0577:             * </p> 
0578:             * @param varTop one or more (if contraction) characters to which the 
0579:             *               variable top should be set
0580:             * @return a int value containing the value of the variable top in upper 16
0581:             *         bits. Lower 16 bits are undefined.
0582:             * @exception IllegalArgumentException is thrown if varTop argument is not 
0583:             *            a valid variable top element. A variable top element is 
0584:             *            invalid when 
0585:             *            <ul>
0586:             *            <li>it is a contraction that does not exist in the
0587:             *                Collation order
0588:             *            <li>when the PRIMARY strength collation element for the 
0589:             *                variable top has more than two bytes
0590:             *            <li>when the varTop argument is null or zero in length.
0591:             *            </ul>
0592:             * @see #getVariableTop
0593:             * @see RuleBasedCollator#setAlternateHandlingShifted
0594:             * @stable ICU 2.6
0595:             */
0596:            public int setVariableTop(String varTop) {
0597:                if (varTop == null || varTop.length() == 0) {
0598:                    throw new IllegalArgumentException(
0599:                            "Variable top argument string can not be null or zero in length.");
0600:                }
0601:                if (m_srcUtilIter_ == null) {
0602:                    initUtility(true);
0603:                }
0604:
0605:                m_srcUtilColEIter_.setText(varTop);
0606:                int ce = m_srcUtilColEIter_.next();
0607:
0608:                // here we check if we have consumed all characters 
0609:                // you can put in either one character or a contraction
0610:                // you shouldn't put more... 
0611:                if (m_srcUtilColEIter_.getOffset() != varTop.length()
0612:                        || ce == CollationElementIterator.NULLORDER) {
0613:                    throw new IllegalArgumentException(
0614:                            "Variable top argument string is a contraction that does not exist "
0615:                                    + "in the Collation order");
0616:                }
0617:
0618:                int nextCE = m_srcUtilColEIter_.next();
0619:
0620:                if ((nextCE != CollationElementIterator.NULLORDER)
0621:                        && (!isContinuation(nextCE) || (nextCE & CE_PRIMARY_MASK_) != 0)) {
0622:                    throw new IllegalArgumentException(
0623:                            "Variable top argument string can only have a single collation "
0624:                                    + "element that has less than or equal to two PRIMARY strength "
0625:                                    + "bytes");
0626:                }
0627:
0628:                m_variableTopValue_ = (ce & CE_PRIMARY_MASK_) >> 16;
0629:
0630:                return ce & CE_PRIMARY_MASK_;
0631:            }
0632:
0633:            /** 
0634:             * Sets the variable top to a collation element value supplied.
0635:             * Variable top is set to the upper 16 bits. 
0636:             * Lower 16 bits are ignored.
0637:             * @param varTop Collation element value, as returned by setVariableTop or 
0638:             *               getVariableTop
0639:             * @see #getVariableTop
0640:             * @see #setVariableTop(String)
0641:             * @stable ICU 2.6
0642:             */
0643:            public void setVariableTop(int varTop) {
0644:                m_variableTopValue_ = (varTop & CE_PRIMARY_MASK_) >> 16;
0645:            }
0646:
0647:            /**
0648:             * When numeric collation is turned on, this Collator generates a collation 
0649:             * key for the numeric value of substrings of digits. This is a way to get 
0650:             * '100' to sort AFTER '2'
0651:             * @param flag true to turn numeric collation on and false to turn it off
0652:             * @see #getNumericCollation
0653:             * @see #setNumericCollationDefault
0654:             * @stable ICU 2.8
0655:             */
0656:            public void setNumericCollation(boolean flag) {
0657:                // sort substrings of digits as numbers
0658:                m_isNumericCollation_ = flag;
0659:                updateInternalState();
0660:            }
0661:
0662:            // public getters --------------------------------------------------------
0663:
0664:            /**
0665:             * Gets the collation rules for this RuleBasedCollator.
0666:             * Equivalent to String getRules(RuleOption.FULL_RULES).
0667:             * @return returns the collation rules
0668:             * @see #getRules(boolean)
0669:             * @stable ICU 2.8
0670:             */
0671:            public String getRules() {
0672:                return m_rules_;
0673:            }
0674:
0675:            /**
0676:             * Returns current rules. The argument defines whether full rules 
0677:             * (UCA + tailored) rules are returned or just the tailoring. 
0678:             * @param fullrules true if the rules that defines the full set of 
0679:             *        collation order is required, otherwise false for returning only 
0680:             *        the tailored rules
0681:             * @return the current rules that defines this Collator.
0682:             * @see #getRules()
0683:             * @stable ICU 2.6
0684:             */
0685:            public String getRules(boolean fullrules) {
0686:                if (!fullrules) {
0687:                    return m_rules_;
0688:                }
0689:                // take the UCA rules and append real rules at the end 
0690:                return UCA_.m_rules_.concat(m_rules_);
0691:            }
0692:
0693:            /**
0694:             * Get an UnicodeSet that contains all the characters and sequences
0695:             * tailored in this collator.
0696:             * @return a pointer to a UnicodeSet object containing all the
0697:             *         code points and sequences that may sort differently than
0698:             *         in the UCA.
0699:             * @exception ParseException thrown when argument rules have an
0700:             *            invalid syntax. IOException
0701:             * @stable ICU 2.4
0702:             */
0703:            public UnicodeSet getTailoredSet() {
0704:                try {
0705:                    CollationRuleParser src = new CollationRuleParser(
0706:                            getRules());
0707:                    return src.getTailoredSet();
0708:                } catch (Exception e) {
0709:                    throw new IllegalStateException(
0710:                            "A tailoring rule should not "
0711:                                    + "have errors. Something is quite wrong!");
0712:                }
0713:            }
0714:
0715:            private class contContext {
0716:                RuleBasedCollator coll;
0717:                UnicodeSet contractions;
0718:                UnicodeSet expansions;
0719:                UnicodeSet removedContractions;
0720:                boolean addPrefixes;
0721:
0722:                contContext(RuleBasedCollator coll, UnicodeSet contractions,
0723:                        UnicodeSet expansions, UnicodeSet removedContractions,
0724:                        boolean addPrefixes) {
0725:                    this .coll = coll;
0726:                    this .contractions = contractions;
0727:                    this .expansions = expansions;
0728:                    this .removedContractions = removedContractions;
0729:                    this .addPrefixes = addPrefixes;
0730:                }
0731:            }
0732:
0733:            private void addSpecial(contContext c, StringBuffer buffer, int CE) {
0734:                StringBuffer b = new StringBuffer();
0735:                int offset = (CE & 0xFFFFFF) - c.coll.m_contractionOffset_;
0736:                int newCE = c.coll.m_contractionCE_[offset];
0737:                // we might have a contraction that ends from previous level
0738:                if (newCE != CollationElementIterator.CE_NOT_FOUND_) {
0739:                    if (isSpecial(CE)
0740:                            && getTag(CE) == CollationElementIterator.CE_CONTRACTION_TAG_
0741:                            && isSpecial(newCE)
0742:                            && getTag(newCE) == CollationElementIterator.CE_SPEC_PROC_TAG_
0743:                            && c.addPrefixes) {
0744:                        addSpecial(c, buffer, newCE);
0745:                    }
0746:                    if (buffer.length() > 1) {
0747:                        if (c.contractions != null) {
0748:                            c.contractions.add(buffer.toString());
0749:                        }
0750:                        if (c.expansions != null
0751:                                && isSpecial(CE)
0752:                                && getTag(CE) == CollationElementIterator.CE_EXPANSION_TAG_) {
0753:                            c.expansions.add(buffer.toString());
0754:                        }
0755:                    }
0756:                }
0757:
0758:                offset++;
0759:                // check whether we're doing contraction or prefix
0760:                if (getTag(CE) == CollationElementIterator.CE_SPEC_PROC_TAG_
0761:                        && c.addPrefixes) {
0762:                    while (c.coll.m_contractionIndex_[offset] != 0xFFFF) {
0763:                        b.delete(0, b.length());
0764:                        b.append(buffer);
0765:                        newCE = c.coll.m_contractionCE_[offset];
0766:                        b.insert(0, c.coll.m_contractionIndex_[offset]);
0767:                        if (isSpecial(newCE)
0768:                                && (getTag(newCE) == CollationElementIterator.CE_CONTRACTION_TAG_ || getTag(newCE) == CollationElementIterator.CE_SPEC_PROC_TAG_)) {
0769:                            addSpecial(c, b, newCE);
0770:                        } else {
0771:                            if (c.contractions != null) {
0772:                                c.contractions.add(b.toString());
0773:                            }
0774:                            if (c.expansions != null
0775:                                    && isSpecial(newCE)
0776:                                    && getTag(newCE) == CollationElementIterator.CE_EXPANSION_TAG_) {
0777:                                c.expansions.add(b.toString());
0778:                            }
0779:                        }
0780:                        offset++;
0781:                    }
0782:                } else if (getTag(CE) == CollationElementIterator.CE_CONTRACTION_TAG_) {
0783:                    while (c.coll.m_contractionIndex_[offset] != 0xFFFF) {
0784:                        b.delete(0, b.length());
0785:                        b.append(buffer);
0786:                        newCE = c.coll.m_contractionCE_[offset];
0787:                        b.append(c.coll.m_contractionIndex_[offset]);
0788:                        if (isSpecial(newCE)
0789:                                && (getTag(newCE) == CollationElementIterator.CE_CONTRACTION_TAG_ || getTag(newCE) == CollationElementIterator.CE_SPEC_PROC_TAG_)) {
0790:                            addSpecial(c, b, newCE);
0791:                        } else {
0792:                            if (c.contractions != null) {
0793:                                c.contractions.add(b.toString());
0794:                            }
0795:                            if (c.expansions != null
0796:                                    && isSpecial(newCE)
0797:                                    && getTag(newCE) == CollationElementIterator.CE_EXPANSION_TAG_) {
0798:                                c.expansions.add(b.toString());
0799:                            }
0800:                        }
0801:                        offset++;
0802:                    }
0803:                }
0804:            }
0805:
0806:            private void processSpecials(contContext c) {
0807:                int internalBufferSize = 512;
0808:                TrieIterator trieiterator = new TrieIterator(c.coll.m_trie_);
0809:                RangeValueIterator.Element element = new RangeValueIterator.Element();
0810:                while (trieiterator.next(element)) {
0811:                    int start = element.start;
0812:                    int limit = element.limit;
0813:                    int CE = element.value;
0814:                    StringBuffer contraction = new StringBuffer(
0815:                            internalBufferSize);
0816:
0817:                    if (isSpecial(CE)) {
0818:                        if (((getTag(CE) == CollationElementIterator.CE_SPEC_PROC_TAG_ && c.addPrefixes) || getTag(CE) == CollationElementIterator.CE_CONTRACTION_TAG_)) {
0819:                            while (start < limit) {
0820:                                // if there are suppressed contractions, we don't 
0821:                                // want to add them.
0822:                                if (c.removedContractions != null
0823:                                        && c.removedContractions
0824:                                                .contains(start)) {
0825:                                    start++;
0826:                                    continue;
0827:                                }
0828:                                // we start our contraction from middle, since we don't know if it
0829:                                // will grow toward right or left
0830:                                contraction.append((char) start);
0831:                                addSpecial(c, contraction, CE);
0832:                                start++;
0833:                            }
0834:                        } else if (c.expansions != null
0835:                                && getTag(CE) == CollationElementIterator.CE_EXPANSION_TAG_) {
0836:                            while (start < limit) {
0837:                                c.expansions.add(start++);
0838:                            }
0839:                        }
0840:                    }
0841:                }
0842:            }
0843:
0844:            /**
0845:             * Gets unicode sets containing contractions and/or expansions of a collator
0846:             * @param contractions if not null, set to contain contractions
0847:             * @param expansions if not null, set to contain expansions
0848:             * @param addPrefixes add the prefix contextual elements to contractions
0849:             * @throws Exception 
0850:             * @draft ICU 3.4
0851:             * @provisional This API might change or be removed in a future release.
0852:             */
0853:            public void getContractionsAndExpansions(UnicodeSet contractions,
0854:                    UnicodeSet expansions, boolean addPrefixes)
0855:                    throws Exception {
0856:                if (contractions != null) {
0857:                    contractions.clear();
0858:                }
0859:                if (expansions != null) {
0860:                    expansions.clear();
0861:                }
0862:                int rulesLen = 0;
0863:                String rules = getRules();
0864:                try {
0865:                    CollationRuleParser src = new CollationRuleParser(rules);
0866:                    contContext c = new contContext(RuleBasedCollator.UCA_,
0867:                            contractions, expansions, src.m_removeSet_,
0868:                            addPrefixes);
0869:
0870:                    // Add the UCA contractions
0871:                    processSpecials(c);
0872:                    // This is collator specific. Add contractions from a collator
0873:                    c.coll = this ;
0874:                    c.removedContractions = null;
0875:                    processSpecials(c);
0876:                } catch (Exception e) {
0877:                    throw e;
0878:                }
0879:            }
0880:
0881:            /**
0882:             * <p>
0883:             * Get a Collation key for the argument String source from this
0884:             * RuleBasedCollator.
0885:             * </p>
0886:             * <p>
0887:             * General recommendation: <br>
0888:             * If comparison are to be done to the same String multiple times, it would
0889:             * be more efficient to generate CollationKeys for the Strings and use
0890:             * CollationKey.compareTo(CollationKey) for the comparisons.
0891:             * If the each Strings are compared to only once, using the method
0892:             * RuleBasedCollator.compare(String, String) will have a better performance.
0893:             * </p>
0894:             * <p>
0895:             * See the class documentation for an explanation about CollationKeys.
0896:             * </p>
0897:             * @param source the text String to be transformed into a collation key.
0898:             * @return the CollationKey for the given String based on this
0899:             *         RuleBasedCollator's collation rules. If the source String is
0900:             *         null, a null CollationKey is returned.
0901:             * @see CollationKey
0902:             * @see #compare(String, String)
0903:             * @see #getRawCollationKey
0904:             * @stable ICU 2.8
0905:             */
0906:            public CollationKey getCollationKey(String source) {
0907:                if (source == null) {
0908:                    return null;
0909:                }
0910:                m_utilRawCollationKey_ = getRawCollationKey(source,
0911:                        m_utilRawCollationKey_);
0912:                return new CollationKey(source, m_utilRawCollationKey_);
0913:            }
0914:
0915:            /**
0916:             * Gets the simpler form of a CollationKey for the String source following
0917:             * the rules of this Collator and stores the result into the user provided 
0918:             * argument key. 
0919:             * If key has a internal byte array of length that's too small for the 
0920:             * result, the internal byte array will be grown to the exact required 
0921:             * size.
0922:             * @param source the text String to be transformed into a RawCollationKey  
0923:             * @param key output RawCollationKey to store results
0924:             * @return If key is null, a new instance of RawCollationKey will be 
0925:             *         created and returned, otherwise the user provided key will be 
0926:             *         returned.
0927:             * @see #getCollationKey 
0928:             * @see #compare(String, String)
0929:             * @see RawCollationKey
0930:             * @stable ICU 2.8
0931:             */
0932:            public RawCollationKey getRawCollationKey(String source,
0933:                    RawCollationKey key) {
0934:                if (source == null) {
0935:                    return null;
0936:                }
0937:                int strength = getStrength();
0938:                m_utilCompare0_ = m_isCaseLevel_;
0939:                m_utilCompare1_ = true;
0940:                m_utilCompare2_ = strength >= SECONDARY;
0941:                m_utilCompare3_ = strength >= TERTIARY;
0942:                m_utilCompare4_ = strength >= QUATERNARY;
0943:                m_utilCompare5_ = strength == IDENTICAL;
0944:
0945:                m_utilBytesCount0_ = 0;
0946:                m_utilBytesCount1_ = 0;
0947:                m_utilBytesCount2_ = 0;
0948:                m_utilBytesCount3_ = 0;
0949:                m_utilBytesCount4_ = 0;
0950:                m_utilBytesCount5_ = 0;
0951:                m_utilCount0_ = 0;
0952:                m_utilCount1_ = 0;
0953:                m_utilCount2_ = 0;
0954:                m_utilCount3_ = 0;
0955:                m_utilCount4_ = 0;
0956:                m_utilCount5_ = 0;
0957:                boolean doFrench = m_isFrenchCollation_ && m_utilCompare2_;
0958:                // TODO: UCOL_COMMON_BOT4 should be a function of qShifted.
0959:                // If we have no qShifted, we don't need to set UCOL_COMMON_BOT4 so
0960:                // high.
0961:                int commonBottom4 = ((m_variableTopValue_ >>> 8) + 1)
0962:                        & LAST_BYTE_MASK_;
0963:                byte hiragana4 = 0;
0964:                if (m_isHiragana4_ && m_utilCompare4_) {
0965:                    // allocate one more space for hiragana, value for hiragana
0966:                    hiragana4 = (byte) commonBottom4;
0967:                    commonBottom4++;
0968:                }
0969:
0970:                int bottomCount4 = 0xFF - commonBottom4;
0971:                // If we need to normalize, we'll do it all at once at the beginning!
0972:                if (m_utilCompare5_
0973:                        && Normalizer.quickCheck(source, Normalizer.NFD, 0) != Normalizer.YES) {
0974:                    // if it is identical strength, we have to normalize the string to
0975:                    // NFD so that it will be appended correctly to the end of the sort
0976:                    // key
0977:                    source = Normalizer.decompose(source, false);
0978:                } else if (getDecomposition() != NO_DECOMPOSITION
0979:                        && Normalizer.quickCheck(source, Normalizer.FCD, 0) != Normalizer.YES) {
0980:                    // for the rest of the strength, if decomposition is on, FCD is
0981:                    // enough for us to work on.
0982:                    source = Normalizer.normalize(source, Normalizer.FCD);
0983:                }
0984:                getSortKeyBytes(source, doFrench, hiragana4, commonBottom4,
0985:                        bottomCount4);
0986:                if (key == null) {
0987:                    key = new RawCollationKey();
0988:                }
0989:                getSortKey(source, doFrench, commonBottom4, bottomCount4, key);
0990:                return key;
0991:            }
0992:
0993:            /**
0994:             * Return true if an uppercase character is sorted before the corresponding lowercase character.
0995:             * See setCaseFirst(boolean) for details.
0996:             * @see #setUpperCaseFirst
0997:             * @see #setLowerCaseFirst
0998:             * @see #isLowerCaseFirst
0999:             * @see #setCaseFirstDefault
1000:             * @return true if upper cased characters are sorted before lower cased
1001:             *         characters, false otherwise
1002:             * @stable ICU 2.8
1003:             */
1004:            public boolean isUpperCaseFirst() {
1005:                return (m_caseFirst_ == AttributeValue.UPPER_FIRST_);
1006:            }
1007:
1008:            /**
1009:             * Return true if a lowercase character is sorted before the corresponding uppercase character.
1010:             * See setCaseFirst(boolean) for details.
1011:             * @see #setUpperCaseFirst
1012:             * @see #setLowerCaseFirst
1013:             * @see #isUpperCaseFirst
1014:             * @see #setCaseFirstDefault
1015:             * @return true lower cased characters are sorted before upper cased
1016:             *         characters, false otherwise
1017:             * @stable ICU 2.8
1018:             */
1019:            public boolean isLowerCaseFirst() {
1020:                return (m_caseFirst_ == AttributeValue.LOWER_FIRST_);
1021:            }
1022:
1023:            /**
1024:             * Checks if the alternate handling behaviour is the UCA defined SHIFTED or
1025:             * NON_IGNORABLE.
1026:             * If return value is true, then the alternate handling attribute for the
1027:             * Collator is SHIFTED. Otherwise if return value is false, then the
1028:             * alternate handling attribute for the Collator is NON_IGNORABLE
1029:             * See setAlternateHandlingShifted(boolean) for more details.
1030:             * @return true or false
1031:             * @see #setAlternateHandlingShifted(boolean)
1032:             * @see #setAlternateHandlingDefault
1033:             * @stable ICU 2.8
1034:             */
1035:            public boolean isAlternateHandlingShifted() {
1036:                return m_isAlternateHandlingShifted_;
1037:            }
1038:
1039:            /**
1040:             * Checks if case level is set to true.
1041:             * See setCaseLevel(boolean) for details.
1042:             * @return the case level mode
1043:             * @see #setCaseLevelDefault
1044:             * @see #isCaseLevel
1045:             * @see #setCaseLevel(boolean)
1046:             * @stable ICU 2.8
1047:             */
1048:            public boolean isCaseLevel() {
1049:                return m_isCaseLevel_;
1050:            }
1051:
1052:            /**
1053:             * Checks if French Collation is set to true.
1054:             * See setFrenchCollation(boolean) for details.
1055:             * @return true if French Collation is set to true, false otherwise
1056:             * @see #setFrenchCollation(boolean)
1057:             * @see #setFrenchCollationDefault
1058:             * @stable ICU 2.8
1059:             */
1060:            public boolean isFrenchCollation() {
1061:                return m_isFrenchCollation_;
1062:            }
1063:
1064:            /**
1065:             * Checks if the Hiragana Quaternary mode is set on.
1066:             * See setHiraganaQuaternary(boolean) for more details.
1067:             * @return flag true if Hiragana Quaternary mode is on, false otherwise
1068:             * @see #setHiraganaQuaternaryDefault
1069:             * @see #setHiraganaQuaternary(boolean)
1070:             * @stable ICU 2.8
1071:             */
1072:            public boolean isHiraganaQuaternary() {
1073:                return m_isHiragana4_;
1074:            }
1075:
1076:            /** 
1077:             * Gets the variable top value of a Collator. 
1078:             * Lower 16 bits are undefined and should be ignored.
1079:             * @return the variable top value of a Collator.
1080:             * @see #setVariableTop
1081:             * @stable ICU 2.6
1082:             */
1083:            public int getVariableTop() {
1084:                return m_variableTopValue_ << 16;
1085:            }
1086:
1087:            /** 
1088:             * Method to retrieve the numeric collation value.
1089:             * When numeric collation is turned on, this Collator generates a collation 
1090:             * key for the numeric value of substrings of digits. This is a way to get 
1091:             * '100' to sort AFTER '2'
1092:             * @see #setNumericCollation
1093:             * @see #setNumericCollationDefault
1094:             * @return true if numeric collation is turned on, false otherwise
1095:             * @stable ICU 2.8
1096:             */
1097:            public boolean getNumericCollation() {
1098:                return m_isNumericCollation_;
1099:            }
1100:
1101:            // public other methods -------------------------------------------------
1102:
1103:            /**
1104:             * Compares the equality of two RuleBasedCollator objects.
1105:             * RuleBasedCollator objects are equal if they have the same collation
1106:             * rules and the same attributes.
1107:             * @param obj the RuleBasedCollator to be compared to.
1108:             * @return true if this RuleBasedCollator has exactly the same
1109:             *         collation behaviour as obj, false otherwise.
1110:             * @stable ICU 2.8
1111:             */
1112:            public boolean equals(Object obj) {
1113:                if (obj == null) {
1114:                    return false; // super does class check
1115:                }
1116:                if (this  == obj) {
1117:                    return true;
1118:                }
1119:                if (getClass() != obj.getClass()) {
1120:                    return false;
1121:                }
1122:                RuleBasedCollator other = (RuleBasedCollator) obj;
1123:                // all other non-transient information is also contained in rules.
1124:                if (getStrength() != other.getStrength()
1125:                        || getDecomposition() != other.getDecomposition()
1126:                        || other.m_caseFirst_ != m_caseFirst_
1127:                        || other.m_caseSwitch_ != m_caseSwitch_
1128:                        || other.m_isAlternateHandlingShifted_ != m_isAlternateHandlingShifted_
1129:                        || other.m_isCaseLevel_ != m_isCaseLevel_
1130:                        || other.m_isFrenchCollation_ != m_isFrenchCollation_
1131:                        || other.m_isHiragana4_ != m_isHiragana4_) {
1132:                    return false;
1133:                }
1134:                boolean rules = m_rules_ == other.m_rules_;
1135:                if (!rules && (m_rules_ != null && other.m_rules_ != null)) {
1136:                    rules = m_rules_.equals(other.m_rules_);
1137:                }
1138:                if (!rules || !ICUDebug.enabled("collation")) {
1139:                    return rules;
1140:                }
1141:                if (m_addition3_ != other.m_addition3_
1142:                        || m_bottom3_ != other.m_bottom3_
1143:                        || m_bottomCount3_ != other.m_bottomCount3_
1144:                        || m_common3_ != other.m_common3_
1145:                        || m_isSimple3_ != other.m_isSimple3_
1146:                        || m_mask3_ != other.m_mask3_
1147:                        || m_minContractionEnd_ != other.m_minContractionEnd_
1148:                        || m_minUnsafe_ != other.m_minUnsafe_
1149:                        || m_top3_ != other.m_top3_
1150:                        || m_topCount3_ != other.m_topCount3_
1151:                        || !Arrays.equals(m_unsafe_, other.m_unsafe_)) {
1152:                    return false;
1153:                }
1154:                if (!m_trie_.equals(other.m_trie_)) {
1155:                    // we should use the trie iterator here, but then this part is
1156:                    // only used in the test.
1157:                    for (int i = UCharacter.MAX_VALUE; i >= UCharacter.MIN_VALUE; i--) {
1158:                        int v = m_trie_.getCodePointValue(i);
1159:                        int otherv = other.m_trie_.getCodePointValue(i);
1160:                        if (v != otherv) {
1161:                            int mask = v & (CE_TAG_MASK_ | CE_SPECIAL_FLAG_);
1162:                            if (mask == (otherv & 0xff000000)) {
1163:                                v &= 0xffffff;
1164:                                otherv &= 0xffffff;
1165:                                if (mask == 0xf1000000) {
1166:                                    v -= (m_expansionOffset_ << 4);
1167:                                    otherv -= (other.m_expansionOffset_ << 4);
1168:                                } else if (mask == 0xf2000000) {
1169:                                    v -= m_contractionOffset_;
1170:                                    otherv -= other.m_contractionOffset_;
1171:                                }
1172:                                if (v == otherv) {
1173:                                    continue;
1174:                                }
1175:                            }
1176:                            return false;
1177:                        }
1178:                    }
1179:                }
1180:                if (Arrays.equals(m_contractionCE_, other.m_contractionCE_)
1181:                        && Arrays.equals(m_contractionEnd_,
1182:                                other.m_contractionEnd_)
1183:                        && Arrays.equals(m_contractionIndex_,
1184:                                other.m_contractionIndex_)
1185:                        && Arrays.equals(m_expansion_, other.m_expansion_)
1186:                        && Arrays.equals(m_expansionEndCE_,
1187:                                other.m_expansionEndCE_)) {
1188:                    // not comparing paddings
1189:                    for (int i = 0; i < m_expansionEndCE_.length; i++) {
1190:                        if (m_expansionEndCEMaxSize_[i] != other.m_expansionEndCEMaxSize_[i]) {
1191:                            return false;
1192:                        }
1193:                        return true;
1194:                    }
1195:                }
1196:                return false;
1197:            }
1198:
1199:            /**
1200:             * Generates a unique hash code for this RuleBasedCollator.
1201:             * @return the unique hash code for this Collator
1202:             * @stable ICU 2.8
1203:             */
1204:            public int hashCode() {
1205:                String rules = getRules();
1206:                if (rules == null) {
1207:                    rules = "";
1208:                }
1209:                return rules.hashCode();
1210:            }
1211:
1212:            /**
1213:             * Compares the source text String to the target text String according to
1214:             * the collation rules, strength and decomposition mode for this
1215:             * RuleBasedCollator.
1216:             * Returns an integer less than,
1217:             * equal to or greater than zero depending on whether the source String is
1218:             * less than, equal to or greater than the target String. See the Collator
1219:             * class description for an example of use.
1220:             * </p>
1221:             * <p>
1222:             * General recommendation: <br>
1223:             * If comparison are to be done to the same String multiple times, it would
1224:             * be more efficient to generate CollationKeys for the Strings and use
1225:             * CollationKey.compareTo(CollationKey) for the comparisons.
1226:             * If speed performance is critical and object instantiation is to be 
1227:             * reduced, further optimization may be achieved by generating a simpler 
1228:             * key of the form RawCollationKey and reusing this RawCollationKey 
1229:             * object with the method RuleBasedCollator.getRawCollationKey. Internal 
1230:             * byte representation can be directly accessed via RawCollationKey and
1231:             * stored for future use. Like CollationKey, RawCollationKey provides a
1232:             * method RawCollationKey.compareTo for key comparisons.
1233:             * If the each Strings are compared to only once, using the method
1234:             * RuleBasedCollator.compare(String, String) will have a better performance.
1235:             * </p>
1236:             * @param source the source text String.
1237:             * @param target the target text String.
1238:             * @return Returns an integer value. Value is less than zero if source is
1239:             *         less than target, value is zero if source and target are equal,
1240:             *         value is greater than zero if source is greater than target.
1241:             * @see CollationKey
1242:             * @see #getCollationKey
1243:             * @stable ICU 2.8
1244:             */
1245:            public int compare(String source, String target) {
1246:                if (source == target) {
1247:                    return 0;
1248:                }
1249:
1250:                // Find the length of any leading portion that is equal
1251:                int offset = getFirstUnmatchedOffset(source, target);
1252:                //return compareRegular(source, target, offset);
1253:                if (latinOneUse_) {
1254:                    if ((offset < source.length() && source.charAt(offset) > ENDOFLATINONERANGE_)
1255:                            || (offset < target.length() && target
1256:                                    .charAt(offset) > ENDOFLATINONERANGE_)) {
1257:                        // source or target start with non-latin-1
1258:                        return compareRegular(source, target, offset);
1259:                    } else {
1260:                        return compareUseLatin1(source, target, offset);
1261:                    }
1262:                } else {
1263:                    return compareRegular(source, target, offset);
1264:                }
1265:            }
1266:
1267:            // package private inner interfaces --------------------------------------
1268:
1269:            /**
1270:             * Attribute values to be used when setting the Collator options
1271:             */
1272:            static interface AttributeValue {
1273:                /**
1274:                 * Indicates that the default attribute value will be used.
1275:                 * See individual attribute for details on its default value.
1276:                 */
1277:                static final int DEFAULT_ = -1;
1278:                /**
1279:                 * Primary collation strength
1280:                 */
1281:                static final int PRIMARY_ = Collator.PRIMARY;
1282:                /**
1283:                 * Secondary collation strength
1284:                 */
1285:                static final int SECONDARY_ = Collator.SECONDARY;
1286:                /**
1287:                 * Tertiary collation strength
1288:                 */
1289:                static final int TERTIARY_ = Collator.TERTIARY;
1290:                /**
1291:                 * Default collation strength
1292:                 */
1293:                static final int DEFAULT_STRENGTH_ = Collator.TERTIARY;
1294:                /**
1295:                 * Internal use for strength checks in Collation elements
1296:                 */
1297:                static final int CE_STRENGTH_LIMIT_ = Collator.TERTIARY + 1;
1298:                /**
1299:                 * Quaternary collation strength
1300:                 */
1301:                static final int QUATERNARY_ = 3;
1302:                /**
1303:                 * Identical collation strength
1304:                 */
1305:                static final int IDENTICAL_ = Collator.IDENTICAL;
1306:                /**
1307:                 * Internal use for strength checks
1308:                 */
1309:                static final int STRENGTH_LIMIT_ = Collator.IDENTICAL + 1;
1310:                /**
1311:                 * Turn the feature off - works for FRENCH_COLLATION, CASE_LEVEL,
1312:                 * HIRAGANA_QUATERNARY_MODE and DECOMPOSITION_MODE
1313:                 */
1314:                static final int OFF_ = 16;
1315:                /**
1316:                 * Turn the feature on - works for FRENCH_COLLATION, CASE_LEVEL,
1317:                 * HIRAGANA_QUATERNARY_MODE and DECOMPOSITION_MODE
1318:                 */
1319:                static final int ON_ = 17;
1320:                /**
1321:                 * Valid for ALTERNATE_HANDLING. Alternate handling will be shifted
1322:                 */
1323:                static final int SHIFTED_ = 20;
1324:                /**
1325:                 * Valid for ALTERNATE_HANDLING. Alternate handling will be non
1326:                 * ignorable
1327:                 */
1328:                static final int NON_IGNORABLE_ = 21;
1329:                /**
1330:                 * Valid for CASE_FIRST - lower case sorts before upper case
1331:                 */
1332:                static final int LOWER_FIRST_ = 24;
1333:                /**
1334:                 * Upper case sorts before lower case
1335:                 */
1336:                static final int UPPER_FIRST_ = 25;
1337:                /**
1338:                 * Number of attribute values
1339:                 */
1340:                static final int LIMIT_ = 29;
1341:            }
1342:
1343:            /**
1344:             * Attributes that collation service understands. All the attributes can
1345:             * take DEFAULT value, as well as the values specific to each one.
1346:             */
1347:            static interface Attribute {
1348:                /**
1349:                 * Attribute for direction of secondary weights - used in French.
1350:                 * Acceptable values are ON, which results in secondary weights being
1351:                 * considered backwards and OFF which treats secondary weights in the
1352:                 * order they appear.
1353:                 */
1354:                static final int FRENCH_COLLATION_ = 0;
1355:                /**
1356:                 * Attribute for handling variable elements. Acceptable values are
1357:                 * NON_IGNORABLE (default) which treats all the codepoints with
1358:                 * non-ignorable primary weights in the same way, and SHIFTED which
1359:                 * causes codepoints with primary weights that are equal or below the
1360:                 * variable top value to be ignored on primary level and moved to the
1361:                 * quaternary level.
1362:                 */
1363:                static final int ALTERNATE_HANDLING_ = 1;
1364:                /**
1365:                 * Controls the ordering of upper and lower case letters. Acceptable
1366:                 * values are OFF (default), which orders upper and lower case letters
1367:                 * in accordance to their tertiary weights, UPPER_FIRST which forces
1368:                 * upper case letters to sort before lower case letters, and
1369:                 * LOWER_FIRST which does the opposite.
1370:                 */
1371:                static final int CASE_FIRST_ = 2;
1372:                /**
1373:                 * Controls whether an extra case level (positioned before the third
1374:                 * level) is generated or not. Acceptable values are OFF (default),
1375:                 * when case level is not generated, and ON which causes the case
1376:                 * level to be generated. Contents of the case level are affected by
1377:                 * the value of CASE_FIRST attribute. A simple way to ignore accent
1378:                 * differences in a string is to set the strength to PRIMARY and
1379:                 * enable case level.
1380:                 */
1381:                static final int CASE_LEVEL_ = 3;
1382:                /**
1383:                 * Controls whether the normalization check and necessary
1384:                 * normalizations are performed. When set to OFF (default) no
1385:                 * normalization check is performed. The correctness of the result is
1386:                 * guaranteed only if the input data is in so-called FCD form (see
1387:                 * users manual for more info). When set to ON, an incremental check
1388:                 * is performed to see whether the input data is in the FCD form. If
1389:                 * the data is not in the FCD form, incremental NFD normalization is
1390:                 * performed.
1391:                 */
1392:                static final int NORMALIZATION_MODE_ = 4;
1393:                /**
1394:                 * The strength attribute. Can be either PRIMARY, SECONDARY, TERTIARY,
1395:                 * QUATERNARY or IDENTICAL. The usual strength for most locales
1396:                 * (except Japanese) is tertiary. Quaternary strength is useful when
1397:                 * combined with shifted setting for alternate handling attribute and
1398:                 * for JIS x 4061 collation, when it is used to distinguish between
1399:                 * Katakana  and Hiragana (this is achieved by setting the
1400:                 * HIRAGANA_QUATERNARY mode to on. Otherwise, quaternary level is
1401:                 * affected only by the number of non ignorable code points in the
1402:                 * string. Identical strength is rarely useful, as it amounts to
1403:                 * codepoints of the NFD form of the string.
1404:                 */
1405:                static final int STRENGTH_ = 5;
1406:                /**
1407:                 * When turned on, this attribute positions Hiragana before all
1408:                 * non-ignorables on quaternary level. This is a sneaky way to produce
1409:                 * JIS sort order.
1410:                 */
1411:                static final int HIRAGANA_QUATERNARY_MODE_ = 6;
1412:                /**
1413:                 * Attribute count
1414:                 */
1415:                static final int LIMIT_ = 7;
1416:            }
1417:
1418:            /**
1419:             * DataManipulate singleton
1420:             */
1421:            static class DataManipulate implements  Trie.DataManipulate {
1422:                // public methods ----------------------------------------------------
1423:
1424:                /**
1425:                 * Internal method called to parse a lead surrogate's ce for the offset
1426:                 * to the next trail surrogate data.
1427:                 * @param ce collation element of the lead surrogate
1428:                 * @return data offset or 0 for the next trail surrogate
1429:                 * @stable ICU 2.8
1430:                 */
1431:                public final int getFoldingOffset(int ce) {
1432:                    if (isSpecial(ce) && getTag(ce) == CE_SURROGATE_TAG_) {
1433:                        return (ce & 0xFFFFFF);
1434:                    }
1435:                    return 0;
1436:                }
1437:
1438:                /**
1439:                 * Get singleton object
1440:                 */
1441:                public static final DataManipulate getInstance() {
1442:                    if (m_instance_ == null) {
1443:                        m_instance_ = new DataManipulate();
1444:                    }
1445:                    return m_instance_;
1446:                }
1447:
1448:                // private data member ----------------------------------------------
1449:
1450:                /**
1451:                 * Singleton instance
1452:                 */
1453:                private static DataManipulate m_instance_;
1454:
1455:                // private constructor ----------------------------------------------
1456:
1457:                /**
1458:                 * private to prevent initialization
1459:                 */
1460:                private DataManipulate() {
1461:                }
1462:            }
1463:
1464:            /**
1465:             * UCAConstants
1466:             */
1467:            static final class UCAConstants {
1468:                int FIRST_TERTIARY_IGNORABLE_[] = new int[2]; // 0x00000000
1469:                int LAST_TERTIARY_IGNORABLE_[] = new int[2]; // 0x00000000
1470:                int FIRST_PRIMARY_IGNORABLE_[] = new int[2]; // 0x00008705
1471:                int FIRST_SECONDARY_IGNORABLE_[] = new int[2]; // 0x00000000
1472:                int LAST_SECONDARY_IGNORABLE_[] = new int[2]; // 0x00000500
1473:                int LAST_PRIMARY_IGNORABLE_[] = new int[2]; // 0x0000DD05
1474:                int FIRST_VARIABLE_[] = new int[2]; // 0x05070505
1475:                int LAST_VARIABLE_[] = new int[2]; // 0x13CF0505
1476:                int FIRST_NON_VARIABLE_[] = new int[2]; // 0x16200505
1477:                int LAST_NON_VARIABLE_[] = new int[2]; // 0x767C0505
1478:                int RESET_TOP_VALUE_[] = new int[2]; // 0x9F000303
1479:                int FIRST_IMPLICIT_[] = new int[2];
1480:                int LAST_IMPLICIT_[] = new int[2];
1481:                int FIRST_TRAILING_[] = new int[2];
1482:                int LAST_TRAILING_[] = new int[2];
1483:                int PRIMARY_TOP_MIN_;
1484:                int PRIMARY_IMPLICIT_MIN_; // 0xE8000000
1485:                int PRIMARY_IMPLICIT_MAX_; // 0xF0000000
1486:                int PRIMARY_TRAILING_MIN_; // 0xE8000000
1487:                int PRIMARY_TRAILING_MAX_; // 0xF0000000
1488:                int PRIMARY_SPECIAL_MIN_; // 0xE8000000
1489:                int PRIMARY_SPECIAL_MAX_; // 0xF0000000
1490:            }
1491:
1492:            // package private data member -------------------------------------------
1493:
1494:            static final byte BYTE_FIRST_TAILORED_ = (byte) 0x04;
1495:            static final byte BYTE_COMMON_ = (byte) 0x05;
1496:            static final int COMMON_TOP_2_ = 0x86; // int for unsigness
1497:            static final int COMMON_BOTTOM_2_ = BYTE_COMMON_;
1498:            /**
1499:             * Case strength mask
1500:             */
1501:            static final int CE_CASE_BIT_MASK_ = 0xC0;
1502:            static final int CE_TAG_SHIFT_ = 24;
1503:            static final int CE_TAG_MASK_ = 0x0F000000;
1504:
1505:            static final int CE_SPECIAL_FLAG_ = 0xF0000000;
1506:            /**
1507:             * Lead surrogate that is tailored and doesn't start a contraction
1508:             */
1509:            static final int CE_SURROGATE_TAG_ = 5;
1510:            /**
1511:             * Mask to get the primary strength of the collation element
1512:             */
1513:            static final int CE_PRIMARY_MASK_ = 0xFFFF0000;
1514:            /**
1515:             * Mask to get the secondary strength of the collation element
1516:             */
1517:            static final int CE_SECONDARY_MASK_ = 0xFF00;
1518:            /**
1519:             * Mask to get the tertiary strength of the collation element
1520:             */
1521:            static final int CE_TERTIARY_MASK_ = 0xFF;
1522:            /**
1523:             * Primary strength shift
1524:             */
1525:            static final int CE_PRIMARY_SHIFT_ = 16;
1526:            /**
1527:             * Secondary strength shift
1528:             */
1529:            static final int CE_SECONDARY_SHIFT_ = 8;
1530:            /**
1531:             * Continuation marker
1532:             */
1533:            static final int CE_CONTINUATION_MARKER_ = 0xC0;
1534:
1535:            /**
1536:             * Size of collator raw data headers and options before the expansion
1537:             * data. This is used when expansion ces are to be retrieved. ICU4C uses
1538:             * the expansion offset starting from UCollator.UColHeader, hence ICU4J
1539:             * will have to minus that off to get the right expansion ce offset. In
1540:             * number of ints.
1541:             */
1542:            int m_expansionOffset_;
1543:            /**
1544:             * Size of collator raw data headers, options and expansions before
1545:             * contraction data. This is used when contraction ces are to be retrieved.
1546:             * ICU4C uses contraction offset starting from UCollator.UColHeader, hence
1547:             * ICU4J will have to minus that off to get the right contraction ce
1548:             * offset. In number of chars.
1549:             */
1550:            int m_contractionOffset_;
1551:            /**
1552:             * Flag indicator if Jamo is special
1553:             */
1554:            boolean m_isJamoSpecial_;
1555:
1556:            // Collator options ------------------------------------------------------
1557:
1558:            int m_defaultVariableTopValue_;
1559:            boolean m_defaultIsFrenchCollation_;
1560:            boolean m_defaultIsAlternateHandlingShifted_;
1561:            int m_defaultCaseFirst_;
1562:            boolean m_defaultIsCaseLevel_;
1563:            int m_defaultDecomposition_;
1564:            int m_defaultStrength_;
1565:            boolean m_defaultIsHiragana4_;
1566:            boolean m_defaultIsNumericCollation_;
1567:
1568:            /**
1569:             * Value of the variable top
1570:             */
1571:            int m_variableTopValue_;
1572:            /**
1573:             * Attribute for special Hiragana
1574:             */
1575:            boolean m_isHiragana4_;
1576:            /**
1577:             * Case sorting customization
1578:             */
1579:            int m_caseFirst_;
1580:            /**
1581:             * Numeric collation option
1582:             */
1583:            boolean m_isNumericCollation_;
1584:
1585:            // end Collator options --------------------------------------------------
1586:
1587:            /**
1588:             * Expansion table
1589:             */
1590:            int m_expansion_[];
1591:            /**
1592:             * Contraction index table
1593:             */
1594:            char m_contractionIndex_[];
1595:            /**
1596:             * Contraction CE table
1597:             */
1598:            int m_contractionCE_[];
1599:            /**
1600:             * Data trie
1601:             */
1602:            IntTrie m_trie_;
1603:            /**
1604:             * Table to store all collation elements that are the last element of an
1605:             * expansion. This is for use in StringSearch.
1606:             */
1607:            int m_expansionEndCE_[];
1608:            /**
1609:             * Table to store the maximum size of any expansions that end with the
1610:             * corresponding collation element in m_expansionEndCE_. For use in
1611:             * StringSearch too
1612:             */
1613:            byte m_expansionEndCEMaxSize_[];
1614:            /**
1615:             * Heuristic table to store information on whether a char character is
1616:             * considered "unsafe". "Unsafe" character are combining marks or those
1617:             * belonging to some contraction sequence from the offset 1 onwards.
1618:             * E.g. if "ABC" is the only contraction, then 'B' and 'C' are considered
1619:             * unsafe. If we have another contraction "ZA" with the one above, then
1620:             * 'A', 'B', 'C' are "unsafe" but 'Z' is not.
1621:             */
1622:            byte m_unsafe_[];
1623:            /**
1624:             * Table to store information on whether a codepoint can occur as the last
1625:             * character in a contraction
1626:             */
1627:            byte m_contractionEnd_[];
1628:            /**
1629:             * Original collation rules
1630:             */
1631:            String m_rules_;
1632:            /**
1633:             * The smallest "unsafe" codepoint
1634:             */
1635:            char m_minUnsafe_;
1636:            /**
1637:             * The smallest codepoint that could be the end of a contraction
1638:             */
1639:            char m_minContractionEnd_;
1640:            /**
1641:             * General version of the collator
1642:             */
1643:            VersionInfo m_version_;
1644:            /**
1645:             * UCA version
1646:             */
1647:            VersionInfo m_UCA_version_;
1648:            /**
1649:             * UCD version
1650:             */
1651:            VersionInfo m_UCD_version_;
1652:
1653:            /**
1654:             * UnicodeData.txt property object
1655:             */
1656:            static final RuleBasedCollator UCA_;
1657:            /**
1658:             * UCA Constants
1659:             */
1660:            static final UCAConstants UCA_CONSTANTS_;
1661:            /**
1662:             * Table for UCA and builder use
1663:             */
1664:            static final char UCA_CONTRACTIONS_[];
1665:
1666:            private static boolean UCA_INIT_COMPLETE;
1667:
1668:            /**
1669:             * Implicit generator
1670:             */
1671:            static final ImplicitCEGenerator impCEGen_;
1672:            //    /**
1673:            //     * Implicit constants
1674:            //     */
1675:            //    static final int IMPLICIT_BASE_BYTE_;
1676:            //    static final int IMPLICIT_LIMIT_BYTE_;
1677:            //    static final int IMPLICIT_4BYTE_BOUNDARY_;
1678:            //    static final int LAST_MULTIPLIER_;
1679:            //    static final int LAST2_MULTIPLIER_;
1680:            //    static final int IMPLICIT_BASE_3BYTE_;
1681:            //    static final int IMPLICIT_BASE_4BYTE_;
1682:            //    static final int BYTES_TO_AVOID_ = 3;
1683:            //    static final int OTHER_COUNT_ = 256 - BYTES_TO_AVOID_;
1684:            //    static final int LAST_COUNT_ = OTHER_COUNT_ / 2;
1685:            //    /**
1686:            //     * Room for intervening, without expanding to 5 bytes
1687:            //     */
1688:            //    static final int LAST_COUNT2_ = OTHER_COUNT_ / 21;
1689:            //    static final int IMPLICIT_3BYTE_COUNT_ = 1;
1690:            //    
1691:            static final byte SORT_LEVEL_TERMINATOR_ = 1;
1692:
1693:            //  These are values from UCA required for
1694:            //  implicit generation and supressing sort key compression
1695:            //  they should regularly be in the UCA, but if one
1696:            //  is running without UCA, it could be a problem
1697:            static final int maxRegularPrimary = 0xA0;
1698:            static final int minImplicitPrimary = 0xE0;
1699:            static final int maxImplicitPrimary = 0xE4;
1700:
1701:            // block to initialise character property database
1702:            static {
1703:                // take pains to let static class init succeed, otherwise the class itself won't exist and
1704:                // clients will get a NoClassDefFoundException.  Instead, make the constructors fail if
1705:                // we can't load the UCA data.
1706:
1707:                RuleBasedCollator iUCA_ = null;
1708:                UCAConstants iUCA_CONSTANTS_ = null;
1709:                char iUCA_CONTRACTIONS_[] = null;
1710:                ImplicitCEGenerator iimpCEGen_ = null;
1711:                try {
1712:                    // !!! note what's going on here...
1713:                    // even though the static init of the class is not yet complete, we
1714:                    // instantiate an instance of the class.  So we'd better be sure that
1715:                    // instantiation doesn't rely on the static initialization that's
1716:                    // not complete yet!
1717:                    iUCA_ = new RuleBasedCollator();
1718:                    iUCA_CONSTANTS_ = new UCAConstants();
1719:                    iUCA_CONTRACTIONS_ = CollatorReader.read(iUCA_,
1720:                            iUCA_CONSTANTS_);
1721:
1722:                    // called before doing canonical closure for the UCA.
1723:                    iimpCEGen_ = new ImplicitCEGenerator(minImplicitPrimary,
1724:                            maxImplicitPrimary);
1725:                    //iimpCEGen_ = new ImplicitCEGenerator(iUCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_, iUCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_);
1726:                    iUCA_.init();
1727:                    ICUResourceBundle rb = (ICUResourceBundle) UResourceBundle
1728:                            .getBundleInstance(
1729:                                    ICUResourceBundle.ICU_COLLATION_BASE_NAME,
1730:                                    ULocale.ENGLISH);
1731:                    iUCA_.m_rules_ = (String) rb.getObject("UCARules");
1732:                } catch (MissingResourceException ex) {
1733:                    //             throw ex;
1734:                } catch (IOException e) {
1735:                    // e.printStackTrace();
1736:                    //             throw new MissingResourceException(e.getMessage(),"","");
1737:                }
1738:
1739:                UCA_ = iUCA_;
1740:                UCA_CONSTANTS_ = iUCA_CONSTANTS_;
1741:                UCA_CONTRACTIONS_ = iUCA_CONTRACTIONS_;
1742:                impCEGen_ = iimpCEGen_;
1743:
1744:                UCA_INIT_COMPLETE = true;
1745:            }
1746:
1747:            private static void checkUCA() throws MissingResourceException {
1748:                if (UCA_INIT_COMPLETE && UCA_ == null) {
1749:                    throw new MissingResourceException(
1750:                            "Collator UCA data unavailable", "", "");
1751:                }
1752:            }
1753:
1754:            // package private constructors ------------------------------------------
1755:
1756:            /**
1757:             * <p>Private contructor for use by subclasses.
1758:             * Public access to creating Collators is handled by the API
1759:             * Collator.getInstance() or RuleBasedCollator(String rules).
1760:             * </p>
1761:             * <p>
1762:             * This constructor constructs the UCA collator internally
1763:             * </p>
1764:             */
1765:            RuleBasedCollator() {
1766:                checkUCA();
1767:                initUtility(false);
1768:            }
1769:
1770:            /**
1771:             * Constructors a RuleBasedCollator from the argument locale.
1772:             * If no resource bundle is associated with the locale, UCA is used
1773:             * instead.
1774:             * @param locale
1775:             */
1776:            RuleBasedCollator(ULocale locale) {
1777:                checkUCA();
1778:                ICUResourceBundle rb = (ICUResourceBundle) UResourceBundle
1779:                        .getBundleInstance(
1780:                                ICUResourceBundle.ICU_COLLATION_BASE_NAME,
1781:                                locale);
1782:                initUtility(false);
1783:                if (rb != null) {
1784:                    try {
1785:                        // Use keywords, if supplied for lookup
1786:                        String collkey = locale.getKeywordValue("collation");
1787:                        if (collkey == null) {
1788:                            collkey = rb
1789:                                    .getStringWithFallback("collations/default");
1790:                        }
1791:
1792:                        // collations/default will always give a string back
1793:                        // keyword for the real collation data
1794:                        // if "collations/collkey" will return null if collkey == null 
1795:                        ICUResourceBundle elements = rb
1796:                                .getWithFallback("collations/" + collkey);
1797:                        if (elements != null) {
1798:                            // TODO: Determine actual & valid locale correctly
1799:                            ULocale uloc = rb.getULocale();
1800:                            setLocale(uloc, uloc);
1801:
1802:                            m_rules_ = elements.getString("Sequence");
1803:                            ByteBuffer buf = elements.get("%%CollationBin")
1804:                                    .getBinary();
1805:                            // %%CollationBin
1806:                            if (buf != null) {
1807:                                //     m_rules_ = (String)rules[1][1];
1808:                                byte map[] = buf.array();
1809:                                CollatorReader.initRBC(this , map);
1810:                                /*
1811:                                BufferedInputStream input =
1812:                                                         new BufferedInputStream(
1813:                                                            new ByteArrayInputStream(map));
1814:                                /*
1815:                                CollatorReader reader = new CollatorReader(input, false);
1816:                                if (map.length > MIN_BINARY_DATA_SIZE_) {
1817:                                    reader.read(this, null);
1818:                                }
1819:                                else {
1820:                                    reader.readHeader(this);
1821:                                    reader.readOptions(this);
1822:                                    // duplicating UCA_'s data
1823:                                    setWithUCATables();
1824:                                }
1825:                                 */
1826:                                // at this point, we have read in the collator
1827:                                // now we need to check whether the binary image has
1828:                                // the right UCA and other versions
1829:                                if (!m_UCA_version_.equals(UCA_.m_UCA_version_)
1830:                                        || !m_UCD_version_
1831:                                                .equals(UCA_.m_UCD_version_)) {
1832:                                    init(m_rules_);
1833:                                    return;
1834:                                }
1835:                                init();
1836:                                return;
1837:                            } else {
1838:                                // due to resource redirection ICUListResourceBundle does not
1839:                                // raise missing resource error
1840:                                //throw new MissingResourceException("Could not get resource for constructing RuleBasedCollator","com.ibm.icu.impl.data.LocaleElements_"+locale.toString(), "%%CollationBin");
1841:
1842:                                init(m_rules_);
1843:                                return;
1844:                            }
1845:                        }
1846:                    } catch (Exception e) {
1847:                        // e.printStackTrace();
1848:                        // if failed use UCA.
1849:                    }
1850:                }
1851:                setWithUCAData();
1852:            }
1853:
1854:            // package private methods -----------------------------------------------
1855:
1856:            /**
1857:             * Sets this collator to use the tables in UCA. Note options not taken
1858:             * care of here.
1859:             */
1860:            final void setWithUCATables() {
1861:                m_contractionOffset_ = UCA_.m_contractionOffset_;
1862:                m_expansionOffset_ = UCA_.m_expansionOffset_;
1863:                m_expansion_ = UCA_.m_expansion_;
1864:                m_contractionIndex_ = UCA_.m_contractionIndex_;
1865:                m_contractionCE_ = UCA_.m_contractionCE_;
1866:                m_trie_ = UCA_.m_trie_;
1867:                m_expansionEndCE_ = UCA_.m_expansionEndCE_;
1868:                m_expansionEndCEMaxSize_ = UCA_.m_expansionEndCEMaxSize_;
1869:                m_unsafe_ = UCA_.m_unsafe_;
1870:                m_contractionEnd_ = UCA_.m_contractionEnd_;
1871:                m_minUnsafe_ = UCA_.m_minUnsafe_;
1872:                m_minContractionEnd_ = UCA_.m_minContractionEnd_;
1873:            }
1874:
1875:            /**
1876:             * Sets this collator to use the all options and tables in UCA.
1877:             */
1878:            final void setWithUCAData() {
1879:                latinOneFailed_ = true;
1880:
1881:                m_addition3_ = UCA_.m_addition3_;
1882:                m_bottom3_ = UCA_.m_bottom3_;
1883:                m_bottomCount3_ = UCA_.m_bottomCount3_;
1884:                m_caseFirst_ = UCA_.m_caseFirst_;
1885:                m_caseSwitch_ = UCA_.m_caseSwitch_;
1886:                m_common3_ = UCA_.m_common3_;
1887:                m_contractionOffset_ = UCA_.m_contractionOffset_;
1888:                setDecomposition(UCA_.getDecomposition());
1889:                m_defaultCaseFirst_ = UCA_.m_defaultCaseFirst_;
1890:                m_defaultDecomposition_ = UCA_.m_defaultDecomposition_;
1891:                m_defaultIsAlternateHandlingShifted_ = UCA_.m_defaultIsAlternateHandlingShifted_;
1892:                m_defaultIsCaseLevel_ = UCA_.m_defaultIsCaseLevel_;
1893:                m_defaultIsFrenchCollation_ = UCA_.m_defaultIsFrenchCollation_;
1894:                m_defaultIsHiragana4_ = UCA_.m_defaultIsHiragana4_;
1895:                m_defaultStrength_ = UCA_.m_defaultStrength_;
1896:                m_defaultVariableTopValue_ = UCA_.m_defaultVariableTopValue_;
1897:                m_defaultIsNumericCollation_ = UCA_.m_defaultIsNumericCollation_;
1898:                m_expansionOffset_ = UCA_.m_expansionOffset_;
1899:                m_isAlternateHandlingShifted_ = UCA_.m_isAlternateHandlingShifted_;
1900:                m_isCaseLevel_ = UCA_.m_isCaseLevel_;
1901:                m_isFrenchCollation_ = UCA_.m_isFrenchCollation_;
1902:                m_isHiragana4_ = UCA_.m_isHiragana4_;
1903:                m_isJamoSpecial_ = UCA_.m_isJamoSpecial_;
1904:                m_isSimple3_ = UCA_.m_isSimple3_;
1905:                m_mask3_ = UCA_.m_mask3_;
1906:                m_minContractionEnd_ = UCA_.m_minContractionEnd_;
1907:                m_minUnsafe_ = UCA_.m_minUnsafe_;
1908:                m_rules_ = UCA_.m_rules_;
1909:                setStrength(UCA_.getStrength());
1910:                m_top3_ = UCA_.m_top3_;
1911:                m_topCount3_ = UCA_.m_topCount3_;
1912:                m_variableTopValue_ = UCA_.m_variableTopValue_;
1913:                m_isNumericCollation_ = UCA_.m_isNumericCollation_;
1914:                setWithUCATables();
1915:                latinOneFailed_ = false;
1916:            }
1917:
1918:            /**
1919:             * Test whether a char character is potentially "unsafe" for use as a
1920:             * collation starting point. "Unsafe" characters are combining marks or
1921:             * those belonging to some contraction sequence from the offset 1 onwards.
1922:             * E.g. if "ABC" is the only contraction, then 'B' and
1923:             * 'C' are considered unsafe. If we have another contraction "ZA" with
1924:             * the one above, then 'A', 'B', 'C' are "unsafe" but 'Z' is not.
1925:             * @param ch character to determin
1926:             * @return true if ch is unsafe, false otherwise
1927:             */
1928:            final boolean isUnsafe(char ch) {
1929:                if (ch < m_minUnsafe_) {
1930:                    return false;
1931:                }
1932:
1933:                if (ch >= (HEURISTIC_SIZE_ << HEURISTIC_SHIFT_)) {
1934:                    if (UTF16.isLeadSurrogate(ch) || UTF16.isTrailSurrogate(ch)) {
1935:                        //  Trail surrogate are always considered unsafe.
1936:                        return true;
1937:                    }
1938:                    ch &= HEURISTIC_OVERFLOW_MASK_;
1939:                    ch += HEURISTIC_OVERFLOW_OFFSET_;
1940:                }
1941:                int value = m_unsafe_[ch >> HEURISTIC_SHIFT_];
1942:                return ((value >> (ch & HEURISTIC_MASK_)) & 1) != 0;
1943:            }
1944:
1945:            /**
1946:             * Approximate determination if a char character is at a contraction end.
1947:             * Guaranteed to be true if a character is at the end of a contraction,
1948:             * otherwise it is not deterministic.
1949:             * @param ch character to be determined
1950:             */
1951:            final boolean isContractionEnd(char ch) {
1952:                if (UTF16.isTrailSurrogate(ch)) {
1953:                    return true;
1954:                }
1955:
1956:                if (ch < m_minContractionEnd_) {
1957:                    return false;
1958:                }
1959:
1960:                if (ch >= (HEURISTIC_SIZE_ << HEURISTIC_SHIFT_)) {
1961:                    ch &= HEURISTIC_OVERFLOW_MASK_;
1962:                    ch += HEURISTIC_OVERFLOW_OFFSET_;
1963:                }
1964:                int value = m_contractionEnd_[ch >> HEURISTIC_SHIFT_];
1965:                return ((value >> (ch & HEURISTIC_MASK_)) & 1) != 0;
1966:            }
1967:
1968:            /**
1969:             * Retrieve the tag of a special ce
1970:             * @param ce ce to test
1971:             * @return tag of ce
1972:             */
1973:            static int getTag(int ce) {
1974:                return (ce & CE_TAG_MASK_) >> CE_TAG_SHIFT_;
1975:            }
1976:
1977:            /**
1978:             * Checking if ce is special
1979:             * @param ce to check
1980:             * @return true if ce is special
1981:             */
1982:            static boolean isSpecial(int ce) {
1983:                return (ce & CE_SPECIAL_FLAG_) == CE_SPECIAL_FLAG_;
1984:            }
1985:
1986:            /**
1987:             * Checks if the argument ce is a continuation
1988:             * @param ce collation element to test
1989:             * @return true if ce is a continuation
1990:             */
1991:            static final boolean isContinuation(int ce) {
1992:                return ce != CollationElementIterator.NULLORDER
1993:                        && (ce & CE_CONTINUATION_TAG_) == CE_CONTINUATION_TAG_;
1994:            }
1995:
1996:            // private inner classes ------------------------------------------------
1997:
1998:            // private variables -----------------------------------------------------
1999:
2000:            /**
2001:             * The smallest natural unsafe or contraction end char character before
2002:             * tailoring.
2003:             * This is a combining mark.
2004:             */
2005:            private static final int DEFAULT_MIN_HEURISTIC_ = 0x300;
2006:            /**
2007:             * Heuristic table table size. Size is 32 bytes, 1 bit for each
2008:             * latin 1 char, and some power of two for hashing the rest of the chars.
2009:             * Size in bytes.
2010:             */
2011:            private static final char HEURISTIC_SIZE_ = 1056;
2012:            /**
2013:             * Mask value down to "some power of two" - 1,
2014:             * number of bits, not num of bytes.
2015:             */
2016:            private static final char HEURISTIC_OVERFLOW_MASK_ = 0x1fff;
2017:            /**
2018:             * Unsafe character shift
2019:             */
2020:            private static final int HEURISTIC_SHIFT_ = 3;
2021:            /**
2022:             * Unsafe character addition for character too large, it has to be folded
2023:             * then incremented.
2024:             */
2025:            private static final char HEURISTIC_OVERFLOW_OFFSET_ = 256;
2026:            /**
2027:             * Mask value to get offset in heuristic table.
2028:             */
2029:            private static final char HEURISTIC_MASK_ = 7;
2030:
2031:            private int m_caseSwitch_;
2032:            private int m_common3_;
2033:            private int m_mask3_;
2034:            /**
2035:             * When switching case, we need to add or subtract different values.
2036:             */
2037:            private int m_addition3_;
2038:            /**
2039:             * Upper range when compressing
2040:             */
2041:            private int m_top3_;
2042:            /**
2043:             * Upper range when compressing
2044:             */
2045:            private int m_bottom3_;
2046:            private int m_topCount3_;
2047:            private int m_bottomCount3_;
2048:            /**
2049:             * Case first constants
2050:             */
2051:            private static final int CASE_SWITCH_ = 0xC0;
2052:            private static final int NO_CASE_SWITCH_ = 0;
2053:            /**
2054:             * Case level constants
2055:             */
2056:            private static final int CE_REMOVE_CASE_ = 0x3F;
2057:            private static final int CE_KEEP_CASE_ = 0xFF;
2058:            /**
2059:             * Case strength mask
2060:             */
2061:            private static final int CE_CASE_MASK_3_ = 0xFF;
2062:            /**
2063:             * Sortkey size factor. Values can be changed.
2064:             */
2065:            private static final double PROPORTION_2_ = 0.5;
2066:            private static final double PROPORTION_3_ = 0.667;
2067:
2068:            // These values come from the UCA ----------------------------------------
2069:
2070:            /**
2071:             * This is an enum that lists magic special byte values from the
2072:             * fractional UCA
2073:             */
2074:            private static final byte BYTE_ZERO_ = 0x0;
2075:            private static final byte BYTE_LEVEL_SEPARATOR_ = (byte) 0x01;
2076:            private static final byte BYTE_SORTKEY_GLUE_ = (byte) 0x02;
2077:            private static final byte BYTE_SHIFT_PREFIX_ = (byte) 0x03;
2078:            /*private*/static final byte BYTE_UNSHIFTED_MIN_ = BYTE_SHIFT_PREFIX_;
2079:            private static final byte BYTE_FIRST_UCA_ = BYTE_COMMON_;
2080:            static final byte CODAN_PLACEHOLDER = 0x24;
2081:            private static final byte BYTE_LAST_LATIN_PRIMARY_ = (byte) 0x4C;
2082:            private static final byte BYTE_FIRST_NON_LATIN_PRIMARY_ = (byte) 0x4D;
2083:            private static final byte BYTE_UNSHIFTED_MAX_ = (byte) 0xFF;
2084:            private static final int TOTAL_2_ = COMMON_TOP_2_
2085:                    - COMMON_BOTTOM_2_ - 1;
2086:            private static final int FLAG_BIT_MASK_CASE_SWITCH_OFF_ = 0x80;
2087:            private static final int FLAG_BIT_MASK_CASE_SWITCH_ON_ = 0x40;
2088:            private static final int COMMON_TOP_CASE_SWITCH_OFF_3_ = 0x85;
2089:            private static final int COMMON_TOP_CASE_SWITCH_LOWER_3_ = 0x45;
2090:            private static final int COMMON_TOP_CASE_SWITCH_UPPER_3_ = 0xC5;
2091:            private static final int COMMON_BOTTOM_3_ = 0x05;
2092:            private static final int COMMON_BOTTOM_CASE_SWITCH_UPPER_3_ = 0x86;
2093:            private static final int COMMON_BOTTOM_CASE_SWITCH_LOWER_3_ = COMMON_BOTTOM_3_;
2094:            private static final int TOP_COUNT_2_ = (int) (PROPORTION_2_ * TOTAL_2_);
2095:            private static final int BOTTOM_COUNT_2_ = TOTAL_2_ - TOP_COUNT_2_;
2096:            private static final int COMMON_2_ = COMMON_BOTTOM_2_;
2097:            private static final int COMMON_UPPER_FIRST_3_ = 0xC5;
2098:            private static final int COMMON_NORMAL_3_ = COMMON_BOTTOM_3_;
2099:            private static final int COMMON_4_ = (byte) 0xFF;
2100:
2101:            /**
2102:             * Minimum size required for the binary collation data in bytes.
2103:             * Size of UCA header + size of options to 4 bytes
2104:             */
2105:            //private static final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;
2106:            /**
2107:             * If this collator is to generate only simple tertiaries for fast path
2108:             */
2109:            private boolean m_isSimple3_;
2110:
2111:            /**
2112:             * French collation sorting flag
2113:             */
2114:            private boolean m_isFrenchCollation_;
2115:            /**
2116:             * Flag indicating if shifted is requested for Quaternary alternate
2117:             * handling. If this is not true, the default for alternate handling will
2118:             * be non-ignorable.
2119:             */
2120:            private boolean m_isAlternateHandlingShifted_;
2121:            /**
2122:             * Extra case level for sorting
2123:             */
2124:            private boolean m_isCaseLevel_;
2125:
2126:            private static final int SORT_BUFFER_INIT_SIZE_ = 128;
2127:            private static final int SORT_BUFFER_INIT_SIZE_1_ = SORT_BUFFER_INIT_SIZE_ << 3;
2128:            private static final int SORT_BUFFER_INIT_SIZE_2_ = SORT_BUFFER_INIT_SIZE_;
2129:            private static final int SORT_BUFFER_INIT_SIZE_3_ = SORT_BUFFER_INIT_SIZE_;
2130:            private static final int SORT_BUFFER_INIT_SIZE_CASE_ = SORT_BUFFER_INIT_SIZE_ >> 2;
2131:            private static final int SORT_BUFFER_INIT_SIZE_4_ = SORT_BUFFER_INIT_SIZE_;
2132:
2133:            private static final int CE_CONTINUATION_TAG_ = 0xC0;
2134:            private static final int CE_REMOVE_CONTINUATION_MASK_ = 0xFFFFFF3F;
2135:
2136:            private static final int LAST_BYTE_MASK_ = 0xFF;
2137:
2138:            private static final int CE_RESET_TOP_VALUE_ = 0x9F000303;
2139:            private static final int CE_NEXT_TOP_VALUE_ = 0xE8960303;
2140:
2141:            private static final byte SORT_CASE_BYTE_START_ = (byte) 0x80;
2142:            private static final byte SORT_CASE_SHIFT_START_ = (byte) 7;
2143:
2144:            /**
2145:             * CE buffer size
2146:             */
2147:            private static final int CE_BUFFER_SIZE_ = 512;
2148:
2149:            // variables for Latin-1 processing
2150:            boolean latinOneUse_ = false;
2151:            boolean latinOneRegenTable_ = false;
2152:            boolean latinOneFailed_ = false;
2153:
2154:            int latinOneTableLen_ = 0;
2155:            int latinOneCEs_[] = null;
2156:            /**
2157:             * Bunch of utility iterators
2158:             */
2159:            private StringUCharacterIterator m_srcUtilIter_;
2160:            private CollationElementIterator m_srcUtilColEIter_;
2161:            private StringUCharacterIterator m_tgtUtilIter_;
2162:            private CollationElementIterator m_tgtUtilColEIter_;
2163:            /**
2164:             * Utility comparison flags
2165:             */
2166:            private boolean m_utilCompare0_;
2167:            private boolean m_utilCompare1_;
2168:            private boolean m_utilCompare2_;
2169:            private boolean m_utilCompare3_;
2170:            private boolean m_utilCompare4_;
2171:            private boolean m_utilCompare5_;
2172:            /**
2173:             * Utility byte buffer
2174:             */
2175:            private byte m_utilBytes0_[];
2176:            private byte m_utilBytes1_[];
2177:            private byte m_utilBytes2_[];
2178:            private byte m_utilBytes3_[];
2179:            private byte m_utilBytes4_[];
2180:            private byte m_utilBytes5_[];
2181:            private RawCollationKey m_utilRawCollationKey_;
2182:
2183:            private int m_utilBytesCount0_;
2184:            private int m_utilBytesCount1_;
2185:            private int m_utilBytesCount2_;
2186:            private int m_utilBytesCount3_;
2187:            private int m_utilBytesCount4_;
2188:            private int m_utilBytesCount5_;
2189:            private int m_utilCount0_;
2190:            private int m_utilCount1_;
2191:            private int m_utilCount2_;
2192:            private int m_utilCount3_;
2193:            private int m_utilCount4_;
2194:            private int m_utilCount5_;
2195:
2196:            private int m_utilFrenchStart_;
2197:            private int m_utilFrenchEnd_;
2198:
2199:            /**
2200:             * Preparing the CE buffers. will be filled during the primary phase
2201:             */
2202:            private int m_srcUtilCEBuffer_[];
2203:            private int m_tgtUtilCEBuffer_[];
2204:            private int m_srcUtilCEBufferSize_;
2205:            private int m_tgtUtilCEBufferSize_;
2206:
2207:            private int m_srcUtilContOffset_;
2208:            private int m_tgtUtilContOffset_;
2209:
2210:            private int m_srcUtilOffset_;
2211:            private int m_tgtUtilOffset_;
2212:
2213:            // private methods -------------------------------------------------------
2214:
2215:            private void init(String rules) throws Exception {
2216:                setWithUCAData();
2217:                CollationParsedRuleBuilder builder = new CollationParsedRuleBuilder(
2218:                        rules);
2219:                builder.setRules(this );
2220:                m_rules_ = rules;
2221:                init();
2222:                initUtility(false);
2223:            }
2224:
2225:            private final int compareRegular(String source, String target,
2226:                    int offset) {
2227:                if (m_srcUtilIter_ == null) {
2228:                    initUtility(true);
2229:                }
2230:                int strength = getStrength();
2231:                // setting up the collator parameters
2232:                m_utilCompare0_ = m_isCaseLevel_;
2233:                m_utilCompare1_ = true;
2234:                m_utilCompare2_ = strength >= SECONDARY;
2235:                m_utilCompare3_ = strength >= TERTIARY;
2236:                m_utilCompare4_ = strength >= QUATERNARY;
2237:                m_utilCompare5_ = strength == IDENTICAL;
2238:                boolean doFrench = m_isFrenchCollation_ && m_utilCompare2_;
2239:                boolean doShift4 = m_isAlternateHandlingShifted_
2240:                        && m_utilCompare4_;
2241:                boolean doHiragana4 = m_isHiragana4_ && m_utilCompare4_;
2242:
2243:                if (doHiragana4 && doShift4) {
2244:                    String sourcesub = source.substring(offset);
2245:                    String targetsub = target.substring(offset);
2246:                    return compareBySortKeys(sourcesub, targetsub);
2247:                }
2248:
2249:                // This is the lowest primary value that will not be ignored if shifted
2250:                int lowestpvalue = m_isAlternateHandlingShifted_ ? m_variableTopValue_ << 16
2251:                        : 0;
2252:                m_srcUtilCEBufferSize_ = 0;
2253:                m_tgtUtilCEBufferSize_ = 0;
2254:                int result = doPrimaryCompare(doHiragana4, lowestpvalue,
2255:                        source, target, offset);
2256:                if (m_srcUtilCEBufferSize_ == -1
2257:                        && m_tgtUtilCEBufferSize_ == -1) {
2258:                    // since the cebuffer is cleared when we have determined that
2259:                    // either source is greater than target or vice versa, the return
2260:                    // result is the comparison result and not the hiragana result
2261:                    return result;
2262:                }
2263:
2264:                int hiraganaresult = result;
2265:
2266:                if (m_utilCompare2_) {
2267:                    result = doSecondaryCompare(doFrench);
2268:                    if (result != 0) {
2269:                        return result;
2270:                    }
2271:                }
2272:                // doing the case bit
2273:                if (m_utilCompare0_) {
2274:                    result = doCaseCompare();
2275:                    if (result != 0) {
2276:                        return result;
2277:                    }
2278:                }
2279:                // Tertiary level
2280:                if (m_utilCompare3_) {
2281:                    result = doTertiaryCompare();
2282:                    if (result != 0) {
2283:                        return result;
2284:                    }
2285:                }
2286:
2287:                if (doShift4) { // checkQuad
2288:                    result = doQuaternaryCompare(lowestpvalue);
2289:                    if (result != 0) {
2290:                        return result;
2291:                    }
2292:                } else if (doHiragana4 && hiraganaresult != 0) {
2293:                    // If we're fine on quaternaries, we might be different
2294:                    // on Hiragana. This, however, might fail us in shifted.
2295:                    return hiraganaresult;
2296:                }
2297:
2298:                // For IDENTICAL comparisons, we use a bitwise character comparison
2299:                // as a tiebreaker if all else is equal.
2300:                // Getting here  should be quite rare - strings are not identical -
2301:                // that is checked first, but compared == through all other checks.
2302:                if (m_utilCompare5_) {
2303:                    return doIdenticalCompare(source, target, offset, true);
2304:                }
2305:                return 0;
2306:            }
2307:
2308:            /**
2309:             * Gets the 2 bytes of primary order and adds it to the primary byte array
2310:             * @param ce current ce
2311:             * @param notIsContinuation flag indicating if the current bytes belong to
2312:             *          a continuation ce
2313:             * @param doShift flag indicating if ce is to be shifted
2314:             * @param leadPrimary lead primary used for compression
2315:             * @param commonBottom4 common byte value for Quaternary
2316:             * @param bottomCount4 smallest byte value for Quaternary
2317:             * @return the new lead primary for compression
2318:             */
2319:            private final int doPrimaryBytes(int ce, boolean notIsContinuation,
2320:                    boolean doShift, int leadPrimary, int commonBottom4,
2321:                    int bottomCount4) {
2322:
2323:                int p2 = (ce >>= 16) & LAST_BYTE_MASK_; // in ints for unsigned
2324:                int p1 = ce >>> 8; // comparison
2325:                if (doShift) {
2326:                    if (m_utilCount4_ > 0) {
2327:                        while (m_utilCount4_ > bottomCount4) {
2328:                            m_utilBytes4_ = append(m_utilBytes4_,
2329:                                    m_utilBytesCount4_,
2330:                                    (byte) (commonBottom4 + bottomCount4));
2331:                            m_utilBytesCount4_++;
2332:                            m_utilCount4_ -= bottomCount4;
2333:                        }
2334:                        m_utilBytes4_ = append(m_utilBytes4_,
2335:                                m_utilBytesCount4_,
2336:                                (byte) (commonBottom4 + (m_utilCount4_ - 1)));
2337:                        m_utilBytesCount4_++;
2338:                        m_utilCount4_ = 0;
2339:                    }
2340:                    // dealing with a variable and we're treating them as shifted
2341:                    // This is a shifted ignorable
2342:                    if (p1 != 0) {
2343:                        // we need to check this since we could be in continuation
2344:                        m_utilBytes4_ = append(m_utilBytes4_,
2345:                                m_utilBytesCount4_, (byte) p1);
2346:                        m_utilBytesCount4_++;
2347:                    }
2348:                    if (p2 != 0) {
2349:                        m_utilBytes4_ = append(m_utilBytes4_,
2350:                                m_utilBytesCount4_, (byte) p2);
2351:                        m_utilBytesCount4_++;
2352:                    }
2353:                } else {
2354:                    // Note: This code assumes that the table is well built
2355:                    // i.e. not having 0 bytes where they are not supposed to be.
2356:                    // Usually, we'll have non-zero primary1 & primary2, except
2357:                    // in cases of LatinOne and friends, when primary2 will be
2358:                    // regular and simple sortkey calc
2359:                    if (p1 != CollationElementIterator.IGNORABLE) {
2360:                        if (notIsContinuation) {
2361:                            if (leadPrimary == p1) {
2362:                                m_utilBytes1_ = append(m_utilBytes1_,
2363:                                        m_utilBytesCount1_, (byte) p2);
2364:                                m_utilBytesCount1_++;
2365:                            } else {
2366:                                if (leadPrimary != 0) {
2367:                                    m_utilBytes1_ = append(
2368:                                            m_utilBytes1_,
2369:                                            m_utilBytesCount1_,
2370:                                            ((p1 > leadPrimary) ? BYTE_UNSHIFTED_MAX_
2371:                                                    : BYTE_UNSHIFTED_MIN_));
2372:                                    m_utilBytesCount1_++;
2373:                                }
2374:                                if (p2 == CollationElementIterator.IGNORABLE) {
2375:                                    // one byter, not compressed
2376:                                    m_utilBytes1_ = append(m_utilBytes1_,
2377:                                            m_utilBytesCount1_, (byte) p1);
2378:                                    m_utilBytesCount1_++;
2379:                                    leadPrimary = 0;
2380:                                } else if (p1 < BYTE_FIRST_NON_LATIN_PRIMARY_
2381:                                        || (p1 > maxRegularPrimary
2382:                                        //> (RuleBasedCollator.UCA_CONSTANTS_.LAST_NON_VARIABLE_[0]
2383:                                        //                                              >>> 24)
2384:                                        && p1 < minImplicitPrimary
2385:                                        //< (RuleBasedCollator.UCA_CONSTANTS_.FIRST_IMPLICIT_[0]
2386:                                        //                                              >>> 24)
2387:                                        )) {
2388:                                    // not compressible
2389:                                    leadPrimary = 0;
2390:                                    m_utilBytes1_ = append(m_utilBytes1_,
2391:                                            m_utilBytesCount1_, (byte) p1);
2392:                                    m_utilBytesCount1_++;
2393:                                    m_utilBytes1_ = append(m_utilBytes1_,
2394:                                            m_utilBytesCount1_, (byte) p2);
2395:                                    m_utilBytesCount1_++;
2396:                                } else { // compress
2397:                                    leadPrimary = p1;
2398:                                    m_utilBytes1_ = append(m_utilBytes1_,
2399:                                            m_utilBytesCount1_, (byte) p1);
2400:                                    m_utilBytesCount1_++;
2401:                                    m_utilBytes1_ = append(m_utilBytes1_,
2402:                                            m_utilBytesCount1_, (byte) p2);
2403:                                    m_utilBytesCount1_++;
2404:                                }
2405:                            }
2406:                        } else {
2407:                            // continuation, add primary to the key, no compression
2408:                            m_utilBytes1_ = append(m_utilBytes1_,
2409:                                    m_utilBytesCount1_, (byte) p1);
2410:                            m_utilBytesCount1_++;
2411:                            if (p2 != CollationElementIterator.IGNORABLE) {
2412:                                m_utilBytes1_ = append(m_utilBytes1_,
2413:                                        m_utilBytesCount1_, (byte) p2);
2414:                                // second part
2415:                                m_utilBytesCount1_++;
2416:                            }
2417:                        }
2418:                    }
2419:                }
2420:                return leadPrimary;
2421:            }
2422:
2423:            /**
2424:             * Gets the secondary byte and adds it to the secondary byte array
2425:             * @param ce current ce
2426:             * @param notIsContinuation flag indicating if the current bytes belong to
2427:             *          a continuation ce
2428:             * @param doFrench flag indicator if french sort is to be performed
2429:             */
2430:            private final void doSecondaryBytes(int ce,
2431:                    boolean notIsContinuation, boolean doFrench) {
2432:                int s = (ce >>= 8) & LAST_BYTE_MASK_; // int for comparison
2433:                if (s != 0) {
2434:                    if (!doFrench) {
2435:                        // This is compression code.
2436:                        if (s == COMMON_2_ && notIsContinuation) {
2437:                            m_utilCount2_++;
2438:                        } else {
2439:                            if (m_utilCount2_ > 0) {
2440:                                if (s > COMMON_2_) { // not necessary for 4th level.
2441:                                    while (m_utilCount2_ > TOP_COUNT_2_) {
2442:                                        m_utilBytes2_ = append(
2443:                                                m_utilBytes2_,
2444:                                                m_utilBytesCount2_,
2445:                                                (byte) (COMMON_TOP_2_ - TOP_COUNT_2_));
2446:                                        m_utilBytesCount2_++;
2447:                                        m_utilCount2_ -= TOP_COUNT_2_;
2448:                                    }
2449:                                    m_utilBytes2_ = append(
2450:                                            m_utilBytes2_,
2451:                                            m_utilBytesCount2_,
2452:                                            (byte) (COMMON_TOP_2_ - (m_utilCount2_ - 1)));
2453:                                    m_utilBytesCount2_++;
2454:                                } else {
2455:                                    while (m_utilCount2_ > BOTTOM_COUNT_2_) {
2456:                                        m_utilBytes2_ = append(
2457:                                                m_utilBytes2_,
2458:                                                m_utilBytesCount2_,
2459:                                                (byte) (COMMON_BOTTOM_2_ + BOTTOM_COUNT_2_));
2460:                                        m_utilBytesCount2_++;
2461:                                        m_utilCount2_ -= BOTTOM_COUNT_2_;
2462:                                    }
2463:                                    m_utilBytes2_ = append(
2464:                                            m_utilBytes2_,
2465:                                            m_utilBytesCount2_,
2466:                                            (byte) (COMMON_BOTTOM_2_ + (m_utilCount2_ - 1)));
2467:                                    m_utilBytesCount2_++;
2468:                                }
2469:                                m_utilCount2_ = 0;
2470:                            }
2471:                            m_utilBytes2_ = append(m_utilBytes2_,
2472:                                    m_utilBytesCount2_, (byte) s);
2473:                            m_utilBytesCount2_++;
2474:                        }
2475:                    } else {
2476:                        m_utilBytes2_ = append(m_utilBytes2_,
2477:                                m_utilBytesCount2_, (byte) s);
2478:                        m_utilBytesCount2_++;
2479:                        // Do the special handling for French secondaries
2480:                        // We need to get continuation elements and do intermediate
2481:                        // restore
2482:                        // abc1c2c3de with french secondaries need to be edc1c2c3ba
2483:                        // NOT edc3c2c1ba
2484:                        if (notIsContinuation) {
2485:                            if (m_utilFrenchStart_ != -1) {
2486:                                // reverse secondaries from frenchStartPtr up to
2487:                                // frenchEndPtr
2488:                                reverseBuffer(m_utilBytes2_);
2489:                                m_utilFrenchStart_ = -1;
2490:                            }
2491:                        } else {
2492:                            if (m_utilFrenchStart_ == -1) {
2493:                                m_utilFrenchStart_ = m_utilBytesCount2_ - 2;
2494:                            }
2495:                            m_utilFrenchEnd_ = m_utilBytesCount2_ - 1;
2496:                        }
2497:                    }
2498:                }
2499:            }
2500:
2501:            /**
2502:             * Reverse the argument buffer
2503:             * @param buffer to reverse
2504:             */
2505:            private void reverseBuffer(byte buffer[]) {
2506:                int start = m_utilFrenchStart_;
2507:                int end = m_utilFrenchEnd_;
2508:                while (start < end) {
2509:                    byte b = buffer[start];
2510:                    buffer[start++] = buffer[end];
2511:                    buffer[end--] = b;
2512:                }
2513:            }
2514:
2515:            /**
2516:             * Insert the case shifting byte if required
2517:             * @param caseshift value
2518:             * @return new caseshift value
2519:             */
2520:            private final int doCaseShift(int caseshift) {
2521:                if (caseshift == 0) {
2522:                    m_utilBytes0_ = append(m_utilBytes0_, m_utilBytesCount0_,
2523:                            SORT_CASE_BYTE_START_);
2524:                    m_utilBytesCount0_++;
2525:                    caseshift = SORT_CASE_SHIFT_START_;
2526:                }
2527:                return caseshift;
2528:            }
2529:
2530:            /**
2531:             * Performs the casing sort
2532:             * @param tertiary byte in ints for easy comparison
2533:             * @param notIsContinuation flag indicating if the current bytes belong to
2534:             *          a continuation ce
2535:             * @param caseshift
2536:             * @return the new value of case shift
2537:             */
2538:            private final int doCaseBytes(int tertiary,
2539:                    boolean notIsContinuation, int caseshift) {
2540:                caseshift = doCaseShift(caseshift);
2541:
2542:                if (notIsContinuation && tertiary != 0) {
2543:                    byte casebits = (byte) (tertiary & 0xC0);
2544:                    if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
2545:                        if (casebits == 0) {
2546:                            m_utilBytes0_[m_utilBytesCount0_ - 1] |= (1 << (--caseshift));
2547:                        } else {
2548:                            // second bit
2549:                            caseshift = doCaseShift(caseshift - 1);
2550:                            m_utilBytes0_[m_utilBytesCount0_ - 1] |= ((casebits >> 6) & 1) << (--caseshift);
2551:                        }
2552:                    } else {
2553:                        if (casebits != 0) {
2554:                            m_utilBytes0_[m_utilBytesCount0_ - 1] |= 1 << (--caseshift);
2555:                            // second bit
2556:                            caseshift = doCaseShift(caseshift);
2557:                            m_utilBytes0_[m_utilBytesCount0_ - 1] |= ((casebits >> 7) & 1) << (--caseshift);
2558:                        } else {
2559:                            caseshift--;
2560:                        }
2561:                    }
2562:                }
2563:
2564:                return caseshift;
2565:            }
2566:
2567:            /**
2568:             * Gets the tertiary byte and adds it to the tertiary byte array
2569:             * @param tertiary byte in int for easy comparison
2570:             * @param notIsContinuation flag indicating if the current bytes belong to
2571:             *          a continuation ce
2572:             */
2573:            private final void doTertiaryBytes(int tertiary,
2574:                    boolean notIsContinuation) {
2575:                if (tertiary != 0) {
2576:                    // This is compression code.
2577:                    // sequence size check is included in the if clause
2578:                    if (tertiary == m_common3_ && notIsContinuation) {
2579:                        m_utilCount3_++;
2580:                    } else {
2581:                        int common3 = m_common3_ & LAST_BYTE_MASK_;
2582:                        if (tertiary > common3
2583:                                && m_common3_ == COMMON_NORMAL_3_) {
2584:                            tertiary += m_addition3_;
2585:                        } else if (tertiary <= common3
2586:                                && m_common3_ == COMMON_UPPER_FIRST_3_) {
2587:                            tertiary -= m_addition3_;
2588:                        }
2589:                        if (m_utilCount3_ > 0) {
2590:                            if (tertiary > common3) {
2591:                                while (m_utilCount3_ > m_topCount3_) {
2592:                                    m_utilBytes3_ = append(m_utilBytes3_,
2593:                                            m_utilBytesCount3_,
2594:                                            (byte) (m_top3_ - m_topCount3_));
2595:                                    m_utilBytesCount3_++;
2596:                                    m_utilCount3_ -= m_topCount3_;
2597:                                }
2598:                                m_utilBytes3_ = append(m_utilBytes3_,
2599:                                        m_utilBytesCount3_,
2600:                                        (byte) (m_top3_ - (m_utilCount3_ - 1)));
2601:                                m_utilBytesCount3_++;
2602:                            } else {
2603:                                while (m_utilCount3_ > m_bottomCount3_) {
2604:                                    m_utilBytes3_ = append(
2605:                                            m_utilBytes3_,
2606:                                            m_utilBytesCount3_,
2607:                                            (byte) (m_bottom3_ + m_bottomCount3_));
2608:                                    m_utilBytesCount3_++;
2609:                                    m_utilCount3_ -= m_bottomCount3_;
2610:                                }
2611:                                m_utilBytes3_ = append(
2612:                                        m_utilBytes3_,
2613:                                        m_utilBytesCount3_,
2614:                                        (byte) (m_bottom3_ + (m_utilCount3_ - 1)));
2615:                                m_utilBytesCount3_++;
2616:                            }
2617:                            m_utilCount3_ = 0;
2618:                        }
2619:                        m_utilBytes3_ = append(m_utilBytes3_,
2620:                                m_utilBytesCount3_, (byte) tertiary);
2621:                        m_utilBytesCount3_++;
2622:                    }
2623:                }
2624:            }
2625:
2626:            /**
2627:             * Gets the Quaternary byte and adds it to the Quaternary byte array
2628:             * @param isCodePointHiragana flag indicator if the previous codepoint
2629:             *          we dealt with was Hiragana
2630:             * @param commonBottom4 smallest common Quaternary byte
2631:             * @param bottomCount4 smallest Quaternary byte
2632:             * @param hiragana4 hiragana Quaternary byte
2633:             */
2634:            private final void doQuaternaryBytes(boolean isCodePointHiragana,
2635:                    int commonBottom4, int bottomCount4, byte hiragana4) {
2636:                if (isCodePointHiragana) { // This was Hiragana, need to note it
2637:                    if (m_utilCount4_ > 0) { // Close this part
2638:                        while (m_utilCount4_ > bottomCount4) {
2639:                            m_utilBytes4_ = append(m_utilBytes4_,
2640:                                    m_utilBytesCount4_,
2641:                                    (byte) (commonBottom4 + bottomCount4));
2642:                            m_utilBytesCount4_++;
2643:                            m_utilCount4_ -= bottomCount4;
2644:                        }
2645:                        m_utilBytes4_ = append(m_utilBytes4_,
2646:                                m_utilBytesCount4_,
2647:                                (byte) (commonBottom4 + (m_utilCount4_ - 1)));
2648:                        m_utilBytesCount4_++;
2649:                        m_utilCount4_ = 0;
2650:                    }
2651:                    m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_,
2652:                            hiragana4); // Add the Hiragana
2653:                    m_utilBytesCount4_++;
2654:                } else { // This wasn't Hiragana, so we can continue adding stuff
2655:                    m_utilCount4_++;
2656:                }
2657:            }
2658:
2659:            /**
2660:             * Iterates through the argument string for all ces.
2661:             * Split the ces into their relevant primaries, secondaries etc.
2662:             * @param source normalized string
2663:             * @param doFrench flag indicator if special handling of French has to be
2664:             *                  done
2665:             * @param hiragana4 offset for Hiragana quaternary
2666:             * @param commonBottom4 smallest common quaternary byte
2667:             * @param bottomCount4 smallest quaternary byte
2668:             */
2669:            private final void getSortKeyBytes(String source, boolean doFrench,
2670:                    byte hiragana4, int commonBottom4, int bottomCount4)
2671:
2672:            {
2673:                if (m_srcUtilIter_ == null) {
2674:                    initUtility(true);
2675:                }
2676:                int backupDecomposition = getDecomposition();
2677:                setDecomposition(NO_DECOMPOSITION); // have to revert to backup later
2678:                m_srcUtilIter_.setText(source);
2679:                m_srcUtilColEIter_.setText(m_srcUtilIter_);
2680:                m_utilFrenchStart_ = -1;
2681:                m_utilFrenchEnd_ = -1;
2682:
2683:                // scriptorder not implemented yet
2684:                // const uint8_t *scriptOrder = coll->scriptOrder;
2685:
2686:                boolean doShift = false;
2687:                boolean notIsContinuation = false;
2688:
2689:                int leadPrimary = 0; // int for easier comparison
2690:                int caseShift = 0;
2691:
2692:                while (true) {
2693:                    int ce = m_srcUtilColEIter_.next();
2694:                    if (ce == CollationElementIterator.NULLORDER) {
2695:                        break;
2696:                    }
2697:
2698:                    if (ce == CollationElementIterator.IGNORABLE) {
2699:                        continue;
2700:                    }
2701:
2702:                    notIsContinuation = !isContinuation(ce);
2703:
2704:                    /*
2705:                     * if (notIsContinuation) {
2706:                            if (scriptOrder != NULL) {
2707:                                primary1 = scriptOrder[primary1];
2708:                            }
2709:                        }*/
2710:                    boolean isPrimaryByteIgnorable = (ce & CE_PRIMARY_MASK_) == 0;
2711:                    // actually we can just check that the first byte is 0
2712:                    // generation stuffs the order left first
2713:                    boolean isSmallerThanVariableTop = (ce >>> CE_PRIMARY_SHIFT_) <= m_variableTopValue_;
2714:                    doShift = (m_isAlternateHandlingShifted_
2715:                            && ((notIsContinuation && isSmallerThanVariableTop && !isPrimaryByteIgnorable) // primary byte not 0
2716:                            || (!notIsContinuation && doShift)) || (doShift && isPrimaryByteIgnorable));
2717:                    if (doShift && isPrimaryByteIgnorable) {
2718:                        // amendment to the UCA says that primary ignorables and other
2719:                        // ignorables should be removed if following a shifted code
2720:                        // point
2721:                        // if we were shifted and we got an ignorable code point
2722:                        // we should just completely ignore it
2723:                        continue;
2724:                    }
2725:                    leadPrimary = doPrimaryBytes(ce, notIsContinuation,
2726:                            doShift, leadPrimary, commonBottom4, bottomCount4);
2727:                    if (doShift) {
2728:                        continue;
2729:                    }
2730:                    if (m_utilCompare2_) {
2731:                        doSecondaryBytes(ce, notIsContinuation, doFrench);
2732:                    }
2733:
2734:                    int t = ce & LAST_BYTE_MASK_;
2735:                    if (!notIsContinuation) {
2736:                        t = ce & CE_REMOVE_CONTINUATION_MASK_;
2737:                    }
2738:
2739:                    if (m_utilCompare0_
2740:                            && (!isPrimaryByteIgnorable || m_utilCompare2_)) {
2741:                        // do the case level if we need to do it. We don't want to calculate
2742:                        // case level for primary ignorables if we have only primary strength and case level
2743:                        // otherwise we would break well formedness of CEs 
2744:                        caseShift = doCaseBytes(t, notIsContinuation, caseShift);
2745:                    } else if (notIsContinuation) {
2746:                        t ^= m_caseSwitch_;
2747:                    }
2748:
2749:                    t &= m_mask3_;
2750:
2751:                    if (m_utilCompare3_) {
2752:                        doTertiaryBytes(t, notIsContinuation);
2753:                    }
2754:
2755:                    if (m_utilCompare4_ && notIsContinuation) { // compare quad
2756:                        doQuaternaryBytes(
2757:                                m_srcUtilColEIter_.m_isCodePointHiragana_,
2758:                                commonBottom4, bottomCount4, hiragana4);
2759:                    }
2760:                }
2761:                setDecomposition(backupDecomposition); // reverts to original
2762:                if (m_utilFrenchStart_ != -1) {
2763:                    // one last round of checks
2764:                    reverseBuffer(m_utilBytes2_);
2765:                }
2766:            }
2767:
2768:            /**
2769:             * From the individual strength byte results the final compact sortkey
2770:             * will be calculated.
2771:             * @param source text string
2772:             * @param doFrench flag indicating that special handling of French has to
2773:             *                  be done
2774:             * @param commonBottom4 smallest common quaternary byte
2775:             * @param bottomCount4 smallest quaternary byte
2776:             * @param key output RawCollationKey to store results, key cannot be null
2777:             */
2778:            private final void getSortKey(String source, boolean doFrench,
2779:                    int commonBottom4, int bottomCount4, RawCollationKey key) {
2780:                // we have done all the CE's, now let's put them together to form
2781:                // a key
2782:                if (m_utilCompare2_) {
2783:                    doSecondary(doFrench);
2784:                }
2785:                // adding case level should be independent of secondary level
2786:                if (m_utilCompare0_) {
2787:                    doCase();
2788:                }
2789:                if (m_utilCompare3_) {
2790:                    doTertiary();
2791:                    if (m_utilCompare4_) {
2792:                        doQuaternary(commonBottom4, bottomCount4);
2793:                        if (m_utilCompare5_) {
2794:                            doIdentical(source);
2795:                        }
2796:
2797:                    }
2798:                }
2799:                m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,
2800:                        (byte) 0);
2801:                m_utilBytesCount1_++;
2802:
2803:                key.set(m_utilBytes1_, 0, m_utilBytesCount1_);
2804:            }
2805:
2806:            /**
2807:             * Packs the French bytes
2808:             */
2809:            private final void doFrench() {
2810:                for (int i = 0; i < m_utilBytesCount2_; i++) {
2811:                    byte s = m_utilBytes2_[m_utilBytesCount2_ - i - 1];
2812:                    // This is compression code.
2813:                    if (s == COMMON_2_) {
2814:                        ++m_utilCount2_;
2815:                    } else {
2816:                        if (m_utilCount2_ > 0) {
2817:                            // getting the unsigned value
2818:                            if ((s & LAST_BYTE_MASK_) > COMMON_2_) {
2819:                                // not necessary for 4th level.
2820:                                while (m_utilCount2_ > TOP_COUNT_2_) {
2821:                                    m_utilBytes1_ = append(
2822:                                            m_utilBytes1_,
2823:                                            m_utilBytesCount1_,
2824:                                            (byte) (COMMON_TOP_2_ - TOP_COUNT_2_));
2825:                                    m_utilBytesCount1_++;
2826:                                    m_utilCount2_ -= TOP_COUNT_2_;
2827:                                }
2828:                                m_utilBytes1_ = append(
2829:                                        m_utilBytes1_,
2830:                                        m_utilBytesCount1_,
2831:                                        (byte) (COMMON_TOP_2_ - (m_utilCount2_ - 1)));
2832:                                m_utilBytesCount1_++;
2833:                            } else {
2834:                                while (m_utilCount2_ > BOTTOM_COUNT_2_) {
2835:                                    m_utilBytes1_ = append(
2836:                                            m_utilBytes1_,
2837:                                            m_utilBytesCount1_,
2838:                                            (byte) (COMMON_BOTTOM_2_ + BOTTOM_COUNT_2_));
2839:                                    m_utilBytesCount1_++;
2840:                                    m_utilCount2_ -= BOTTOM_COUNT_2_;
2841:                                }
2842:                                m_utilBytes1_ = append(
2843:                                        m_utilBytes1_,
2844:                                        m_utilBytesCount1_,
2845:                                        (byte) (COMMON_BOTTOM_2_ + (m_utilCount2_ - 1)));
2846:                                m_utilBytesCount1_++;
2847:                            }
2848:                            m_utilCount2_ = 0;
2849:                        }
2850:                        m_utilBytes1_ = append(m_utilBytes1_,
2851:                                m_utilBytesCount1_, s);
2852:                        m_utilBytesCount1_++;
2853:                    }
2854:                }
2855:                if (m_utilCount2_ > 0) {
2856:                    while (m_utilCount2_ > BOTTOM_COUNT_2_) {
2857:                        m_utilBytes1_ = append(m_utilBytes1_,
2858:                                m_utilBytesCount1_,
2859:                                (byte) (COMMON_BOTTOM_2_ + BOTTOM_COUNT_2_));
2860:                        m_utilBytesCount1_++;
2861:                        m_utilCount2_ -= BOTTOM_COUNT_2_;
2862:                    }
2863:                    m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,
2864:                            (byte) (COMMON_BOTTOM_2_ + (m_utilCount2_ - 1)));
2865:                    m_utilBytesCount1_++;
2866:                }
2867:            }
2868:
2869:            /**
2870:             * Compacts the secondary bytes and stores them into the primary array
2871:             * @param doFrench flag indicator that French has to be handled specially
2872:             */
2873:            private final void doSecondary(boolean doFrench) {
2874:                if (m_utilCount2_ > 0) {
2875:                    while (m_utilCount2_ > BOTTOM_COUNT_2_) {
2876:                        m_utilBytes2_ = append(m_utilBytes2_,
2877:                                m_utilBytesCount2_,
2878:                                (byte) (COMMON_BOTTOM_2_ + BOTTOM_COUNT_2_));
2879:                        m_utilBytesCount2_++;
2880:                        m_utilCount2_ -= BOTTOM_COUNT_2_;
2881:                    }
2882:                    m_utilBytes2_ = append(m_utilBytes2_, m_utilBytesCount2_,
2883:                            (byte) (COMMON_BOTTOM_2_ + (m_utilCount2_ - 1)));
2884:                    m_utilBytesCount2_++;
2885:                }
2886:
2887:                m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,
2888:                        SORT_LEVEL_TERMINATOR_);
2889:                m_utilBytesCount1_++;
2890:
2891:                if (doFrench) { // do the reverse copy
2892:                    doFrench();
2893:                } else {
2894:                    if (m_utilBytes1_.length <= m_utilBytesCount1_
2895:                            + m_utilBytesCount2_) {
2896:                        m_utilBytes1_ = increase(m_utilBytes1_,
2897:                                m_utilBytesCount1_, m_utilBytesCount2_);
2898:                    }
2899:                    System.arraycopy(m_utilBytes2_, 0, m_utilBytes1_,
2900:                            m_utilBytesCount1_, m_utilBytesCount2_);
2901:                    m_utilBytesCount1_ += m_utilBytesCount2_;
2902:                }
2903:            }
2904:
2905:            /**
2906:             * Increase buffer size
2907:             * @param buffer array of bytes
2908:             * @param size of the byte array
2909:             * @param incrementsize size to increase
2910:             * @return the new buffer
2911:             */
2912:            private static final byte[] increase(byte buffer[], int size,
2913:                    int incrementsize) {
2914:                byte result[] = new byte[buffer.length + incrementsize];
2915:                System.arraycopy(buffer, 0, result, 0, size);
2916:                return result;
2917:            }
2918:
2919:            /**
2920:             * Increase buffer size
2921:             * @param buffer array of ints
2922:             * @param size of the byte array
2923:             * @param incrementsize size to increase
2924:             * @return the new buffer
2925:             */
2926:            private static final int[] increase(int buffer[], int size,
2927:                    int incrementsize) {
2928:                int result[] = new int[buffer.length + incrementsize];
2929:                System.arraycopy(buffer, 0, result, 0, size);
2930:                return result;
2931:            }
2932:
2933:            /**
2934:             * Compacts the case bytes and stores them into the primary array
2935:             */
2936:            private final void doCase() {
2937:                m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,
2938:                        SORT_LEVEL_TERMINATOR_);
2939:                m_utilBytesCount1_++;
2940:                if (m_utilBytes1_.length <= m_utilBytesCount1_
2941:                        + m_utilBytesCount0_) {
2942:                    m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_,
2943:                            m_utilBytesCount0_);
2944:                }
2945:                System.arraycopy(m_utilBytes0_, 0, m_utilBytes1_,
2946:                        m_utilBytesCount1_, m_utilBytesCount0_);
2947:                m_utilBytesCount1_ += m_utilBytesCount0_;
2948:            }
2949:
2950:            /**
2951:             * Compacts the tertiary bytes and stores them into the primary array
2952:             */
2953:            private final void doTertiary() {
2954:                if (m_utilCount3_ > 0) {
2955:                    if (m_common3_ != COMMON_BOTTOM_3_) {
2956:                        while (m_utilCount3_ >= m_topCount3_) {
2957:                            m_utilBytes3_ = append(m_utilBytes3_,
2958:                                    m_utilBytesCount3_,
2959:                                    (byte) (m_top3_ - m_topCount3_));
2960:                            m_utilBytesCount3_++;
2961:                            m_utilCount3_ -= m_topCount3_;
2962:                        }
2963:                        m_utilBytes3_ = append(m_utilBytes3_,
2964:                                m_utilBytesCount3_,
2965:                                (byte) (m_top3_ - m_utilCount3_));
2966:                        m_utilBytesCount3_++;
2967:                    } else {
2968:                        while (m_utilCount3_ > m_bottomCount3_) {
2969:                            m_utilBytes3_ = append(m_utilBytes3_,
2970:                                    m_utilBytesCount3_,
2971:                                    (byte) (m_bottom3_ + m_bottomCount3_));
2972:                            m_utilBytesCount3_++;
2973:                            m_utilCount3_ -= m_bottomCount3_;
2974:                        }
2975:                        m_utilBytes3_ = append(m_utilBytes3_,
2976:                                m_utilBytesCount3_,
2977:                                (byte) (m_bottom3_ + (m_utilCount3_ - 1)));
2978:                        m_utilBytesCount3_++;
2979:                    }
2980:                }
2981:                m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,
2982:                        SORT_LEVEL_TERMINATOR_);
2983:                m_utilBytesCount1_++;
2984:                if (m_utilBytes1_.length <= m_utilBytesCount1_
2985:                        + m_utilBytesCount3_) {
2986:                    m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_,
2987:                            m_utilBytesCount3_);
2988:                }
2989:                System.arraycopy(m_utilBytes3_, 0, m_utilBytes1_,
2990:                        m_utilBytesCount1_, m_utilBytesCount3_);
2991:                m_utilBytesCount1_ += m_utilBytesCount3_;
2992:            }
2993:
2994:            /**
2995:             * Compacts the quaternary bytes and stores them into the primary array
2996:             */
2997:            private final void doQuaternary(int commonbottom4, int bottomcount4) {
2998:                if (m_utilCount4_ > 0) {
2999:                    while (m_utilCount4_ > bottomcount4) {
3000:                        m_utilBytes4_ = append(m_utilBytes4_,
3001:                                m_utilBytesCount4_,
3002:                                (byte) (commonbottom4 + bottomcount4));
3003:                        m_utilBytesCount4_++;
3004:                        m_utilCount4_ -= bottomcount4;
3005:                    }
3006:                    m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_,
3007:                            (byte) (commonbottom4 + (m_utilCount4_ - 1)));
3008:                    m_utilBytesCount4_++;
3009:                }
3010:                m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,
3011:                        SORT_LEVEL_TERMINATOR_);
3012:                m_utilBytesCount1_++;
3013:                if (m_utilBytes1_.length <= m_utilBytesCount1_
3014:                        + m_utilBytesCount4_) {
3015:                    m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_,
3016:                            m_utilBytesCount4_);
3017:                }
3018:                System.arraycopy(m_utilBytes4_, 0, m_utilBytes1_,
3019:                        m_utilBytesCount1_, m_utilBytesCount4_);
3020:                m_utilBytesCount1_ += m_utilBytesCount4_;
3021:            }
3022:
3023:            /**
3024:             * Deals with the identical sort.
3025:             * Appends the BOCSU version of the source string to the ends of the
3026:             * byte buffer.
3027:             * @param source text string
3028:             */
3029:            private final void doIdentical(String source) {
3030:                int isize = BOCU.getCompressionLength(source);
3031:                m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_,
3032:                        SORT_LEVEL_TERMINATOR_);
3033:                m_utilBytesCount1_++;
3034:                if (m_utilBytes1_.length <= m_utilBytesCount1_ + isize) {
3035:                    m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_,
3036:                            1 + isize);
3037:                }
3038:                m_utilBytesCount1_ = BOCU.compress(source, m_utilBytes1_,
3039:                        m_utilBytesCount1_);
3040:            }
3041:
3042:            /**
3043:             * Gets the offset of the first unmatched characters in source and target.
3044:             * This method returns the offset of the start of a contraction or a
3045:             * combining sequence, if the first difference is in the middle of such a
3046:             * sequence.
3047:             * @param source string
3048:             * @param target string
3049:             * @return offset of the first unmatched characters in source and target.
3050:             */
3051:            private final int getFirstUnmatchedOffset(String source,
3052:                    String target) {
3053:                int result = 0;
3054:                int slength = source.length();
3055:                int tlength = target.length();
3056:                int minlength = slength;
3057:                if (minlength > tlength) {
3058:                    minlength = tlength;
3059:                }
3060:                while (result < minlength
3061:                        && source.charAt(result) == target.charAt(result)) {
3062:                    result++;
3063:                }
3064:                if (result > 0) {
3065:                    // There is an identical portion at the beginning of the two
3066:                    // strings. If the identical portion ends within a contraction or a
3067:                    // combining character sequence, back up to the start of that
3068:                    // sequence.
3069:                    char schar = 0;
3070:                    char tchar = 0;
3071:                    if (result < minlength) {
3072:                        schar = source.charAt(result); // first differing chars
3073:                        tchar = target.charAt(result);
3074:                    } else {
3075:                        schar = source.charAt(minlength - 1);
3076:                        if (isUnsafe(schar)) {
3077:                            tchar = schar;
3078:                        } else if (slength == tlength) {
3079:                            return result;
3080:                        } else if (slength < tlength) {
3081:                            tchar = target.charAt(result);
3082:                        } else {
3083:                            schar = source.charAt(result);
3084:                        }
3085:                    }
3086:                    if (isUnsafe(schar) || isUnsafe(tchar)) {
3087:                        // We are stopped in the middle of a contraction or combining
3088:                        // sequence.
3089:                        // Look backwards for the part of the string for the start of
3090:                        // the sequence
3091:                        // It doesn't matter which string we scan, since they are the
3092:                        // same in this region.
3093:                        do {
3094:                            result--;
3095:                        } while (result > 0 && isUnsafe(source.charAt(result)));
3096:                    }
3097:                }
3098:                return result;
3099:            }
3100:
3101:            /**
3102:             * Appending an byte to an array of bytes and increases it if we run out of
3103:             * space
3104:             * @param array of byte arrays
3105:             * @param appendindex index in the byte array to append
3106:             * @param value to append
3107:             * @return array if array size can accomodate the new value, otherwise
3108:             *         a bigger array will be created and returned
3109:             */
3110:            private static final byte[] append(byte array[], int appendindex,
3111:                    byte value) {
3112:                try {
3113:                    array[appendindex] = value;
3114:                } catch (ArrayIndexOutOfBoundsException e) {
3115:                    array = increase(array, appendindex, SORT_BUFFER_INIT_SIZE_);
3116:                    array[appendindex] = value;
3117:                }
3118:                return array;
3119:            }
3120:
3121:            /**
3122:             * This is a trick string compare function that goes in and uses sortkeys
3123:             * to compare. It is used when compare gets in trouble and needs to bail
3124:             * out.
3125:             * @param source text string
3126:             * @param target text string
3127:             */
3128:            private final int compareBySortKeys(String source, String target)
3129:
3130:            {
3131:                m_utilRawCollationKey_ = getRawCollationKey(source,
3132:                        m_utilRawCollationKey_);
3133:                // this method is very seldom called
3134:                RawCollationKey targetkey = getRawCollationKey(target, null);
3135:                return m_utilRawCollationKey_.compareTo(targetkey);
3136:            }
3137:
3138:            /**
3139:             * Performs the primary comparisons, and fills up the CE buffer at the
3140:             * same time.
3141:             * The return value toggles between the comparison result and the hiragana
3142:             * result. If either the source is greater than target or vice versa, the
3143:             * return result is the comparison result, ie 1 or -1, furthermore the
3144:             * cebuffers will be cleared when that happens. If the primary comparisons
3145:             * are equal, we'll have to continue with secondary comparison. In this case
3146:             * the cebuffer will not be cleared and the return result will be the
3147:             * hiragana result.
3148:             * @param doHiragana4 flag indicator that Hiragana Quaternary has to be
3149:             *                  observed
3150:             * @param lowestpvalue the lowest primary value that will not be ignored if
3151:             *                      alternate handling is shifted
3152:             * @param source text string
3153:             * @param target text string
3154:             * @param textoffset offset in text to start the comparison
3155:             * @return comparion result if a primary difference is found, otherwise
3156:             *                      hiragana result
3157:             */
3158:            private final int doPrimaryCompare(boolean doHiragana4,
3159:                    int lowestpvalue, String source, String target,
3160:                    int textoffset)
3161:
3162:            {
3163:                // Preparing the context objects for iterating over strings
3164:                m_srcUtilIter_.setText(source);
3165:                m_srcUtilColEIter_.setText(m_srcUtilIter_, textoffset);
3166:                m_tgtUtilIter_.setText(target);
3167:                m_tgtUtilColEIter_.setText(m_tgtUtilIter_, textoffset);
3168:
3169:                // Non shifted primary processing is quite simple
3170:                if (!m_isAlternateHandlingShifted_) {
3171:                    int hiraganaresult = 0;
3172:                    while (true) {
3173:                        int sorder = 0;
3174:                        // We fetch CEs until we hit a non ignorable primary or end.
3175:                        do {
3176:                            sorder = m_srcUtilColEIter_.next();
3177:                            m_srcUtilCEBuffer_ = append(m_srcUtilCEBuffer_,
3178:                                    m_srcUtilCEBufferSize_, sorder);
3179:                            m_srcUtilCEBufferSize_++;
3180:                            sorder &= CE_PRIMARY_MASK_;
3181:                        } while (sorder == CollationElementIterator.IGNORABLE);
3182:
3183:                        int torder = 0;
3184:                        do {
3185:                            torder = m_tgtUtilColEIter_.next();
3186:                            m_tgtUtilCEBuffer_ = append(m_tgtUtilCEBuffer_,
3187:                                    m_tgtUtilCEBufferSize_, torder);
3188:                            m_tgtUtilCEBufferSize_++;
3189:                            torder &= CE_PRIMARY_MASK_;
3190:                        } while (torder == CollationElementIterator.IGNORABLE);
3191:
3192:                        // if both primaries are the same
3193:                        if (sorder == torder) {
3194:                            // and there are no more CEs, we advance to the next level
3195:                            // see if we are at the end of either string
3196:                            if (m_srcUtilCEBuffer_[m_srcUtilCEBufferSize_ - 1] == CollationElementIterator.NULLORDER) {
3197:                                if (m_tgtUtilCEBuffer_[m_tgtUtilCEBufferSize_ - 1] != CollationElementIterator.NULLORDER) {
3198:                                    return -1;
3199:                                }
3200:                                break;
3201:                            } else if (m_tgtUtilCEBuffer_[m_tgtUtilCEBufferSize_ - 1] == CollationElementIterator.NULLORDER) {
3202:                                return 1;
3203:                            }
3204:                            if (doHiragana4
3205:                                    && hiraganaresult == 0
3206:                                    && m_srcUtilColEIter_.m_isCodePointHiragana_ != m_tgtUtilColEIter_.m_isCodePointHiragana_) {
3207:                                if (m_srcUtilColEIter_.m_isCodePointHiragana_) {
3208:                                    hiraganaresult = -1;
3209:                                } else {
3210:                                    hiraganaresult = 1;
3211:                                }
3212:                            }
3213:                        } else {
3214:                            // if two primaries are different, we are done
3215:                            return endPrimaryCompare(sorder, torder);
3216:                        }
3217:                    }
3218:                    // no primary difference... do the rest from the buffers
3219:                    return hiraganaresult;
3220:                } else { // shifted - do a slightly more complicated processing :)
3221:                    while (true) {
3222:                        int sorder = getPrimaryShiftedCompareCE(
3223:                                m_srcUtilColEIter_, lowestpvalue, true);
3224:                        int torder = getPrimaryShiftedCompareCE(
3225:                                m_tgtUtilColEIter_, lowestpvalue, false);
3226:                        if (sorder == torder) {
3227:                            if (m_srcUtilCEBuffer_[m_srcUtilCEBufferSize_ - 1] == CollationElementIterator.NULLORDER) {
3228:                                break;
3229:                            } else {
3230:                                continue;
3231:                            }
3232:                        } else {
3233:                            return endPrimaryCompare(sorder, torder);
3234:                        }
3235:                    } // no primary difference... do the rest from the buffers
3236:                }
3237:                return 0;
3238:            }
3239:
3240:            /**
3241:             * This is used only for primary strength when we know that sorder is
3242:             * already different from torder.
3243:             * Compares sorder and torder, returns -1 if sorder is less than torder.
3244:             * Clears the cebuffer at the same time.
3245:             * @param sorder source strength order
3246:             * @param torder target strength order
3247:             * @return the comparison result of sorder and torder
3248:             */
3249:            private final int endPrimaryCompare(int sorder, int torder) {
3250:                // if we reach here, the ce offset accessed is the last ce
3251:                // appended to the buffer
3252:                boolean isSourceNullOrder = (m_srcUtilCEBuffer_[m_srcUtilCEBufferSize_ - 1] == CollationElementIterator.NULLORDER);
3253:                boolean isTargetNullOrder = (m_tgtUtilCEBuffer_[m_tgtUtilCEBufferSize_ - 1] == CollationElementIterator.NULLORDER);
3254:                m_srcUtilCEBufferSize_ = -1;
3255:                m_tgtUtilCEBufferSize_ = -1;
3256:                if (isSourceNullOrder) {
3257:                    return -1;
3258:                }
3259:                if (isTargetNullOrder) {
3260:                    return 1;
3261:                }
3262:                // getting rid of the sign
3263:                sorder >>>= CE_PRIMARY_SHIFT_;
3264:                torder >>>= CE_PRIMARY_SHIFT_;
3265:                if (sorder < torder) {
3266:                    return -1;
3267:                }
3268:                return 1;
3269:            }
3270:
3271:            /**
3272:             * Calculates the next primary shifted value and fills up cebuffer with the
3273:             * next non-ignorable ce.
3274:             * @param coleiter collation element iterator
3275:             * @param doHiragana4 flag indicator if hiragana quaternary is to be
3276:             *                      handled
3277:             * @param lowestpvalue lowest primary shifted value that will not be
3278:             *                      ignored
3279:             * @return result next modified ce
3280:             */
3281:            private final int getPrimaryShiftedCompareCE(
3282:                    CollationElementIterator coleiter, int lowestpvalue,
3283:                    boolean isSrc)
3284:
3285:            {
3286:                boolean shifted = false;
3287:                int result = CollationElementIterator.IGNORABLE;
3288:                int cebuffer[] = m_srcUtilCEBuffer_;
3289:                int cebuffersize = m_srcUtilCEBufferSize_;
3290:                if (!isSrc) {
3291:                    cebuffer = m_tgtUtilCEBuffer_;
3292:                    cebuffersize = m_tgtUtilCEBufferSize_;
3293:                }
3294:                while (true) {
3295:                    result = coleiter.next();
3296:                    if (result == CollationElementIterator.NULLORDER) {
3297:                        cebuffer = append(cebuffer, cebuffersize, result);
3298:                        cebuffersize++;
3299:                        break;
3300:                    } else if (result == CollationElementIterator.IGNORABLE
3301:                            || (shifted && (result & CE_PRIMARY_MASK_) == CollationElementIterator.IGNORABLE)) {
3302:                        // UCA amendment - ignore ignorables that follow shifted code
3303:                        // points
3304:                        continue;
3305:                    } else if (isContinuation(result)) {
3306:                        if ((result & CE_PRIMARY_MASK_) != CollationElementIterator.IGNORABLE) {
3307:                            // There is primary value
3308:                            if (shifted) {
3309:                                result = (result & CE_PRIMARY_MASK_)
3310:                                        | CE_CONTINUATION_MARKER_;
3311:                                // preserve interesting continuation
3312:                                cebuffer = append(cebuffer, cebuffersize,
3313:                                        result);
3314:                                cebuffersize++;
3315:                                continue;
3316:                            } else {
3317:                                cebuffer = append(cebuffer, cebuffersize,
3318:                                        result);
3319:                                cebuffersize++;
3320:                                break;
3321:                            }
3322:                        } else { // Just lower level values
3323:                            if (!shifted) {
3324:                                cebuffer = append(cebuffer, cebuffersize,
3325:                                        result);
3326:                                cebuffersize++;
3327:                            }
3328:                        }
3329:                    } else { // regular
3330:                        if (Utility.compareUnsigned(result & CE_PRIMARY_MASK_,
3331:                                lowestpvalue) > 0) {
3332:                            cebuffer = append(cebuffer, cebuffersize, result);
3333:                            cebuffersize++;
3334:                            break;
3335:                        } else {
3336:                            if ((result & CE_PRIMARY_MASK_) != 0) {
3337:                                shifted = true;
3338:                                result &= CE_PRIMARY_MASK_;
3339:                                cebuffer = append(cebuffer, cebuffersize,
3340:                                        result);
3341:                                cebuffersize++;
3342:                                continue;
3343:                            } else {
3344:                                cebuffer = append(cebuffer, cebuffersize,
3345:                                        result);
3346:                                cebuffersize++;
3347:                                shifted = false;
3348:                                continue;
3349:                            }
3350:                        }
3351:                    }
3352:                }
3353:                if (isSrc) {
3354:                    m_srcUtilCEBuffer_ = cebuffer;
3355:                    m_srcUtilCEBufferSize_ = cebuffersize;
3356:                } else {
3357:                    m_tgtUtilCEBuffer_ = cebuffer;
3358:                    m_tgtUtilCEBufferSize_ = cebuffersize;
3359:                }
3360:                result &= CE_PRIMARY_MASK_;
3361:                return result;
3362:            }
3363:
3364:            /**
3365:             * Appending an int to an array of ints and increases it if we run out of
3366:             * space
3367:             * @param array of int arrays
3368:             * @param appendindex index at which value will be appended
3369:             * @param value to append
3370:             * @return array if size is not increased, otherwise a new array will be
3371:             *         returned
3372:             */
3373:            private static final int[] append(int array[], int appendindex,
3374:                    int value) {
3375:                if (appendindex + 1 >= array.length) {
3376:                    array = increase(array, appendindex, CE_BUFFER_SIZE_);
3377:                }
3378:                array[appendindex] = value;
3379:                return array;
3380:            }
3381:
3382:            /**
3383:             * Does secondary strength comparison based on the collected ces.
3384:             * @param doFrench flag indicates if French ordering is to be done
3385:             * @return the secondary strength comparison result
3386:             */
3387:            private final int doSecondaryCompare(boolean doFrench) {
3388:                // now, we're gonna reexamine collected CEs
3389:                if (!doFrench) { // normal
3390:                    int soffset = 0;
3391:                    int toffset = 0;
3392:                    while (true) {
3393:                        int sorder = CollationElementIterator.IGNORABLE;
3394:                        while (sorder == CollationElementIterator.IGNORABLE) {
3395:                            sorder = m_srcUtilCEBuffer_[soffset++]
3396:                                    & CE_SECONDARY_MASK_;
3397:                        }
3398:                        int torder = CollationElementIterator.IGNORABLE;
3399:                        while (torder == CollationElementIterator.IGNORABLE) {
3400:                            torder = m_tgtUtilCEBuffer_[toffset++]
3401:                                    & CE_SECONDARY_MASK_;
3402:                        }
3403:
3404:                        if (sorder == torder) {
3405:                            if (m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
3406:                                if (m_tgtUtilCEBuffer_[toffset - 1] != CollationElementIterator.NULLORDER) {
3407:                                    return -1;
3408:                                }
3409:                                break;
3410:                            } else if (m_tgtUtilCEBuffer_[toffset - 1] == CollationElementIterator.NULLORDER) {
3411:                                return 1;
3412:                            }
3413:                        } else {
3414:                            if (m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
3415:                                return -1;
3416:                            }
3417:                            if (m_tgtUtilCEBuffer_[toffset - 1] == CollationElementIterator.NULLORDER) {
3418:                                return 1;
3419:                            }
3420:                            return (sorder < torder) ? -1 : 1;
3421:                        }
3422:                    }
3423:                } else { // do the French
3424:                    m_srcUtilContOffset_ = 0;
3425:                    m_tgtUtilContOffset_ = 0;
3426:                    m_srcUtilOffset_ = m_srcUtilCEBufferSize_ - 2;
3427:                    m_tgtUtilOffset_ = m_tgtUtilCEBufferSize_ - 2;
3428:                    while (true) {
3429:                        int sorder = getSecondaryFrenchCE(true);
3430:                        int torder = getSecondaryFrenchCE(false);
3431:                        if (sorder == torder) {
3432:                            if ((m_srcUtilOffset_ < 0 && m_tgtUtilOffset_ < 0)
3433:                                    || (m_srcUtilOffset_ >= 0 && m_srcUtilCEBuffer_[m_srcUtilOffset_] == CollationElementIterator.NULLORDER)) {
3434:                                break;
3435:                            }
3436:                        } else {
3437:                            return (sorder < torder) ? -1 : 1;
3438:                        }
3439:                    }
3440:                }
3441:                return 0;
3442:            }
3443:
3444:            /**
3445:             * Calculates the next secondary french CE.
3446:             * @param isSrc flag indicator if we are calculating the src ces
3447:             * @return result next modified ce
3448:             */
3449:            private final int getSecondaryFrenchCE(boolean isSrc) {
3450:                int result = CollationElementIterator.IGNORABLE;
3451:                int offset = m_srcUtilOffset_;
3452:                int continuationoffset = m_srcUtilContOffset_;
3453:                int cebuffer[] = m_srcUtilCEBuffer_;
3454:                if (!isSrc) {
3455:                    offset = m_tgtUtilOffset_;
3456:                    continuationoffset = m_tgtUtilContOffset_;
3457:                    cebuffer = m_tgtUtilCEBuffer_;
3458:                }
3459:
3460:                while (result == CollationElementIterator.IGNORABLE
3461:                        && offset >= 0) {
3462:                    if (continuationoffset == 0) {
3463:                        result = cebuffer[offset];
3464:                        while (isContinuation(cebuffer[offset--])) {
3465:                        }
3466:                        // after this, sorder is at the start of continuation,
3467:                        // and offset points before that
3468:                        if (isContinuation(cebuffer[offset + 1])) {
3469:                            // save offset for later
3470:                            continuationoffset = offset;
3471:                            offset += 2;
3472:                        }
3473:                    } else {
3474:                        result = cebuffer[offset++];
3475:                        if (!isContinuation(result)) {
3476:                            // we have finished with this continuation
3477:                            offset = continuationoffset;
3478:                            // reset the pointer to before continuation
3479:                            continuationoffset = 0;
3480:                            continue;
3481:                        }
3482:                    }
3483:                    result &= CE_SECONDARY_MASK_; // remove continuation bit
3484:                }
3485:                if (isSrc) {
3486:                    m_srcUtilOffset_ = offset;
3487:                    m_srcUtilContOffset_ = continuationoffset;
3488:                } else {
3489:                    m_tgtUtilOffset_ = offset;
3490:                    m_tgtUtilContOffset_ = continuationoffset;
3491:                }
3492:                return result;
3493:            }
3494:
3495:            /**
3496:             * Does case strength comparison based on the collected ces.
3497:             * @return the case strength comparison result
3498:             */
3499:            private final int doCaseCompare() {
3500:                int soffset = 0;
3501:                int toffset = 0;
3502:                while (true) {
3503:                    int sorder = CollationElementIterator.IGNORABLE;
3504:                    int torder = CollationElementIterator.IGNORABLE;
3505:                    while ((sorder & CE_REMOVE_CASE_) == CollationElementIterator.IGNORABLE) {
3506:                        sorder = m_srcUtilCEBuffer_[soffset++];
3507:                        if (!isContinuation(sorder)
3508:                                && ((sorder & CE_PRIMARY_MASK_) != 0 || m_utilCompare2_ == true)) {
3509:                            // primary ignorables should not be considered on the case level when the strength is primary
3510:                            // otherwise, the CEs stop being well-formed
3511:                            sorder &= CE_CASE_MASK_3_;
3512:                            sorder ^= m_caseSwitch_;
3513:                        } else {
3514:                            sorder = CollationElementIterator.IGNORABLE;
3515:                        }
3516:                    }
3517:
3518:                    while ((torder & CE_REMOVE_CASE_) == CollationElementIterator.IGNORABLE) {
3519:                        torder = m_tgtUtilCEBuffer_[toffset++];
3520:                        if (!isContinuation(torder)
3521:                                && ((torder & CE_PRIMARY_MASK_) != 0 || m_utilCompare2_ == true)) {
3522:                            // primary ignorables should not be considered on the case level when the strength is primary
3523:                            // otherwise, the CEs stop being well-formed
3524:                            torder &= CE_CASE_MASK_3_;
3525:                            torder ^= m_caseSwitch_;
3526:                        } else {
3527:                            torder = CollationElementIterator.IGNORABLE;
3528:                        }
3529:                    }
3530:
3531:                    sorder &= CE_CASE_BIT_MASK_;
3532:                    torder &= CE_CASE_BIT_MASK_;
3533:                    if (sorder == torder) {
3534:                        // checking end of strings
3535:                        if (m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
3536:                            if (m_tgtUtilCEBuffer_[toffset - 1] != CollationElementIterator.NULLORDER) {
3537:                                return -1;
3538:                            }
3539:                            break;
3540:                        } else if (m_tgtUtilCEBuffer_[toffset - 1] == CollationElementIterator.NULLORDER) {
3541:                            return 1;
3542:                        }
3543:                    } else {
3544:                        if (m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
3545:                            return -1;
3546:                        }
3547:                        if (m_tgtUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
3548:                            return 1;
3549:                        }
3550:                        return (sorder < torder) ? -1 : 1;
3551:                    }
3552:                }
3553:                return 0;
3554:            }
3555:
3556:            /**
3557:             * Does tertiary strength comparison based on the collected ces.
3558:             * @return the tertiary strength comparison result
3559:             */
3560:            private final int doTertiaryCompare() {
3561:                int soffset = 0;
3562:                int toffset = 0;
3563:                while (true) {
3564:                    int sorder = CollationElementIterator.IGNORABLE;
3565:                    int torder = CollationElementIterator.IGNORABLE;
3566:                    while ((sorder & CE_REMOVE_CASE_) == CollationElementIterator.IGNORABLE) {
3567:                        sorder = m_srcUtilCEBuffer_[soffset++] & m_mask3_;
3568:                        if (!isContinuation(sorder)) {
3569:                            sorder ^= m_caseSwitch_;
3570:                        } else {
3571:                            sorder &= CE_REMOVE_CASE_;
3572:                        }
3573:                    }
3574:
3575:                    while ((torder & CE_REMOVE_CASE_) == CollationElementIterator.IGNORABLE) {
3576:                        torder = m_tgtUtilCEBuffer_[toffset++] & m_mask3_;
3577:                        if (!isContinuation(torder)) {
3578:                            torder ^= m_caseSwitch_;
3579:                        } else {
3580:                            torder &= CE_REMOVE_CASE_;
3581:                        }
3582:                    }
3583:
3584:                    if (sorder == torder) {
3585:                        if (m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
3586:                            if (m_tgtUtilCEBuffer_[toffset - 1] != CollationElementIterator.NULLORDER) {
3587:                                return -1;
3588:                            }
3589:                            break;
3590:                        } else if (m_tgtUtilCEBuffer_[toffset - 1] == CollationElementIterator.NULLORDER) {
3591:                            return 1;
3592:                        }
3593:                    } else {
3594:                        if (m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
3595:                            return -1;
3596:                        }
3597:                        if (m_tgtUtilCEBuffer_[toffset - 1] == CollationElementIterator.NULLORDER) {
3598:                            return 1;
3599:                        }
3600:                        return (sorder < torder) ? -1 : 1;
3601:                    }
3602:                }
3603:                return 0;
3604:            }
3605:
3606:            /**
3607:             * Does quaternary strength comparison based on the collected ces.
3608:             * @param lowestpvalue the lowest primary value that will not be ignored if
3609:             *                      alternate handling is shifted
3610:             * @return the quaternary strength comparison result
3611:             */
3612:            private final int doQuaternaryCompare(int lowestpvalue) {
3613:                boolean sShifted = true;
3614:                boolean tShifted = true;
3615:                int soffset = 0;
3616:                int toffset = 0;
3617:                while (true) {
3618:                    int sorder = CollationElementIterator.IGNORABLE;
3619:                    int torder = CollationElementIterator.IGNORABLE;
3620:                    while (sorder == CollationElementIterator.IGNORABLE
3621:                            || (isContinuation(sorder) && !sShifted)) {
3622:                        sorder = m_srcUtilCEBuffer_[soffset++];
3623:                        if (isContinuation(sorder)) {
3624:                            if (!sShifted) {
3625:                                continue;
3626:                            }
3627:                        } else if (Utility
3628:                                .compareUnsigned(sorder, lowestpvalue) > 0
3629:                                || (sorder & CE_PRIMARY_MASK_) == CollationElementIterator.IGNORABLE) {
3630:                            // non continuation
3631:                            sorder = CE_PRIMARY_MASK_;
3632:                            sShifted = false;
3633:                        } else {
3634:                            sShifted = true;
3635:                        }
3636:                    }
3637:                    sorder >>>= CE_PRIMARY_SHIFT_;
3638:                    while (torder == CollationElementIterator.IGNORABLE
3639:                            || (isContinuation(torder) && !tShifted)) {
3640:                        torder = m_tgtUtilCEBuffer_[toffset++];
3641:                        if (isContinuation(torder)) {
3642:                            if (!tShifted) {
3643:                                continue;
3644:                            }
3645:                        } else if (Utility
3646:                                .compareUnsigned(torder, lowestpvalue) > 0
3647:                                || (torder & CE_PRIMARY_MASK_) == CollationElementIterator.IGNORABLE) {
3648:                            // non continuation
3649:                            torder = CE_PRIMARY_MASK_;
3650:                            tShifted = false;
3651:                        } else {
3652:                            tShifted = true;
3653:                        }
3654:                    }
3655:                    torder >>>= CE_PRIMARY_SHIFT_;
3656:
3657:                    if (sorder == torder) {
3658:                        if (m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
3659:                            if (m_tgtUtilCEBuffer_[toffset - 1] != CollationElementIterator.NULLORDER) {
3660:                                return -1;
3661:                            }
3662:                            break;
3663:                        } else if (m_tgtUtilCEBuffer_[toffset - 1] == CollationElementIterator.NULLORDER) {
3664:                            return 1;
3665:                        }
3666:                    } else {
3667:                        if (m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
3668:                            return -1;
3669:                        }
3670:                        if (m_tgtUtilCEBuffer_[toffset - 1] == CollationElementIterator.NULLORDER) {
3671:                            return 1;
3672:                        }
3673:                        return (sorder < torder) ? -1 : 1;
3674:                    }
3675:                }
3676:                return 0;
3677:            }
3678:
3679:            /**
3680:             * Internal function. Does byte level string compare. Used by strcoll if
3681:             * strength == identical and strings are otherwise equal. This is a rare
3682:             * case. Comparison must be done on NFD normalized strings. FCD is not good
3683:             * enough.
3684:             * @param source text
3685:             * @param target text
3686:             * @param offset of the first difference in the text strings
3687:             * @param normalize flag indicating if we are to normalize the text before
3688:             *              comparison
3689:             * @return 1 if source is greater than target, -1 less than and 0 if equals
3690:             */
3691:            private static final int doIdenticalCompare(String source,
3692:                    String target, int offset, boolean normalize)
3693:
3694:            {
3695:                if (normalize) {
3696:                    if (Normalizer.quickCheck(source, Normalizer.NFD, 0) != Normalizer.YES) {
3697:                        source = Normalizer.decompose(source, false);
3698:                    }
3699:
3700:                    if (Normalizer.quickCheck(target, Normalizer.NFD, 0) != Normalizer.YES) {
3701:                        target = Normalizer.decompose(target, false);
3702:                    }
3703:                    offset = 0;
3704:                }
3705:
3706:                return doStringCompare(source, target, offset);
3707:            }
3708:
3709:            /**
3710:             * Compares string for their codepoint order.
3711:             * This comparison handles surrogate characters and place them after the
3712:             * all non surrogate characters.
3713:             * @param source text
3714:             * @param target text
3715:             * @param offset start offset for comparison
3716:             * @return 1 if source is greater than target, -1 less than and 0 if equals
3717:             */
3718:            private static final int doStringCompare(String source,
3719:                    String target, int offset) {
3720:                // compare identical prefixes - they do not need to be fixed up
3721:                char schar = 0;
3722:                char tchar = 0;
3723:                int slength = source.length();
3724:                int tlength = target.length();
3725:                int minlength = Math.min(slength, tlength);
3726:                while (offset < minlength) {
3727:                    schar = source.charAt(offset);
3728:                    tchar = target.charAt(offset++);
3729:                    if (schar != tchar) {
3730:                        break;
3731:                    }
3732:                }
3733:
3734:                if (schar == tchar && offset == minlength) {
3735:                    if (slength > minlength) {
3736:                        return 1;
3737:                    }
3738:                    if (tlength > minlength) {
3739:                        return -1;
3740:                    }
3741:                    return 0;
3742:                }
3743:
3744:                //  if both values are in or above the surrogate range, Fix them up.
3745:                if (schar >= UTF16.LEAD_SURROGATE_MIN_VALUE
3746:                        && tchar >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
3747:                    schar = fixupUTF16(schar);
3748:                    tchar = fixupUTF16(tchar);
3749:                }
3750:
3751:                // now c1 and c2 are in UTF-32-compatible order
3752:                return (schar < tchar) ? -1 : 1; // schar and tchar has to be different
3753:            }
3754:
3755:            /**
3756:             * Rotate surrogates to the top to get code point order
3757:             */
3758:            private static final char fixupUTF16(char ch) {
3759:                if (ch >= 0xe000) {
3760:                    ch -= 0x800;
3761:                } else {
3762:                    ch += 0x2000;
3763:                }
3764:                return ch;
3765:            }
3766:
3767:            /**
3768:             * Resets the internal case data members and compression values.
3769:             */
3770:            private void updateInternalState() {
3771:                if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
3772:                    m_caseSwitch_ = CASE_SWITCH_;
3773:                } else {
3774:                    m_caseSwitch_ = NO_CASE_SWITCH_;
3775:                }
3776:
3777:                if (m_isCaseLevel_ || m_caseFirst_ == AttributeValue.OFF_) {
3778:                    m_mask3_ = CE_REMOVE_CASE_;
3779:                    m_common3_ = COMMON_NORMAL_3_;
3780:                    m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_OFF_;
3781:                    m_top3_ = COMMON_TOP_CASE_SWITCH_OFF_3_;
3782:                    m_bottom3_ = COMMON_BOTTOM_3_;
3783:                } else {
3784:                    m_mask3_ = CE_KEEP_CASE_;
3785:                    m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_ON_;
3786:                    if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
3787:                        m_common3_ = COMMON_UPPER_FIRST_3_;
3788:                        m_top3_ = COMMON_TOP_CASE_SWITCH_UPPER_3_;
3789:                        m_bottom3_ = COMMON_BOTTOM_CASE_SWITCH_UPPER_3_;
3790:                    } else {
3791:                        m_common3_ = COMMON_NORMAL_3_;
3792:                        m_top3_ = COMMON_TOP_CASE_SWITCH_LOWER_3_;
3793:                        m_bottom3_ = COMMON_BOTTOM_CASE_SWITCH_LOWER_3_;
3794:                    }
3795:                }
3796:
3797:                // Set the compression values
3798:                int total3 = m_top3_ - COMMON_BOTTOM_3_ - 1;
3799:                // we multilply double with int, but need only int
3800:                m_topCount3_ = (int) (PROPORTION_3_ * total3);
3801:                m_bottomCount3_ = total3 - m_topCount3_;
3802:
3803:                if (!m_isCaseLevel_
3804:                        && getStrength() == AttributeValue.TERTIARY_
3805:                        && !m_isFrenchCollation_
3806:                        && !m_isAlternateHandlingShifted_) {
3807:                    m_isSimple3_ = true;
3808:                } else {
3809:                    m_isSimple3_ = false;
3810:                }
3811:                if (!m_isCaseLevel_
3812:                        && getStrength() <= AttributeValue.TERTIARY_
3813:                        && !m_isNumericCollation_
3814:                        && !m_isAlternateHandlingShifted_ && !latinOneFailed_) {
3815:                    if (latinOneCEs_ == null || latinOneRegenTable_) {
3816:                        if (setUpLatinOne()) { // if we succeed in building latin1 table, we'll use it
3817:                            latinOneUse_ = true;
3818:                        } else {
3819:                            latinOneUse_ = false;
3820:                            latinOneFailed_ = true;
3821:                        }
3822:                        latinOneRegenTable_ = false;
3823:                    } else { // latin1Table exists and it doesn't need to be regenerated, just use it
3824:                        latinOneUse_ = true;
3825:                    }
3826:                } else {
3827:                    latinOneUse_ = false;
3828:                }
3829:
3830:            }
3831:
3832:            /**
3833:             * Initializes the RuleBasedCollator
3834:             */
3835:            private final void init() {
3836:                for (m_minUnsafe_ = 0; m_minUnsafe_ < DEFAULT_MIN_HEURISTIC_; m_minUnsafe_++) {
3837:                    // Find the smallest unsafe char.
3838:                    if (isUnsafe(m_minUnsafe_)) {
3839:                        break;
3840:                    }
3841:                }
3842:
3843:                for (m_minContractionEnd_ = 0; m_minContractionEnd_ < DEFAULT_MIN_HEURISTIC_; m_minContractionEnd_++) {
3844:                    // Find the smallest contraction-ending char.
3845:                    if (isContractionEnd(m_minContractionEnd_)) {
3846:                        break;
3847:                    }
3848:                }
3849:                latinOneFailed_ = true;
3850:                setStrength(m_defaultStrength_);
3851:                setDecomposition(m_defaultDecomposition_);
3852:                m_variableTopValue_ = m_defaultVariableTopValue_;
3853:                m_isFrenchCollation_ = m_defaultIsFrenchCollation_;
3854:                m_isAlternateHandlingShifted_ = m_defaultIsAlternateHandlingShifted_;
3855:                m_isCaseLevel_ = m_defaultIsCaseLevel_;
3856:                m_caseFirst_ = m_defaultCaseFirst_;
3857:                m_isHiragana4_ = m_defaultIsHiragana4_;
3858:                m_isNumericCollation_ = m_defaultIsNumericCollation_;
3859:                latinOneFailed_ = false;
3860:                updateInternalState();
3861:            }
3862:
3863:            /**
3864:             *  Initializes utility iterators and byte buffer used by compare
3865:             */
3866:            private final void initUtility(boolean allocate) {
3867:                if (allocate) {
3868:                    if (m_srcUtilIter_ == null) {
3869:                        m_srcUtilIter_ = new StringUCharacterIterator();
3870:                        m_srcUtilColEIter_ = new CollationElementIterator(
3871:                                m_srcUtilIter_, this );
3872:                        m_tgtUtilIter_ = new StringUCharacterIterator();
3873:                        m_tgtUtilColEIter_ = new CollationElementIterator(
3874:                                m_tgtUtilIter_, this );
3875:                        m_utilBytes0_ = new byte[SORT_BUFFER_INIT_SIZE_CASE_]; // case
3876:                        m_utilBytes1_ = new byte[SORT_BUFFER_INIT_SIZE_1_]; // primary
3877:                        m_utilBytes2_ = new byte[SORT_BUFFER_INIT_SIZE_2_]; // secondary
3878:                        m_utilBytes3_ = new byte[SORT_BUFFER_INIT_SIZE_3_]; // tertiary
3879:                        m_utilBytes4_ = new byte[SORT_BUFFER_INIT_SIZE_4_]; // Quaternary
3880:                        m_srcUtilCEBuffer_ = new int[CE_BUFFER_SIZE_];
3881:                        m_tgtUtilCEBuffer_ = new int[CE_BUFFER_SIZE_];
3882:                    }
3883:                } else {
3884:                    m_srcUtilIter_ = null;
3885:                    m_srcUtilColEIter_ = null;
3886:                    m_tgtUtilIter_ = null;
3887:                    m_tgtUtilColEIter_ = null;
3888:                    m_utilBytes0_ = null;
3889:                    m_utilBytes1_ = null;
3890:                    m_utilBytes2_ = null;
3891:                    m_utilBytes3_ = null;
3892:                    m_utilBytes4_ = null;
3893:                    m_srcUtilCEBuffer_ = null;
3894:                    m_tgtUtilCEBuffer_ = null;
3895:                }
3896:            }
3897:
3898:            // Consts for Latin-1 special processing
3899:            private static final int ENDOFLATINONERANGE_ = 0xFF;
3900:            private static final int LATINONETABLELEN_ = (ENDOFLATINONERANGE_ + 50);
3901:            private static final int BAIL_OUT_CE_ = 0xFF000000;
3902:
3903:            /**
3904:             * Generate latin-1 tables
3905:             */
3906:
3907:            private class shiftValues {
3908:                int primShift = 24;
3909:                int secShift = 24;
3910:                int terShift = 24;
3911:            }
3912:
3913:            private final void addLatinOneEntry(char ch, int CE, shiftValues sh) {
3914:                int primary1 = 0, primary2 = 0, secondary = 0, tertiary = 0;
3915:                boolean reverseSecondary = false;
3916:                if (!isContinuation(CE)) {
3917:                    tertiary = ((CE & m_mask3_));
3918:                    tertiary ^= m_caseSwitch_;
3919:                    reverseSecondary = true;
3920:                } else {
3921:                    tertiary = (byte) ((CE & CE_REMOVE_CONTINUATION_MASK_));
3922:                    tertiary &= CE_REMOVE_CASE_;
3923:                    reverseSecondary = false;
3924:                }
3925:
3926:                secondary = ((CE >>>= 8) & LAST_BYTE_MASK_);
3927:                primary2 = ((CE >>>= 8) & LAST_BYTE_MASK_);
3928:                primary1 = (CE >>> 8);
3929:
3930:                if (primary1 != 0) {
3931:                    latinOneCEs_[ch] |= (primary1 << sh.primShift);
3932:                    sh.primShift -= 8;
3933:                }
3934:                if (primary2 != 0) {
3935:                    if (sh.primShift < 0) {
3936:                        latinOneCEs_[ch] = BAIL_OUT_CE_;
3937:                        latinOneCEs_[latinOneTableLen_ + ch] = BAIL_OUT_CE_;
3938:                        latinOneCEs_[2 * latinOneTableLen_ + ch] = BAIL_OUT_CE_;
3939:                        return;
3940:                    }
3941:                    latinOneCEs_[ch] |= (primary2 << sh.primShift);
3942:                    sh.primShift -= 8;
3943:                }
3944:                if (secondary != 0) {
3945:                    if (reverseSecondary && m_isFrenchCollation_) { // reverse secondary
3946:                        latinOneCEs_[latinOneTableLen_ + ch] >>>= 8; // make space for secondary
3947:                        latinOneCEs_[latinOneTableLen_ + ch] |= (secondary << 24);
3948:                    } else { // normal case
3949:                        latinOneCEs_[latinOneTableLen_ + ch] |= (secondary << sh.secShift);
3950:                    }
3951:                    sh.secShift -= 8;
3952:                }
3953:                if (tertiary != 0) {
3954:                    latinOneCEs_[2 * latinOneTableLen_ + ch] |= (tertiary << sh.terShift);
3955:                    sh.terShift -= 8;
3956:                }
3957:            }
3958:
3959:            private final void resizeLatinOneTable(int newSize) {
3960:                int newTable[] = new int[3 * newSize];
3961:                int sizeToCopy = ((newSize < latinOneTableLen_) ? newSize
3962:                        : latinOneTableLen_);
3963:                //uprv_memset(newTable, 0, newSize*sizeof(uint32_t)*3); // automatically cleared.
3964:                System.arraycopy(latinOneCEs_, 0, newTable, 0, sizeToCopy);
3965:                System.arraycopy(latinOneCEs_, latinOneTableLen_, newTable,
3966:                        newSize, sizeToCopy);
3967:                System.arraycopy(latinOneCEs_, 2 * latinOneTableLen_, newTable,
3968:                        2 * newSize, sizeToCopy);
3969:                latinOneTableLen_ = newSize;
3970:                latinOneCEs_ = newTable;
3971:            }
3972:
3973:            private final boolean setUpLatinOne() {
3974:                if (latinOneCEs_ == null || m_reallocLatinOneCEs_) {
3975:                    latinOneCEs_ = new int[3 * LATINONETABLELEN_];
3976:                    latinOneTableLen_ = LATINONETABLELEN_;
3977:                    m_reallocLatinOneCEs_ = false;
3978:                } else {
3979:                    Arrays.fill(latinOneCEs_, 0);
3980:                }
3981:                if (m_ContInfo_ == null) {
3982:                    m_ContInfo_ = new ContractionInfo();
3983:                }
3984:                char ch = 0;
3985:                //StringBuffer sCh = new StringBuffer();
3986:                //CollationElementIterator it = getCollationElementIterator(sCh.toString());
3987:                CollationElementIterator it = getCollationElementIterator("");
3988:
3989:                shiftValues s = new shiftValues();
3990:                int CE = 0;
3991:                char contractionOffset = ENDOFLATINONERANGE_ + 1;
3992:
3993:                for (ch = 0; ch <= ENDOFLATINONERANGE_; ch++) {
3994:                    s.primShift = 24;
3995:                    s.secShift = 24;
3996:                    s.terShift = 24;
3997:                    if (ch < 0x100) {
3998:                        CE = m_trie_.getLatin1LinearValue(ch);
3999:                    } else {
4000:                        CE = m_trie_.getLeadValue(ch);
4001:                        if (CE == CollationElementIterator.CE_NOT_FOUND_) {
4002:                            CE = UCA_.m_trie_.getLeadValue(ch);
4003:                        }
4004:                    }
4005:                    if (!isSpecial(CE)) {
4006:                        addLatinOneEntry(ch, CE, s);
4007:                    } else {
4008:                        switch (RuleBasedCollator.getTag(CE)) {
4009:                        case CollationElementIterator.CE_EXPANSION_TAG_:
4010:                        case CollationElementIterator.CE_DIGIT_TAG_:
4011:                            //sCh.delete(0, sCh.length());
4012:                            //sCh.append(ch);
4013:                            //it.setText(sCh.toString());
4014:                            it.setText(UCharacter.toString(ch));
4015:                            while ((CE = it.next()) != CollationElementIterator.NULLORDER) {
4016:                                if (s.primShift < 0 || s.secShift < 0
4017:                                        || s.terShift < 0) {
4018:                                    latinOneCEs_[ch] = BAIL_OUT_CE_;
4019:                                    latinOneCEs_[latinOneTableLen_ + ch] = BAIL_OUT_CE_;
4020:                                    latinOneCEs_[2 * latinOneTableLen_ + ch] = BAIL_OUT_CE_;
4021:                                    break;
4022:                                }
4023:                                addLatinOneEntry(ch, CE, s);
4024:                            }
4025:                            break;
4026:                        case CollationElementIterator.CE_CONTRACTION_TAG_:
4027:                            // here is the trick
4028:                            // F2 is contraction. We do something very similar to contractions
4029:                            // but have two indices, one in the real contraction table and the
4030:                            // other to where we stuffed things. This hopes that we don't have
4031:                            // many contractions (this should work for latin-1 tables).
4032:                        {
4033:                            if ((CE & 0x00FFF000) != 0) {
4034:                                latinOneFailed_ = true;
4035:                                return false;
4036:                            }
4037:
4038:                            int UCharOffset = (CE & 0xFFFFFF)
4039:                                    - m_contractionOffset_; //getContractionOffset(CE)]
4040:
4041:                            CE |= (contractionOffset & 0xFFF) << 12; // insert the offset in latin-1 table
4042:
4043:                            latinOneCEs_[ch] = CE;
4044:                            latinOneCEs_[latinOneTableLen_ + ch] = CE;
4045:                            latinOneCEs_[2 * latinOneTableLen_ + ch] = CE;
4046:
4047:                            // We're going to jump into contraction table, pick the elements
4048:                            // and use them
4049:                            do {
4050:                                //CE = *(contractionCEs + (UCharOffset - contractionIndex));
4051:                                CE = m_contractionCE_[UCharOffset];
4052:                                if (isSpecial(CE)
4053:                                        && getTag(CE) == CollationElementIterator.CE_EXPANSION_TAG_) {
4054:                                    int i; /* general counter */
4055:                                    //uint32_t *CEOffset = (uint32_t *)image+getExpansionOffset(CE); /* find the offset to expansion table */
4056:                                    int offset = ((CE & 0xFFFFF0) >> 4)
4057:                                            - m_expansionOffset_; //it.getExpansionOffset(this, CE);
4058:                                    int size = CE & 0xF; // getExpansionCount(CE);
4059:                                    //CE = *CEOffset++;
4060:                                    if (size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */
4061:                                        for (i = 0; i < size; i++) {
4062:                                            if (s.primShift < 0
4063:                                                    || s.secShift < 0
4064:                                                    || s.terShift < 0) {
4065:                                                latinOneCEs_[contractionOffset] = BAIL_OUT_CE_;
4066:                                                latinOneCEs_[latinOneTableLen_
4067:                                                        + contractionOffset] = BAIL_OUT_CE_;
4068:                                                latinOneCEs_[2
4069:                                                        * latinOneTableLen_
4070:                                                        + contractionOffset] = BAIL_OUT_CE_;
4071:                                                break;
4072:                                            }
4073:                                            addLatinOneEntry(contractionOffset,
4074:                                                    m_expansion_[offset + i], s);
4075:                                        }
4076:                                    } else { /* else, we do */
4077:                                        while (m_expansion_[offset] != 0) {
4078:                                            if (s.primShift < 0
4079:                                                    || s.secShift < 0
4080:                                                    || s.terShift < 0) {
4081:                                                latinOneCEs_[contractionOffset] = BAIL_OUT_CE_;
4082:                                                latinOneCEs_[latinOneTableLen_
4083:                                                        + contractionOffset] = BAIL_OUT_CE_;
4084:                                                latinOneCEs_[2
4085:                                                        * latinOneTableLen_
4086:                                                        + contractionOffset] = BAIL_OUT_CE_;
4087:                                                break;
4088:                                            }
4089:                                            addLatinOneEntry(contractionOffset,
4090:                                                    m_expansion_[offset++], s);
4091:                                        }
4092:                                    }
4093:                                    contractionOffset++;
4094:                                } else if (!isSpecial(CE)) {
4095:                                    addLatinOneEntry(contractionOffset++, CE, s);
4096:                                } else {
4097:                                    latinOneCEs_[contractionOffset] = BAIL_OUT_CE_;
4098:                                    latinOneCEs_[latinOneTableLen_
4099:                                            + contractionOffset] = BAIL_OUT_CE_;
4100:                                    latinOneCEs_[2 * latinOneTableLen_
4101:                                            + contractionOffset] = BAIL_OUT_CE_;
4102:                                    contractionOffset++;
4103:                                }
4104:                                UCharOffset++;
4105:                                s.primShift = 24;
4106:                                s.secShift = 24;
4107:                                s.terShift = 24;
4108:                                if (contractionOffset == latinOneTableLen_) { // we need to reallocate
4109:                                    resizeLatinOneTable(2 * latinOneTableLen_);
4110:                                }
4111:                            } while (m_contractionIndex_[UCharOffset] != 0xFFFF);
4112:                        }
4113:                            break;
4114:                        default:
4115:                            latinOneFailed_ = true;
4116:                            return false;
4117:                        }
4118:                    }
4119:                }
4120:                // compact table
4121:                if (contractionOffset < latinOneTableLen_) {
4122:                    resizeLatinOneTable(contractionOffset);
4123:                }
4124:                return true;
4125:            }
4126:
4127:            private class ContractionInfo {
4128:                int index;
4129:            }
4130:
4131:            ContractionInfo m_ContInfo_;
4132:
4133:            private int getLatinOneContraction(int strength, int CE, String s) {
4134:                //int strength, int CE, String s, Integer ind) {
4135:                int len = s.length();
4136:                //const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE&0xFFF);
4137:                int UCharOffset = (CE & 0xFFF) - m_contractionOffset_;
4138:                int offset = 1;
4139:                int latinOneOffset = (CE & 0x00FFF000) >>> 12;
4140:                char schar = 0, tchar = 0;
4141:
4142:                for (;;) {
4143:                    /*
4144:                    if(len == -1) {
4145:                      if(s[*index] == 0) { // end of string
4146:                        return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]);
4147:                      } else {
4148:                        schar = s[*index];
4149:                      }
4150:                    } else {
4151:                     */
4152:                    if (m_ContInfo_.index == len) {
4153:                        return (latinOneCEs_[strength * latinOneTableLen_
4154:                                + latinOneOffset]);
4155:                    } else {
4156:                        schar = s.charAt(m_ContInfo_.index);
4157:                    }
4158:                    //}
4159:
4160:                    while (schar > (tchar = m_contractionIndex_[UCharOffset
4161:                            + offset]/**(UCharOffset+offset)*/
4162:                    )) { /* since the contraction codepoints should be ordered, we skip all that are smaller */
4163:                        offset++;
4164:                    }
4165:
4166:                    if (schar == tchar) {
4167:                        m_ContInfo_.index++;
4168:                        return (latinOneCEs_[strength * latinOneTableLen_
4169:                                + latinOneOffset + offset]);
4170:                    } else {
4171:                        if (schar > ENDOFLATINONERANGE_ /*& 0xFF00*/) {
4172:                            return BAIL_OUT_CE_;
4173:                        }
4174:                        // skip completely ignorables
4175:                        int isZeroCE = m_trie_.getLeadValue(schar); //UTRIE_GET32_FROM_LEAD(coll->mapping, schar);
4176:                        if (isZeroCE == 0) { // we have to ignore completely ignorables
4177:                            m_ContInfo_.index++;
4178:                            continue;
4179:                        }
4180:
4181:                        return (latinOneCEs_[strength * latinOneTableLen_
4182:                                + latinOneOffset]);
4183:                    }
4184:                }
4185:            }
4186:
4187:            /**
4188:             * This is a fast strcoll, geared towards text in Latin-1.
4189:             * It supports contractions of size two, French secondaries
4190:             * and case switching. You can use it with strengths primary
4191:             * to tertiary. It does not support shifted and case level.
4192:             * It relies on the table build by setupLatin1Table. If it
4193:             * doesn't understand something, it will go to the regular
4194:             * strcoll.
4195:             */
4196:            private final int compareUseLatin1(String source, String target,
4197:                    int startOffset) {
4198:                int sLen = source.length();
4199:                int tLen = target.length();
4200:
4201:                int strength = getStrength();
4202:
4203:                int sIndex = startOffset, tIndex = startOffset;
4204:                char sChar = 0, tChar = 0;
4205:                int sOrder = 0, tOrder = 0;
4206:
4207:                boolean endOfSource = false;
4208:
4209:                //uint32_t *elements = coll->latinOneCEs;
4210:
4211:                boolean haveContractions = false; // if we have contractions in our string
4212:                // we cannot do French secondary
4213:
4214:                int offset = latinOneTableLen_;
4215:
4216:                // Do the primary level
4217:                primLoop: for (;;) {
4218:                    while (sOrder == 0) { // this loop skips primary ignorables
4219:                        // sOrder=getNextlatinOneCE(source);
4220:                        if (sIndex == sLen) {
4221:                            endOfSource = true;
4222:                            break;
4223:                        }
4224:                        sChar = source.charAt(sIndex++); //[sIndex++];
4225:                        //}
4226:                        if (sChar > ENDOFLATINONERANGE_) { // if we encounter non-latin-1, we bail out
4227:                            //fprintf(stderr, "R");
4228:                            return compareRegular(source, target, startOffset);
4229:                        }
4230:                        sOrder = latinOneCEs_[sChar];
4231:                        if (isSpecial(sOrder)) { // if we got a special
4232:                            // specials can basically be either contractions or bail-out signs. If we get anything
4233:                            // else, we'll bail out anywasy
4234:                            if (getTag(sOrder) == CollationElementIterator.CE_CONTRACTION_TAG_) {
4235:                                m_ContInfo_.index = sIndex;
4236:                                sOrder = getLatinOneContraction(0, sOrder,
4237:                                        source);
4238:                                sIndex = m_ContInfo_.index;
4239:                                haveContractions = true; // if there are contractions, we cannot do French secondary
4240:                                // However, if there are contractions in the table, but we always use just one char,
4241:                                // we might be able to do French. This should be checked out.
4242:                            }
4243:                            if (isSpecial(sOrder) /*== UCOL_BAIL_OUT_CE*/) {
4244:                                //fprintf(stderr, "S");
4245:                                return compareRegular(source, target,
4246:                                        startOffset);
4247:                            }
4248:                        }
4249:                    }
4250:
4251:                    while (tOrder == 0) { // this loop skips primary ignorables
4252:                        // tOrder=getNextlatinOneCE(target);
4253:                        if (tIndex == tLen) {
4254:                            if (endOfSource) {
4255:                                break primLoop;
4256:                            } else {
4257:                                return 1;
4258:                            }
4259:                        }
4260:                        tChar = target.charAt(tIndex++); //[tIndex++];
4261:                        if (tChar > ENDOFLATINONERANGE_) { // if we encounter non-latin-1, we bail out
4262:                            //fprintf(stderr, "R");
4263:                            return compareRegular(source, target, startOffset);
4264:                        }
4265:                        tOrder = latinOneCEs_[tChar];
4266:                        if (isSpecial(tOrder)) {
4267:                            // Handling specials, see the comments for source
4268:                            if (getTag(tOrder) == CollationElementIterator.CE_CONTRACTION_TAG_) {
4269:                                m_ContInfo_.index = tIndex;
4270:                                tOrder = getLatinOneContraction(0, tOrder,
4271:                                        target);
4272:                                tIndex = m_ContInfo_.index;
4273:                                haveContractions = true;
4274:                            }
4275:                            if (isSpecial(tOrder)/*== UCOL_BAIL_OUT_CE*/) {
4276:                                //fprintf(stderr, "S");
4277:                                return compareRegular(source, target,
4278:                                        startOffset);
4279:                            }
4280:                        }
4281:                    }
4282:                    if (endOfSource) { // source is finished, but target is not, say the result.
4283:                        return -1;
4284:                    }
4285:
4286:                    if (sOrder == tOrder) { // if we have same CEs, we continue the loop
4287:                        sOrder = 0;
4288:                        tOrder = 0;
4289:                        continue;
4290:                    } else {
4291:                        // compare current top bytes
4292:                        if (((sOrder ^ tOrder) & 0xFF000000) != 0) {
4293:                            // top bytes differ, return difference
4294:                            if (sOrder >>> 8 < tOrder >>> 8) {
4295:                                return -1;
4296:                            } else {
4297:                                return 1;
4298:                            }
4299:                            // instead of return (int32_t)(sOrder>>24)-(int32_t)(tOrder>>24);
4300:                            // since we must return enum value
4301:                        }
4302:
4303:                        // top bytes match, continue with following bytes
4304:                        sOrder <<= 8;
4305:                        tOrder <<= 8;
4306:                    }
4307:                }
4308:
4309:                // after primary loop, we definitely know the sizes of strings,
4310:                // so we set it and use simpler loop for secondaries and tertiaries
4311:                //sLen = sIndex; tLen = tIndex;
4312:                if (strength >= SECONDARY) {
4313:                    // adjust the table beggining
4314:                    //latinOneCEs_ += coll->latinOneTableLen;
4315:                    endOfSource = false;
4316:
4317:                    if (!m_isFrenchCollation_) { // non French
4318:                        // This loop is a simplified copy of primary loop
4319:                        // at this point we know that whole strings are latin-1, so we don't
4320:                        // check for that. We also know that we only have contractions as
4321:                        // specials.
4322:                        //sIndex = 0; tIndex = 0;
4323:                        sIndex = startOffset;
4324:                        tIndex = startOffset;
4325:                        secLoop: for (;;) {
4326:                            while (sOrder == 0) {
4327:                                if (sIndex == sLen) {
4328:                                    endOfSource = true;
4329:                                    break;
4330:                                }
4331:                                sChar = source.charAt(sIndex++); //[sIndex++];
4332:                                sOrder = latinOneCEs_[offset + sChar];
4333:                                if (isSpecial(sOrder)) {
4334:                                    m_ContInfo_.index = sIndex;
4335:                                    sOrder = getLatinOneContraction(1, sOrder,
4336:                                            source);
4337:                                    sIndex = m_ContInfo_.index;
4338:                                }
4339:                            }
4340:
4341:                            while (tOrder == 0) {
4342:                                if (tIndex == tLen) {
4343:                                    if (endOfSource) {
4344:                                        break secLoop;
4345:                                    } else {
4346:                                        return 1;
4347:                                    }
4348:                                }
4349:                                tChar = target.charAt(tIndex++); //[tIndex++];
4350:                                tOrder = latinOneCEs_[offset + tChar];
4351:                                if (isSpecial(tOrder)) {
4352:                                    m_ContInfo_.index = tIndex;
4353:                                    tOrder = getLatinOneContraction(1, tOrder,
4354:                                            target);
4355:                                    tIndex = m_ContInfo_.index;
4356:                                }
4357:                            }
4358:                            if (endOfSource) {
4359:                                return -1;
4360:                            }
4361:
4362:                            if (sOrder == tOrder) {
4363:                                sOrder = 0;
4364:                                tOrder = 0;
4365:                                continue;
4366:                            } else {
4367:                                // see primary loop for comments on this
4368:                                if (((sOrder ^ tOrder) & 0xFF000000) != 0) {
4369:                                    if (sOrder >>> 8 < tOrder >>> 8) {
4370:                                        return -1;
4371:                                    } else {
4372:                                        return 1;
4373:                                    }
4374:                                }
4375:                                sOrder <<= 8;
4376:                                tOrder <<= 8;
4377:                            }
4378:                        }
4379:                    } else { // French
4380:                        if (haveContractions) { // if we have contractions, we have to bail out
4381:                            // since we don't really know how to handle them here
4382:                            return compareRegular(source, target, startOffset);
4383:                        }
4384:                        // For French, we go backwards
4385:                        sIndex = sLen;
4386:                        tIndex = tLen;
4387:                        secFLoop: for (;;) {
4388:                            while (sOrder == 0) {
4389:                                if (sIndex == startOffset) {
4390:                                    endOfSource = true;
4391:                                    break;
4392:                                }
4393:                                sChar = source.charAt(--sIndex); //[--sIndex];
4394:                                sOrder = latinOneCEs_[offset + sChar];
4395:                                // don't even look for contractions
4396:                            }
4397:
4398:                            while (tOrder == 0) {
4399:                                if (tIndex == startOffset) {
4400:                                    if (endOfSource) {
4401:                                        break secFLoop;
4402:                                    } else {
4403:                                        return 1;
4404:                                    }
4405:                                }
4406:                                tChar = target.charAt(--tIndex); //[--tIndex];
4407:                                tOrder = latinOneCEs_[offset + tChar];
4408:                                // don't even look for contractions
4409:                            }
4410:                            if (endOfSource) {
4411:                                return -1;
4412:                            }
4413:
4414:                            if (sOrder == tOrder) {
4415:                                sOrder = 0;
4416:                                tOrder = 0;
4417:                                continue;
4418:                            } else {
4419:                                // see the primary loop for comments
4420:                                if (((sOrder ^ tOrder) & 0xFF000000) != 0) {
4421:                                    if (sOrder >>> 8 < tOrder >>> 8) {
4422:                                        return -1;
4423:                                    } else {
4424:                                        return 1;
4425:                                    }
4426:                                }
4427:                                sOrder <<= 8;
4428:                                tOrder <<= 8;
4429:                            }
4430:                        }
4431:                    }
4432:                }
4433:
4434:                if (strength >= TERTIARY) {
4435:                    // tertiary loop is the same as secondary (except no French)
4436:                    offset += latinOneTableLen_;
4437:                    //sIndex = 0; tIndex = 0;
4438:                    sIndex = startOffset;
4439:                    tIndex = startOffset;
4440:                    endOfSource = false;
4441:                    for (;;) {
4442:                        while (sOrder == 0) {
4443:                            if (sIndex == sLen) {
4444:                                endOfSource = true;
4445:                                break;
4446:                            }
4447:                            sChar = source.charAt(sIndex++); //[sIndex++];
4448:                            sOrder = latinOneCEs_[offset + sChar];
4449:                            if (isSpecial(sOrder)) {
4450:                                m_ContInfo_.index = sIndex;
4451:                                sOrder = getLatinOneContraction(2, sOrder,
4452:                                        source);
4453:                                sIndex = m_ContInfo_.index;
4454:                            }
4455:                        }
4456:                        while (tOrder == 0) {
4457:                            if (tIndex == tLen) {
4458:                                if (endOfSource) {
4459:                                    return 0; // if both strings are at the end, they are equal
4460:                                } else {
4461:                                    return 1;
4462:                                }
4463:                            }
4464:                            tChar = target.charAt(tIndex++); //[tIndex++];
4465:                            tOrder = latinOneCEs_[offset + tChar];
4466:                            if (isSpecial(tOrder)) {
4467:                                m_ContInfo_.index = tIndex;
4468:                                tOrder = getLatinOneContraction(2, tOrder,
4469:                                        target);
4470:                                tIndex = m_ContInfo_.index;
4471:                            }
4472:                        }
4473:                        if (endOfSource) {
4474:                            return -1;
4475:                        }
4476:                        if (sOrder == tOrder) {
4477:                            sOrder = 0;
4478:                            tOrder = 0;
4479:                            continue;
4480:                        } else {
4481:                            if (((sOrder ^ tOrder) & 0xff000000) != 0) {
4482:                                if (sOrder >>> 8 < tOrder >>> 8) {
4483:                                    return -1;
4484:                                } else {
4485:                                    return 1;
4486:                                }
4487:                            }
4488:                            sOrder <<= 8;
4489:                            tOrder <<= 8;
4490:                        }
4491:                    }
4492:                }
4493:                return 0;
4494:            }
4495:
4496:            /** 
4497:             * Get the version of this collator object.
4498:             * @return the version object associated with this collator
4499:             * @stable ICU 2.8
4500:             */
4501:            public VersionInfo getVersion() {
4502:                /* RunTime version  */
4503:                int rtVersion = VersionInfo.UCOL_RUNTIME_VERSION.getMajor();
4504:                /* Builder version*/
4505:                int bdVersion = m_version_.getMajor();
4506:
4507:                /* Charset Version. Need to get the version from cnv files
4508:                 * makeconv should populate cnv files with version and
4509:                 * an api has to be provided in ucnv.h to obtain this version
4510:                 */
4511:                int csVersion = 0;
4512:
4513:                /* combine the version info */
4514:                int cmbVersion = ((rtVersion << 11) | (bdVersion << 6) | (csVersion)) & 0xFFFF;
4515:
4516:                /* Tailoring rules */
4517:                return VersionInfo.getInstance(cmbVersion >> 8,
4518:                        cmbVersion & 0xFF, m_version_.getMinor(),
4519:                        UCA_.m_UCA_version_.getMajor());
4520:
4521:                //        versionInfo[0] = (uint8_t)(cmbVersion>>8);
4522:                //        versionInfo[1] = (uint8_t)cmbVersion;
4523:                //        versionInfo[2] = coll->image->version[1];
4524:                //        versionInfo[3] = coll->UCA->image->UCAVersion[0];
4525:            }
4526:
4527:            /** 
4528:             * Get the UCA version of this collator object.
4529:             * @return the version object associated with this collator
4530:             * @stable ICU 2.8
4531:             */
4532:            public VersionInfo getUCAVersion() {
4533:                return UCA_.m_UCA_version_;
4534:            }
4535:
4536:            private transient boolean m_reallocLatinOneCEs_;
4537:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.