Source Code Cross Referenced for UnicodeSet.java in » Internationalization-Localization » icu4j » com » ibm » icu » text » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.text
Source Cross Referenced Class Diagram Java Document (Java Doc)
0001:        //##header
0002:        /*
0003:         *******************************************************************************
0004:         * Copyright (C) 1996-2006, International Business Machines Corporation and    *
0005:         * others. All Rights Reserved.                                                *
0006:         *******************************************************************************
0007:         */
0008:        package com.ibm.icu.text;
0009:
0010:        import java.text.*;
0011:        import com.ibm.icu.lang.*;
0012:
0013:        import java.io.IOException;
0014:
0015:        import com.ibm.icu.impl.CollectionUtilities;
0016:        import com.ibm.icu.impl.NormalizerImpl;
0017:        import com.ibm.icu.impl.Utility;
0018:        import com.ibm.icu.impl.UCharacterProperty;
0019:        import com.ibm.icu.impl.UBiDiProps;
0020:        import com.ibm.icu.impl.UCaseProps;
0021:        import com.ibm.icu.impl.UPropertyAliases;
0022:        import com.ibm.icu.impl.SortedSetRelation;
0023:        import com.ibm.icu.impl.RuleCharacterIterator;
0024:
0025:        import com.ibm.icu.util.Freezable;
0026:        import com.ibm.icu.util.ULocale;
0027:        import com.ibm.icu.util.VersionInfo;
0028:
0029:        import com.ibm.icu.text.BreakIterator;
0030:
0031:        import java.util.Map;
0032:        import java.util.HashMap;
0033:        import java.util.MissingResourceException;
0034:        import java.util.TreeSet;
0035:        import java.util.Iterator;
0036:        import java.util.Collection;
0037:
0038:        /**
0039:         * A mutable set of Unicode characters and multicharacter strings.  Objects of this class
0040:         * represent <em>character classes</em> used in regular expressions.
0041:         * A character specifies a subset of Unicode code points.  Legal
0042:         * code points are U+0000 to U+10FFFF, inclusive.
0043:         *
0044:         * <p>The UnicodeSet class is not designed to be subclassed.
0045:         *
0046:         * <p><code>UnicodeSet</code> supports two APIs. The first is the
0047:         * <em>operand</em> API that allows the caller to modify the value of
0048:         * a <code>UnicodeSet</code> object. It conforms to Java 2's
0049:         * <code>java.util.Set</code> interface, although
0050:         * <code>UnicodeSet</code> does not actually implement that
0051:         * interface. All methods of <code>Set</code> are supported, with the
0052:         * modification that they take a character range or single character
0053:         * instead of an <code>Object</code>, and they take a
0054:         * <code>UnicodeSet</code> instead of a <code>Collection</code>.  The
0055:         * operand API may be thought of in terms of boolean logic: a boolean
0056:         * OR is implemented by <code>add</code>, a boolean AND is implemented
0057:         * by <code>retain</code>, a boolean XOR is implemented by
0058:         * <code>complement</code> taking an argument, and a boolean NOT is
0059:         * implemented by <code>complement</code> with no argument.  In terms
0060:         * of traditional set theory function names, <code>add</code> is a
0061:         * union, <code>retain</code> is an intersection, <code>remove</code>
0062:         * is an asymmetric difference, and <code>complement</code> with no
0063:         * argument is a set complement with respect to the superset range
0064:         * <code>MIN_VALUE-MAX_VALUE</code>
0065:         *
0066:         * <p>The second API is the
0067:         * <code>applyPattern()</code>/<code>toPattern()</code> API from the
0068:         * <code>java.text.Format</code>-derived classes.  Unlike the
0069:         * methods that add characters, add categories, and control the logic
0070:         * of the set, the method <code>applyPattern()</code> sets all
0071:         * attributes of a <code>UnicodeSet</code> at once, based on a
0072:         * string pattern.
0073:         *
0074:         * <p><b>Pattern syntax</b></p>
0075:         *
0076:         * Patterns are accepted by the constructors and the
0077:         * <code>applyPattern()</code> methods and returned by the
0078:         * <code>toPattern()</code> method.  These patterns follow a syntax
0079:         * similar to that employed by version 8 regular expression character
0080:         * classes.  Here are some simple examples:
0081:         *
0082:         * <blockquote>
0083:         *   <table>
0084:         *     <tr align="top">
0085:         *       <td nowrap valign="top" align="left"><code>[]</code></td>
0086:         *       <td valign="top">No characters</td>
0087:         *     </tr><tr align="top">
0088:         *       <td nowrap valign="top" align="left"><code>[a]</code></td>
0089:         *       <td valign="top">The character 'a'</td>
0090:         *     </tr><tr align="top">
0091:         *       <td nowrap valign="top" align="left"><code>[ae]</code></td>
0092:         *       <td valign="top">The characters 'a' and 'e'</td>
0093:         *     </tr>
0094:         *     <tr>
0095:         *       <td nowrap valign="top" align="left"><code>[a-e]</code></td>
0096:         *       <td valign="top">The characters 'a' through 'e' inclusive, in Unicode code
0097:         *       point order</td>
0098:         *     </tr>
0099:         *     <tr>
0100:         *       <td nowrap valign="top" align="left"><code>[\\u4E01]</code></td>
0101:         *       <td valign="top">The character U+4E01</td>
0102:         *     </tr>
0103:         *     <tr>
0104:         *       <td nowrap valign="top" align="left"><code>[a{ab}{ac}]</code></td>
0105:         *       <td valign="top">The character 'a' and the multicharacter strings &quot;ab&quot; and
0106:         *       &quot;ac&quot;</td>
0107:         *     </tr>
0108:         *     <tr>
0109:         *       <td nowrap valign="top" align="left"><code>[\p{Lu}]</code></td>
0110:         *       <td valign="top">All characters in the general category Uppercase Letter</td>
0111:         *     </tr>
0112:         *   </table>
0113:         * </blockquote>
0114:         *
0115:         * Any character may be preceded by a backslash in order to remove any special
0116:         * meaning.  White space characters, as defined by UCharacterProperty.isRuleWhiteSpace(), are
0117:         * ignored, unless they are escaped.
0118:         *
0119:         * <p>Property patterns specify a set of characters having a certain
0120:         * property as defined by the Unicode standard.  Both the POSIX-like
0121:         * "[:Lu:]" and the Perl-like syntax "\p{Lu}" are recognized.  For a
0122:         * complete list of supported property patterns, see the User's Guide
0123:         * for UnicodeSet at
0124:         * <a href="http://icu.sourceforge.net/userguide/unicodeSet.html">
0125:         * http://icu.sourceforge.net/userguide/unicodeSet.html</a>.
0126:         * Actual determination of property data is defined by the underlying
0127:         * Unicode database as implemented by UCharacter.
0128:         *
0129:         * <p>Patterns specify individual characters, ranges of characters, and
0130:         * Unicode property sets.  When elements are concatenated, they
0131:         * specify their union.  To complement a set, place a '^' immediately
0132:         * after the opening '['.  Property patterns are inverted by modifying
0133:         * their delimiters; "[:^foo]" and "\P{foo}".  In any other location,
0134:         * '^' has no special meaning.
0135:         *
0136:         * <p>Ranges are indicated by placing two a '-' between two
0137:         * characters, as in "a-z".  This specifies the range of all
0138:         * characters from the left to the right, in Unicode order.  If the
0139:         * left character is greater than or equal to the
0140:         * right character it is a syntax error.  If a '-' occurs as the first
0141:         * character after the opening '[' or '[^', or if it occurs as the
0142:         * last character before the closing ']', then it is taken as a
0143:         * literal.  Thus "[a\\-b]", "[-ab]", and "[ab-]" all indicate the same
0144:         * set of three characters, 'a', 'b', and '-'.
0145:         *
0146:         * <p>Sets may be intersected using the '&' operator or the asymmetric
0147:         * set difference may be taken using the '-' operator, for example,
0148:         * "[[:L:]&[\\u0000-\\u0FFF]]" indicates the set of all Unicode letters
0149:         * with values less than 4096.  Operators ('&' and '|') have equal
0150:         * precedence and bind left-to-right.  Thus
0151:         * "[[:L:]-[a-z]-[\\u0100-\\u01FF]]" is equivalent to
0152:         * "[[[:L:]-[a-z]]-[\\u0100-\\u01FF]]".  This only really matters for
0153:         * difference; intersection is commutative.
0154:         *
0155:         * <table>
0156:         * <tr valign=top><td nowrap><code>[a]</code><td>The set containing 'a'
0157:         * <tr valign=top><td nowrap><code>[a-z]</code><td>The set containing 'a'
0158:         * through 'z' and all letters in between, in Unicode order
0159:         * <tr valign=top><td nowrap><code>[^a-z]</code><td>The set containing
0160:         * all characters but 'a' through 'z',
0161:         * that is, U+0000 through 'a'-1 and 'z'+1 through U+10FFFF
0162:         * <tr valign=top><td nowrap><code>[[<em>pat1</em>][<em>pat2</em>]]</code>
0163:         * <td>The union of sets specified by <em>pat1</em> and <em>pat2</em>
0164:         * <tr valign=top><td nowrap><code>[[<em>pat1</em>]&[<em>pat2</em>]]</code>
0165:         * <td>The intersection of sets specified by <em>pat1</em> and <em>pat2</em>
0166:         * <tr valign=top><td nowrap><code>[[<em>pat1</em>]-[<em>pat2</em>]]</code>
0167:         * <td>The asymmetric difference of sets specified by <em>pat1</em> and
0168:         * <em>pat2</em>
0169:         * <tr valign=top><td nowrap><code>[:Lu:] or \p{Lu}</code>
0170:         * <td>The set of characters having the specified
0171:         * Unicode property; in
0172:         * this case, Unicode uppercase letters
0173:         * <tr valign=top><td nowrap><code>[:^Lu:] or \P{Lu}</code>
0174:         * <td>The set of characters <em>not</em> having the given
0175:         * Unicode property
0176:         * </table>
0177:         *
0178:         * <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p>
0179:         *
0180:         * <p><b>Formal syntax</b></p>
0181:         *
0182:         * <blockquote>
0183:         *   <table>
0184:         *     <tr align="top">
0185:         *       <td nowrap valign="top" align="right"><code>pattern :=&nbsp; </code></td>
0186:         *       <td valign="top"><code>('[' '^'? item* ']') |
0187:         *       property</code></td>
0188:         *     </tr>
0189:         *     <tr align="top">
0190:         *       <td nowrap valign="top" align="right"><code>item :=&nbsp; </code></td>
0191:         *       <td valign="top"><code>char | (char '-' char) | pattern-expr<br>
0192:         *       </code></td>
0193:         *     </tr>
0194:         *     <tr align="top">
0195:         *       <td nowrap valign="top" align="right"><code>pattern-expr :=&nbsp; </code></td>
0196:         *       <td valign="top"><code>pattern | pattern-expr pattern |
0197:         *       pattern-expr op pattern<br>
0198:         *       </code></td>
0199:         *     </tr>
0200:         *     <tr align="top">
0201:         *       <td nowrap valign="top" align="right"><code>op :=&nbsp; </code></td>
0202:         *       <td valign="top"><code>'&amp;' | '-'<br>
0203:         *       </code></td>
0204:         *     </tr>
0205:         *     <tr align="top">
0206:         *       <td nowrap valign="top" align="right"><code>special :=&nbsp; </code></td>
0207:         *       <td valign="top"><code>'[' | ']' | '-'<br>
0208:         *       </code></td>
0209:         *     </tr>
0210:         *     <tr align="top">
0211:         *       <td nowrap valign="top" align="right"><code>char :=&nbsp; </code></td>
0212:         *       <td valign="top"><em>any character that is not</em><code> special<br>
0213:         *       | ('\\' </code><em>any character</em><code>)<br>
0214:         *       | ('&#92;u' hex hex hex hex)<br>
0215:         *       </code></td>
0216:         *     </tr>
0217:         *     <tr align="top">
0218:         *       <td nowrap valign="top" align="right"><code>hex :=&nbsp; </code></td>
0219:         *       <td valign="top"><em>any character for which
0220:         *       </em><code>Character.digit(c, 16)</code><em>
0221:         *       returns a non-negative result</em></td>
0222:         *     </tr>
0223:         *     <tr>
0224:         *       <td nowrap valign="top" align="right"><code>property :=&nbsp; </code></td>
0225:         *       <td valign="top"><em>a Unicode property set pattern</td>
0226:         *     </tr>
0227:         *   </table>
0228:         *   <br>
0229:         *   <table border="1">
0230:         *     <tr>
0231:         *       <td>Legend: <table>
0232:         *         <tr>
0233:         *           <td nowrap valign="top"><code>a := b</code></td>
0234:         *           <td width="20" valign="top">&nbsp; </td>
0235:         *           <td valign="top"><code>a</code> may be replaced by <code>b</code> </td>
0236:         *         </tr>
0237:         *         <tr>
0238:         *           <td nowrap valign="top"><code>a?</code></td>
0239:         *           <td valign="top"></td>
0240:         *           <td valign="top">zero or one instance of <code>a</code><br>
0241:         *           </td>
0242:         *         </tr>
0243:         *         <tr>
0244:         *           <td nowrap valign="top"><code>a*</code></td>
0245:         *           <td valign="top"></td>
0246:         *           <td valign="top">one or more instances of <code>a</code><br>
0247:         *           </td>
0248:         *         </tr>
0249:         *         <tr>
0250:         *           <td nowrap valign="top"><code>a | b</code></td>
0251:         *           <td valign="top"></td>
0252:         *           <td valign="top">either <code>a</code> or <code>b</code><br>
0253:         *           </td>
0254:         *         </tr>
0255:         *         <tr>
0256:         *           <td nowrap valign="top"><code>'a'</code></td>
0257:         *           <td valign="top"></td>
0258:         *           <td valign="top">the literal string between the quotes </td>
0259:         *         </tr>
0260:         *       </table>
0261:         *       </td>
0262:         *     </tr>
0263:         *   </table>
0264:         * </blockquote>
0265:         * <p>To iterate over contents of UnicodeSet, use UnicodeSetIterator class.
0266:         *
0267:         * @author Alan Liu
0268:         * @stable ICU 2.0
0269:         * @see UnicodeSetIterator
0270:         */
0271:        public class UnicodeSet extends UnicodeFilter implements  Freezable {
0272:
0273:            private static final int LOW = 0x000000; // LOW <= all valid values. ZERO for codepoints
0274:            private static final int HIGH = 0x110000; // HIGH > all valid values. 10000 for code units.
0275:            // 110000 for codepoints
0276:
0277:            /**
0278:             * Minimum value that can be stored in a UnicodeSet.
0279:             * @stable ICU 2.0
0280:             */
0281:            public static final int MIN_VALUE = LOW;
0282:
0283:            /**
0284:             * Maximum value that can be stored in a UnicodeSet.
0285:             * @stable ICU 2.0
0286:             */
0287:            public static final int MAX_VALUE = HIGH - 1;
0288:
0289:            private int len; // length used; list may be longer to minimize reallocs
0290:            private int[] list; // MUST be terminated with HIGH
0291:            private int[] rangeList; // internal buffer
0292:            private int[] buffer; // internal buffer
0293:
0294:            // NOTE: normally the field should be of type SortedSet; but that is missing a public clone!!
0295:            // is not private so that UnicodeSetIterator can get access
0296:            TreeSet strings = new TreeSet();
0297:
0298:            /**
0299:             * The pattern representation of this set.  This may not be the
0300:             * most economical pattern.  It is the pattern supplied to
0301:             * applyPattern(), with variables substituted and whitespace
0302:             * removed.  For sets constructed without applyPattern(), or
0303:             * modified using the non-pattern API, this string will be null,
0304:             * indicating that toPattern() must generate a pattern
0305:             * representation from the inversion list.
0306:             */
0307:            private String pat = null;
0308:
0309:            private static final int START_EXTRA = 16; // initial storage. Must be >= 0
0310:            private static final int GROW_EXTRA = START_EXTRA; // extra amount for growth. Must be >= 0
0311:
0312:            // Special property set IDs
0313:            private static final String ANY_ID = "ANY"; // [\u0000-\U0010FFFF]
0314:            private static final String ASCII_ID = "ASCII"; // [\u0000-\u007F]
0315:            private static final String ASSIGNED = "Assigned"; // [:^Cn:]
0316:
0317:            /**
0318:             * A set of all characters _except_ the second through last characters of
0319:             * certain ranges.  These ranges are ranges of characters whose
0320:             * properties are all exactly alike, e.g. CJK Ideographs from
0321:             * U+4E00 to U+9FA5.
0322:             */
0323:            private static UnicodeSet INCLUSIONS[] = null;
0324:
0325:            //----------------------------------------------------------------
0326:            // Public API
0327:            //----------------------------------------------------------------
0328:
0329:            /**
0330:             * Constructs an empty set.
0331:             * @stable ICU 2.0
0332:             */
0333:            public UnicodeSet() {
0334:                list = new int[1 + START_EXTRA];
0335:                list[len++] = HIGH;
0336:            }
0337:
0338:            /**
0339:             * Constructs a copy of an existing set.
0340:             * @stable ICU 2.0
0341:             */
0342:            public UnicodeSet(UnicodeSet other) {
0343:                set(other);
0344:            }
0345:
0346:            /**
0347:             * Constructs a set containing the given range. If <code>end >
0348:             * start</code> then an empty set is created.
0349:             *
0350:             * @param start first character, inclusive, of range
0351:             * @param end last character, inclusive, of range
0352:             * @stable ICU 2.0
0353:             */
0354:            public UnicodeSet(int start, int end) {
0355:                this ();
0356:                complement(start, end);
0357:            }
0358:
0359:            /**
0360:             * Constructs a set from the given pattern.  See the class description
0361:             * for the syntax of the pattern language.  Whitespace is ignored.
0362:             * @param pattern a string specifying what characters are in the set
0363:             * @exception java.lang.IllegalArgumentException if the pattern contains
0364:             * a syntax error.
0365:             * @stable ICU 2.0
0366:             */
0367:            public UnicodeSet(String pattern) {
0368:                this ();
0369:                applyPattern(pattern, null, null, IGNORE_SPACE);
0370:            }
0371:
0372:            /**
0373:             * Constructs a set from the given pattern.  See the class description
0374:             * for the syntax of the pattern language.
0375:             * @param pattern a string specifying what characters are in the set
0376:             * @param ignoreWhitespace if true, ignore characters for which
0377:             * UCharacterProperty.isRuleWhiteSpace() returns true
0378:             * @exception java.lang.IllegalArgumentException if the pattern contains
0379:             * a syntax error.
0380:             * @stable ICU 2.0
0381:             */
0382:            public UnicodeSet(String pattern, boolean ignoreWhitespace) {
0383:                this ();
0384:                applyPattern(pattern, null, null,
0385:                        ignoreWhitespace ? IGNORE_SPACE : 0);
0386:            }
0387:
0388:            /**
0389:             * Constructs a set from the given pattern.  See the class description
0390:             * for the syntax of the pattern language.
0391:             * @param pattern a string specifying what characters are in the set
0392:             * @param options a bitmask indicating which options to apply.
0393:             * Valid options are IGNORE_SPACE and CASE.
0394:             * @exception java.lang.IllegalArgumentException if the pattern contains
0395:             * a syntax error.
0396:             * @internal
0397:             * @deprecated This API is ICU internal only.
0398:             */
0399:            public UnicodeSet(String pattern, int options) {
0400:                this ();
0401:                applyPattern(pattern, null, null, options);
0402:            }
0403:
0404:            /**
0405:             * Constructs a set from the given pattern.  See the class description
0406:             * for the syntax of the pattern language.
0407:             * @param pattern a string specifying what characters are in the set
0408:             * @param pos on input, the position in pattern at which to start parsing.
0409:             * On output, the position after the last character parsed.
0410:             * @param symbols a symbol table mapping variables to char[] arrays
0411:             * and chars to UnicodeSets
0412:             * @exception java.lang.IllegalArgumentException if the pattern
0413:             * contains a syntax error.
0414:             * @stable ICU 2.0
0415:             */
0416:            public UnicodeSet(String pattern, ParsePosition pos,
0417:                    SymbolTable symbols) {
0418:                this ();
0419:                applyPattern(pattern, pos, symbols, IGNORE_SPACE);
0420:            }
0421:
0422:            /**
0423:             * Constructs a set from the given pattern.  See the class description
0424:             * for the syntax of the pattern language.
0425:             * @param pattern a string specifying what characters are in the set
0426:             * @param pos on input, the position in pattern at which to start parsing.
0427:             * On output, the position after the last character parsed.
0428:             * @param symbols a symbol table mapping variables to char[] arrays
0429:             * and chars to UnicodeSets
0430:             * @param options a bitmask indicating which options to apply.
0431:             * Valid options are IGNORE_SPACE and CASE.
0432:             * @exception java.lang.IllegalArgumentException if the pattern
0433:             * contains a syntax error.
0434:             * @draft ICU 3.2
0435:             * @provisional This API might change or be removed in a future release.
0436:             */
0437:            public UnicodeSet(String pattern, ParsePosition pos,
0438:                    SymbolTable symbols, int options) {
0439:                this ();
0440:                applyPattern(pattern, pos, symbols, options);
0441:            }
0442:
0443:            /**
0444:             * Return a new set that is equivalent to this one.
0445:             * @stable ICU 2.0
0446:             */
0447:            public Object clone() {
0448:                UnicodeSet result = new UnicodeSet(this );
0449:                result.frozen = this .frozen;
0450:                return result;
0451:            }
0452:
0453:            /**
0454:             * Make this object represent the range <code>start - end</code>.
0455:             * If <code>end > start</code> then this object is set to an
0456:             * an empty range.
0457:             *
0458:             * @param start first character in the set, inclusive
0459:             * @param end last character in the set, inclusive
0460:             * @stable ICU 2.0
0461:             */
0462:            public UnicodeSet set(int start, int end) {
0463:                checkFrozen();
0464:                clear();
0465:                complement(start, end);
0466:                return this ;
0467:            }
0468:
0469:            /**
0470:             * Make this object represent the same set as <code>other</code>.
0471:             * @param other a <code>UnicodeSet</code> whose value will be
0472:             * copied to this object
0473:             * @stable ICU 2.0
0474:             */
0475:            public UnicodeSet set(UnicodeSet other) {
0476:                checkFrozen();
0477:                list = (int[]) other.list.clone();
0478:                len = other.len;
0479:                pat = other.pat;
0480:                strings = (TreeSet) other.strings.clone();
0481:                return this ;
0482:            }
0483:
0484:            /**
0485:             * Modifies this set to represent the set specified by the given pattern.
0486:             * See the class description for the syntax of the pattern language.
0487:             * Whitespace is ignored.
0488:             * @param pattern a string specifying what characters are in the set
0489:             * @exception java.lang.IllegalArgumentException if the pattern
0490:             * contains a syntax error.
0491:             * @stable ICU 2.0
0492:             */
0493:            public final UnicodeSet applyPattern(String pattern) {
0494:                checkFrozen();
0495:                return applyPattern(pattern, null, null, IGNORE_SPACE);
0496:            }
0497:
0498:            /**
0499:             * Modifies this set to represent the set specified by the given pattern,
0500:             * optionally ignoring whitespace.
0501:             * See the class description for the syntax of the pattern language.
0502:             * @param pattern a string specifying what characters are in the set
0503:             * @param ignoreWhitespace if true then characters for which
0504:             * UCharacterProperty.isRuleWhiteSpace() returns true are ignored
0505:             * @exception java.lang.IllegalArgumentException if the pattern
0506:             * contains a syntax error.
0507:             * @stable ICU 2.0
0508:             */
0509:            public UnicodeSet applyPattern(String pattern,
0510:                    boolean ignoreWhitespace) {
0511:                checkFrozen();
0512:                return applyPattern(pattern, null, null,
0513:                        ignoreWhitespace ? IGNORE_SPACE : 0);
0514:            }
0515:
0516:            /**
0517:             * Modifies this set to represent the set specified by the given pattern,
0518:             * optionally ignoring whitespace.
0519:             * See the class description for the syntax of the pattern language.
0520:             * @param pattern a string specifying what characters are in the set
0521:             * @param options a bitmask indicating which options to apply.
0522:             * Valid options are IGNORE_SPACE and CASE.
0523:             * @exception java.lang.IllegalArgumentException if the pattern
0524:             * contains a syntax error.
0525:             * @internal
0526:             * @deprecated This API is ICU internal only.
0527:             */
0528:            public UnicodeSet applyPattern(String pattern, int options) {
0529:                checkFrozen();
0530:                return applyPattern(pattern, null, null, options);
0531:            }
0532:
0533:            /**
0534:             * Return true if the given position, in the given pattern, appears
0535:             * to be the start of a UnicodeSet pattern.
0536:             * @stable ICU 2.0
0537:             */
0538:            public static boolean resemblesPattern(String pattern, int pos) {
0539:                return ((pos + 1) < pattern.length() && pattern.charAt(pos) == '[')
0540:                        || resemblesPropertyPattern(pattern, pos);
0541:            }
0542:
0543:            /**
0544:             * Append the <code>toPattern()</code> representation of a
0545:             * string to the given <code>StringBuffer</code>.
0546:             */
0547:            private static void _appendToPat(StringBuffer buf, String s,
0548:                    boolean escapeUnprintable) {
0549:                for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) {
0550:                    _appendToPat(buf, UTF16.charAt(s, i), escapeUnprintable);
0551:                }
0552:            }
0553:
0554:            /**
0555:             * Append the <code>toPattern()</code> representation of a
0556:             * character to the given <code>StringBuffer</code>.
0557:             */
0558:            private static void _appendToPat(StringBuffer buf, int c,
0559:                    boolean escapeUnprintable) {
0560:                if (escapeUnprintable && Utility.isUnprintable(c)) {
0561:                    // Use hex escape notation (<backslash>uxxxx or <backslash>Uxxxxxxxx) for anything
0562:                    // unprintable
0563:                    if (Utility.escapeUnprintable(buf, c)) {
0564:                        return;
0565:                    }
0566:                }
0567:                // Okay to let ':' pass through
0568:                switch (c) {
0569:                case '[': // SET_OPEN:
0570:                case ']': // SET_CLOSE:
0571:                case '-': // HYPHEN:
0572:                case '^': // COMPLEMENT:
0573:                case '&': // INTERSECTION:
0574:                case '\\': //BACKSLASH:
0575:                case '{':
0576:                case '}':
0577:                case '$':
0578:                case ':':
0579:                    buf.append('\\');
0580:                    break;
0581:                default:
0582:                    // Escape whitespace
0583:                    if (UCharacterProperty.isRuleWhiteSpace(c)) {
0584:                        buf.append('\\');
0585:                    }
0586:                    break;
0587:                }
0588:                UTF16.append(buf, c);
0589:            }
0590:
0591:            /**
0592:             * Returns a string representation of this set.  If the result of
0593:             * calling this function is passed to a UnicodeSet constructor, it
0594:             * will produce another set that is equal to this one.
0595:             * @stable ICU 2.0
0596:             */
0597:            public String toPattern(boolean escapeUnprintable) {
0598:                StringBuffer result = new StringBuffer();
0599:                return _toPattern(result, escapeUnprintable).toString();
0600:            }
0601:
0602:            /**
0603:             * Append a string representation of this set to result.  This will be
0604:             * a cleaned version of the string passed to applyPattern(), if there
0605:             * is one.  Otherwise it will be generated.
0606:             */
0607:            private StringBuffer _toPattern(StringBuffer result,
0608:                    boolean escapeUnprintable) {
0609:                if (pat != null) {
0610:                    int i;
0611:                    int backslashCount = 0;
0612:                    for (i = 0; i < pat.length();) {
0613:                        int c = UTF16.charAt(pat, i);
0614:                        i += UTF16.getCharCount(c);
0615:                        if (escapeUnprintable && Utility.isUnprintable(c)) {
0616:                            // If the unprintable character is preceded by an odd
0617:                            // number of backslashes, then it has been escaped.
0618:                            // Before unescaping it, we delete the final
0619:                            // backslash.
0620:                            if ((backslashCount % 2) == 1) {
0621:                                result.setLength(result.length() - 1);
0622:                            }
0623:                            Utility.escapeUnprintable(result, c);
0624:                            backslashCount = 0;
0625:                        } else {
0626:                            UTF16.append(result, c);
0627:                            if (c == '\\') {
0628:                                ++backslashCount;
0629:                            } else {
0630:                                backslashCount = 0;
0631:                            }
0632:                        }
0633:                    }
0634:                    return result;
0635:                }
0636:
0637:                return _generatePattern(result, escapeUnprintable, true);
0638:            }
0639:
0640:            /**
0641:             * Generate and append a string representation of this set to result.
0642:             * This does not use this.pat, the cleaned up copy of the string
0643:             * passed to applyPattern().
0644:             * @param result the buffer into which to generate the pattern
0645:             * @param escapeUnprintable escape unprintable characters if true
0646:             * @stable ICU 2.0
0647:             */
0648:            public StringBuffer _generatePattern(StringBuffer result,
0649:                    boolean escapeUnprintable) {
0650:                return _generatePattern(result, escapeUnprintable, true);
0651:            }
0652:
0653:            /**
0654:             * Generate and append a string representation of this set to result.
0655:             * This does not use this.pat, the cleaned up copy of the string
0656:             * passed to applyPattern().
0657:             * @param includeStrings if false, doesn't include the strings.
0658:             * @internal
0659:             * @deprecated This API is ICU internal only.
0660:             */
0661:            public StringBuffer _generatePattern(StringBuffer result,
0662:                    boolean escapeUnprintable, boolean includeStrings) {
0663:                result.append('[');
0664:
0665:                //      // Check against the predefined categories.  We implicitly build
0666:                //      // up ALL category sets the first time toPattern() is called.
0667:                //      for (int cat=0; cat<CATEGORY_COUNT; ++cat) {
0668:                //          if (this.equals(getCategorySet(cat))) {
0669:                //              result.append(':');
0670:                //              result.append(CATEGORY_NAMES.substring(cat*2, cat*2+2));
0671:                //              return result.append(":]");
0672:                //          }
0673:                //      }
0674:
0675:                int count = getRangeCount();
0676:
0677:                // If the set contains at least 2 intervals and includes both
0678:                // MIN_VALUE and MAX_VALUE, then the inverse representation will
0679:                // be more economical.
0680:                if (count > 1 && getRangeStart(0) == MIN_VALUE
0681:                        && getRangeEnd(count - 1) == MAX_VALUE) {
0682:
0683:                    // Emit the inverse
0684:                    result.append('^');
0685:
0686:                    for (int i = 1; i < count; ++i) {
0687:                        int start = getRangeEnd(i - 1) + 1;
0688:                        int end = getRangeStart(i) - 1;
0689:                        _appendToPat(result, start, escapeUnprintable);
0690:                        if (start != end) {
0691:                            if ((start + 1) != end) {
0692:                                result.append('-');
0693:                            }
0694:                            _appendToPat(result, end, escapeUnprintable);
0695:                        }
0696:                    }
0697:                }
0698:
0699:                // Default; emit the ranges as pairs
0700:                else {
0701:                    for (int i = 0; i < count; ++i) {
0702:                        int start = getRangeStart(i);
0703:                        int end = getRangeEnd(i);
0704:                        _appendToPat(result, start, escapeUnprintable);
0705:                        if (start != end) {
0706:                            if ((start + 1) != end) {
0707:                                result.append('-');
0708:                            }
0709:                            _appendToPat(result, end, escapeUnprintable);
0710:                        }
0711:                    }
0712:                }
0713:
0714:                if (includeStrings && strings.size() > 0) {
0715:                    Iterator it = strings.iterator();
0716:                    while (it.hasNext()) {
0717:                        result.append('{');
0718:                        _appendToPat(result, (String) it.next(),
0719:                                escapeUnprintable);
0720:                        result.append('}');
0721:                    }
0722:                }
0723:                return result.append(']');
0724:            }
0725:
0726:            /**
0727:             * Returns the number of elements in this set (its cardinality)
0728:             * Note than the elements of a set may include both individual
0729:             * codepoints and strings.
0730:             *
0731:             * @return the number of elements in this set (its cardinality).
0732:             * @stable ICU 2.0
0733:             */
0734:            public int size() {
0735:                int n = 0;
0736:                int count = getRangeCount();
0737:                for (int i = 0; i < count; ++i) {
0738:                    n += getRangeEnd(i) - getRangeStart(i) + 1;
0739:                }
0740:                return n + strings.size();
0741:            }
0742:
0743:            /**
0744:             * Returns <tt>true</tt> if this set contains no elements.
0745:             *
0746:             * @return <tt>true</tt> if this set contains no elements.
0747:             * @stable ICU 2.0
0748:             */
0749:            public boolean isEmpty() {
0750:                return len == 1 && strings.size() == 0;
0751:            }
0752:
0753:            /**
0754:             * Implementation of UnicodeMatcher API.  Returns <tt>true</tt> if
0755:             * this set contains any character whose low byte is the given
0756:             * value.  This is used by <tt>RuleBasedTransliterator</tt> for
0757:             * indexing.
0758:             * @stable ICU 2.0
0759:             */
0760:            public boolean matchesIndexValue(int v) {
0761:                /* The index value v, in the range [0,255], is contained in this set if
0762:                 * it is contained in any pair of this set.  Pairs either have the high
0763:                 * bytes equal, or unequal.  If the high bytes are equal, then we have
0764:                 * aaxx..aayy, where aa is the high byte.  Then v is contained if xx <=
0765:                 * v <= yy.  If the high bytes are unequal we have aaxx..bbyy, bb>aa.
0766:                 * Then v is contained if xx <= v || v <= yy.  (This is identical to the
0767:                 * time zone month containment logic.)
0768:                 */
0769:                for (int i = 0; i < getRangeCount(); ++i) {
0770:                    int low = getRangeStart(i);
0771:                    int high = getRangeEnd(i);
0772:                    if ((low & ~0xFF) == (high & ~0xFF)) {
0773:                        if ((low & 0xFF) <= v && v <= (high & 0xFF)) {
0774:                            return true;
0775:                        }
0776:                    } else if ((low & 0xFF) <= v || v <= (high & 0xFF)) {
0777:                        return true;
0778:                    }
0779:                }
0780:                if (strings.size() != 0) {
0781:                    Iterator it = strings.iterator();
0782:                    while (it.hasNext()) {
0783:                        String s = (String) it.next();
0784:                        //if (s.length() == 0) {
0785:                        //    // Empty strings match everything
0786:                        //    return true;
0787:                        //}
0788:                        // assert(s.length() != 0); // We enforce this elsewhere
0789:                        int c = UTF16.charAt(s, 0);
0790:                        if ((c & 0xFF) == v) {
0791:                            return true;
0792:                        }
0793:                    }
0794:                }
0795:                return false;
0796:            }
0797:
0798:            /**
0799:             * Implementation of UnicodeMatcher.matches().  Always matches the
0800:             * longest possible multichar string.
0801:             * @stable ICU 2.0
0802:             */
0803:            public int matches(Replaceable text, int[] offset, int limit,
0804:                    boolean incremental) {
0805:
0806:                if (offset[0] == limit) {
0807:                    // Strings, if any, have length != 0, so we don't worry
0808:                    // about them here.  If we ever allow zero-length strings
0809:                    // we much check for them here.
0810:                    if (contains(UnicodeMatcher.ETHER)) {
0811:                        return incremental ? U_PARTIAL_MATCH : U_MATCH;
0812:                    } else {
0813:                        return U_MISMATCH;
0814:                    }
0815:                } else {
0816:                    if (strings.size() != 0) { // try strings first
0817:
0818:                        // might separate forward and backward loops later
0819:                        // for now they are combined
0820:
0821:                        // TODO Improve efficiency of this, at least in the forward
0822:                        // direction, if not in both.  In the forward direction we
0823:                        // can assume the strings are sorted.
0824:
0825:                        Iterator it = strings.iterator();
0826:                        boolean forward = offset[0] < limit;
0827:
0828:                        // firstChar is the leftmost char to match in the
0829:                        // forward direction or the rightmost char to match in
0830:                        // the reverse direction.
0831:                        char firstChar = text.charAt(offset[0]);
0832:
0833:                        // If there are multiple strings that can match we
0834:                        // return the longest match.
0835:                        int highWaterLength = 0;
0836:
0837:                        while (it.hasNext()) {
0838:                            String trial = (String) it.next();
0839:
0840:                            //if (trial.length() == 0) {
0841:                            //    return U_MATCH; // null-string always matches
0842:                            //}
0843:                            // assert(trial.length() != 0); // We ensure this elsewhere
0844:
0845:                            char c = trial.charAt(forward ? 0
0846:                                    : trial.length() - 1);
0847:
0848:                            // Strings are sorted, so we can optimize in the
0849:                            // forward direction.
0850:                            if (forward && c > firstChar)
0851:                                break;
0852:                            if (c != firstChar)
0853:                                continue;
0854:
0855:                            int len = matchRest(text, offset[0], limit, trial);
0856:
0857:                            if (incremental) {
0858:                                int maxLen = forward ? limit - offset[0]
0859:                                        : offset[0] - limit;
0860:                                if (len == maxLen) {
0861:                                    // We have successfully matched but only up to limit.
0862:                                    return U_PARTIAL_MATCH;
0863:                                }
0864:                            }
0865:
0866:                            if (len == trial.length()) {
0867:                                // We have successfully matched the whole string.
0868:                                if (len > highWaterLength) {
0869:                                    highWaterLength = len;
0870:                                }
0871:                                // In the forward direction we know strings
0872:                                // are sorted so we can bail early.
0873:                                if (forward && len < highWaterLength) {
0874:                                    break;
0875:                                }
0876:                                continue;
0877:                            }
0878:                        }
0879:
0880:                        // We've checked all strings without a partial match.
0881:                        // If we have full matches, return the longest one.
0882:                        if (highWaterLength != 0) {
0883:                            offset[0] += forward ? highWaterLength
0884:                                    : -highWaterLength;
0885:                            return U_MATCH;
0886:                        }
0887:                    }
0888:                    return super .matches(text, offset, limit, incremental);
0889:                }
0890:            }
0891:
0892:            /**
0893:             * Returns the longest match for s in text at the given position.
0894:             * If limit > start then match forward from start+1 to limit
0895:             * matching all characters except s.charAt(0).  If limit < start,
0896:             * go backward starting from start-1 matching all characters
0897:             * except s.charAt(s.length()-1).  This method assumes that the
0898:             * first character, text.charAt(start), matches s, so it does not
0899:             * check it.
0900:             * @param text the text to match
0901:             * @param start the first character to match.  In the forward
0902:             * direction, text.charAt(start) is matched against s.charAt(0).
0903:             * In the reverse direction, it is matched against
0904:             * s.charAt(s.length()-1).
0905:             * @param limit the limit offset for matching, either last+1 in
0906:             * the forward direction, or last-1 in the reverse direction,
0907:             * where last is the index of the last character to match.
0908:             * @return If part of s matches up to the limit, return |limit -
0909:             * start|.  If all of s matches before reaching the limit, return
0910:             * s.length().  If there is a mismatch between s and text, return
0911:             * 0
0912:             */
0913:            private static int matchRest(Replaceable text, int start,
0914:                    int limit, String s) {
0915:                int maxLen;
0916:                int slen = s.length();
0917:                if (start < limit) {
0918:                    maxLen = limit - start;
0919:                    if (maxLen > slen)
0920:                        maxLen = slen;
0921:                    for (int i = 1; i < maxLen; ++i) {
0922:                        if (text.charAt(start + i) != s.charAt(i))
0923:                            return 0;
0924:                    }
0925:                } else {
0926:                    maxLen = start - limit;
0927:                    if (maxLen > slen)
0928:                        maxLen = slen;
0929:                    --slen; // <=> slen = s.length() - 1;
0930:                    for (int i = 1; i < maxLen; ++i) {
0931:                        if (text.charAt(start - i) != s.charAt(slen - i))
0932:                            return 0;
0933:                    }
0934:                }
0935:                return maxLen;
0936:            }
0937:
0938:            //#ifndef FOUNDATION
0939:            /**
0940:             * Tests whether the text matches at the offset. If so, returns the end of the longest substring that it matches. If not, returns -1. For now, an internal routine.
0941:             * @internal
0942:             * @deprecated This API is ICU internal only.
0943:             */
0944:            public int matchesAt(CharSequence text, int offset) {
0945:                int len = -1;
0946:                strings: if (strings.size() != 0) {
0947:                    char firstChar = text.charAt(offset);
0948:                    String trial = null;
0949:                    // find the first string starting with firstChar
0950:                    Iterator it = strings.iterator();
0951:                    while (it.hasNext()) {
0952:                        trial = (String) it.next();
0953:                        char firstStringChar = trial.charAt(0);
0954:                        if (firstStringChar < firstChar)
0955:                            continue;
0956:                        if (firstStringChar > firstChar)
0957:                            break strings;
0958:                    }
0959:                    // now keep checking string until we get the longest one
0960:                    while (true) {
0961:                        int tempLen = CollectionUtilities.matchesAt(text,
0962:                                offset, trial);
0963:                        if (len > tempLen)
0964:                            break strings;
0965:                        len = tempLen;
0966:                        if (!it.hasNext())
0967:                            break;
0968:                        trial = (String) it.next();
0969:                    }
0970:                }
0971:                if (len < 2) {
0972:                    int cp = UTF16.charAt(text, offset);
0973:                    if (contains(cp)) {
0974:                        len = UTF16.getCharCount(cp);
0975:                    }
0976:                }
0977:                return offset + len;
0978:            }
0979:
0980:            //#endif
0981:
0982:            /**
0983:             * Implementation of UnicodeMatcher API.  Union the set of all
0984:             * characters that may be matched by this object into the given
0985:             * set.
0986:             * @param toUnionTo the set into which to union the source characters
0987:             * @stable ICU 2.2
0988:             */
0989:            public void addMatchSetTo(UnicodeSet toUnionTo) {
0990:                toUnionTo.addAll(this );
0991:            }
0992:
0993:            /**
0994:             * Returns the index of the given character within this set, where
0995:             * the set is ordered by ascending code point.  If the character
0996:             * is not in this set, return -1.  The inverse of this method is
0997:             * <code>charAt()</code>.
0998:             * @return an index from 0..size()-1, or -1
0999:             * @stable ICU 2.0
1000:             */
1001:            public int indexOf(int c) {
1002:                if (c < MIN_VALUE || c > MAX_VALUE) {
1003:                    throw new IllegalArgumentException("Invalid code point U+"
1004:                            + Utility.hex(c, 6));
1005:                }
1006:                int i = 0;
1007:                int n = 0;
1008:                for (;;) {
1009:                    int start = list[i++];
1010:                    if (c < start) {
1011:                        return -1;
1012:                    }
1013:                    int limit = list[i++];
1014:                    if (c < limit) {
1015:                        return n + c - start;
1016:                    }
1017:                    n += limit - start;
1018:                }
1019:            }
1020:
1021:            /**
1022:             * Returns the character at the given index within this set, where
1023:             * the set is ordered by ascending code point.  If the index is
1024:             * out of range, return -1.  The inverse of this method is
1025:             * <code>indexOf()</code>.
1026:             * @param index an index from 0..size()-1
1027:             * @return the character at the given index, or -1.
1028:             * @stable ICU 2.0
1029:             */
1030:            public int charAt(int index) {
1031:                if (index >= 0) {
1032:                    // len2 is the largest even integer <= len, that is, it is len
1033:                    // for even values and len-1 for odd values.  With odd values
1034:                    // the last entry is UNICODESET_HIGH.
1035:                    int len2 = len & ~1;
1036:                    for (int i = 0; i < len2;) {
1037:                        int start = list[i++];
1038:                        int count = list[i++] - start;
1039:                        if (index < count) {
1040:                            return start + index;
1041:                        }
1042:                        index -= count;
1043:                    }
1044:                }
1045:                return -1;
1046:            }
1047:
1048:            /**
1049:             * Adds the specified range to this set if it is not already
1050:             * present.  If this set already contains the specified range,
1051:             * the call leaves this set unchanged.  If <code>end > start</code>
1052:             * then an empty range is added, leaving the set unchanged.
1053:             *
1054:             * @param start first character, inclusive, of range to be added
1055:             * to this set.
1056:             * @param end last character, inclusive, of range to be added
1057:             * to this set.
1058:             * @stable ICU 2.0
1059:             */
1060:            public UnicodeSet add(int start, int end) {
1061:                checkFrozen();
1062:                return add_unchecked(start, end);
1063:            }
1064:
1065:            // for internal use, after checkFrozen has been called
1066:            private UnicodeSet add_unchecked(int start, int end) {
1067:                if (start < MIN_VALUE || start > MAX_VALUE) {
1068:                    throw new IllegalArgumentException("Invalid code point U+"
1069:                            + Utility.hex(start, 6));
1070:                }
1071:                if (end < MIN_VALUE || end > MAX_VALUE) {
1072:                    throw new IllegalArgumentException("Invalid code point U+"
1073:                            + Utility.hex(end, 6));
1074:                }
1075:                if (start < end) {
1076:                    add(range(start, end), 2, 0);
1077:                } else if (start == end) {
1078:                    add(start);
1079:                }
1080:                return this ;
1081:            }
1082:
1083:            //    /**
1084:            //     * Format out the inversion list as a string, for debugging.  Uncomment when
1085:            //     * needed.
1086:            //     */
1087:            //    public final String dump() {
1088:            //        StringBuffer buf = new StringBuffer("[");
1089:            //        for (int i=0; i<len; ++i) {
1090:            //            if (i != 0) buf.append(", ");
1091:            //            int c = list[i];
1092:            //            //if (c <= 0x7F && c != '\n' && c != '\r' && c != '\t' && c != ' ') {
1093:            //            //    buf.append((char) c);
1094:            //            //} else {
1095:            //                buf.append("U+").append(Utility.hex(c, (c<0x10000)?4:6));
1096:            //            //}
1097:            //        }
1098:            //        buf.append("]");
1099:            //        return buf.toString();
1100:            //    }
1101:
1102:            /**
1103:             * Adds the specified character to this set if it is not already
1104:             * present.  If this set already contains the specified character,
1105:             * the call leaves this set unchanged.
1106:             * @stable ICU 2.0
1107:             */
1108:            public final UnicodeSet add(int c) {
1109:                checkFrozen();
1110:                return add_unchecked(c);
1111:            }
1112:
1113:            // for internal use only, after checkFrozen has been called
1114:            private final UnicodeSet add_unchecked(int c) {
1115:                if (c < MIN_VALUE || c > MAX_VALUE) {
1116:                    throw new IllegalArgumentException("Invalid code point U+"
1117:                            + Utility.hex(c, 6));
1118:                }
1119:
1120:                // find smallest i such that c < list[i]
1121:                // if odd, then it is IN the set
1122:                // if even, then it is OUT of the set
1123:                int i = findCodePoint(c);
1124:
1125:                // already in set?
1126:                if ((i & 1) != 0)
1127:                    return this ;
1128:
1129:                // HIGH is 0x110000
1130:                // assert(list[len-1] == HIGH);
1131:
1132:                // empty = [HIGH]
1133:                // [start_0, limit_0, start_1, limit_1, HIGH]
1134:
1135:                // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH]
1136:                //                             ^
1137:                //                             list[i]
1138:
1139:                // i == 0 means c is before the first range
1140:
1141:                if (c == list[i] - 1) {
1142:                    // c is before start of next range
1143:                    list[i] = c;
1144:                    // if we touched the HIGH mark, then add a new one
1145:                    if (c == MAX_VALUE) {
1146:                        ensureCapacity(len + 1);
1147:                        list[len++] = HIGH;
1148:                    }
1149:                    if (i > 0 && c == list[i - 1]) {
1150:                        // collapse adjacent ranges
1151:
1152:                        // [..., start_k-1, c, c, limit_k, ..., HIGH]
1153:                        //                     ^
1154:                        //                     list[i]
1155:                        System.arraycopy(list, i + 1, list, i - 1, len - i - 1);
1156:                        len -= 2;
1157:                    }
1158:                }
1159:
1160:                else if (i > 0 && c == list[i - 1]) {
1161:                    // c is after end of prior range
1162:                    list[i - 1]++;
1163:                    // no need to chcek for collapse here
1164:                }
1165:
1166:                else {
1167:                    // At this point we know the new char is not adjacent to
1168:                    // any existing ranges, and it is not 10FFFF.
1169:
1170:                    // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH]
1171:                    //                             ^
1172:                    //                             list[i]
1173:
1174:                    // [..., start_k-1, limit_k-1, c, c+1, start_k, limit_k, ..., HIGH]
1175:                    //                             ^
1176:                    //                             list[i]
1177:
1178:                    // Don't use ensureCapacity() to save on copying.
1179:                    // NOTE: This has no measurable impact on performance,
1180:                    // but it might help in some usage patterns.
1181:                    if (len + 2 > list.length) {
1182:                        int[] temp = new int[len + 2 + GROW_EXTRA];
1183:                        if (i != 0)
1184:                            System.arraycopy(list, 0, temp, 0, i);
1185:                        System.arraycopy(list, i, temp, i + 2, len - i);
1186:                        list = temp;
1187:                    } else {
1188:                        System.arraycopy(list, i, list, i + 2, len - i);
1189:                    }
1190:
1191:                    list[i] = c;
1192:                    list[i + 1] = c + 1;
1193:                    len += 2;
1194:                }
1195:
1196:                pat = null;
1197:                return this ;
1198:            }
1199:
1200:            /**
1201:             * Adds the specified multicharacter to this set if it is not already
1202:             * present.  If this set already contains the multicharacter,
1203:             * the call leaves this set unchanged.
1204:             * Thus "ch" => {"ch"}
1205:             * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
1206:             * @param s the source string
1207:             * @return this object, for chaining
1208:             * @stable ICU 2.0
1209:             */
1210:            public final UnicodeSet add(String s) {
1211:                checkFrozen();
1212:                int cp = getSingleCP(s);
1213:                if (cp < 0) {
1214:                    strings.add(s);
1215:                    pat = null;
1216:                } else {
1217:                    add_unchecked(cp, cp);
1218:                }
1219:                return this ;
1220:            }
1221:
1222:            /**
1223:             * @return a code point IF the string consists of a single one.
1224:             * otherwise returns -1.
1225:             * @param string to test
1226:             */
1227:            private static int getSingleCP(String s) {
1228:                if (s.length() < 1) {
1229:                    throw new IllegalArgumentException(
1230:                            "Can't use zero-length strings in UnicodeSet");
1231:                }
1232:                if (s.length() > 2)
1233:                    return -1;
1234:                if (s.length() == 1)
1235:                    return s.charAt(0);
1236:
1237:                // at this point, len = 2
1238:                int cp = UTF16.charAt(s, 0);
1239:                if (cp > 0xFFFF) { // is surrogate pair
1240:                    return cp;
1241:                }
1242:                return -1;
1243:            }
1244:
1245:            /**
1246:             * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
1247:             * If this set already any particular character, it has no effect on that character.
1248:             * @param s the source string
1249:             * @return this object, for chaining
1250:             * @stable ICU 2.0
1251:             */
1252:            public final UnicodeSet addAll(String s) {
1253:                checkFrozen();
1254:                int cp;
1255:                for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
1256:                    cp = UTF16.charAt(s, i);
1257:                    add_unchecked(cp, cp);
1258:                }
1259:                return this ;
1260:            }
1261:
1262:            /**
1263:             * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
1264:             * If this set already any particular character, it has no effect on that character.
1265:             * @param s the source string
1266:             * @return this object, for chaining
1267:             * @stable ICU 2.0
1268:             */
1269:            public final UnicodeSet retainAll(String s) {
1270:                return retainAll(fromAll(s));
1271:            }
1272:
1273:            /**
1274:             * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
1275:             * If this set already any particular character, it has no effect on that character.
1276:             * @param s the source string
1277:             * @return this object, for chaining
1278:             * @stable ICU 2.0
1279:             */
1280:            public final UnicodeSet complementAll(String s) {
1281:                return complementAll(fromAll(s));
1282:            }
1283:
1284:            /**
1285:             * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
1286:             * If this set already any particular character, it has no effect on that character.
1287:             * @param s the source string
1288:             * @return this object, for chaining
1289:             * @stable ICU 2.0
1290:             */
1291:            public final UnicodeSet removeAll(String s) {
1292:                return removeAll(fromAll(s));
1293:            }
1294:
1295:            /**
1296:             * Makes a set from a multicharacter string. Thus "ch" => {"ch"}
1297:             * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
1298:             * @param s the source string
1299:             * @return a newly created set containing the given string
1300:             * @stable ICU 2.0
1301:             */
1302:            public static UnicodeSet from(String s) {
1303:                return new UnicodeSet().add(s);
1304:            }
1305:
1306:            /**
1307:             * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"}
1308:             * @param s the source string
1309:             * @return a newly created set containing the given characters
1310:             * @stable ICU 2.0
1311:             */
1312:            public static UnicodeSet fromAll(String s) {
1313:                return new UnicodeSet().addAll(s);
1314:            }
1315:
1316:            /**
1317:             * Retain only the elements in this set that are contained in the
1318:             * specified range.  If <code>end > start</code> then an empty range is
1319:             * retained, leaving the set empty.
1320:             *
1321:             * @param start first character, inclusive, of range to be retained
1322:             * to this set.
1323:             * @param end last character, inclusive, of range to be retained
1324:             * to this set.
1325:             * @stable ICU 2.0
1326:             */
1327:            public UnicodeSet retain(int start, int end) {
1328:                checkFrozen();
1329:                if (start < MIN_VALUE || start > MAX_VALUE) {
1330:                    throw new IllegalArgumentException("Invalid code point U+"
1331:                            + Utility.hex(start, 6));
1332:                }
1333:                if (end < MIN_VALUE || end > MAX_VALUE) {
1334:                    throw new IllegalArgumentException("Invalid code point U+"
1335:                            + Utility.hex(end, 6));
1336:                }
1337:                if (start <= end) {
1338:                    retain(range(start, end), 2, 0);
1339:                } else {
1340:                    clear();
1341:                }
1342:                return this ;
1343:            }
1344:
1345:            /**
1346:             * Retain the specified character from this set if it is present.
1347:             * Upon return this set will be empty if it did not contain c, or
1348:             * will only contain c if it did contain c.
1349:             * @param c the character to be retained
1350:             * @return this object, for chaining
1351:             * @stable ICU 2.0
1352:             */
1353:            public final UnicodeSet retain(int c) {
1354:                return retain(c, c);
1355:            }
1356:
1357:            /**
1358:             * Retain the specified string in this set if it is present.
1359:             * Upon return this set will be empty if it did not contain s, or
1360:             * will only contain s if it did contain s.
1361:             * @param s the string to be retained
1362:             * @return this object, for chaining
1363:             * @stable ICU 2.0
1364:             */
1365:            public final UnicodeSet retain(String s) {
1366:                int cp = getSingleCP(s);
1367:                if (cp < 0) {
1368:                    boolean isIn = strings.contains(s);
1369:                    if (isIn && size() == 1) {
1370:                        return this ;
1371:                    }
1372:                    clear();
1373:                    strings.add(s);
1374:                    pat = null;
1375:                } else {
1376:                    retain(cp, cp);
1377:                }
1378:                return this ;
1379:            }
1380:
1381:            /**
1382:             * Removes the specified range from this set if it is present.
1383:             * The set will not contain the specified range once the call
1384:             * returns.  If <code>end > start</code> then an empty range is
1385:             * removed, leaving the set unchanged.
1386:             *
1387:             * @param start first character, inclusive, of range to be removed
1388:             * from this set.
1389:             * @param end last character, inclusive, of range to be removed
1390:             * from this set.
1391:             * @stable ICU 2.0
1392:             */
1393:            public UnicodeSet remove(int start, int end) {
1394:                checkFrozen();
1395:                if (start < MIN_VALUE || start > MAX_VALUE) {
1396:                    throw new IllegalArgumentException("Invalid code point U+"
1397:                            + Utility.hex(start, 6));
1398:                }
1399:                if (end < MIN_VALUE || end > MAX_VALUE) {
1400:                    throw new IllegalArgumentException("Invalid code point U+"
1401:                            + Utility.hex(end, 6));
1402:                }
1403:                if (start <= end) {
1404:                    retain(range(start, end), 2, 2);
1405:                }
1406:                return this ;
1407:            }
1408:
1409:            /**
1410:             * Removes the specified character from this set if it is present.
1411:             * The set will not contain the specified character once the call
1412:             * returns.
1413:             * @param c the character to be removed
1414:             * @return this object, for chaining
1415:             * @stable ICU 2.0
1416:             */
1417:            public final UnicodeSet remove(int c) {
1418:                return remove(c, c);
1419:            }
1420:
1421:            /**
1422:             * Removes the specified string from this set if it is present.
1423:             * The set will not contain the specified string once the call
1424:             * returns.
1425:             * @param s the string to be removed
1426:             * @return this object, for chaining
1427:             * @stable ICU 2.0
1428:             */
1429:            public final UnicodeSet remove(String s) {
1430:                int cp = getSingleCP(s);
1431:                if (cp < 0) {
1432:                    strings.remove(s);
1433:                    pat = null;
1434:                } else {
1435:                    remove(cp, cp);
1436:                }
1437:                return this ;
1438:            }
1439:
1440:            /**
1441:             * Complements the specified range in this set.  Any character in
1442:             * the range will be removed if it is in this set, or will be
1443:             * added if it is not in this set.  If <code>end > start</code>
1444:             * then an empty range is complemented, leaving the set unchanged.
1445:             *
1446:             * @param start first character, inclusive, of range to be removed
1447:             * from this set.
1448:             * @param end last character, inclusive, of range to be removed
1449:             * from this set.
1450:             * @stable ICU 2.0
1451:             */
1452:            public UnicodeSet complement(int start, int end) {
1453:                checkFrozen();
1454:                if (start < MIN_VALUE || start > MAX_VALUE) {
1455:                    throw new IllegalArgumentException("Invalid code point U+"
1456:                            + Utility.hex(start, 6));
1457:                }
1458:                if (end < MIN_VALUE || end > MAX_VALUE) {
1459:                    throw new IllegalArgumentException("Invalid code point U+"
1460:                            + Utility.hex(end, 6));
1461:                }
1462:                if (start <= end) {
1463:                    xor(range(start, end), 2, 0);
1464:                }
1465:                pat = null;
1466:                return this ;
1467:            }
1468:
1469:            /**
1470:             * Complements the specified character in this set.  The character
1471:             * will be removed if it is in this set, or will be added if it is
1472:             * not in this set.
1473:             * @stable ICU 2.0
1474:             */
1475:            public final UnicodeSet complement(int c) {
1476:                return complement(c, c);
1477:            }
1478:
1479:            /**
1480:             * This is equivalent to
1481:             * <code>complement(MIN_VALUE, MAX_VALUE)</code>.
1482:             * @stable ICU 2.0
1483:             */
1484:            public UnicodeSet complement() {
1485:                checkFrozen();
1486:                if (list[0] == LOW) {
1487:                    System.arraycopy(list, 1, list, 0, len - 1);
1488:                    --len;
1489:                } else {
1490:                    ensureCapacity(len + 1);
1491:                    System.arraycopy(list, 0, list, 1, len);
1492:                    list[0] = LOW;
1493:                    ++len;
1494:                }
1495:                pat = null;
1496:                return this ;
1497:            }
1498:
1499:            /**
1500:             * Complement the specified string in this set.
1501:             * The set will not contain the specified string once the call
1502:             * returns.
1503:             * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
1504:             * @param s the string to complement
1505:             * @return this object, for chaining
1506:             * @stable ICU 2.0
1507:             */
1508:            public final UnicodeSet complement(String s) {
1509:                checkFrozen();
1510:                int cp = getSingleCP(s);
1511:                if (cp < 0) {
1512:                    if (strings.contains(s))
1513:                        strings.remove(s);
1514:                    else
1515:                        strings.add(s);
1516:                    pat = null;
1517:                } else {
1518:                    complement(cp, cp);
1519:                }
1520:                return this ;
1521:            }
1522:
1523:            /**
1524:             * Returns true if this set contains the given character.
1525:             * @param c character to be checked for containment
1526:             * @return true if the test condition is met
1527:             * @stable ICU 2.0
1528:             */
1529:            public boolean contains(int c) {
1530:                if (c < MIN_VALUE || c > MAX_VALUE) {
1531:                    throw new IllegalArgumentException("Invalid code point U+"
1532:                            + Utility.hex(c, 6));
1533:                }
1534:
1535:                /*
1536:                // Set i to the index of the start item greater than ch
1537:                // We know we will terminate without length test!
1538:                int i = -1;
1539:                while (true) {
1540:                    if (c < list[++i]) break;
1541:                }
1542:                 */
1543:
1544:                int i = findCodePoint(c);
1545:
1546:                return ((i & 1) != 0); // return true if odd
1547:            }
1548:
1549:            /**
1550:             * Returns the smallest value i such that c < list[i].  Caller
1551:             * must ensure that c is a legal value or this method will enter
1552:             * an infinite loop.  This method performs a binary search.
1553:             * @param c a character in the range MIN_VALUE..MAX_VALUE
1554:             * inclusive
1555:             * @return the smallest integer i in the range 0..len-1,
1556:             * inclusive, such that c < list[i]
1557:             */
1558:            private final int findCodePoint(int c) {
1559:                /* Examples:
1560:                                                   findCodePoint(c)
1561:                   set              list[]         c=0 1 3 4 7 8
1562:                   ===              ==============   ===========
1563:                   []               [110000]         0 0 0 0 0 0
1564:                   [\u0000-\u0003]  [0, 4, 110000]   1 1 1 2 2 2
1565:                   [\u0004-\u0007]  [4, 8, 110000]   0 0 0 1 1 2
1566:                   [:all:]          [0, 110000]      1 1 1 1 1 1
1567:                 */
1568:
1569:                // Return the smallest i such that c < list[i].  Assume
1570:                // list[len - 1] == HIGH and that c is legal (0..HIGH-1).
1571:                if (c < list[0])
1572:                    return 0;
1573:                // High runner test.  c is often after the last range, so an
1574:                // initial check for this condition pays off.
1575:                if (len >= 2 && c >= list[len - 2])
1576:                    return len - 1;
1577:                int lo = 0;
1578:                int hi = len - 1;
1579:                // invariant: c >= list[lo]
1580:                // invariant: c < list[hi]
1581:                for (;;) {
1582:                    int i = (lo + hi) >>> 1;
1583:                    if (i == lo)
1584:                        return hi;
1585:                    if (c < list[i]) {
1586:                        hi = i;
1587:                    } else {
1588:                        lo = i;
1589:                    }
1590:                }
1591:            }
1592:
1593:            //    //----------------------------------------------------------------
1594:            //    // Unrolled binary search
1595:            //    //----------------------------------------------------------------
1596:            //
1597:            //    private int validLen = -1; // validated value of len
1598:            //    private int topOfLow;
1599:            //    private int topOfHigh;
1600:            //    private int power;
1601:            //    private int deltaStart;
1602:            //
1603:            //    private void validate() {
1604:            //        if (len <= 1) {
1605:            //            throw new IllegalArgumentException("list.len==" + len + "; must be >1");
1606:            //        }
1607:            //
1608:            //        // find greatest power of 2 less than or equal to len
1609:            //        for (power = exp2.length-1; power > 0 && exp2[power] > len; power--) {}
1610:            //
1611:            //        // assert(exp2[power] <= len);
1612:            //
1613:            //        // determine the starting points
1614:            //        topOfLow = exp2[power] - 1;
1615:            //        topOfHigh = len - 1;
1616:            //        deltaStart = exp2[power-1];
1617:            //        validLen = len;
1618:            //    }
1619:            //
1620:            //    private static final int exp2[] = {
1621:            //        0x1, 0x2, 0x4, 0x8,
1622:            //        0x10, 0x20, 0x40, 0x80,
1623:            //        0x100, 0x200, 0x400, 0x800,
1624:            //        0x1000, 0x2000, 0x4000, 0x8000,
1625:            //        0x10000, 0x20000, 0x40000, 0x80000,
1626:            //        0x100000, 0x200000, 0x400000, 0x800000,
1627:            //        0x1000000, 0x2000000, 0x4000000, 0x8000000,
1628:            //        0x10000000, 0x20000000 // , 0x40000000 // no unsigned int in Java
1629:            //    };
1630:            //
1631:            //    /**
1632:            //     * Unrolled lowest index GT.
1633:            //     */
1634:            //    private final int leastIndexGT(int searchValue) {
1635:            //
1636:            //        if (len != validLen) {
1637:            //            if (len == 1) return 0;
1638:            //            validate();
1639:            //        }
1640:            //        int temp;
1641:            //
1642:            //        // set up initial range to search. Each subrange is a power of two in length
1643:            //        int high = searchValue < list[topOfLow] ? topOfLow : topOfHigh;
1644:            //
1645:            //        // Completely unrolled binary search, folhighing "Programming Pearls"
1646:            //        // Each case deliberately falls through to the next
1647:            //        // Logically, list[-1] < all_search_values && list[count] > all_search_values
1648:            //        // although the values -1 and count are never actually touched.
1649:            //
1650:            //        // The bounds at each point are low & high,
1651:            //        // where low == high - delta*2
1652:            //        // so high - delta is the midpoint
1653:            //
1654:            //        // The invariant AFTER each line is that list[low] < searchValue <= list[high]
1655:            //
1656:            //        switch (power) {
1657:            //        //case 31: if (searchValue < list[temp = high-0x40000000]) high = temp; // no unsigned int in Java
1658:            //        case 30: if (searchValue < list[temp = high-0x20000000]) high = temp;
1659:            //        case 29: if (searchValue < list[temp = high-0x10000000]) high = temp;
1660:            //
1661:            //        case 28: if (searchValue < list[temp = high- 0x8000000]) high = temp;
1662:            //        case 27: if (searchValue < list[temp = high- 0x4000000]) high = temp;
1663:            //        case 26: if (searchValue < list[temp = high- 0x2000000]) high = temp;
1664:            //        case 25: if (searchValue < list[temp = high- 0x1000000]) high = temp;
1665:            //
1666:            //        case 24: if (searchValue < list[temp = high-  0x800000]) high = temp;
1667:            //        case 23: if (searchValue < list[temp = high-  0x400000]) high = temp;
1668:            //        case 22: if (searchValue < list[temp = high-  0x200000]) high = temp;
1669:            //        case 21: if (searchValue < list[temp = high-  0x100000]) high = temp;
1670:            //
1671:            //        case 20: if (searchValue < list[temp = high-   0x80000]) high = temp;
1672:            //        case 19: if (searchValue < list[temp = high-   0x40000]) high = temp;
1673:            //        case 18: if (searchValue < list[temp = high-   0x20000]) high = temp;
1674:            //        case 17: if (searchValue < list[temp = high-   0x10000]) high = temp;
1675:            //
1676:            //        case 16: if (searchValue < list[temp = high-    0x8000]) high = temp;
1677:            //        case 15: if (searchValue < list[temp = high-    0x4000]) high = temp;
1678:            //        case 14: if (searchValue < list[temp = high-    0x2000]) high = temp;
1679:            //        case 13: if (searchValue < list[temp = high-    0x1000]) high = temp;
1680:            //
1681:            //        case 12: if (searchValue < list[temp = high-     0x800]) high = temp;
1682:            //        case 11: if (searchValue < list[temp = high-     0x400]) high = temp;
1683:            //        case 10: if (searchValue < list[temp = high-     0x200]) high = temp;
1684:            //        case  9: if (searchValue < list[temp = high-     0x100]) high = temp;
1685:            //
1686:            //        case  8: if (searchValue < list[temp = high-      0x80]) high = temp;
1687:            //        case  7: if (searchValue < list[temp = high-      0x40]) high = temp;
1688:            //        case  6: if (searchValue < list[temp = high-      0x20]) high = temp;
1689:            //        case  5: if (searchValue < list[temp = high-      0x10]) high = temp;
1690:            //
1691:            //        case  4: if (searchValue < list[temp = high-       0x8]) high = temp;
1692:            //        case  3: if (searchValue < list[temp = high-       0x4]) high = temp;
1693:            //        case  2: if (searchValue < list[temp = high-       0x2]) high = temp;
1694:            //        case  1: if (searchValue < list[temp = high-       0x1]) high = temp;
1695:            //        }
1696:            //
1697:            //        return high;
1698:            //    }
1699:            //
1700:            //    // For debugging only
1701:            //    public int len() {
1702:            //        return len;
1703:            //    }
1704:            //
1705:            //    //----------------------------------------------------------------
1706:            //    //----------------------------------------------------------------
1707:
1708:            /**
1709:             * Returns true if this set contains every character
1710:             * of the given range.
1711:             * @param start first character, inclusive, of the range
1712:             * @param end last character, inclusive, of the range
1713:             * @return true if the test condition is met
1714:             * @stable ICU 2.0
1715:             */
1716:            public boolean contains(int start, int end) {
1717:                if (start < MIN_VALUE || start > MAX_VALUE) {
1718:                    throw new IllegalArgumentException("Invalid code point U+"
1719:                            + Utility.hex(start, 6));
1720:                }
1721:                if (end < MIN_VALUE || end > MAX_VALUE) {
1722:                    throw new IllegalArgumentException("Invalid code point U+"
1723:                            + Utility.hex(end, 6));
1724:                }
1725:                //int i = -1;
1726:                //while (true) {
1727:                //    if (start < list[++i]) break;
1728:                //}
1729:                int i = findCodePoint(start);
1730:                return ((i & 1) != 0 && end < list[i]);
1731:            }
1732:
1733:            /**
1734:             * Returns <tt>true</tt> if this set contains the given
1735:             * multicharacter string.
1736:             * @param s string to be checked for containment
1737:             * @return <tt>true</tt> if this set contains the specified string
1738:             * @stable ICU 2.0
1739:             */
1740:            public final boolean contains(String s) {
1741:
1742:                int cp = getSingleCP(s);
1743:                if (cp < 0) {
1744:                    return strings.contains(s);
1745:                } else {
1746:                    return contains(cp);
1747:                }
1748:            }
1749:
1750:            /**
1751:             * Returns true if this set contains all the characters and strings
1752:             * of the given set.
1753:             * @param c set to be checked for containment
1754:             * @return true if the test condition is met
1755:             * @stable ICU 2.0
1756:             */
1757:            public boolean containsAll(UnicodeSet c) {
1758:                // The specified set is a subset if all of its pairs are contained in
1759:                // this set.  It's possible to code this more efficiently in terms of
1760:                // direct manipulation of the inversion lists if the need arises.
1761:                int n = c.getRangeCount();
1762:                for (int i = 0; i < n; ++i) {
1763:                    if (!contains(c.getRangeStart(i), c.getRangeEnd(i))) {
1764:                        return false;
1765:                    }
1766:                }
1767:                if (!strings.containsAll(c.strings))
1768:                    return false;
1769:                return true;
1770:            }
1771:
1772:            /**
1773:             * Returns true if there is a partition of the string such that this set contains each of the partitioned strings.
1774:             * For example, for the Unicode set [a{bc}{cd}]<br>
1775:             * containsAll is true for each of: "a", "bc", ""cdbca"<br>
1776:             * containsAll is false for each of: "acb", "bcda", "bcx"<br>
1777:             * @param s string containing characters to be checked for containment
1778:             * @return true if the test condition is met
1779:             * @stable ICU 2.0
1780:             */
1781:            public boolean containsAll(String s) {
1782:                int cp;
1783:                for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
1784:                    cp = UTF16.charAt(s, i);
1785:                    if (!contains(cp)) {
1786:                        if (strings.size() == 0) {
1787:                            return false;
1788:                        }
1789:                        return containsAll(s, 0);
1790:                    }
1791:                }
1792:                return true;
1793:            }
1794:
1795:            /**
1796:             * Recursive routine called if we fail to find a match in containsAll, and there are strings
1797:             * @param s source string
1798:             * @param i point to match to the end on
1799:             * @return true if ok
1800:             */
1801:            private boolean containsAll(String s, int i) {
1802:                if (i >= s.length()) {
1803:                    return true;
1804:                }
1805:                int cp = UTF16.charAt(s, i);
1806:                if (contains(cp) && containsAll(s, i + UTF16.getCharCount(cp))) {
1807:                    return true;
1808:                }
1809:
1810:                Iterator it = strings.iterator();
1811:                while (it.hasNext()) {
1812:                    String setStr = (String) it.next();
1813:                    if (s.startsWith(setStr, i)
1814:                            && containsAll(s, i + setStr.length())) {
1815:                        return true;
1816:                    }
1817:                }
1818:                return false;
1819:
1820:            }
1821:
1822:            /**
1823:             * @return regex pattern equivalent to this UnicodeSet
1824:             * @internal
1825:             * @deprecated This API is ICU internal only.
1826:             */
1827:            public String getRegexEquivalent() {
1828:                if (strings.size() == 0)
1829:                    return toString();
1830:                StringBuffer result = new StringBuffer("(?:");
1831:                _generatePattern(result, true, false);
1832:                Iterator it = strings.iterator();
1833:                while (it.hasNext()) {
1834:                    result.append('|');
1835:                    _appendToPat(result, (String) it.next(), true);
1836:                }
1837:                return result.append(")").toString();
1838:            }
1839:
1840:            /**
1841:             * Returns true if this set contains none of the characters
1842:             * of the given range.
1843:             * @param start first character, inclusive, of the range
1844:             * @param end last character, inclusive, of the range
1845:             * @return true if the test condition is met
1846:             * @stable ICU 2.0
1847:             */
1848:            public boolean containsNone(int start, int end) {
1849:                if (start < MIN_VALUE || start > MAX_VALUE) {
1850:                    throw new IllegalArgumentException("Invalid code point U+"
1851:                            + Utility.hex(start, 6));
1852:                }
1853:                if (end < MIN_VALUE || end > MAX_VALUE) {
1854:                    throw new IllegalArgumentException("Invalid code point U+"
1855:                            + Utility.hex(end, 6));
1856:                }
1857:                int i = -1;
1858:                while (true) {
1859:                    if (start < list[++i])
1860:                        break;
1861:                }
1862:                return ((i & 1) == 0 && end < list[i]);
1863:            }
1864:
1865:            /**
1866:             * Returns true if none of the characters or strings in this UnicodeSet appears in the string.
1867:             * For example, for the Unicode set [a{bc}{cd}]<br>
1868:             * containsNone is true for: "xy", "cb"<br>
1869:             * containsNone is false for: "a", "bc", "bcd"<br>
1870:             * @param c set to be checked for containment
1871:             * @return true if the test condition is met
1872:             * @stable ICU 2.0
1873:             */
1874:            public boolean containsNone(UnicodeSet c) {
1875:                // The specified set is a subset if all of its pairs are contained in
1876:                // this set.  It's possible to code this more efficiently in terms of
1877:                // direct manipulation of the inversion lists if the need arises.
1878:                int n = c.getRangeCount();
1879:                for (int i = 0; i < n; ++i) {
1880:                    if (!containsNone(c.getRangeStart(i), c.getRangeEnd(i))) {
1881:                        return false;
1882:                    }
1883:                }
1884:                if (!SortedSetRelation.hasRelation(strings,
1885:                        SortedSetRelation.DISJOINT, c.strings))
1886:                    return false;
1887:                return true;
1888:            }
1889:
1890:            /**
1891:             * Returns true if this set contains none of the characters
1892:             * of the given string.
1893:             * @param s string containing characters to be checked for containment
1894:             * @return true if the test condition is met
1895:             * @stable ICU 2.0
1896:             */
1897:            public boolean containsNone(String s) {
1898:                int cp;
1899:                for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
1900:                    cp = UTF16.charAt(s, i);
1901:                    if (contains(cp))
1902:                        return false;
1903:                }
1904:                if (strings.size() == 0)
1905:                    return true;
1906:                // do a last check to make sure no strings are in.
1907:                for (Iterator it = strings.iterator(); it.hasNext();) {
1908:                    String item = (String) it.next();
1909:                    if (s.indexOf(item) >= 0)
1910:                        return false;
1911:                }
1912:                return true;
1913:            }
1914:
1915:            /**
1916:             * Returns true if this set contains one or more of the characters
1917:             * in the given range.
1918:             * @param start first character, inclusive, of the range
1919:             * @param end last character, inclusive, of the range
1920:             * @return true if the condition is met
1921:             * @stable ICU 2.0
1922:             */
1923:            public final boolean containsSome(int start, int end) {
1924:                return !containsNone(start, end);
1925:            }
1926:
1927:            /**
1928:             * Returns true if this set contains one or more of the characters
1929:             * and strings of the given set.
1930:             * @param s set to be checked for containment
1931:             * @return true if the condition is met
1932:             * @stable ICU 2.0
1933:             */
1934:            public final boolean containsSome(UnicodeSet s) {
1935:                return !containsNone(s);
1936:            }
1937:
1938:            /**
1939:             * Returns true if this set contains one or more of the characters
1940:             * of the given string.
1941:             * @param s string containing characters to be checked for containment
1942:             * @return true if the condition is met
1943:             * @stable ICU 2.0
1944:             */
1945:            public final boolean containsSome(String s) {
1946:                return !containsNone(s);
1947:            }
1948:
1949:            /**
1950:             * Adds all of the elements in the specified set to this set if
1951:             * they're not already present.  This operation effectively
1952:             * modifies this set so that its value is the <i>union</i> of the two
1953:             * sets.  The behavior of this operation is unspecified if the specified
1954:             * collection is modified while the operation is in progress.
1955:             *
1956:             * @param c set whose elements are to be added to this set.
1957:             * @stable ICU 2.0
1958:             */
1959:            public UnicodeSet addAll(UnicodeSet c) {
1960:                checkFrozen();
1961:                add(c.list, c.len, 0);
1962:                strings.addAll(c.strings);
1963:                return this ;
1964:            }
1965:
1966:            /**
1967:             * Retains only the elements in this set that are contained in the
1968:             * specified set.  In other words, removes from this set all of
1969:             * its elements that are not contained in the specified set.  This
1970:             * operation effectively modifies this set so that its value is
1971:             * the <i>intersection</i> of the two sets.
1972:             *
1973:             * @param c set that defines which elements this set will retain.
1974:             * @stable ICU 2.0
1975:             */
1976:            public UnicodeSet retainAll(UnicodeSet c) {
1977:                checkFrozen();
1978:                retain(c.list, c.len, 0);
1979:                strings.retainAll(c.strings);
1980:                return this ;
1981:            }
1982:
1983:            /**
1984:             * Removes from this set all of its elements that are contained in the
1985:             * specified set.  This operation effectively modifies this
1986:             * set so that its value is the <i>asymmetric set difference</i> of
1987:             * the two sets.
1988:             *
1989:             * @param c set that defines which elements will be removed from
1990:             *          this set.
1991:             * @stable ICU 2.0
1992:             */
1993:            public UnicodeSet removeAll(UnicodeSet c) {
1994:                checkFrozen();
1995:                retain(c.list, c.len, 2);
1996:                strings.removeAll(c.strings);
1997:                return this ;
1998:            }
1999:
2000:            /**
2001:             * Complements in this set all elements contained in the specified
2002:             * set.  Any character in the other set will be removed if it is
2003:             * in this set, or will be added if it is not in this set.
2004:             *
2005:             * @param c set that defines which elements will be complemented from
2006:             *          this set.
2007:             * @stable ICU 2.0
2008:             */
2009:            public UnicodeSet complementAll(UnicodeSet c) {
2010:                checkFrozen();
2011:                xor(c.list, c.len, 0);
2012:                SortedSetRelation.doOperation(strings,
2013:                        SortedSetRelation.COMPLEMENTALL, c.strings);
2014:                return this ;
2015:            }
2016:
2017:            /**
2018:             * Removes all of the elements from this set.  This set will be
2019:             * empty after this call returns.
2020:             * @stable ICU 2.0
2021:             */
2022:            public UnicodeSet clear() {
2023:                checkFrozen();
2024:                list[0] = HIGH;
2025:                len = 1;
2026:                pat = null;
2027:                strings.clear();
2028:                return this ;
2029:            }
2030:
2031:            /**
2032:             * Iteration method that returns the number of ranges contained in
2033:             * this set.
2034:             * @see #getRangeStart
2035:             * @see #getRangeEnd
2036:             * @stable ICU 2.0
2037:             */
2038:            public int getRangeCount() {
2039:                return len / 2;
2040:            }
2041:
2042:            /**
2043:             * Iteration method that returns the first character in the
2044:             * specified range of this set.
2045:             * @exception ArrayIndexOutOfBoundsException if index is outside
2046:             * the range <code>0..getRangeCount()-1</code>
2047:             * @see #getRangeCount
2048:             * @see #getRangeEnd
2049:             * @stable ICU 2.0
2050:             */
2051:            public int getRangeStart(int index) {
2052:                return list[index * 2];
2053:            }
2054:
2055:            /**
2056:             * Iteration method that returns the last character in the
2057:             * specified range of this set.
2058:             * @exception ArrayIndexOutOfBoundsException if index is outside
2059:             * the range <code>0..getRangeCount()-1</code>
2060:             * @see #getRangeStart
2061:             * @see #getRangeEnd
2062:             * @stable ICU 2.0
2063:             */
2064:            public int getRangeEnd(int index) {
2065:                return (list[index * 2 + 1] - 1);
2066:            }
2067:
2068:            /**
2069:             * Reallocate this objects internal structures to take up the least
2070:             * possible space, without changing this object's value.
2071:             * @stable ICU 2.0
2072:             */
2073:            public UnicodeSet compact() {
2074:                checkFrozen();
2075:                if (len != list.length) {
2076:                    int[] temp = new int[len];
2077:                    System.arraycopy(list, 0, temp, 0, len);
2078:                    list = temp;
2079:                }
2080:                rangeList = null;
2081:                buffer = null;
2082:                return this ;
2083:            }
2084:
2085:            /**
2086:             * Compares the specified object with this set for equality.  Returns
2087:             * <tt>true</tt> if the specified object is also a set, the two sets
2088:             * have the same size, and every member of the specified set is
2089:             * contained in this set (or equivalently, every member of this set is
2090:             * contained in the specified set).
2091:             *
2092:             * @param o Object to be compared for equality with this set.
2093:             * @return <tt>true</tt> if the specified Object is equal to this set.
2094:             * @stable ICU 2.0
2095:             */
2096:            public boolean equals(Object o) {
2097:                try {
2098:                    UnicodeSet that = (UnicodeSet) o;
2099:                    if (len != that.len)
2100:                        return false;
2101:                    for (int i = 0; i < len; ++i) {
2102:                        if (list[i] != that.list[i])
2103:                            return false;
2104:                    }
2105:                    if (!strings.equals(that.strings))
2106:                        return false;
2107:                } catch (Exception e) {
2108:                    return false;
2109:                }
2110:                return true;
2111:            }
2112:
2113:            /**
2114:             * Returns the hash code value for this set.
2115:             *
2116:             * @return the hash code value for this set.
2117:             * @see java.lang.Object#hashCode()
2118:             * @stable ICU 2.0
2119:             */
2120:            public int hashCode() {
2121:                int result = len;
2122:                for (int i = 0; i < len; ++i) {
2123:                    result *= 1000003;
2124:                    result += list[i];
2125:                }
2126:                return result;
2127:            }
2128:
2129:            /**
2130:             * Return a programmer-readable string representation of this object.
2131:             * @stable ICU 2.0
2132:             */
2133:            public String toString() {
2134:                return toPattern(true);
2135:            }
2136:
2137:            //----------------------------------------------------------------
2138:            // Implementation: Pattern parsing
2139:            //----------------------------------------------------------------
2140:
2141:            /**
2142:             * Parses the given pattern, starting at the given position.  The character
2143:             * at pattern.charAt(pos.getIndex()) must be '[', or the parse fails.
2144:             * Parsing continues until the corresponding closing ']'.  If a syntax error
2145:             * is encountered between the opening and closing brace, the parse fails.
2146:             * Upon return from a successful parse, the ParsePosition is updated to
2147:             * point to the character following the closing ']', and an inversion
2148:             * list for the parsed pattern is returned.  This method
2149:             * calls itself recursively to parse embedded subpatterns.
2150:             *
2151:             * @param pattern the string containing the pattern to be parsed.  The
2152:             * portion of the string from pos.getIndex(), which must be a '[', to the
2153:             * corresponding closing ']', is parsed.
2154:             * @param pos upon entry, the position at which to being parsing.  The
2155:             * character at pattern.charAt(pos.getIndex()) must be a '['.  Upon return
2156:             * from a successful parse, pos.getIndex() is either the character after the
2157:             * closing ']' of the parsed pattern, or pattern.length() if the closing ']'
2158:             * is the last character of the pattern string.
2159:             * @return an inversion list for the parsed substring
2160:             * of <code>pattern</code>
2161:             * @exception java.lang.IllegalArgumentException if the parse fails.
2162:             */
2163:            UnicodeSet applyPattern(String pattern, ParsePosition pos,
2164:                    SymbolTable symbols, int options) {
2165:
2166:                // Need to build the pattern in a temporary string because
2167:                // _applyPattern calls add() etc., which set pat to empty.
2168:                boolean parsePositionWasNull = pos == null;
2169:                if (parsePositionWasNull) {
2170:                    pos = new ParsePosition(0);
2171:                }
2172:
2173:                StringBuffer rebuiltPat = new StringBuffer();
2174:                RuleCharacterIterator chars = new RuleCharacterIterator(
2175:                        pattern, symbols, pos);
2176:                applyPattern(chars, symbols, rebuiltPat, options);
2177:                if (chars.inVariable()) {
2178:                    syntaxError(chars, "Extra chars in variable value");
2179:                }
2180:                pat = rebuiltPat.toString();
2181:                if (parsePositionWasNull) {
2182:                    int i = pos.getIndex();
2183:
2184:                    // Skip over trailing whitespace
2185:                    if ((options & IGNORE_SPACE) != 0) {
2186:                        i = Utility.skipWhitespace(pattern, i);
2187:                    }
2188:
2189:                    if (i != pattern.length()) {
2190:                        throw new IllegalArgumentException("Parse of \""
2191:                                + pattern + "\" failed at " + i);
2192:                    }
2193:                }
2194:                return this ;
2195:            }
2196:
2197:            /**
2198:             * Parse the pattern from the given RuleCharacterIterator.  The
2199:             * iterator is advanced over the parsed pattern.
2200:             * @param chars iterator over the pattern characters.  Upon return
2201:             * it will be advanced to the first character after the parsed
2202:             * pattern, or the end of the iteration if all characters are
2203:             * parsed.
2204:             * @param symbols symbol table to use to parse and dereference
2205:             * variables, or null if none.
2206:             * @param rebuiltPat the pattern that was parsed, rebuilt or
2207:             * copied from the input pattern, as appropriate.
2208:             * @param options a bit mask of zero or more of the following:
2209:             * IGNORE_SPACE, CASE.
2210:             */
2211:            void applyPattern(RuleCharacterIterator chars, SymbolTable symbols,
2212:                    StringBuffer rebuiltPat, int options) {
2213:
2214:                // Syntax characters: [ ] ^ - & { }
2215:
2216:                // Recognized special forms for chars, sets: c-c s-s s&s
2217:
2218:                int opts = RuleCharacterIterator.PARSE_VARIABLES
2219:                        | RuleCharacterIterator.PARSE_ESCAPES;
2220:                if ((options & IGNORE_SPACE) != 0) {
2221:                    opts |= RuleCharacterIterator.SKIP_WHITESPACE;
2222:                }
2223:
2224:                StringBuffer pat = new StringBuffer(), buf = null;
2225:                boolean usePat = false;
2226:                UnicodeSet scratch = null;
2227:                Object backup = null;
2228:
2229:                // mode: 0=before [, 1=between [...], 2=after ]
2230:                // lastItem: 0=none, 1=char, 2=set
2231:                int lastItem = 0, lastChar = 0, mode = 0;
2232:                char op = 0;
2233:
2234:                boolean invert = false;
2235:
2236:                clear();
2237:
2238:                while (mode != 2 && !chars.atEnd()) {
2239:                    if (false) {
2240:                        // Debugging assertion
2241:                        if (!((lastItem == 0 && op == 0)
2242:                                || (lastItem == 1 && (op == 0 || op == '-')) || (lastItem == 2 && (op == 0
2243:                                || op == '-' || op == '&')))) {
2244:                            throw new IllegalArgumentException();
2245:                        }
2246:                    }
2247:
2248:                    int c = 0;
2249:                    boolean literal = false;
2250:                    UnicodeSet nested = null;
2251:
2252:                    // -------- Check for property pattern
2253:
2254:                    // setMode: 0=none, 1=unicodeset, 2=propertypat, 3=preparsed
2255:                    int setMode = 0;
2256:                    if (resemblesPropertyPattern(chars, opts)) {
2257:                        setMode = 2;
2258:                    }
2259:
2260:                    // -------- Parse '[' of opening delimiter OR nested set.
2261:                    // If there is a nested set, use `setMode' to define how
2262:                    // the set should be parsed.  If the '[' is part of the
2263:                    // opening delimiter for this pattern, parse special
2264:                    // strings "[", "[^", "[-", and "[^-".  Check for stand-in
2265:                    // characters representing a nested set in the symbol
2266:                    // table.
2267:
2268:                    else {
2269:                        // Prepare to backup if necessary
2270:                        backup = chars.getPos(backup);
2271:                        c = chars.next(opts);
2272:                        literal = chars.isEscaped();
2273:
2274:                        if (c == '[' && !literal) {
2275:                            if (mode == 1) {
2276:                                chars.setPos(backup); // backup
2277:                                setMode = 1;
2278:                            } else {
2279:                                // Handle opening '[' delimiter
2280:                                mode = 1;
2281:                                pat.append('[');
2282:                                backup = chars.getPos(backup); // prepare to backup
2283:                                c = chars.next(opts);
2284:                                literal = chars.isEscaped();
2285:                                if (c == '^' && !literal) {
2286:                                    invert = true;
2287:                                    pat.append('^');
2288:                                    backup = chars.getPos(backup); // prepare to backup
2289:                                    c = chars.next(opts);
2290:                                    literal = chars.isEscaped();
2291:                                }
2292:                                // Fall through to handle special leading '-';
2293:                                // otherwise restart loop for nested [], \p{}, etc.
2294:                                if (c == '-') {
2295:                                    literal = true;
2296:                                    // Fall through to handle literal '-' below
2297:                                } else {
2298:                                    chars.setPos(backup); // backup
2299:                                    continue;
2300:                                }
2301:                            }
2302:                        } else if (symbols != null) {
2303:                            UnicodeMatcher m = symbols.lookupMatcher(c); // may be null
2304:                            if (m != null) {
2305:                                try {
2306:                                    nested = (UnicodeSet) m;
2307:                                    setMode = 3;
2308:                                } catch (ClassCastException e) {
2309:                                    syntaxError(chars, "Syntax error");
2310:                                }
2311:                            }
2312:                        }
2313:                    }
2314:
2315:                    // -------- Handle a nested set.  This either is inline in
2316:                    // the pattern or represented by a stand-in that has
2317:                    // previously been parsed and was looked up in the symbol
2318:                    // table.
2319:
2320:                    if (setMode != 0) {
2321:                        if (lastItem == 1) {
2322:                            if (op != 0) {
2323:                                syntaxError(chars,
2324:                                        "Char expected after operator");
2325:                            }
2326:                            add_unchecked(lastChar, lastChar);
2327:                            _appendToPat(pat, lastChar, false);
2328:                            lastItem = op = 0;
2329:                        }
2330:
2331:                        if (op == '-' || op == '&') {
2332:                            pat.append(op);
2333:                        }
2334:
2335:                        if (nested == null) {
2336:                            if (scratch == null)
2337:                                scratch = new UnicodeSet();
2338:                            nested = scratch;
2339:                        }
2340:                        switch (setMode) {
2341:                        case 1:
2342:                            nested.applyPattern(chars, symbols, pat, options);
2343:                            break;
2344:                        case 2:
2345:                            chars.skipIgnored(opts);
2346:                            nested.applyPropertyPattern(chars, pat, symbols);
2347:                            break;
2348:                        case 3: // `nested' already parsed
2349:                            nested._toPattern(pat, false);
2350:                            break;
2351:                        }
2352:
2353:                        usePat = true;
2354:
2355:                        if (mode == 0) {
2356:                            // Entire pattern is a category; leave parse loop
2357:                            set(nested);
2358:                            mode = 2;
2359:                            break;
2360:                        }
2361:
2362:                        switch (op) {
2363:                        case '-':
2364:                            removeAll(nested);
2365:                            break;
2366:                        case '&':
2367:                            retainAll(nested);
2368:                            break;
2369:                        case 0:
2370:                            addAll(nested);
2371:                            break;
2372:                        }
2373:
2374:                        op = 0;
2375:                        lastItem = 2;
2376:
2377:                        continue;
2378:                    }
2379:
2380:                    if (mode == 0) {
2381:                        syntaxError(chars, "Missing '['");
2382:                    }
2383:
2384:                    // -------- Parse special (syntax) characters.  If the
2385:                    // current character is not special, or if it is escaped,
2386:                    // then fall through and handle it below.
2387:
2388:                    if (!literal) {
2389:                        switch (c) {
2390:                        case ']':
2391:                            if (lastItem == 1) {
2392:                                add_unchecked(lastChar, lastChar);
2393:                                _appendToPat(pat, lastChar, false);
2394:                            }
2395:                            // Treat final trailing '-' as a literal
2396:                            if (op == '-') {
2397:                                add_unchecked(op, op);
2398:                                pat.append(op);
2399:                            } else if (op == '&') {
2400:                                syntaxError(chars, "Trailing '&'");
2401:                            }
2402:                            pat.append(']');
2403:                            mode = 2;
2404:                            continue;
2405:                        case '-':
2406:                            if (op == 0) {
2407:                                if (lastItem != 0) {
2408:                                    op = (char) c;
2409:                                    continue;
2410:                                } else {
2411:                                    // Treat final trailing '-' as a literal
2412:                                    add_unchecked(c, c);
2413:                                    c = chars.next(opts);
2414:                                    literal = chars.isEscaped();
2415:                                    if (c == ']' && !literal) {
2416:                                        pat.append("-]");
2417:                                        mode = 2;
2418:                                        continue;
2419:                                    }
2420:                                }
2421:                            }
2422:                            syntaxError(chars, "'-' not after char or set");
2423:                        case '&':
2424:                            if (lastItem == 2 && op == 0) {
2425:                                op = (char) c;
2426:                                continue;
2427:                            }
2428:                            syntaxError(chars, "'&' not after set");
2429:                        case '^':
2430:                            syntaxError(chars, "'^' not after '['");
2431:                        case '{':
2432:                            if (op != 0) {
2433:                                syntaxError(chars,
2434:                                        "Missing operand after operator");
2435:                            }
2436:                            if (lastItem == 1) {
2437:                                add_unchecked(lastChar, lastChar);
2438:                                _appendToPat(pat, lastChar, false);
2439:                            }
2440:                            lastItem = 0;
2441:                            if (buf == null) {
2442:                                buf = new StringBuffer();
2443:                            } else {
2444:                                buf.setLength(0);
2445:                            }
2446:                            boolean ok = false;
2447:                            while (!chars.atEnd()) {
2448:                                c = chars.next(opts);
2449:                                literal = chars.isEscaped();
2450:                                if (c == '}' && !literal) {
2451:                                    ok = true;
2452:                                    break;
2453:                                }
2454:                                UTF16.append(buf, c);
2455:                            }
2456:                            if (buf.length() < 1 || !ok) {
2457:                                syntaxError(chars,
2458:                                        "Invalid multicharacter string");
2459:                            }
2460:                            // We have new string. Add it to set and continue;
2461:                            // we don't need to drop through to the further
2462:                            // processing
2463:                            add(buf.toString());
2464:                            pat.append('{');
2465:                            _appendToPat(pat, buf.toString(), false);
2466:                            pat.append('}');
2467:                            continue;
2468:                        case SymbolTable.SYMBOL_REF:
2469:                            //         symbols  nosymbols
2470:                            // [a-$]   error    error (ambiguous)
2471:                            // [a$]    anchor   anchor
2472:                            // [a-$x]  var "x"* literal '$'
2473:                            // [a-$.]  error    literal '$'
2474:                            // *We won't get here in the case of var "x"
2475:                            backup = chars.getPos(backup);
2476:                            c = chars.next(opts);
2477:                            literal = chars.isEscaped();
2478:                            boolean anchor = (c == ']' && !literal);
2479:                            if (symbols == null && !anchor) {
2480:                                c = SymbolTable.SYMBOL_REF;
2481:                                chars.setPos(backup);
2482:                                break; // literal '$'
2483:                            }
2484:                            if (anchor && op == 0) {
2485:                                if (lastItem == 1) {
2486:                                    add_unchecked(lastChar, lastChar);
2487:                                    _appendToPat(pat, lastChar, false);
2488:                                }
2489:                                add_unchecked(UnicodeMatcher.ETHER);
2490:                                usePat = true;
2491:                                pat.append(SymbolTable.SYMBOL_REF).append(']');
2492:                                mode = 2;
2493:                                continue;
2494:                            }
2495:                            syntaxError(chars, "Unquoted '$'");
2496:                        default:
2497:                            break;
2498:                        }
2499:                    }
2500:
2501:                    // -------- Parse literal characters.  This includes both
2502:                    // escaped chars ("\u4E01") and non-syntax characters
2503:                    // ("a").
2504:
2505:                    switch (lastItem) {
2506:                    case 0:
2507:                        lastItem = 1;
2508:                        lastChar = c;
2509:                        break;
2510:                    case 1:
2511:                        if (op == '-') {
2512:                            if (lastChar >= c) {
2513:                                // Don't allow redundant (a-a) or empty (b-a) ranges;
2514:                                // these are most likely typos.
2515:                                syntaxError(chars, "Invalid range");
2516:                            }
2517:                            add_unchecked(lastChar, c);
2518:                            _appendToPat(pat, lastChar, false);
2519:                            pat.append(op);
2520:                            _appendToPat(pat, c, false);
2521:                            lastItem = op = 0;
2522:                        } else {
2523:                            add_unchecked(lastChar, lastChar);
2524:                            _appendToPat(pat, lastChar, false);
2525:                            lastChar = c;
2526:                        }
2527:                        break;
2528:                    case 2:
2529:                        if (op != 0) {
2530:                            syntaxError(chars, "Set expected after operator");
2531:                        }
2532:                        lastChar = c;
2533:                        lastItem = 1;
2534:                        break;
2535:                    }
2536:                }
2537:
2538:                if (mode != 2) {
2539:                    syntaxError(chars, "Missing ']'");
2540:                }
2541:
2542:                chars.skipIgnored(opts);
2543:
2544:                /**
2545:                 * Handle global flags (invert, case insensitivity).  If this
2546:                 * pattern should be compiled case-insensitive, then we need
2547:                 * to close over case BEFORE COMPLEMENTING.  This makes
2548:                 * patterns like /[^abc]/i work.
2549:                 */
2550:                if ((options & CASE) != 0) {
2551:                    closeOver(CASE);
2552:                }
2553:                if (invert) {
2554:                    complement();
2555:                }
2556:
2557:                // Use the rebuilt pattern (pat) only if necessary.  Prefer the
2558:                // generated pattern.
2559:                if (usePat) {
2560:                    rebuiltPat.append(pat.toString());
2561:                } else {
2562:                    _generatePattern(rebuiltPat, false, true);
2563:                }
2564:            }
2565:
2566:            private static void syntaxError(RuleCharacterIterator chars,
2567:                    String msg) {
2568:                throw new IllegalArgumentException("Error: " + msg + " at \""
2569:                        + Utility.escape(chars.toString()) + '"');
2570:            }
2571:
2572:            /**
2573:             * Add the contents of the UnicodeSet (as strings) into a collection.
2574:             * @param target collection to add into
2575:             * @stable ICU 2.8
2576:             */
2577:            public void addAllTo(Collection target) {
2578:                UnicodeSetIterator it = new UnicodeSetIterator(this );
2579:                while (it.next()) {
2580:                    target.add(it.getString());
2581:                }
2582:            }
2583:
2584:            /**
2585:             * Add the contents of the collection (as strings) into this UnicodeSet.
2586:             * @param source the collection to add
2587:             * @stable ICU 2.8
2588:             */
2589:            public void addAll(Collection source) {
2590:                checkFrozen();
2591:                Iterator it = source.iterator();
2592:                while (it.hasNext()) {
2593:                    add(it.next().toString());
2594:                }
2595:            }
2596:
2597:            //----------------------------------------------------------------
2598:            // Implementation: Utility methods
2599:            //----------------------------------------------------------------
2600:
2601:            private void ensureCapacity(int newLen) {
2602:                if (newLen <= list.length)
2603:                    return;
2604:                int[] temp = new int[newLen + GROW_EXTRA];
2605:                System.arraycopy(list, 0, temp, 0, len);
2606:                list = temp;
2607:            }
2608:
2609:            private void ensureBufferCapacity(int newLen) {
2610:                if (buffer != null && newLen <= buffer.length)
2611:                    return;
2612:                buffer = new int[newLen + GROW_EXTRA];
2613:            }
2614:
2615:            /**
2616:             * Assumes start <= end.
2617:             */
2618:            private int[] range(int start, int end) {
2619:                if (rangeList == null) {
2620:                    rangeList = new int[] { start, end + 1, HIGH };
2621:                } else {
2622:                    rangeList[0] = start;
2623:                    rangeList[1] = end + 1;
2624:                }
2625:                return rangeList;
2626:            }
2627:
2628:            //----------------------------------------------------------------
2629:            // Implementation: Fundamental operations
2630:            //----------------------------------------------------------------
2631:
2632:            // polarity = 0, 3 is normal: x xor y
2633:            // polarity = 1, 2: x xor ~y == x === y
2634:
2635:            private UnicodeSet xor(int[] other, int otherLen, int polarity) {
2636:                ensureBufferCapacity(len + otherLen);
2637:                int i = 0, j = 0, k = 0;
2638:                int a = list[i++];
2639:                int b;
2640:                if (polarity == 1 || polarity == 2) {
2641:                    b = LOW;
2642:                    if (other[j] == LOW) { // skip base if already LOW
2643:                        ++j;
2644:                        b = other[j];
2645:                    }
2646:                } else {
2647:                    b = other[j++];
2648:                }
2649:                // simplest of all the routines
2650:                // sort the values, discarding identicals!
2651:                while (true) {
2652:                    if (a < b) {
2653:                        buffer[k++] = a;
2654:                        a = list[i++];
2655:                    } else if (b < a) {
2656:                        buffer[k++] = b;
2657:                        b = other[j++];
2658:                    } else if (a != HIGH) { // at this point, a == b
2659:                        // discard both values!
2660:                        a = list[i++];
2661:                        b = other[j++];
2662:                    } else { // DONE!
2663:                        buffer[k++] = HIGH;
2664:                        len = k;
2665:                        break;
2666:                    }
2667:                }
2668:                // swap list and buffer
2669:                int[] temp = list;
2670:                list = buffer;
2671:                buffer = temp;
2672:                pat = null;
2673:                return this ;
2674:            }
2675:
2676:            // polarity = 0 is normal: x union y
2677:            // polarity = 2: x union ~y
2678:            // polarity = 1: ~x union y
2679:            // polarity = 3: ~x union ~y
2680:
2681:            private UnicodeSet add(int[] other, int otherLen, int polarity) {
2682:                ensureBufferCapacity(len + otherLen);
2683:                int i = 0, j = 0, k = 0;
2684:                int a = list[i++];
2685:                int b = other[j++];
2686:                // change from xor is that we have to check overlapping pairs
2687:                // polarity bit 1 means a is second, bit 2 means b is.
2688:                main: while (true) {
2689:                    switch (polarity) {
2690:                    case 0: // both first; take lower if unequal
2691:                        if (a < b) { // take a
2692:                            // Back up over overlapping ranges in buffer[]
2693:                            if (k > 0 && a <= buffer[k - 1]) {
2694:                                // Pick latter end value in buffer[] vs. list[]
2695:                                a = max(list[i], buffer[--k]);
2696:                            } else {
2697:                                // No overlap
2698:                                buffer[k++] = a;
2699:                                a = list[i];
2700:                            }
2701:                            i++; // Common if/else code factored out
2702:                            polarity ^= 1;
2703:                        } else if (b < a) { // take b
2704:                            if (k > 0 && b <= buffer[k - 1]) {
2705:                                b = max(other[j], buffer[--k]);
2706:                            } else {
2707:                                buffer[k++] = b;
2708:                                b = other[j];
2709:                            }
2710:                            j++;
2711:                            polarity ^= 2;
2712:                        } else { // a == b, take a, drop b
2713:                            if (a == HIGH)
2714:                                break main;
2715:                            // This is symmetrical; it doesn't matter if
2716:                            // we backtrack with a or b. - liu
2717:                            if (k > 0 && a <= buffer[k - 1]) {
2718:                                a = max(list[i], buffer[--k]);
2719:                            } else {
2720:                                // No overlap
2721:                                buffer[k++] = a;
2722:                                a = list[i];
2723:                            }
2724:                            i++;
2725:                            polarity ^= 1;
2726:                            b = other[j++];
2727:                            polarity ^= 2;
2728:                        }
2729:                        break;
2730:                    case 3: // both second; take higher if unequal, and drop other
2731:                        if (b <= a) { // take a
2732:                            if (a == HIGH)
2733:                                break main;
2734:                            buffer[k++] = a;
2735:                        } else { // take b
2736:                            if (b == HIGH)
2737:                                break main;
2738:                            buffer[k++] = b;
2739:                        }
2740:                        a = list[i++];
2741:                        polarity ^= 1; // factored common code
2742:                        b = other[j++];
2743:                        polarity ^= 2;
2744:                        break;
2745:                    case 1: // a second, b first; if b < a, overlap
2746:                        if (a < b) { // no overlap, take a
2747:                            buffer[k++] = a;
2748:                            a = list[i++];
2749:                            polarity ^= 1;
2750:                        } else if (b < a) { // OVERLAP, drop b
2751:                            b = other[j++];
2752:                            polarity ^= 2;
2753:                        } else { // a == b, drop both!
2754:                            if (a == HIGH)
2755:                                break main;
2756:                            a = list[i++];
2757:                            polarity ^= 1;
2758:                            b = other[j++];
2759:                            polarity ^= 2;
2760:                        }
2761:                        break;
2762:                    case 2: // a first, b second; if a < b, overlap
2763:                        if (b < a) { // no overlap, take b
2764:                            buffer[k++] = b;
2765:                            b = other[j++];
2766:                            polarity ^= 2;
2767:                        } else if (a < b) { // OVERLAP, drop a
2768:                            a = list[i++];
2769:                            polarity ^= 1;
2770:                        } else { // a == b, drop both!
2771:                            if (a == HIGH)
2772:                                break main;
2773:                            a = list[i++];
2774:                            polarity ^= 1;
2775:                            b = other[j++];
2776:                            polarity ^= 2;
2777:                        }
2778:                        break;
2779:                    }
2780:                }
2781:                buffer[k++] = HIGH; // terminate
2782:                len = k;
2783:                // swap list and buffer
2784:                int[] temp = list;
2785:                list = buffer;
2786:                buffer = temp;
2787:                pat = null;
2788:                return this ;
2789:            }
2790:
2791:            // polarity = 0 is normal: x intersect y
2792:            // polarity = 2: x intersect ~y == set-minus
2793:            // polarity = 1: ~x intersect y
2794:            // polarity = 3: ~x intersect ~y
2795:
2796:            private UnicodeSet retain(int[] other, int otherLen, int polarity) {
2797:                ensureBufferCapacity(len + otherLen);
2798:                int i = 0, j = 0, k = 0;
2799:                int a = list[i++];
2800:                int b = other[j++];
2801:                // change from xor is that we have to check overlapping pairs
2802:                // polarity bit 1 means a is second, bit 2 means b is.
2803:                main: while (true) {
2804:                    switch (polarity) {
2805:                    case 0: // both first; drop the smaller
2806:                        if (a < b) { // drop a
2807:                            a = list[i++];
2808:                            polarity ^= 1;
2809:                        } else if (b < a) { // drop b
2810:                            b = other[j++];
2811:                            polarity ^= 2;
2812:                        } else { // a == b, take one, drop other
2813:                            if (a == HIGH)
2814:                                break main;
2815:                            buffer[k++] = a;
2816:                            a = list[i++];
2817:                            polarity ^= 1;
2818:                            b = other[j++];
2819:                            polarity ^= 2;
2820:                        }
2821:                        break;
2822:                    case 3: // both second; take lower if unequal
2823:                        if (a < b) { // take a
2824:                            buffer[k++] = a;
2825:                            a = list[i++];
2826:                            polarity ^= 1;
2827:                        } else if (b < a) { // take b
2828:                            buffer[k++] = b;
2829:                            b = other[j++];
2830:                            polarity ^= 2;
2831:                        } else { // a == b, take one, drop other
2832:                            if (a == HIGH)
2833:                                break main;
2834:                            buffer[k++] = a;
2835:                            a = list[i++];
2836:                            polarity ^= 1;
2837:                            b = other[j++];
2838:                            polarity ^= 2;
2839:                        }
2840:                        break;
2841:                    case 1: // a second, b first;
2842:                        if (a < b) { // NO OVERLAP, drop a
2843:                            a = list[i++];
2844:                            polarity ^= 1;
2845:                        } else if (b < a) { // OVERLAP, take b
2846:                            buffer[k++] = b;
2847:                            b = other[j++];
2848:                            polarity ^= 2;
2849:                        } else { // a == b, drop both!
2850:                            if (a == HIGH)
2851:                                break main;
2852:                            a = list[i++];
2853:                            polarity ^= 1;
2854:                            b = other[j++];
2855:                            polarity ^= 2;
2856:                        }
2857:                        break;
2858:                    case 2: // a first, b second; if a < b, overlap
2859:                        if (b < a) { // no overlap, drop b
2860:                            b = other[j++];
2861:                            polarity ^= 2;
2862:                        } else if (a < b) { // OVERLAP, take a
2863:                            buffer[k++] = a;
2864:                            a = list[i++];
2865:                            polarity ^= 1;
2866:                        } else { // a == b, drop both!
2867:                            if (a == HIGH)
2868:                                break main;
2869:                            a = list[i++];
2870:                            polarity ^= 1;
2871:                            b = other[j++];
2872:                            polarity ^= 2;
2873:                        }
2874:                        break;
2875:                    }
2876:                }
2877:                buffer[k++] = HIGH; // terminate
2878:                len = k;
2879:                // swap list and buffer
2880:                int[] temp = list;
2881:                list = buffer;
2882:                buffer = temp;
2883:                pat = null;
2884:                return this ;
2885:            }
2886:
2887:            private static final int max(int a, int b) {
2888:                return (a > b) ? a : b;
2889:            }
2890:
2891:            //----------------------------------------------------------------
2892:            // Generic filter-based scanning code
2893:            //----------------------------------------------------------------
2894:
2895:            private static interface Filter {
2896:                boolean contains(int codePoint);
2897:            }
2898:
2899:            private static class NumericValueFilter implements  Filter {
2900:                double value;
2901:
2902:                NumericValueFilter(double value) {
2903:                    this .value = value;
2904:                }
2905:
2906:                public boolean contains(int ch) {
2907:                    return UCharacter.getUnicodeNumericValue(ch) == value;
2908:                }
2909:            }
2910:
2911:            private static class GeneralCategoryMaskFilter implements  Filter {
2912:                int mask;
2913:
2914:                GeneralCategoryMaskFilter(int mask) {
2915:                    this .mask = mask;
2916:                }
2917:
2918:                public boolean contains(int ch) {
2919:                    return ((1 << UCharacter.getType(ch)) & mask) != 0;
2920:                }
2921:            }
2922:
2923:            private static class IntPropertyFilter implements  Filter {
2924:                int prop;
2925:                int value;
2926:
2927:                IntPropertyFilter(int prop, int value) {
2928:                    this .prop = prop;
2929:                    this .value = value;
2930:                }
2931:
2932:                public boolean contains(int ch) {
2933:                    return UCharacter.getIntPropertyValue(ch, prop) == value;
2934:                }
2935:            }
2936:
2937:            // VersionInfo for unassigned characters
2938:            static final VersionInfo NO_VERSION = VersionInfo.getInstance(0, 0,
2939:                    0, 0);
2940:
2941:            private static class VersionFilter implements  Filter {
2942:                VersionInfo version;
2943:
2944:                VersionFilter(VersionInfo version) {
2945:                    this .version = version;
2946:                }
2947:
2948:                public boolean contains(int ch) {
2949:                    VersionInfo v = UCharacter.getAge(ch);
2950:                    // Reference comparison ok; VersionInfo caches and reuses
2951:                    // unique objects.
2952:                    return v != NO_VERSION && v.compareTo(version) <= 0;
2953:                }
2954:            }
2955:
2956:            private static synchronized UnicodeSet getInclusions(int src) {
2957:                if (INCLUSIONS == null) {
2958:                    INCLUSIONS = new UnicodeSet[UCharacterProperty.SRC_COUNT];
2959:                }
2960:                if (INCLUSIONS[src] == null) {
2961:                    UnicodeSet incl = new UnicodeSet();
2962:                    switch (src) {
2963:                    case UCharacterProperty.SRC_CHAR:
2964:                        UCharacterProperty.getInstance()
2965:                                .addPropertyStarts(incl);
2966:                        break;
2967:                    case UCharacterProperty.SRC_PROPSVEC:
2968:                        UCharacterProperty.getInstance()
2969:                                .upropsvec_addPropertyStarts(incl);
2970:                        break;
2971:                    case UCharacterProperty.SRC_CHAR_AND_PROPSVEC:
2972:                        UCharacterProperty.getInstance()
2973:                                .addPropertyStarts(incl);
2974:                        UCharacterProperty.getInstance()
2975:                                .upropsvec_addPropertyStarts(incl);
2976:                        break;
2977:                    case UCharacterProperty.SRC_HST:
2978:                        UCharacterProperty.getInstance()
2979:                                .uhst_addPropertyStarts(incl);
2980:                        break;
2981:                    case UCharacterProperty.SRC_NORM:
2982:                        NormalizerImpl.addPropertyStarts(incl);
2983:                        break;
2984:                    case UCharacterProperty.SRC_CASE:
2985:                        try {
2986:                            UCaseProps.getSingleton().addPropertyStarts(incl);
2987:                        } catch (IOException e) {
2988:                            throw new MissingResourceException(e.getMessage(),
2989:                                    "", "");
2990:                        }
2991:                        break;
2992:                    case UCharacterProperty.SRC_BIDI:
2993:                        try {
2994:                            UBiDiProps.getSingleton().addPropertyStarts(incl);
2995:                        } catch (IOException e) {
2996:                            throw new MissingResourceException(e.getMessage(),
2997:                                    "", "");
2998:                        }
2999:                        break;
3000:                    default:
3001:                        throw new IllegalStateException(
3002:                                "UnicodeSet.getInclusions(unknown src " + src
3003:                                        + ")");
3004:                    }
3005:                    INCLUSIONS[src] = incl;
3006:                }
3007:                return INCLUSIONS[src];
3008:            }
3009:
3010:            /**
3011:             * Generic filter-based scanning code for UCD property UnicodeSets.
3012:             */
3013:            private UnicodeSet applyFilter(Filter filter, int src) {
3014:                // Walk through all Unicode characters, noting the start
3015:                // and end of each range for which filter.contain(c) is
3016:                // true.  Add each range to a set.
3017:                //
3018:                // To improve performance, use the INCLUSIONS set, which
3019:                // encodes information about character ranges that are known
3020:                // to have identical properties, such as the CJK Ideographs
3021:                // from U+4E00 to U+9FA5.  INCLUSIONS contains all characters
3022:                // except the first characters of such ranges.
3023:                //
3024:                // TODO Where possible, instead of scanning over code points,
3025:                // use internal property data to initialize UnicodeSets for
3026:                // those properties.  Scanning code points is slow.
3027:
3028:                clear();
3029:
3030:                int startHasProperty = -1;
3031:                UnicodeSet inclusions = getInclusions(src);
3032:                int limitRange = inclusions.getRangeCount();
3033:
3034:                for (int j = 0; j < limitRange; ++j) {
3035:                    // get current range
3036:                    int start = inclusions.getRangeStart(j);
3037:                    int end = inclusions.getRangeEnd(j);
3038:
3039:                    // for all the code points in the range, process
3040:                    for (int ch = start; ch <= end; ++ch) {
3041:                        // only add to the unicodeset on inflection points --
3042:                        // where the hasProperty value changes to false
3043:                        if (filter.contains(ch)) {
3044:                            if (startHasProperty < 0) {
3045:                                startHasProperty = ch;
3046:                            }
3047:                        } else if (startHasProperty >= 0) {
3048:                            add_unchecked(startHasProperty, ch - 1);
3049:                            startHasProperty = -1;
3050:                        }
3051:                    }
3052:                }
3053:                if (startHasProperty >= 0) {
3054:                    add_unchecked(startHasProperty, 0x10FFFF);
3055:                }
3056:
3057:                return this ;
3058:            }
3059:
3060:            /**
3061:             * Remove leading and trailing rule white space and compress
3062:             * internal rule white space to a single space character.
3063:             *
3064:             * @see UCharacterProperty#isRuleWhiteSpace
3065:             */
3066:            private static String mungeCharName(String source) {
3067:                StringBuffer buf = new StringBuffer();
3068:                for (int i = 0; i < source.length();) {
3069:                    int ch = UTF16.charAt(source, i);
3070:                    i += UTF16.getCharCount(ch);
3071:                    if (UCharacterProperty.isRuleWhiteSpace(ch)) {
3072:                        if (buf.length() == 0
3073:                                || buf.charAt(buf.length() - 1) == ' ') {
3074:                            continue;
3075:                        }
3076:                        ch = ' '; // convert to ' '
3077:                    }
3078:                    UTF16.append(buf, ch);
3079:                }
3080:                if (buf.length() != 0 && buf.charAt(buf.length() - 1) == ' ') {
3081:                    buf.setLength(buf.length() - 1);
3082:                }
3083:                return buf.toString();
3084:            }
3085:
3086:            //----------------------------------------------------------------
3087:            // Property set API
3088:            //----------------------------------------------------------------
3089:
3090:            /**
3091:             * Modifies this set to contain those code points which have the
3092:             * given value for the given binary or enumerated property, as
3093:             * returned by UCharacter.getIntPropertyValue.  Prior contents of
3094:             * this set are lost.
3095:             *
3096:             * @param prop a property in the range
3097:             * UProperty.BIN_START..UProperty.BIN_LIMIT-1 or
3098:             * UProperty.INT_START..UProperty.INT_LIMIT-1 or.
3099:             * UProperty.MASK_START..UProperty.MASK_LIMIT-1.
3100:             *
3101:             * @param value a value in the range
3102:             * UCharacter.getIntPropertyMinValue(prop)..
3103:             * UCharacter.getIntPropertyMaxValue(prop), with one exception.
3104:             * If prop is UProperty.GENERAL_CATEGORY_MASK, then value should not be
3105:             * a UCharacter.getType() result, but rather a mask value produced
3106:             * by logically ORing (1 << UCharacter.getType()) values together.
3107:             * This allows grouped categories such as [:L:] to be represented.
3108:             *
3109:             * @return a reference to this set
3110:             *
3111:             * @stable ICU 2.4
3112:             */
3113:            public UnicodeSet applyIntPropertyValue(int prop, int value) {
3114:                checkFrozen();
3115:                if (prop == UProperty.GENERAL_CATEGORY_MASK) {
3116:                    applyFilter(new GeneralCategoryMaskFilter(value),
3117:                            UCharacterProperty.SRC_CHAR);
3118:                } else {
3119:                    applyFilter(new IntPropertyFilter(prop, value),
3120:                            UCharacterProperty.getInstance().getSource(prop));
3121:                }
3122:                return this ;
3123:            }
3124:
3125:            /**
3126:             * Modifies this set to contain those code points which have the
3127:             * given value for the given property.  Prior contents of this
3128:             * set are lost.
3129:             *
3130:             * @param propertyAlias a property alias, either short or long.
3131:             * The name is matched loosely.  See PropertyAliases.txt for names
3132:             * and a description of loose matching.  If the value string is
3133:             * empty, then this string is interpreted as either a
3134:             * General_Category value alias, a Script value alias, a binary
3135:             * property alias, or a special ID.  Special IDs are matched
3136:             * loosely and correspond to the following sets:
3137:             *
3138:             * "ANY" = [\u0000-\U0010FFFF],
3139:             * "ASCII" = [\u0000-\u007F].
3140:             *
3141:             * @param valueAlias a value alias, either short or long.  The
3142:             * name is matched loosely.  See PropertyValueAliases.txt for
3143:             * names and a description of loose matching.  In addition to
3144:             * aliases listed, numeric values and canonical combining classes
3145:             * may be expressed numerically, e.g., ("nv", "0.5") or ("ccc",
3146:             * "220").  The value string may also be empty.
3147:             *
3148:             * @return a reference to this set
3149:             *
3150:             * @stable ICU 2.4
3151:             */
3152:            public UnicodeSet applyPropertyAlias(String propertyAlias,
3153:                    String valueAlias) {
3154:                return applyPropertyAlias(propertyAlias, valueAlias, null);
3155:            }
3156:
3157:            /**
3158:             * Modifies this set to contain those code points which have the
3159:             * given value for the given property.  Prior contents of this
3160:             * set are lost.
3161:             * @param propertyAlias
3162:             * @param valueAlias
3163:             * @param symbols if not null, then symbols are first called to see if a property
3164:             * is available. If true, then everything else is skipped.
3165:             * @return this set
3166:             * @draft ICU 3.2
3167:             * @provisional This API might change or be removed in a future release.
3168:             */
3169:            public UnicodeSet applyPropertyAlias(String propertyAlias,
3170:                    String valueAlias, SymbolTable symbols) {
3171:                checkFrozen();
3172:                int p;
3173:                int v;
3174:                boolean mustNotBeEmpty = false, invert = false;
3175:
3176:                if (symbols != null
3177:                        && (symbols instanceof  XSymbolTable)
3178:                        && ((XSymbolTable) symbols).applyPropertyAlias(
3179:                                propertyAlias, valueAlias, this )) {
3180:                    return this ;
3181:                }
3182:
3183:                if (valueAlias.length() > 0) {
3184:                    p = UCharacter.getPropertyEnum(propertyAlias);
3185:
3186:                    // Treat gc as gcm
3187:                    if (p == UProperty.GENERAL_CATEGORY) {
3188:                        p = UProperty.GENERAL_CATEGORY_MASK;
3189:                    }
3190:
3191:                    if ((p >= UProperty.BINARY_START && p < UProperty.BINARY_LIMIT)
3192:                            || (p >= UProperty.INT_START && p < UProperty.INT_LIMIT)
3193:                            || (p >= UProperty.MASK_START && p < UProperty.MASK_LIMIT)) {
3194:                        try {
3195:                            v = UCharacter.getPropertyValueEnum(p, valueAlias);
3196:                        } catch (IllegalArgumentException e) {
3197:                            // Handle numeric CCC
3198:                            if (p == UProperty.CANONICAL_COMBINING_CLASS
3199:                                    || p == UProperty.LEAD_CANONICAL_COMBINING_CLASS
3200:                                    || p == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) {
3201:                                v = Integer.parseInt(Utility
3202:                                        .deleteRuleWhiteSpace(valueAlias));
3203:                                // If the resultant set is empty then the numeric value
3204:                                // was invalid.
3205:                                mustNotBeEmpty = true;
3206:                            } else {
3207:                                throw e;
3208:                            }
3209:                        }
3210:                    }
3211:
3212:                    else {
3213:
3214:                        switch (p) {
3215:                        case UProperty.NUMERIC_VALUE: {
3216:                            double value = Double.parseDouble(Utility
3217:                                    .deleteRuleWhiteSpace(valueAlias));
3218:                            applyFilter(new NumericValueFilter(value),
3219:                                    UCharacterProperty.SRC_CHAR);
3220:                            return this ;
3221:                        }
3222:                        case UProperty.NAME:
3223:                        case UProperty.UNICODE_1_NAME: {
3224:                            // Must munge name, since
3225:                            // UCharacter.charFromName() does not do
3226:                            // 'loose' matching.
3227:                            String buf = mungeCharName(valueAlias);
3228:                            int ch = (p == UProperty.NAME) ? UCharacter
3229:                                    .getCharFromExtendedName(buf) : UCharacter
3230:                                    .getCharFromName1_0(buf);
3231:                            if (ch == -1) {
3232:                                throw new IllegalArgumentException(
3233:                                        "Invalid character name");
3234:                            }
3235:                            clear();
3236:                            add_unchecked(ch);
3237:                            return this ;
3238:                        }
3239:                        case UProperty.AGE: {
3240:                            // Must munge name, since
3241:                            // VersionInfo.getInstance() does not do
3242:                            // 'loose' matching.
3243:                            VersionInfo version = VersionInfo
3244:                                    .getInstance(mungeCharName(valueAlias));
3245:                            applyFilter(new VersionFilter(version),
3246:                                    UCharacterProperty.SRC_PROPSVEC);
3247:                            return this ;
3248:                        }
3249:                        }
3250:
3251:                        // p is a non-binary, non-enumerated property that we
3252:                        // don't support (yet).
3253:                        throw new IllegalArgumentException(
3254:                                "Unsupported property");
3255:                    }
3256:                }
3257:
3258:                else {
3259:                    // valueAlias is empty.  Interpret as General Category, Script,
3260:                    // Binary property, or ANY or ASCII.  Upon success, p and v will
3261:                    // be set.
3262:                    try {
3263:                        p = UProperty.GENERAL_CATEGORY_MASK;
3264:                        v = UCharacter.getPropertyValueEnum(p, propertyAlias);
3265:                    } catch (IllegalArgumentException e) {
3266:                        try {
3267:                            p = UProperty.SCRIPT;
3268:                            v = UCharacter.getPropertyValueEnum(p,
3269:                                    propertyAlias);
3270:                        } catch (IllegalArgumentException e2) {
3271:                            try {
3272:                                p = UCharacter.getPropertyEnum(propertyAlias);
3273:                            } catch (IllegalArgumentException e3) {
3274:                                p = -1;
3275:                            }
3276:                            if (p >= UProperty.BINARY_START
3277:                                    && p < UProperty.BINARY_LIMIT) {
3278:                                v = 1;
3279:                            } else if (p == -1) {
3280:                                if (0 == UPropertyAliases.compare(ANY_ID,
3281:                                        propertyAlias)) {
3282:                                    set(MIN_VALUE, MAX_VALUE);
3283:                                    return this ;
3284:                                } else if (0 == UPropertyAliases.compare(
3285:                                        ASCII_ID, propertyAlias)) {
3286:                                    set(0, 0x7F);
3287:                                    return this ;
3288:                                } else if (0 == UPropertyAliases.compare(
3289:                                        ASSIGNED, propertyAlias)) {
3290:                                    // [:Assigned:]=[:^Cn:]
3291:                                    p = UProperty.GENERAL_CATEGORY_MASK;
3292:                                    v = (1 << UCharacter.UNASSIGNED);
3293:                                    invert = true;
3294:                                } else {
3295:                                    // Property name was never matched.
3296:                                    throw new IllegalArgumentException(
3297:                                            "Invalid property alias: "
3298:                                                    + propertyAlias + "="
3299:                                                    + valueAlias);
3300:                                }
3301:                            } else {
3302:                                // Valid propery name, but it isn't binary, so the value
3303:                                // must be supplied.
3304:                                throw new IllegalArgumentException(
3305:                                        "Missing property value");
3306:                            }
3307:                        }
3308:                    }
3309:                }
3310:
3311:                applyIntPropertyValue(p, v);
3312:                if (invert) {
3313:                    complement();
3314:                }
3315:
3316:                if (mustNotBeEmpty && isEmpty()) {
3317:                    // mustNotBeEmpty is set to true if an empty set indicates
3318:                    // invalid input.
3319:                    throw new IllegalArgumentException("Invalid property value");
3320:                }
3321:
3322:                return this ;
3323:            }
3324:
3325:            //----------------------------------------------------------------
3326:            // Property set patterns
3327:            //----------------------------------------------------------------
3328:
3329:            /**
3330:             * Return true if the given position, in the given pattern, appears
3331:             * to be the start of a property set pattern.
3332:             */
3333:            private static boolean resemblesPropertyPattern(String pattern,
3334:                    int pos) {
3335:                // Patterns are at least 5 characters long
3336:                if ((pos + 5) > pattern.length()) {
3337:                    return false;
3338:                }
3339:
3340:                // Look for an opening [:, [:^, \p, or \P
3341:                return pattern.regionMatches(pos, "[:", 0, 2)
3342:                        || pattern.regionMatches(true, pos, "\\p", 0, 2)
3343:                        || pattern.regionMatches(pos, "\\N", 0, 2);
3344:            }
3345:
3346:            /**
3347:             * Return true if the given iterator appears to point at a
3348:             * property pattern.  Regardless of the result, return with the
3349:             * iterator unchanged.
3350:             * @param chars iterator over the pattern characters.  Upon return
3351:             * it will be unchanged.
3352:             * @param iterOpts RuleCharacterIterator options
3353:             */
3354:            private static boolean resemblesPropertyPattern(
3355:                    RuleCharacterIterator chars, int iterOpts) {
3356:                boolean result = false;
3357:                iterOpts &= ~RuleCharacterIterator.PARSE_ESCAPES;
3358:                Object pos = chars.getPos(null);
3359:                int c = chars.next(iterOpts);
3360:                if (c == '[' || c == '\\') {
3361:                    int d = chars.next(iterOpts
3362:                            & ~RuleCharacterIterator.SKIP_WHITESPACE);
3363:                    result = (c == '[') ? (d == ':')
3364:                            : (d == 'N' || d == 'p' || d == 'P');
3365:                }
3366:                chars.setPos(pos);
3367:                return result;
3368:            }
3369:
3370:            /**
3371:             * Parse the given property pattern at the given parse position.
3372:             * @param symbols TODO
3373:             */
3374:            private UnicodeSet applyPropertyPattern(String pattern,
3375:                    ParsePosition ppos, SymbolTable symbols) {
3376:                int pos = ppos.getIndex();
3377:
3378:                // On entry, ppos should point to one of the following locations:
3379:
3380:                // Minimum length is 5 characters, e.g. \p{L}
3381:                if ((pos + 5) > pattern.length()) {
3382:                    return null;
3383:                }
3384:
3385:                boolean posix = false; // true for [:pat:], false for \p{pat} \P{pat} \N{pat}
3386:                boolean isName = false; // true for \N{pat}, o/w false
3387:                boolean invert = false;
3388:
3389:                // Look for an opening [:, [:^, \p, or \P
3390:                if (pattern.regionMatches(pos, "[:", 0, 2)) {
3391:                    posix = true;
3392:                    pos = Utility.skipWhitespace(pattern, pos + 2);
3393:                    if (pos < pattern.length() && pattern.charAt(pos) == '^') {
3394:                        ++pos;
3395:                        invert = true;
3396:                    }
3397:                } else if (pattern.regionMatches(true, pos, "\\p", 0, 2)
3398:                        || pattern.regionMatches(pos, "\\N", 0, 2)) {
3399:                    char c = pattern.charAt(pos + 1);
3400:                    invert = (c == 'P');
3401:                    isName = (c == 'N');
3402:                    pos = Utility.skipWhitespace(pattern, pos + 2);
3403:                    if (pos == pattern.length() || pattern.charAt(pos++) != '{') {
3404:                        // Syntax error; "\p" or "\P" not followed by "{"
3405:                        return null;
3406:                    }
3407:                } else {
3408:                    // Open delimiter not seen
3409:                    return null;
3410:                }
3411:
3412:                // Look for the matching close delimiter, either :] or }
3413:                int close = pattern.indexOf(posix ? ":]" : "}", pos);
3414:                if (close < 0) {
3415:                    // Syntax error; close delimiter missing
3416:                    return null;
3417:                }
3418:
3419:                // Look for an '=' sign.  If this is present, we will parse a
3420:                // medium \p{gc=Cf} or long \p{GeneralCategory=Format}
3421:                // pattern.
3422:                int equals = pattern.indexOf('=', pos);
3423:                String propName, valueName;
3424:                if (equals >= 0 && equals < close && !isName) {
3425:                    // Equals seen; parse medium/long pattern
3426:                    propName = pattern.substring(pos, equals);
3427:                    valueName = pattern.substring(equals + 1, close);
3428:                }
3429:
3430:                else {
3431:                    // Handle case where no '=' is seen, and \N{}
3432:                    propName = pattern.substring(pos, close);
3433:                    valueName = "";
3434:
3435:                    // Handle \N{name}
3436:                    if (isName) {
3437:                        // This is a little inefficient since it means we have to
3438:                        // parse "na" back to UProperty.NAME even though we already
3439:                        // know it's UProperty.NAME.  If we refactor the API to
3440:                        // support args of (int, String) then we can remove
3441:                        // "na" and make this a little more efficient.
3442:                        valueName = propName;
3443:                        propName = "na";
3444:                    }
3445:                }
3446:
3447:                applyPropertyAlias(propName, valueName, symbols);
3448:
3449:                if (invert) {
3450:                    complement();
3451:                }
3452:
3453:                // Move to the limit position after the close delimiter
3454:                ppos.setIndex(close + (posix ? 2 : 1));
3455:
3456:                return this ;
3457:            }
3458:
3459:            /**
3460:             * Parse a property pattern.
3461:             * @param chars iterator over the pattern characters.  Upon return
3462:             * it will be advanced to the first character after the parsed
3463:             * pattern, or the end of the iteration if all characters are
3464:             * parsed.
3465:             * @param rebuiltPat the pattern that was parsed, rebuilt or
3466:             * copied from the input pattern, as appropriate.
3467:             * @param symbols TODO
3468:             */
3469:            private void applyPropertyPattern(RuleCharacterIterator chars,
3470:                    StringBuffer rebuiltPat, SymbolTable symbols) {
3471:                String pat = chars.lookahead();
3472:                ParsePosition pos = new ParsePosition(0);
3473:                applyPropertyPattern(pat, pos, symbols);
3474:                if (pos.getIndex() == 0) {
3475:                    syntaxError(chars, "Invalid property pattern");
3476:                }
3477:                chars.jumpahead(pos.getIndex());
3478:                rebuiltPat.append(pat.substring(0, pos.getIndex()));
3479:            }
3480:
3481:            //----------------------------------------------------------------
3482:            // Case folding API
3483:            //----------------------------------------------------------------
3484:
3485:            /**
3486:             * Bitmask for constructor and applyPattern() indicating that
3487:             * white space should be ignored.  If set, ignore characters for
3488:             * which UCharacterProperty.isRuleWhiteSpace() returns true,
3489:             * unless they are quoted or escaped.  This may be ORed together
3490:             * with other selectors.
3491:             * @internal
3492:             * @deprecated This API is ICU internal only.
3493:             */
3494:            public static final int IGNORE_SPACE = 1;
3495:
3496:            /**
3497:             * Bitmask for constructor, applyPattern(), and closeOver()
3498:             * indicating letter case.  This may be ORed together with other
3499:             * selectors.
3500:             *
3501:             * Enable case insensitive matching.  E.g., "[ab]" with this flag
3502:             * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
3503:             * match all except 'a', 'A', 'b', and 'B'. This performs a full
3504:             * closure over case mappings, e.g. U+017F for s.
3505:             *
3506:             * The resulting set is a superset of the input for the code points but
3507:             * not for the strings.
3508:             * It performs a case mapping closure of the code points and adds
3509:             * full case folding strings for the code points, and reduces strings of
3510:             * the original set to their full case folding equivalents.
3511:             *
3512:             * This is designed for case-insensitive matches, for example
3513:             * in regular expressions. The full code point case closure allows checking of
3514:             * an input character directly against the closure set.
3515:             * Strings are matched by comparing the case-folded form from the closure
3516:             * set with an incremental case folding of the string in question.
3517:             *
3518:             * The closure set will also contain single code points if the original
3519:             * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.).
3520:             * This is not necessary (that is, redundant) for the above matching method
3521:             * but results in the same closure sets regardless of whether the original
3522:             * set contained the code point or a string.
3523:             *
3524:             * @internal
3525:             * @deprecated This API is ICU internal only.
3526:             */
3527:            public static final int CASE = 2;
3528:
3529:            /**
3530:             * Alias for UnicodeSet.CASE, for ease of porting from C++ where ICU4C
3531:             * also has both USET_CASE and USET_CASE_INSENSITIVE (see uset.h).
3532:             * @see #CASE
3533:             * @draft ICU 3.4
3534:             * @provisional This API might change or be removed in a future release.
3535:             */
3536:            public static final int CASE_INSENSITIVE = 2;
3537:
3538:            /**
3539:             * Bitmask for constructor, applyPattern(), and closeOver()
3540:             * indicating letter case.  This may be ORed together with other
3541:             * selectors.
3542:             *
3543:             * Enable case insensitive matching.  E.g., "[ab]" with this flag
3544:             * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
3545:             * match all except 'a', 'A', 'b', and 'B'. This adds the lower-,
3546:             * title-, and uppercase mappings as well as the case folding
3547:             * of each existing element in the set.
3548:             * @draft ICU 3.4
3549:             * @provisional This API might change or be removed in a future release.
3550:             */
3551:            public static final int ADD_CASE_MAPPINGS = 4;
3552:
3553:            //  add the result of a full case mapping to the set
3554:            //  use str as a temporary string to avoid constructing one
3555:            private static final void addCaseMapping(UnicodeSet set,
3556:                    int result, StringBuffer full) {
3557:                if (result >= 0) {
3558:                    if (result > UCaseProps.MAX_STRING_LENGTH) {
3559:                        // add a single-code point case mapping
3560:                        set.add(result);
3561:                    } else {
3562:                        // add a string case mapping from full with length result
3563:                        set.add(full.toString());
3564:                        full.setLength(0);
3565:                    }
3566:                }
3567:                // result < 0: the code point mapped to itself, no need to add it
3568:                // see UCaseProps
3569:            }
3570:
3571:            /**
3572:             * Close this set over the given attribute.  For the attribute
3573:             * CASE, the result is to modify this set so that:
3574:             *
3575:             * 1. For each character or string 'a' in this set, all strings
3576:             * 'b' such that foldCase(a) == foldCase(b) are added to this set.
3577:             * (For most 'a' that are single characters, 'b' will have
3578:             * b.length() == 1.)
3579:             *
3580:             * 2. For each string 'e' in the resulting set, if e !=
3581:             * foldCase(e), 'e' will be removed.
3582:             *
3583:             * Example: [aq\u00DF{Bc}{bC}{Fi}] => [aAqQ\u00DF\uFB01{ss}{bc}{fi}]
3584:             *
3585:             * (Here foldCase(x) refers to the operation
3586:             * UCharacter.foldCase(x, true), and a == b actually denotes
3587:             * a.equals(b), not pointer comparison.)
3588:             *
3589:             * @param attribute bitmask for attributes to close over.
3590:             * Currently only the CASE bit is supported.  Any undefined bits
3591:             * are ignored.
3592:             * @return a reference to this set.
3593:             * @internal
3594:             * @deprecated This API is ICU internal only.
3595:             */
3596:            public UnicodeSet closeOver(int attribute) {
3597:                checkFrozen();
3598:                if ((attribute & (CASE | ADD_CASE_MAPPINGS)) != 0) {
3599:                    UCaseProps csp;
3600:                    try {
3601:                        csp = UCaseProps.getSingleton();
3602:                    } catch (IOException e) {
3603:                        return this ;
3604:                    }
3605:                    UnicodeSet foldSet = new UnicodeSet(this );
3606:                    ULocale root = ULocale.ROOT;
3607:
3608:                    // start with input set to guarantee inclusion
3609:                    // CASE: remove strings because the strings will actually be reduced (folded);
3610:                    //       therefore, start with no strings and add only those needed
3611:                    if ((attribute & CASE) != 0) {
3612:                        foldSet.strings.clear();
3613:                    }
3614:
3615:                    int n = getRangeCount();
3616:                    int result;
3617:                    StringBuffer full = new StringBuffer();
3618:                    int locCache[] = new int[1];
3619:
3620:                    for (int i = 0; i < n; ++i) {
3621:                        int start = getRangeStart(i);
3622:                        int end = getRangeEnd(i);
3623:
3624:                        if ((attribute & CASE) != 0) {
3625:                            // full case closure
3626:                            for (int cp = start; cp <= end; ++cp) {
3627:                                csp.addCaseClosure(cp, foldSet);
3628:                            }
3629:                        } else {
3630:                            // add case mappings
3631:                            // (does not add long s for regular s, or Kelvin for k, for example)
3632:                            for (int cp = start; cp <= end; ++cp) {
3633:                                result = csp.toFullLower(cp, null, full, root,
3634:                                        locCache);
3635:                                addCaseMapping(foldSet, result, full);
3636:
3637:                                result = csp.toFullTitle(cp, null, full, root,
3638:                                        locCache);
3639:                                addCaseMapping(foldSet, result, full);
3640:
3641:                                result = csp.toFullUpper(cp, null, full, root,
3642:                                        locCache);
3643:                                addCaseMapping(foldSet, result, full);
3644:
3645:                                result = csp.toFullFolding(cp, full, 0);
3646:                                addCaseMapping(foldSet, result, full);
3647:                            }
3648:                        }
3649:                    }
3650:                    if (!strings.isEmpty()) {
3651:                        String str;
3652:                        if ((attribute & CASE) != 0) {
3653:                            Iterator it = strings.iterator();
3654:                            while (it.hasNext()) {
3655:                                str = UCharacter
3656:                                        .foldCase((String) it.next(), 0);
3657:                                if (!csp.addStringCaseClosure(str, foldSet)) {
3658:                                    foldSet.add(str); // does not map to code points: add the folded string itself
3659:                                }
3660:                            }
3661:                        } else {
3662:                            BreakIterator bi = BreakIterator
3663:                                    .getWordInstance(root);
3664:                            Iterator it = strings.iterator();
3665:                            while (it.hasNext()) {
3666:                                str = (String) it.next();
3667:                                foldSet.add(UCharacter.toLowerCase(root, str));
3668:                                foldSet.add(UCharacter.toTitleCase(root, str,
3669:                                        bi));
3670:                                foldSet.add(UCharacter.toUpperCase(root, str));
3671:                                foldSet.add(UCharacter.foldCase(str, 0));
3672:                            }
3673:                        }
3674:                    }
3675:                    set(foldSet);
3676:                }
3677:                return this ;
3678:            }
3679:
3680:            /**
3681:             * Internal class for customizing UnicodeSet parsing of properties.
3682:             * TODO: extend to allow customizing of codepoint ranges
3683:             * @internal
3684:             * @deprecated This API is ICU internal only.
3685:             * @author medavis
3686:             */
3687:            abstract public static class XSymbolTable implements  SymbolTable {
3688:                /**
3689:                 * Default constructor
3690:                 * @internal
3691:                 * @deprecated This API is ICU internal only.
3692:                 */
3693:                public XSymbolTable() {
3694:                }
3695:
3696:                /**
3697:                 * @internal
3698:                 * @deprecated This API is ICU internal only.
3699:                 */
3700:                public UnicodeMatcher lookupMatcher(int i) {
3701:                    return null;
3702:                }
3703:
3704:                /**
3705:                 * @internal
3706:                 * @deprecated This API is ICU internal only.
3707:                 */
3708:                public boolean applyPropertyAlias(String propertyName,
3709:                        String propertyValue, UnicodeSet result) {
3710:                    return false;
3711:                }
3712:
3713:                /**
3714:                 * @internal
3715:                 * @deprecated This API is ICU internal only.
3716:                 */
3717:                public char[] lookup(String s) {
3718:                    return null;
3719:                }
3720:
3721:                /**
3722:                 * @internal
3723:                 * @deprecated This API is ICU internal only.
3724:                 */
3725:                public String parseReference(String text, ParsePosition pos,
3726:                        int limit) {
3727:                    return null;
3728:                }
3729:            }
3730:
3731:            private boolean frozen;
3732:
3733:            /**
3734:             * Is this frozen, according to the Freezable interface?
3735:             * @return value
3736:             * @internal
3737:             * @deprecated This API is ICU internal only.
3738:             */
3739:            public boolean isFrozen() {
3740:                return frozen;
3741:            }
3742:
3743:            /**
3744:             * Freeze this class, according to the Freezable interface.
3745:             * @return this
3746:             * @internal
3747:             * @deprecated This API is ICU internal only.
3748:             */
3749:            public Object freeze() {
3750:                frozen = true;
3751:                return this ;
3752:            }
3753:
3754:            /**
3755:             * Clone a thawed version of this class, according to the Freezable interface.
3756:             * @return this
3757:             * @internal
3758:             * @deprecated This API is ICU internal only.
3759:             */
3760:            public Object cloneAsThawed() {
3761:                UnicodeSet result = (UnicodeSet) clone();
3762:                result.frozen = false;
3763:                return result;
3764:            }
3765:
3766:            // internal function
3767:            private void checkFrozen() {
3768:                if (frozen) {
3769:                    throw new UnsupportedOperationException(
3770:                            "Attempt to modify frozen object");
3771:                }
3772:            }
3773:        }
3774:        //eof
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.