0001: /*
0002: *
0003: * @(#)CharSet.java 1.11 06/10/10
0004: *
0005: * Portions Copyright 2000-2006 Sun Microsystems, Inc. All Rights
0006: * Reserved. Use is subject to license terms.
0007: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
0008: *
0009: * This program is free software; you can redistribute it and/or
0010: * modify it under the terms of the GNU General Public License version
0011: * 2 only, as published by the Free Software Foundation.
0012: *
0013: * This program is distributed in the hope that it will be useful, but
0014: * WITHOUT ANY WARRANTY; without even the implied warranty of
0015: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
0016: * General Public License version 2 for more details (a copy is
0017: * included at /legal/license.txt).
0018: *
0019: * You should have received a copy of the GNU General Public License
0020: * version 2 along with this work; if not, write to the Free Software
0021: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
0022: * 02110-1301 USA
0023: *
0024: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
0025: * Clara, CA 95054 or visit www.sun.com if you need additional
0026: * information or have any questions.
0027: */
0028:
0029: /*
0030: * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
0031: * (C) Copyright IBM Corp. 1996 - 2002 - All Rights Reserved
0032: *
0033: * The original version of this source code and documentation
0034: * is copyrighted and owned by Taligent, Inc., a wholly-owned
0035: * subsidiary of IBM. These materials are provided under terms
0036: * of a License Agreement between Taligent and Sun. This technology
0037: * is protected by multiple US and International patents.
0038: *
0039: * This notice and attribution to Taligent may not be removed.
0040: * Taligent is a registered trademark of Taligent, Inc.
0041: */
0042: package java.text;
0043:
0044: import java.util.Hashtable;
0045:
0046: /**
0047: * An object representing a set of characters. (This is a "set" in the
0048: * mathematical sense: an unduplicated list of characters on which set
0049: * operations such as union and intersection can be performed.) The
0050: * set information is stored in compressed, optimized form: The object
0051: * contains a String with an even number of characters. Each pair of
0052: * characters represents a range of characters contained in the set
0053: * (a pair of the same character represents a single character). The
0054: * characters are sorted in increasing order.
0055: */
0056: class CharSet implements Cloneable {
0057: /**
0058: * The structure containing the set information. The characters
0059: * in this string are organized into pairs, each pair representing
0060: * a range of characters contained in the set
0061: */
0062: private String chars;
0063:
0064: //==========================================================================
0065: // parseString() and associated routines
0066: //==========================================================================
0067: /**
0068: * A cache which is used to speed up parseString() whenever it is
0069: * used to parse a description that has been parsed before
0070: */
0071: private static Hashtable expressionCache = null;
0072:
0073: /**
0074: * Builds a CharSet based on a textual description. For the syntax of
0075: * the description, see the documentation of RuleBasedBreakIterator.
0076: * @see java.text.RuleBasedBreakIterator
0077: */
0078: public static CharSet parseString(String s) {
0079: CharSet result = null;
0080:
0081: // if "s" is in the expression cache, pull the result out
0082: // of the expresison cache
0083: if (expressionCache != null) {
0084: result = (CharSet) expressionCache.get(s);
0085: }
0086:
0087: // otherwise, use doParseString() to actually parse the string,
0088: // and then add a corresponding entry to the expression cache
0089: if (result == null) {
0090: result = doParseString(s);
0091: if (expressionCache == null) {
0092: expressionCache = new Hashtable();
0093: }
0094: expressionCache.put(s, result);
0095: }
0096: result = (CharSet) (result.clone());
0097: return result;
0098: }
0099:
0100: /**
0101: * This function is used by parseString() to actually parse the string
0102: */
0103: private static CharSet doParseString(String s) {
0104: CharSet result = new CharSet();
0105: int p = 0;
0106:
0107: boolean haveDash = false;
0108: boolean haveTilde = false;
0109: boolean wIsReal = false;
0110: char w = '\u0000';
0111:
0112: // for each character in the description...
0113: while (p < s.length()) {
0114: char c = s.charAt(p);
0115:
0116: // if it's an opening bracket...
0117: if (c == '[') {
0118: // flush the single-character cache
0119: if (wIsReal) {
0120: result.internalUnion(new CharSet(w));
0121: }
0122:
0123: // locate the matching closing bracket
0124: int bracketLevel = 1;
0125: int q = p + 1;
0126: while (bracketLevel != 0) {
0127: // if no matching bracket by end of string then...
0128: if (q >= s.length()) {
0129: throw new IllegalArgumentException(
0130: "Parse error at position " + p + " in "
0131: + s);
0132: }
0133: switch (s.charAt(q)) {
0134: case '\\': // need to step over next character
0135: ++q;
0136: break;
0137: case '[':
0138: ++bracketLevel;
0139: break;
0140: case ']':
0141: --bracketLevel;
0142: break;
0143: }
0144: ++q;
0145: }
0146: --q;
0147:
0148: // call parseString() recursively to parse the text inside
0149: // the brackets, then either add or subtract the result from
0150: // our running result depending on whether or not the []
0151: // expresison was preceded by a ^
0152: if (!haveTilde) {
0153: result.internalUnion(CharSet.parseString(s
0154: .substring(p + 1, q)));
0155: } else {
0156: result.internalDifference(CharSet.parseString(s
0157: .substring(p + 1, q)));
0158: }
0159: haveTilde = false;
0160: haveDash = false;
0161: wIsReal = false;
0162: p = q + 1;
0163: }
0164:
0165: // if the character is a colon...
0166: else if (c == ':') {
0167: // flush the single-character cache
0168: if (wIsReal) {
0169: result.internalUnion(new CharSet(w));
0170: }
0171:
0172: // locate the matching colon (and throw an error if there
0173: // isn't one)
0174: int q = s.indexOf(':', p + 1);
0175: if (q == -1) {
0176: throw new IllegalArgumentException(
0177: "Parse error at position " + p + " in " + s);
0178: }
0179:
0180: // use charSetForCategory() to parse the text in the colons,
0181: // and either add or substract the result from our running
0182: // result depending on whether the :: expression was
0183: // preceded by a ^
0184: if (!haveTilde) {
0185: result.internalUnion(charSetForCategory(s
0186: .substring(p + 1, q)));
0187: } else {
0188: result.internalDifference(charSetForCategory(s
0189: .substring(p + 1, q)));
0190: }
0191:
0192: // reset everything and advance to the next character
0193: haveTilde = false;
0194: haveDash = false;
0195: wIsReal = false;
0196: p = q + 1;
0197: }
0198:
0199: // if the character is a dash, set an appropriate flag
0200: else if (c == '-') {
0201: if (wIsReal) {
0202: haveDash = true;
0203: }
0204: ++p;
0205: }
0206:
0207: // if the character is a caret, flush the single-character
0208: // cache and set an appropriate flag. If the set is empty
0209: // (i.e., if the expression begins with ^), invert the set
0210: // (i.e., set it to include everything). The idea here is
0211: // that a set that includes nothing but ^ expressions
0212: // means "everything but these things".
0213: else if (c == '^') {
0214: if (wIsReal) {
0215: result.internalUnion(new CharSet(w));
0216: wIsReal = false;
0217: }
0218: haveTilde = true;
0219: ++p;
0220: if (result.empty()) {
0221: result.internalComplement();
0222: }
0223: }
0224:
0225: // throw an exception on an illegal character
0226: else if (c >= ' ' && c < '\u007f' && !Character.isLetter(c)
0227: && !Character.isDigit(c) && c != '\\') {
0228: throw new IllegalArgumentException(
0229: "Parse error at position " + p + " in " + s);
0230: }
0231:
0232: // otherwise, we end up here...
0233: else {
0234: // on a backslash, advance to the next character
0235: if (c == '\\') {
0236: ++p;
0237: }
0238:
0239: // if the preceding character was a dash, this character
0240: // defines the end of a range. Add or subtract that range
0241: // from the running result depending on whether or not it
0242: // was preceded by a ^
0243: if (haveDash) {
0244: if (s.charAt(p) < w) {
0245: throw new IllegalArgumentException(
0246: "U+"
0247: + Integer.toHexString(s
0248: .charAt(p))
0249: + " is less than U+"
0250: + Integer.toHexString(w)
0251: + ". Dash expressions "
0252: + "can't have their endpoints in reverse order.");
0253: }
0254:
0255: if (!haveTilde) {
0256: result.internalUnion(new CharSet(w, s
0257: .charAt(p++)));
0258: } else {
0259: result.internalDifference(new CharSet(w, s
0260: .charAt(p++)));
0261: }
0262: haveDash = false;
0263: haveTilde = false;
0264: wIsReal = false;
0265: }
0266:
0267: // if the preceding character was a caret, remove this character
0268: // from the running result
0269: else if (haveTilde) {
0270: result
0271: .internalDifference(new CharSet(s
0272: .charAt(p++)));
0273: haveTilde = false;
0274: wIsReal = false;
0275: }
0276:
0277: // otherwise, flush the single-character cache and then
0278: // put this character into the cache
0279: else if (wIsReal) {
0280: result.internalUnion(new CharSet(w));
0281: w = s.charAt(p++);
0282: wIsReal = true;
0283: } else {
0284: w = s.charAt(p++);
0285: wIsReal = true;
0286: }
0287: }
0288: }
0289:
0290: // finally, flush the single-character cache one last time
0291: if (wIsReal) {
0292: result.internalUnion(new CharSet(w));
0293: }
0294:
0295: return result;
0296: }
0297:
0298: /**
0299: * Creates a CharSet containing all the characters in a particular
0300: * Unicode category. The text is either a two-character code from
0301: * the Unicode database or a single character that begins one or more
0302: * two-character codes.
0303: */
0304: private static CharSet charSetForCategory(String category) {
0305: // throw an exception if we have anything other than one or two
0306: // characters inside the colons
0307: if (category.length() == 0 || category.length() >= 3) {
0308: throw new IllegalArgumentException(
0309: "Invalid character category: " + category);
0310: }
0311:
0312: // if we have two characters, search the category map for that code
0313: // and either construct and return a CharSet from the data in the
0314: // category map or throw an exception
0315: if (category.length() == 2) {
0316: for (int i = 0; i < categoryMap.length; i++) {
0317: if (categoryMap[i][0].equals(category)) {
0318: return new CharSet(categoryMap[i][1]);
0319: }
0320: }
0321: throw new IllegalArgumentException(
0322: "Invalid character category: " + category);
0323: }
0324:
0325: // if we have one character, search the category map for codes beginning
0326: // with that letter, and union together all of the matching sets that
0327: // we find (or throw an exception if there are no matches)
0328: else if (category.length() == 1) {
0329: CharSet result = new CharSet();
0330: for (int i = 0; i < categoryMap.length; i++) {
0331: if (categoryMap[i][0].startsWith(category)) {
0332: result = result
0333: .union(new CharSet(categoryMap[i][1]));
0334: }
0335: }
0336: if (result.empty()) {
0337: throw new IllegalArgumentException(
0338: "Invalid character category: " + category);
0339: } else {
0340: return result;
0341: }
0342: }
0343: return new CharSet(); // should never get here, but to make the compiler happy...
0344: }
0345:
0346: /**
0347: * Returns a copy of CharSet's expression cache and sets CharSet's
0348: * expression cache to empty.
0349: */
0350: public static Hashtable releaseExpressionCache() {
0351: Hashtable result = expressionCache;
0352: expressionCache = null;
0353: return result;
0354: }
0355:
0356: //==========================================================================
0357: // CharSet manipulation
0358: //==========================================================================
0359: /**
0360: * Creates an empty CharSet.
0361: */
0362: public CharSet() {
0363: chars = "";
0364: }
0365:
0366: /**
0367: * Creates a CharSet containing a single character.
0368: * @param c The character to put into the CharSet
0369: */
0370: public CharSet(char c) {
0371: StringBuffer temp = new StringBuffer();
0372: temp.append(c);
0373: temp.append(c);
0374: chars = temp.toString();
0375: }
0376:
0377: /**
0378: * Creates a CharSet containing a range of characters.
0379: * @param lo The lowest-numbered character to include in the range
0380: * @param hi The highest-numbered character to include in the range
0381: */
0382: public CharSet(char lo, char hi) {
0383: StringBuffer temp = new StringBuffer();
0384: if (lo <= hi) {
0385: temp.append(lo);
0386: temp.append(hi);
0387: } else {
0388: temp.append(hi);
0389: temp.append(lo);
0390: }
0391: chars = temp.toString();
0392: }
0393:
0394: /**
0395: * Creates a CharSet, initializing it from the internal storage
0396: * of another CharSet (this function performs no error checking
0397: * on "chars", so if it's malformed, undefined behavior will result)
0398: */
0399: private CharSet(String chars) {
0400: this .chars = chars;
0401: }
0402:
0403: /**
0404: * Returns a CharSet representing the union of two CharSets.
0405: */
0406: public CharSet union(CharSet that) {
0407: return new CharSet(doUnion(that.chars).toString());
0408: }
0409:
0410: /**
0411: * Adds the characters in "that" to this CharSet
0412: */
0413: private void internalUnion(CharSet that) {
0414: chars = doUnion(that.chars).toString();
0415: }
0416:
0417: /**
0418: * The actual implementation of the union functions
0419: */
0420: private StringBuffer doUnion(String c2) {
0421: StringBuffer result = new StringBuffer();
0422:
0423: int i = 0;
0424: int j = 0;
0425:
0426: // consider all the characters in both strings
0427: while (i < chars.length() && j < c2.length()) {
0428: char ub;
0429:
0430: // the first character in the result is the lower of the
0431: // starting characters of the two strings, and "ub" gets
0432: // set to the upper bound of that range
0433: if (chars.charAt(i) < c2.charAt(j)) {
0434: result.append(chars.charAt(i));
0435: ub = chars.charAt(++i);
0436: } else {
0437: result.append(c2.charAt(j));
0438: ub = c2.charAt(++j);
0439: }
0440:
0441: // for as long as one of our two pointers is pointing to a range's
0442: // end point, or i is pointing to a character that is less than
0443: // "ub" plus one (the "plus one" stitches touching ranges together)...
0444: while (i % 2 == 1
0445: || j % 2 == 1
0446: || (i < chars.length() && chars.charAt(i) <= ub + 1)) {
0447:
0448: // advance i to the first character that is greater than
0449: // "ub" plus one
0450: while (i < chars.length() && chars.charAt(i) <= ub + 1) {
0451: ++i;
0452: }
0453:
0454: // if i points to the endpoint of a range, update "ub"
0455: // to that character, or if i points to the start of
0456: // a range and the endpoint of the preceding range is
0457: // greater than "ub", update "up" to _that_ character
0458: if (i % 2 == 1) {
0459: ub = chars.charAt(i);
0460: } else if (i > 0 && chars.charAt(i - 1) > ub) {
0461: ub = chars.charAt(i - 1);
0462: }
0463:
0464: // now advance j to the first character that is greater
0465: // that "ub" plus one
0466: while (j < c2.length() && c2.charAt(j) <= ub + 1) {
0467: ++j;
0468: }
0469:
0470: // if j points to the endpoint of a range, update "ub"
0471: // to that character, or if j points to the start of
0472: // a range and the endpoint of the preceding range is
0473: // greater than "ub", update "up" to _that_ character
0474: if (j % 2 == 1) {
0475: ub = c2.charAt(j);
0476: } else if (j > 0 && c2.charAt(j - 1) > ub) {
0477: ub = c2.charAt(j - 1);
0478: }
0479: }
0480: // when we finally fall out of this loop, we will have stitched
0481: // together a series of ranges that overlap or touch, i and j
0482: // will both point to starting points of ranges, and "ub" will
0483: // be the endpoint of the range we're working on. Write "ub"
0484: // to the result
0485: result.append(ub);
0486:
0487: // loop back around to create the next range in the result
0488: }
0489:
0490: // we fall out to here when we've exhausted all the characters in
0491: // one of the operands. We can append all of the remaining characters
0492: // in the other operand without doing any extra work.
0493: if (i < chars.length()) {
0494: result.append(chars.substring(i));
0495: }
0496: if (j < c2.length()) {
0497: result.append(c2.substring(j));
0498: }
0499:
0500: return result;
0501: }
0502:
0503: /**
0504: * Returns the intersection of two CharSets.
0505: */
0506: public CharSet intersection(CharSet that) {
0507: return new CharSet(doIntersection(that.chars).toString());
0508: }
0509:
0510: /**
0511: * Removes from this CharSet any characters that aren't also in "that"
0512: */
0513: private void internalIntersection(CharSet that) {
0514: chars = doIntersection(that.chars).toString();
0515: }
0516:
0517: /**
0518: * The internal implementation of the two intersection functions
0519: */
0520: private StringBuffer doIntersection(String c2) {
0521: StringBuffer result = new StringBuffer();
0522:
0523: int i = 0;
0524: int j = 0;
0525: int oldI;
0526: int oldJ;
0527:
0528: // iterate until we've exhausted one of the operands
0529: while (i < chars.length() && j < c2.length()) {
0530:
0531: // advance j until it points to a character that is larger than
0532: // the one i points to. If this is the beginning of a one-
0533: // character range, advance j to point to the end
0534: if (i < chars.length() && i % 2 == 0) {
0535: while (j < c2.length()
0536: && c2.charAt(j) < chars.charAt(i)) {
0537: ++j;
0538: }
0539: if (j < c2.length() && j % 2 == 0
0540: && c2.charAt(j) == chars.charAt(i)) {
0541: ++j;
0542: }
0543: }
0544:
0545: // if j points to the endpoint of a range, save the current
0546: // value of i, then advance i until it reaches a character
0547: // which is larger than the character pointed at
0548: // by j. All of the characters we've advanced over (except
0549: // the one currently pointed to by i) are added to the result
0550: oldI = i;
0551: while (j % 2 == 1 && i < chars.length()
0552: && chars.charAt(i) <= c2.charAt(j)) {
0553: ++i;
0554: }
0555: result.append(chars.substring(oldI, i));
0556:
0557: // if i points to the endpoint of a range, save the current
0558: // value of j, then advance j until it reaches a character
0559: // which is larger than the character pointed at
0560: // by i. All of the characters we've advanced over (except
0561: // the one currently pointed to by i) are added to the result
0562: oldJ = j;
0563: while (i % 2 == 1 && j < c2.length()
0564: && c2.charAt(j) <= chars.charAt(i)) {
0565: ++j;
0566: }
0567: result.append(c2.substring(oldJ, j));
0568:
0569: // advance i until it points to a character larger than j
0570: // If it points at the beginning of a one-character range,
0571: // advance it to the end of that range
0572: if (j < c2.length() && j % 2 == 0) {
0573: while (i < chars.length()
0574: && chars.charAt(i) < c2.charAt(j)) {
0575: ++i;
0576: }
0577: if (i < chars.length() && i % 2 == 0
0578: && c2.charAt(j) == chars.charAt(i)) {
0579: ++i;
0580: }
0581: }
0582: }
0583:
0584: return result;
0585: }
0586:
0587: /**
0588: * Returns a CharSet containing all the characters in "this" that
0589: * aren't also in "that"
0590: */
0591: public CharSet difference(CharSet that) {
0592: return new CharSet(doIntersection(
0593: that.doComplement().toString()).toString());
0594: }
0595:
0596: /**
0597: * Removes from "this" all the characters that are also in "that"
0598: */
0599: private void internalDifference(CharSet that) {
0600: chars = doIntersection(that.doComplement().toString())
0601: .toString();
0602: }
0603:
0604: /**
0605: * Returns a CharSet containing all the characters which are not
0606: * in "this"
0607: */
0608: public CharSet complement() {
0609: return new CharSet(doComplement().toString());
0610: }
0611:
0612: /**
0613: * Complements "this". All the characters it contains are removed,
0614: * and all the characters it doesn't contain are added.
0615: */
0616: private void internalComplement() {
0617: chars = doComplement().toString();
0618: }
0619:
0620: /**
0621: * The internal implementation function for the complement routines
0622: */
0623: private StringBuffer doComplement() {
0624: // the complement of an empty CharSet is one containing everything
0625: if (empty()) {
0626: return new StringBuffer("\u0000\uffff");
0627: }
0628:
0629: StringBuffer result = new StringBuffer();
0630:
0631: int i = 0;
0632:
0633: // the result begins with \u0000 unless the original CharSet does
0634: if (chars.charAt(0) != '\u0000') {
0635: result.append('\u0000');
0636: }
0637:
0638: // walk through the characters in this CharSet. Append a pair of
0639: // characters the first of which is one less than the first
0640: // character we see and the second of which is one plus the second
0641: // character we see (don't write the first character if it's \u0000,
0642: // and don't write the second character if it's \uffff.
0643: while (i < chars.length()) {
0644: if (chars.charAt(i) != '\u0000') {
0645: result.append((char) (chars.charAt(i) - 1));
0646: }
0647: if (chars.charAt(i + 1) != '\uffff') {
0648: result.append((char) (chars.charAt(i + 1) + 1));
0649: }
0650: i += 2;
0651: }
0652:
0653: // add \uffff to the end of the result, unless it was in
0654: // the original set
0655: if (chars.charAt(chars.length() - 1) != '\uffff') {
0656: result.append('\uffff');
0657: }
0658:
0659: return result;
0660: }
0661:
0662: /**
0663: * Returns true if this CharSet contains the specified character
0664: * @param c The character we're testing for set membership
0665: */
0666: public boolean contains(char c) {
0667: // search for the first range endpoint that is greater than or
0668: // equal to c
0669: int i = 1;
0670: while (i < chars.length() && chars.charAt(i) < c) {
0671: i += 2;
0672: }
0673:
0674: // if we've walked off the end, we don't contain c
0675: if (i == chars.length()) {
0676: return false;
0677: }
0678:
0679: // otherwise, we contain c if the beginning of the range is less
0680: // than or equal to c
0681: return chars.charAt(i - 1) <= c;
0682: }
0683:
0684: /**
0685: * Returns true if "that" is another instance of CharSet containing
0686: * the exact same characters as this one
0687: */
0688: public boolean equals(Object that) {
0689: return (that instanceof CharSet)
0690: && chars.equals(((CharSet) that).chars);
0691: }
0692:
0693: /**
0694: * Creates a new CharSet that is equal to this one
0695: */
0696: public Object clone() {
0697: return new CharSet(chars);
0698: }
0699:
0700: /**
0701: * Returns true if this CharSet contains no characters
0702: */
0703: public boolean empty() {
0704: return chars.length() == 0;
0705: }
0706:
0707: /**
0708: * Returns a textual representation of this CharSet. If the result
0709: * of calling this function is passed to CharSet.parseString(), it
0710: * will produce another CharSet that is equal to this one.
0711: */
0712: public String toString() {
0713: StringBuffer result = new StringBuffer();
0714:
0715: // the result begins with an opening bracket
0716: result.append('[');
0717:
0718: // iterate through the ranges in the CharSet
0719: for (int i = 0; i < chars.length(); i += 2) {
0720: // for a range with the same beginning and ending point,
0721: // output that character
0722: if (chars.charAt(i) == chars.charAt(i + 1)) {
0723: result.append(chars.charAt(i));
0724: }
0725:
0726: // otherwise, output the start and end points of the range
0727: // separated by a dash
0728: else {
0729: result.append(chars.charAt(i) + "-"
0730: + chars.charAt(i + 1));
0731: }
0732: }
0733:
0734: // the result ends with a closing bracket
0735: result.append(']');
0736: return result.toString();
0737: }
0738:
0739: /**
0740: * Returns a String representing the contents of this CharSet
0741: * in the same form in which they're stored internally: as pairs
0742: * of characters representing the start and end points of ranges
0743: */
0744: public String getRanges() {
0745: return chars;
0746: }
0747:
0748: /**
0749: * Returns an Enumeration that will return the ranges of characters
0750: * contained in this CharSet one at a time
0751: */
0752: public Enumeration getChars() {
0753: return new Enumeration(this );
0754: }
0755:
0756: //==========================================================================
0757: // CharSet.Enumeration
0758: //==========================================================================
0759:
0760: /**
0761: * An Enumeration that can be used to extract the character ranges
0762: * from a CharSet one at a time
0763: */
0764: public class Enumeration implements java.util.Enumeration {
0765: /**
0766: * Initializes a CharSet.Enumeration
0767: */
0768: Enumeration(CharSet cs) {
0769: this .chars = cs.chars;
0770: p = 0;
0771: }
0772:
0773: /**
0774: * Returns true if the enumeration hasn't yet returned
0775: * all the ranges in the CharSet
0776: */
0777: public boolean hasMoreElements() {
0778: return p < chars.length();
0779: }
0780:
0781: /**
0782: * Returns the next range in the CarSet
0783: */
0784: public Object nextElement() {
0785: char[] result = new char[2];
0786: result[0] = chars.charAt(p);
0787: result[1] = chars.charAt(p + 1);
0788: p += 2;
0789:
0790: return result;
0791: }
0792:
0793: int p;
0794: String chars;
0795: }
0796:
0797: //==========================================================================
0798: // tables for charSetForCategory()
0799: //==========================================================================
0800:
0801: /**
0802: * Table used with charSetFromCategory. This is an array of pairs
0803: * of Strings. The first column of Strings is Unicode character category
0804: * codes as defined in the Unicode database. The second column is the
0805: * internal storage for a CharSet containing the characters in that
0806: * category.
0807: */
0808: private static final String[][] categoryMap = {
0809: {
0810: "Ll",
0811: "az\u00AA\u00AA\u00B5\u00B5\u00BA\u00BA\u00DF\u00F6\u00F8"
0812: + "\u00FF\u0101\u0101\u0103\u0103\u0105\u0105\u0107\u0107\u0109"
0813: + "\u0109\u010B\u010B\u010D\u010D\u010F\u010F\u0111\u0111\u0113"
0814: + "\u0113\u0115\u0115\u0117\u0117\u0119\u0119\u011B\u011B\u011D"
0815: + "\u011D\u011F\u011F\u0121\u0121\u0123\u0123\u0125\u0125\u0127"
0816: + "\u0127\u0129\u0129\u012B\u012B\u012D\u012D\u012F\u012F\u0131"
0817: + "\u0131\u0133\u0133\u0135\u0135\u0137\u0138\u013A\u013A\u013C"
0818: + "\u013C\u013E\u013E\u0140\u0140\u0142\u0142\u0144\u0144\u0146"
0819: + "\u0146\u0148\u0149\u014B\u014B\u014D\u014D\u014F\u014F\u0151"
0820: + "\u0151\u0153\u0153\u0155\u0155\u0157\u0157\u0159\u0159\u015B"
0821: + "\u015B\u015D\u015D\u015F\u015F\u0161\u0161\u0163\u0163\u0165"
0822: + "\u0165\u0167\u0167\u0169\u0169\u016B\u016B\u016D\u016D\u016F"
0823: + "\u016F\u0171\u0171\u0173\u0173\u0175\u0175\u0177\u0177\u017A"
0824: + "\u017A\u017C\u017C\u017E\u0180\u0183\u0183\u0185\u0185\u0188"
0825: + "\u0188\u018C\u018D\u0192\u0192\u0195\u0195\u0199\u019B\u019E"
0826: + "\u019E\u01A1\u01A1\u01A3\u01A3\u01A5\u01A5\u01A8\u01A8\u01AA"
0827: + "\u01AB\u01AD\u01AD\u01B0\u01B0\u01B4\u01B4\u01B6\u01B6\u01B9"
0828: + "\u01BA\u01BD\u01BF\u01C6\u01C6\u01C9\u01C9\u01CC\u01CC\u01CE"
0829: + "\u01CE\u01D0\u01D0\u01D2\u01D2\u01D4\u01D4\u01D6\u01D6\u01D8"
0830: + "\u01D8\u01DA\u01DA\u01DC\u01DD\u01DF\u01DF\u01E1\u01E1\u01E3"
0831: + "\u01E3\u01E5\u01E5\u01E7\u01E7\u01E9\u01E9\u01EB\u01EB\u01ED"
0832: + "\u01ED\u01EF\u01F0\u01F3\u01F3\u01F5\u01F5\u01F9\u01F9\u01FB"
0833: + "\u01FB\u01FD\u01FD\u01FF\u01FF\u0201\u0201\u0203\u0203\u0205"
0834: + "\u0205\u0207\u0207\u0209\u0209\u020B\u020B\u020D\u020D\u020F"
0835: + "\u020F\u0211\u0211\u0213\u0213\u0215\u0215\u0217\u0217\u0219"
0836: + "\u0219\u021B\u021B\u021D\u021D\u021F\u021F\u0223\u0223\u0225"
0837: + "\u0225\u0227\u0227\u0229\u0229\u022B\u022B\u022D\u022D\u022F"
0838: + "\u022F\u0231\u0231\u0233\u0233\u0250\u02AD\u0390\u0390\u03AC"
0839: + "\u03CE\u03D0\u03D1\u03D5\u03D7\u03DB\u03DB\u03DD\u03DD\u03DF"
0840: + "\u03DF\u03E1\u03E1\u03E3\u03E3\u03E5\u03E5\u03E7\u03E7\u03E9"
0841: + "\u03E9\u03EB\u03EB\u03ED\u03ED\u03EF\u03F3\u0430\u045F\u0461"
0842: + "\u0461\u0463\u0463\u0465\u0465\u0467\u0467\u0469\u0469\u046B"
0843: + "\u046B\u046D\u046D\u046F\u046F\u0471\u0471\u0473\u0473\u0475"
0844: + "\u0475\u0477\u0477\u0479\u0479\u047B\u047B\u047D\u047D\u047F"
0845: + "\u047F\u0481\u0481\u048D\u048D\u048F\u048F\u0491\u0491\u0493"
0846: + "\u0493\u0495\u0495\u0497\u0497\u0499\u0499\u049B\u049B\u049D"
0847: + "\u049D\u049F\u049F\u04A1\u04A1\u04A3\u04A3\u04A5\u04A5\u04A7"
0848: + "\u04A7\u04A9\u04A9\u04AB\u04AB\u04AD\u04AD\u04AF\u04AF\u04B1"
0849: + "\u04B1\u04B3\u04B3\u04B5\u04B5\u04B7\u04B7\u04B9\u04B9\u04BB"
0850: + "\u04BB\u04BD\u04BD\u04BF\u04BF\u04C2\u04C2\u04C4\u04C4\u04C8"
0851: + "\u04C8\u04CC\u04CC\u04D1\u04D1\u04D3\u04D3\u04D5\u04D5\u04D7"
0852: + "\u04D7\u04D9\u04D9\u04DB\u04DB\u04DD\u04DD\u04DF\u04DF\u04E1"
0853: + "\u04E1\u04E3\u04E3\u04E5\u04E5\u04E7\u04E7\u04E9\u04E9\u04EB"
0854: + "\u04EB\u04ED\u04ED\u04EF\u04EF\u04F1\u04F1\u04F3\u04F3\u04F5"
0855: + "\u04F5\u04F9\u04F9\u0561\u0587\u1E01\u1E01\u1E03\u1E03\u1E05"
0856: + "\u1E05\u1E07\u1E07\u1E09\u1E09\u1E0B\u1E0B\u1E0D\u1E0D\u1E0F"
0857: + "\u1E0F\u1E11\u1E11\u1E13\u1E13\u1E15\u1E15\u1E17\u1E17\u1E19"
0858: + "\u1E19\u1E1B\u1E1B\u1E1D\u1E1D\u1E1F\u1E1F\u1E21\u1E21\u1E23"
0859: + "\u1E23\u1E25\u1E25\u1E27\u1E27\u1E29\u1E29\u1E2B\u1E2B\u1E2D"
0860: + "\u1E2D\u1E2F\u1E2F\u1E31\u1E31\u1E33\u1E33\u1E35\u1E35\u1E37"
0861: + "\u1E37\u1E39\u1E39\u1E3B\u1E3B\u1E3D\u1E3D\u1E3F\u1E3F\u1E41"
0862: + "\u1E41\u1E43\u1E43\u1E45\u1E45\u1E47\u1E47\u1E49\u1E49\u1E4B"
0863: + "\u1E4B\u1E4D\u1E4D\u1E4F\u1E4F\u1E51\u1E51\u1E53\u1E53\u1E55"
0864: + "\u1E55\u1E57\u1E57\u1E59\u1E59\u1E5B\u1E5B\u1E5D\u1E5D\u1E5F"
0865: + "\u1E5F\u1E61\u1E61\u1E63\u1E63\u1E65\u1E65\u1E67\u1E67\u1E69"
0866: + "\u1E69\u1E6B\u1E6B\u1E6D\u1E6D\u1E6F\u1E6F\u1E71\u1E71\u1E73"
0867: + "\u1E73\u1E75\u1E75\u1E77\u1E77\u1E79\u1E79\u1E7B\u1E7B\u1E7D"
0868: + "\u1E7D\u1E7F\u1E7F\u1E81\u1E81\u1E83\u1E83\u1E85\u1E85\u1E87"
0869: + "\u1E87\u1E89\u1E89\u1E8B\u1E8B\u1E8D\u1E8D\u1E8F\u1E8F\u1E91"
0870: + "\u1E91\u1E93\u1E93\u1E95\u1E9B\u1EA1\u1EA1\u1EA3\u1EA3\u1EA5"
0871: + "\u1EA5\u1EA7\u1EA7\u1EA9\u1EA9\u1EAB\u1EAB\u1EAD\u1EAD\u1EAF"
0872: + "\u1EAF\u1EB1\u1EB1\u1EB3\u1EB3\u1EB5\u1EB5\u1EB7\u1EB7\u1EB9"
0873: + "\u1EB9\u1EBB\u1EBB\u1EBD\u1EBD\u1EBF\u1EBF\u1EC1\u1EC1\u1EC3"
0874: + "\u1EC3\u1EC5\u1EC5\u1EC7\u1EC7\u1EC9\u1EC9\u1ECB\u1ECB\u1ECD"
0875: + "\u1ECD\u1ECF\u1ECF\u1ED1\u1ED1\u1ED3\u1ED3\u1ED5\u1ED5\u1ED7"
0876: + "\u1ED7\u1ED9\u1ED9\u1EDB\u1EDB\u1EDD\u1EDD\u1EDF\u1EDF\u1EE1"
0877: + "\u1EE1\u1EE3\u1EE3\u1EE5\u1EE5\u1EE7\u1EE7\u1EE9\u1EE9\u1EEB"
0878: + "\u1EEB\u1EED\u1EED\u1EEF\u1EEF\u1EF1\u1EF1\u1EF3\u1EF3\u1EF5"
0879: + "\u1EF5\u1EF7\u1EF7\u1EF9\u1EF9\u1F00\u1F07\u1F10\u1F15\u1F20"
0880: + "\u1F27\u1F30\u1F37\u1F40\u1F45\u1F50\u1F57\u1F60\u1F67\u1F70"
0881: + "\u1F7D\u1F80\u1F87\u1F90\u1F97\u1FA0\u1FA7\u1FB0\u1FB4\u1FB6"
0882: + "\u1FB7\u1FBE\u1FBE\u1FC2\u1FC4\u1FC6\u1FC7\u1FD0\u1FD3\u1FD6"
0883: + "\u1FD7\u1FE0\u1FE7\u1FF2\u1FF4\u1FF6\u1FF7\u207F\u207F\u210A"
0884: + "\u210A\u210E\u210F\u2113\u2113\u212F\u212F\u2134\u2134\u2139"
0885: + "\u2139\uFB00\uFB06\uFB13\uFB17\uFF41\uFF5A" },
0886: {
0887: "Lu",
0888: "AZ\u00C0\u00D6\u00D8\u00DE\u0100\u0100\u0102\u0102\u0104"
0889: + "\u0104\u0106\u0106\u0108\u0108\u010A\u010A\u010C\u010C\u010E"
0890: + "\u010E\u0110\u0110\u0112\u0112\u0114\u0114\u0116\u0116\u0118"
0891: + "\u0118\u011A\u011A\u011C\u011C\u011E\u011E\u0120\u0120\u0122"
0892: + "\u0122\u0124\u0124\u0126\u0126\u0128\u0128\u012A\u012A\u012C"
0893: + "\u012C\u012E\u012E\u0130\u0130\u0132\u0132\u0134\u0134\u0136"
0894: + "\u0136\u0139\u0139\u013B\u013B\u013D\u013D\u013F\u013F\u0141"
0895: + "\u0141\u0143\u0143\u0145\u0145\u0147\u0147\u014A\u014A\u014C"
0896: + "\u014C\u014E\u014E\u0150\u0150\u0152\u0152\u0154\u0154\u0156"
0897: + "\u0156\u0158\u0158\u015A\u015A\u015C\u015C\u015E\u015E\u0160"
0898: + "\u0160\u0162\u0162\u0164\u0164\u0166\u0166\u0168\u0168\u016A"
0899: + "\u016A\u016C\u016C\u016E\u016E\u0170\u0170\u0172\u0172\u0174"
0900: + "\u0174\u0176\u0176\u0178\u0179\u017B\u017B\u017D\u017D\u0181"
0901: + "\u0182\u0184\u0184\u0186\u0187\u0189\u018B\u018E\u0191\u0193"
0902: + "\u0194\u0196\u0198\u019C\u019D\u019F\u01A0\u01A2\u01A2\u01A4"
0903: + "\u01A4\u01A6\u01A7\u01A9\u01A9\u01AC\u01AC\u01AE\u01AF\u01B1"
0904: + "\u01B3\u01B5\u01B5\u01B7\u01B8\u01BC\u01BC\u01C4\u01C4\u01C7"
0905: + "\u01C7\u01CA\u01CA\u01CD\u01CD\u01CF\u01CF\u01D1\u01D1\u01D3"
0906: + "\u01D3\u01D5\u01D5\u01D7\u01D7\u01D9\u01D9\u01DB\u01DB\u01DE"
0907: + "\u01DE\u01E0\u01E0\u01E2\u01E2\u01E4\u01E4\u01E6\u01E6\u01E8"
0908: + "\u01E8\u01EA\u01EA\u01EC\u01EC\u01EE\u01EE\u01F1\u01F1\u01F4"
0909: + "\u01F4\u01F6\u01F8\u01FA\u01FA\u01FC\u01FC\u01FE\u01FE\u0200"
0910: + "\u0200\u0202\u0202\u0204\u0204\u0206\u0206\u0208\u0208\u020A"
0911: + "\u020A\u020C\u020C\u020E\u020E\u0210\u0210\u0212\u0212\u0214"
0912: + "\u0214\u0216\u0216\u0218\u0218\u021A\u021A\u021C\u021C\u021E"
0913: + "\u021E\u0222\u0222\u0224\u0224\u0226\u0226\u0228\u0228\u022A"
0914: + "\u022A\u022C\u022C\u022E\u022E\u0230\u0230\u0232\u0232\u0386"
0915: + "\u0386\u0388\u038A\u038C\u038C\u038E\u038F\u0391\u03A1\u03A3"
0916: + "\u03AB\u03D2\u03D4\u03DA\u03DA\u03DC\u03DC\u03DE\u03DE\u03E0"
0917: + "\u03E0\u03E2\u03E2\u03E4\u03E4\u03E6\u03E6\u03E8\u03E8\u03EA"
0918: + "\u03EA\u03EC\u03EC\u03EE\u03EE\u0400\u042F\u0460\u0460\u0462"
0919: + "\u0462\u0464\u0464\u0466\u0466\u0468\u0468\u046A\u046A\u046C"
0920: + "\u046C\u046E\u046E\u0470\u0470\u0472\u0472\u0474\u0474\u0476"
0921: + "\u0476\u0478\u0478\u047A\u047A\u047C\u047C\u047E\u047E\u0480"
0922: + "\u0480\u048C\u048C\u048E\u048E\u0490\u0490\u0492\u0492\u0494"
0923: + "\u0494\u0496\u0496\u0498\u0498\u049A\u049A\u049C\u049C\u049E"
0924: + "\u049E\u04A0\u04A0\u04A2\u04A2\u04A4\u04A4\u04A6\u04A6\u04A8"
0925: + "\u04A8\u04AA\u04AA\u04AC\u04AC\u04AE\u04AE\u04B0\u04B0\u04B2"
0926: + "\u04B2\u04B4\u04B4\u04B6\u04B6\u04B8\u04B8\u04BA\u04BA\u04BC"
0927: + "\u04BC\u04BE\u04BE\u04C0\u04C1\u04C3\u04C3\u04C7\u04C7\u04CB"
0928: + "\u04CB\u04D0\u04D0\u04D2\u04D2\u04D4\u04D4\u04D6\u04D6\u04D8"
0929: + "\u04D8\u04DA\u04DA\u04DC\u04DC\u04DE\u04DE\u04E0\u04E0\u04E2"
0930: + "\u04E2\u04E4\u04E4\u04E6\u04E6\u04E8\u04E8\u04EA\u04EA\u04EC"
0931: + "\u04EC\u04EE\u04EE\u04F0\u04F0\u04F2\u04F2\u04F4\u04F4\u04F8"
0932: + "\u04F8\u0531\u0556\u10A0\u10C5\u1E00\u1E00\u1E02\u1E02\u1E04"
0933: + "\u1E04\u1E06\u1E06\u1E08\u1E08\u1E0A\u1E0A\u1E0C\u1E0C\u1E0E"
0934: + "\u1E0E\u1E10\u1E10\u1E12\u1E12\u1E14\u1E14\u1E16\u1E16\u1E18"
0935: + "\u1E18\u1E1A\u1E1A\u1E1C\u1E1C\u1E1E\u1E1E\u1E20\u1E20\u1E22"
0936: + "\u1E22\u1E24\u1E24\u1E26\u1E26\u1E28\u1E28\u1E2A\u1E2A\u1E2C"
0937: + "\u1E2C\u1E2E\u1E2E\u1E30\u1E30\u1E32\u1E32\u1E34\u1E34\u1E36"
0938: + "\u1E36\u1E38\u1E38\u1E3A\u1E3A\u1E3C\u1E3C\u1E3E\u1E3E\u1E40"
0939: + "\u1E40\u1E42\u1E42\u1E44\u1E44\u1E46\u1E46\u1E48\u1E48\u1E4A"
0940: + "\u1E4A\u1E4C\u1E4C\u1E4E\u1E4E\u1E50\u1E50\u1E52\u1E52\u1E54"
0941: + "\u1E54\u1E56\u1E56\u1E58\u1E58\u1E5A\u1E5A\u1E5C\u1E5C\u1E5E"
0942: + "\u1E5E\u1E60\u1E60\u1E62\u1E62\u1E64\u1E64\u1E66\u1E66\u1E68"
0943: + "\u1E68\u1E6A\u1E6A\u1E6C\u1E6C\u1E6E\u1E6E\u1E70\u1E70\u1E72"
0944: + "\u1E72\u1E74\u1E74\u1E76\u1E76\u1E78\u1E78\u1E7A\u1E7A\u1E7C"
0945: + "\u1E7C\u1E7E\u1E7E\u1E80\u1E80\u1E82\u1E82\u1E84\u1E84\u1E86"
0946: + "\u1E86\u1E88\u1E88\u1E8A\u1E8A\u1E8C\u1E8C\u1E8E\u1E8E\u1E90"
0947: + "\u1E90\u1E92\u1E92\u1E94\u1E94\u1EA0\u1EA0\u1EA2\u1EA2\u1EA4"
0948: + "\u1EA4\u1EA6\u1EA6\u1EA8\u1EA8\u1EAA\u1EAA\u1EAC\u1EAC\u1EAE"
0949: + "\u1EAE\u1EB0\u1EB0\u1EB2\u1EB2\u1EB4\u1EB4\u1EB6\u1EB6\u1EB8"
0950: + "\u1EB8\u1EBA\u1EBA\u1EBC\u1EBC\u1EBE\u1EBE\u1EC0\u1EC0\u1EC2"
0951: + "\u1EC2\u1EC4\u1EC4\u1EC6\u1EC6\u1EC8\u1EC8\u1ECA\u1ECA\u1ECC"
0952: + "\u1ECC\u1ECE\u1ECE\u1ED0\u1ED0\u1ED2\u1ED2\u1ED4\u1ED4\u1ED6"
0953: + "\u1ED6\u1ED8\u1ED8\u1EDA\u1EDA\u1EDC\u1EDC\u1EDE\u1EDE\u1EE0"
0954: + "\u1EE0\u1EE2\u1EE2\u1EE4\u1EE4\u1EE6\u1EE6\u1EE8\u1EE8\u1EEA"
0955: + "\u1EEA\u1EEC\u1EEC\u1EEE\u1EEE\u1EF0\u1EF0\u1EF2\u1EF2\u1EF4"
0956: + "\u1EF4\u1EF6\u1EF6\u1EF8\u1EF8\u1F08\u1F0F\u1F18\u1F1D\u1F28"
0957: + "\u1F2F\u1F38\u1F3F\u1F48\u1F4D\u1F59\u1F59\u1F5B\u1F5B\u1F5D"
0958: + "\u1F5D\u1F5F\u1F5F\u1F68\u1F6F\u1FB8\u1FBB\u1FC8\u1FCB\u1FD8"
0959: + "\u1FDB\u1FE8\u1FEC\u1FF8\u1FFB\u2102\u2102\u2107\u2107\u210B"
0960: + "\u210D\u2110\u2112\u2115\u2115\u2119\u211D\u2124\u2124\u2126"
0961: + "\u2126\u2128\u2128\u212A\u212D\u2130\u2131\u2133\u2133\uFF21"
0962: + "\uFF3A" },
0963: {
0964: "Lt",
0965: "\u01C5\u01C5\u01C8\u01C8\u01CB\u01CB\u01F2\u01F2\u1F88"
0966: + "\u1F8F\u1F98\u1F9F\u1FA8\u1FAF\u1FBC\u1FBC\u1FCC\u1FCC\u1FFC"
0967: + "\u1FFC" },
0968: {
0969: "Lo",
0970: "\u01BB\u01BB\u01C0\u01C3\u05D0\u05EA\u05F0\u05F2\u0621"
0971: + "\u063A\u0641\u064A\u0671\u06D3\u06D5\u06D5\u06FA\u06FC\u0710"
0972: + "\u0710\u0712\u072C\u0780\u07A5\u0905\u0939\u093D\u093D\u0950"
0973: + "\u0950\u0958\u0961\u0985\u098C\u098F\u0990\u0993\u09A8\u09AA"
0974: + "\u09B0\u09B2\u09B2\u09B6\u09B9\u09DC\u09DD\u09DF\u09E1\u09F0"
0975: + "\u09F1\u0A05\u0A0A\u0A0F\u0A10\u0A13\u0A28\u0A2A\u0A30\u0A32"
0976: + "\u0A33\u0A35\u0A36\u0A38\u0A39\u0A59\u0A5C\u0A5E\u0A5E\u0A72"
0977: + "\u0A74\u0A85\u0A8B\u0A8D\u0A8D\u0A8F\u0A91\u0A93\u0AA8\u0AAA"
0978: + "\u0AB0\u0AB2\u0AB3\u0AB5\u0AB9\u0ABD\u0ABD\u0AD0\u0AD0\u0AE0"
0979: + "\u0AE0\u0B05\u0B0C\u0B0F\u0B10\u0B13\u0B28\u0B2A\u0B30\u0B32"
0980: + "\u0B33\u0B36\u0B39\u0B3D\u0B3D\u0B5C\u0B5D\u0B5F\u0B61\u0B85"
0981: + "\u0B8A\u0B8E\u0B90\u0B92\u0B95\u0B99\u0B9A\u0B9C\u0B9C\u0B9E"
0982: + "\u0B9F\u0BA3\u0BA4\u0BA8\u0BAA\u0BAE\u0BB5\u0BB7\u0BB9\u0C05"
0983: + "\u0C0C\u0C0E\u0C10\u0C12\u0C28\u0C2A\u0C33\u0C35\u0C39\u0C60"
0984: + "\u0C61\u0C85\u0C8C\u0C8E\u0C90\u0C92\u0CA8\u0CAA\u0CB3\u0CB5"
0985: + "\u0CB9\u0CDE\u0CDE\u0CE0\u0CE1\u0D05\u0D0C\u0D0E\u0D10\u0D12"
0986: + "\u0D28\u0D2A\u0D39\u0D60\u0D61\u0D85\u0D96\u0D9A\u0DB1\u0DB3"
0987: + "\u0DBB\u0DBD\u0DBD\u0DC0\u0DC6\u0E01\u0E30\u0E32\u0E33\u0E40"
0988: + "\u0E45\u0E81\u0E82\u0E84\u0E84\u0E87\u0E88\u0E8A\u0E8A\u0E8D"
0989: + "\u0E8D\u0E94\u0E97\u0E99\u0E9F\u0EA1\u0EA3\u0EA5\u0EA5\u0EA7"
0990: + "\u0EA7\u0EAA\u0EAB\u0EAD\u0EB0\u0EB2\u0EB3\u0EBD\u0EBD\u0EC0"
0991: + "\u0EC4\u0EDC\u0EDD\u0F00\u0F00\u0F40\u0F47\u0F49\u0F6A\u0F88"
0992: + "\u0F8B\u1000\u1021\u1023\u1027\u1029\u102A\u1050\u1055\u10D0"
0993: + "\u10F6\u1100\u1159\u115F\u11A2\u11A8\u11F9\u1200\u1206\u1208"
0994: + "\u1246\u1248\u1248\u124A\u124D\u1250\u1256\u1258\u1258\u125A"
0995: + "\u125D\u1260\u1286\u1288\u1288\u128A\u128D\u1290\u12AE\u12B0"
0996: + "\u12B0\u12B2\u12B5\u12B8\u12BE\u12C0\u12C0\u12C2\u12C5\u12C8"
0997: + "\u12CE\u12D0\u12D6\u12D8\u12EE\u12F0\u130E\u1310\u1310\u1312"
0998: + "\u1315\u1318\u131E\u1320\u1346\u1348\u135A\u13A0\u13F4\u1401"
0999: + "\u166C\u166F\u1676\u1681\u169A\u16A0\u16EA\u1780\u17B3\u1820"
1000: + "\u1842\u1844\u1877\u1880\u18A8\u2135\u2138\u3006\u3006\u3041"
1001: + "\u3094\u30A1\u30FA\u3105\u312C\u3131\u318E\u31A0\u31B7\u3400"
1002: + "\u4DB5\u4E00\u9FA5\uA000\uA48C\uAC00\uD7A3\uF900\uFA2D\uFB1D"
1003: + "\uFB1D\uFB1F\uFB28\uFB2A\uFB36\uFB38\uFB3C\uFB3E\uFB3E\uFB40"
1004: + "\uFB41\uFB43\uFB44\uFB46\uFBB1\uFBD3\uFD3D\uFD50\uFD8F\uFD92"
1005: + "\uFDC7\uFDF0\uFDFB\uFE70\uFE72\uFE74\uFE74\uFE76\uFEFC\uFF66"
1006: + "\uFF6F\uFF71\uFF9D\uFFA0\uFFBE\uFFC2\uFFC7\uFFCA\uFFCF\uFFD2"
1007: + "\uFFD7\uFFDA\uFFDC" },
1008: {
1009: "Lm",
1010: "\u02B0\u02B8\u02BB\u02C1\u02D0\u02D1\u02E0\u02E4\u02EE"
1011: + "\u02EE\u037A\u037A\u0559\u0559\u0640\u0640\u06E5\u06E6\u0E46"
1012: + "\u0E46\u0EC6\u0EC6\u1843\u1843\u3005\u3005\u3031\u3035\u309D"
1013: + "\u309E\u30FC\u30FE\uFF70\uFF70\uFF9E\uFF9F" },
1014: {
1015: "Nd",
1016: "09\u0660\u0669\u06F0\u06F9\u0966\u096F\u09E6\u09EF\u0A66"
1017: + "\u0A6F\u0AE6\u0AEF\u0B66\u0B6F\u0BE7\u0BEF\u0C66\u0C6F\u0CE6"
1018: + "\u0CEF\u0D66\u0D6F\u0E50\u0E59\u0ED0\u0ED9\u0F20\u0F29\u1040"
1019: + "\u1049\u1369\u1371\u17E0\u17E9\u1810\u1819\uFF10\uFF19" },
1020: { "Nl", "\u2160\u2183\u3007\u3007\u3021\u3029\u3038\u303A" },
1021: {
1022: "No",
1023: "\u00B2\u00B3\u00B9\u00B9\u00BC\u00BE\u09F4\u09F9\u0BF0"
1024: + "\u0BF2\u0F2A\u0F33\u1372\u137C\u16EE\u16F0\u2070\u2070\u2074"
1025: + "\u2079\u2080\u2089\u2153\u215F\u2460\u249B\u24EA\u24EA\u2776"
1026: + "\u2793\u3192\u3195\u3220\u3229\u3280\u3289" },
1027: {
1028: "Ps",
1029: "(([[{{\u0F3A\u0F3A\u0F3C\u0F3C\u169B\u169B\u201A\u201A"
1030: + "\u201E\u201E\u2045\u2045\u207D\u207D\u208D\u208D\u2329\u2329"
1031: + "\u3008\u3008\u300A\u300A\u300C\u300C\u300E\u300E\u3010\u3010"
1032: + "\u3014\u3014\u3016\u3016\u3018\u3018\u301A\u301A\u301D\u301D"
1033: + "\uFD3E\uFD3E\uFE35\uFE35\uFE37\uFE37\uFE39\uFE39\uFE3B\uFE3B"
1034: + "\uFE3D\uFE3D\uFE3F\uFE3F\uFE41\uFE41\uFE43\uFE43\uFE59\uFE59"
1035: + "\uFE5B\uFE5B\uFE5D\uFE5D\uFF08\uFF08\uFF3B\uFF3B\uFF5B\uFF5B"
1036: + "\uFF62\uFF62" },
1037: {
1038: "Pe",
1039: "))]]}}\u0F3B\u0F3B\u0F3D\u0F3D\u169C\u169C\u2046\u2046"
1040: + "\u207E\u207E\u208E\u208E\u232A\u232A\u3009\u3009\u300B\u300B"
1041: + "\u300D\u300D\u300F\u300F\u3011\u3011\u3015\u3015\u3017\u3017"
1042: + "\u3019\u3019\u301B\u301B\u301E\u301F\uFD3F\uFD3F\uFE36\uFE36"
1043: + "\uFE38\uFE38\uFE3A\uFE3A\uFE3C\uFE3C\uFE3E\uFE3E\uFE40\uFE40"
1044: + "\uFE42\uFE42\uFE44\uFE44\uFE5A\uFE5A\uFE5C\uFE5C\uFE5E\uFE5E"
1045: + "\uFF09\uFF09\uFF3D\uFF3D\uFF5D\uFF5D\uFF63\uFF63" },
1046: {
1047: "Pi",
1048: "\u00AB\u00AB\u2018\u2018\u201B\u201C\u201F\u201F\u2039"
1049: + "\u2039" },
1050: { "Pf", "\u00BB\u00BB\u2019\u2019\u201D\u201D\u203A\u203A" },
1051: {
1052: "Pd",
1053: "--\u00AD\u00AD\u058A\u058A\u1806\u1806\u2010\u2015\u301C"
1054: + "\u301C\u3030\u3030\uFE31\uFE32\uFE58\uFE58\uFE63\uFE63\uFF0D"
1055: + "\uFF0D" },
1056: {
1057: "Pc",
1058: "__\u203F\u2040\u30FB\u30FB\uFE33\uFE34\uFE4D\uFE4F\uFF3F"
1059: + "\uFF3F\uFF65\uFF65" },
1060: {
1061: "Po",
1062: "!#%'**,,./:;?@\\\\\u00A1\u00A1\u00B7\u00B7\u00BF\u00BF\u037E"
1063: + "\u037E\u0387\u0387\u055A\u055F\u0589\u0589\u05BE\u05BE\u05C0"
1064: + "\u05C0\u05C3\u05C3\u05F3\u05F4\u060C\u060C\u061B\u061B\u061F"
1065: + "\u061F\u066A\u066D\u06D4\u06D4\u0700\u070D\u0964\u0965\u0970"
1066: + "\u0970\u0DF4\u0DF4\u0E4F\u0E4F\u0E5A\u0E5B\u0F04\u0F12\u0F85"
1067: + "\u0F85\u104A\u104F\u10FB\u10FB\u1361\u1368\u166D\u166E\u16EB"
1068: + "\u16ED\u17D4\u17DA\u17DC\u17DC\u1800\u1805\u1807\u180A\u2016"
1069: + "\u2017\u2020\u2027\u2030\u2038\u203B\u203E\u2041\u2043\u2048"
1070: + "\u204D\u3001\u3003\uFE30\uFE30\uFE49\uFE4C\uFE50\uFE52\uFE54"
1071: + "\uFE57\uFE5F\uFE61\uFE68\uFE68\uFE6A\uFE6B\uFF01\uFF03\uFF05"
1072: + "\uFF07\uFF0A\uFF0A\uFF0C\uFF0C\uFF0E\uFF0F\uFF1A\uFF1B\uFF1F"
1073: + "\uFF20\uFF3C\uFF3C\uFF61\uFF61\uFF64\uFF64" },
1074: {
1075: "Sc",
1076: "$$\u00A2\u00A5\u09F2\u09F3\u0E3F\u0E3F\u17DB\u17DB\u20A0"
1077: + "\u20AF\uFE69\uFE69\uFF04\uFF04\uFFE0\uFFE1\uFFE5\uFFE6" },
1078: {
1079: "Sm",
1080: "++<>||~~\u00AC\u00AC\u00B1\u00B1\u00D7\u00D7\u00F7\u00F7"
1081: + "\u2044\u2044\u207A\u207C\u208A\u208C\u2190\u2194\u219A\u219B"
1082: + "\u21A0\u21A0\u21A3\u21A3\u21A6\u21A6\u21AE\u21AE\u21CE\u21CF"
1083: + "\u21D2\u21D2\u21D4\u21D4\u2200\u22F1\u2308\u230B\u2320\u2321"
1084: + "\u25B7\u25B7\u25C1\u25C1\u266F\u266F\uFB29\uFB29\uFE62\uFE62"
1085: + "\uFE64\uFE66\uFF0B\uFF0B\uFF1C\uFF1E\uFF5C\uFF5C\uFF5E\uFF5E"
1086: + "\uFFE2\uFFE2\uFFE9\uFFEC" },
1087: {
1088: "So",
1089: "\u00A6\u00A7\u00A9\u00A9\u00AE\u00AE\u00B0\u00B0\u00B6"
1090: + "\u00B6\u0482\u0482\u06E9\u06E9\u06FD\u06FE\u09FA\u09FA\u0B70"
1091: + "\u0B70\u0F01\u0F03\u0F13\u0F17\u0F1A\u0F1F\u0F34\u0F34\u0F36"
1092: + "\u0F36\u0F38\u0F38\u0FBE\u0FC5\u0FC7\u0FCC\u0FCF\u0FCF\u2100"
1093: + "\u2101\u2103\u2106\u2108\u2109\u2114\u2114\u2116\u2118\u211E"
1094: + "\u2123\u2125\u2125\u2127\u2127\u2129\u2129\u212E\u212E\u2132"
1095: + "\u2132\u213A\u213A\u2195\u2199\u219C\u219F\u21A1\u21A2\u21A4"
1096: + "\u21A5\u21A7\u21AD\u21AF\u21CD\u21D0\u21D1\u21D3\u21D3\u21D5"
1097: + "\u21F3\u2300\u2307\u230C\u231F\u2322\u2328\u232B\u237B\u237D"
1098: + "\u239A\u2400\u2426\u2440\u244A\u249C\u24E9\u2500\u2595\u25A0"
1099: + "\u25B6\u25B8\u25C0\u25C2\u25F7\u2600\u2613\u2619\u266E\u2670"
1100: + "\u2671\u2701\u2704\u2706\u2709\u270C\u2727\u2729\u274B\u274D"
1101: + "\u274D\u274F\u2752\u2756\u2756\u2758\u275E\u2761\u2767\u2794"
1102: + "\u2794\u2798\u27AF\u27B1\u27BE\u2800\u28FF\u2E80\u2E99\u2E9B"
1103: + "\u2EF3\u2F00\u2FD5\u2FF0\u2FFB\u3004\u3004\u3012\u3013\u3020"
1104: + "\u3020\u3036\u3037\u303E\u303F\u3190\u3191\u3196\u319F\u3200"
1105: + "\u321C\u322A\u3243\u3260\u327B\u327F\u327F\u328A\u32B0\u32C0"
1106: + "\u32CB\u32D0\u32FE\u3300\u3376\u337B\u33DD\u33E0\u33FE\uA490"
1107: + "\uA4A1\uA4A4\uA4B3\uA4B5\uA4C0\uA4C2\uA4C4\uA4C6\uA4C6\uFFE4"
1108: + "\uFFE4\uFFE8\uFFE8\uFFED\uFFEE\uFFFC\uFFFD" },
1109: {
1110: "Mn",
1111: "\u0300\u034E\u0360\u0362\u0483\u0486\u0591\u05A1\u05A3"
1112: + "\u05B9\u05BB\u05BD\u05BF\u05BF\u05C1\u05C2\u05C4\u05C4\u064B"
1113: + "\u0655\u0670\u0670\u06D6\u06DC\u06DF\u06E4\u06E7\u06E8\u06EA"
1114: + "\u06ED\u0711\u0711\u0730\u074A\u07A6\u07B0\u0901\u0902\u093C"
1115: + "\u093C\u0941\u0948\u094D\u094D\u0951\u0954\u0962\u0963\u0981"
1116: + "\u0981\u09BC\u09BC\u09C1\u09C4\u09CD\u09CD\u09E2\u09E3\u0A02"
1117: + "\u0A02\u0A3C\u0A3C\u0A41\u0A42\u0A47\u0A48\u0A4B\u0A4D\u0A70"
1118: + "\u0A71\u0A81\u0A82\u0ABC\u0ABC\u0AC1\u0AC5\u0AC7\u0AC8\u0ACD"
1119: + "\u0ACD\u0B01\u0B01\u0B3C\u0B3C\u0B3F\u0B3F\u0B41\u0B43\u0B4D"
1120: + "\u0B4D\u0B56\u0B56\u0B82\u0B82\u0BC0\u0BC0\u0BCD\u0BCD\u0C3E"
1121: + "\u0C40\u0C46\u0C48\u0C4A\u0C4D\u0C55\u0C56\u0CBF\u0CBF\u0CC6"
1122: + "\u0CC6\u0CCC\u0CCD\u0D41\u0D43\u0D4D\u0D4D\u0DCA\u0DCA\u0DD2"
1123: + "\u0DD4\u0DD6\u0DD6\u0E31\u0E31\u0E34\u0E3A\u0E47\u0E4E\u0EB1"
1124: + "\u0EB1\u0EB4\u0EB9\u0EBB\u0EBC\u0EC8\u0ECD\u0F18\u0F19\u0F35"
1125: + "\u0F35\u0F37\u0F37\u0F39\u0F39\u0F71\u0F7E\u0F80\u0F84\u0F86"
1126: + "\u0F87\u0F90\u0F97\u0F99\u0FBC\u0FC6\u0FC6\u102D\u1030\u1032"
1127: + "\u1032\u1036\u1037\u1039\u1039\u1058\u1059\u17B7\u17BD\u17C6"
1128: + "\u17C6\u17C9\u17D3\u18A9\u18A9\u20D0\u20DC\u20E1\u20E1\u302A"
1129: + "\u302F\u3099\u309A\uFB1E\uFB1E\uFE20\uFE23" },
1130: {
1131: "Mc",
1132: "\u0903\u0903\u093E\u0940\u0949\u094C\u0982\u0983\u09BE"
1133: + "\u09C0\u09C7\u09C8\u09CB\u09CC\u09D7\u09D7\u0A3E\u0A40\u0A83"
1134: + "\u0A83\u0ABE\u0AC0\u0AC9\u0AC9\u0ACB\u0ACC\u0B02\u0B03\u0B3E"
1135: + "\u0B3E\u0B40\u0B40\u0B47\u0B48\u0B4B\u0B4C\u0B57\u0B57\u0B83"
1136: + "\u0B83\u0BBE\u0BBF\u0BC1\u0BC2\u0BC6\u0BC8\u0BCA\u0BCC\u0BD7"
1137: + "\u0BD7\u0C01\u0C03\u0C41\u0C44\u0C82\u0C83\u0CBE\u0CBE\u0CC0"
1138: + "\u0CC4\u0CC7\u0CC8\u0CCA\u0CCB\u0CD5\u0CD6\u0D02\u0D03\u0D3E"
1139: + "\u0D40\u0D46\u0D48\u0D4A\u0D4C\u0D57\u0D57\u0D82\u0D83\u0DCF"
1140: + "\u0DD1\u0DD8\u0DDF\u0DF2\u0DF3\u0F3E\u0F3F\u0F7F\u0F7F\u102C"
1141: + "\u102C\u1031\u1031\u1038\u1038\u1056\u1057\u17B4\u17B6\u17BE"
1142: + "\u17C5\u17C7\u17C8" },
1143: { "Me", "\u0488\u0489\u06DD\u06DE\u20DD\u20E0\u20E2\u20E3" },
1144: { "Zl", "\u2028\u2028" },
1145: { "Zp", "\u2029\u2029" },
1146: {
1147: "Zs",
1148: "\u0020\u0020\u00A0\u00A0\u1680\u1680\u2000\u200B\u202F"
1149: + "\u202F\u3000\u3000" },
1150: { "Cc", "\u0000\u001F\u007F\u009F" },
1151: {
1152: "Cf",
1153: "\u070F\u070F\u180B\u180E\u200C\u200F\u202A\u202E\u206A"
1154: + "\u206F\uFEFF\uFEFF\uFFF9\uFFFB" }, };
1155: }
|