001: /*
002: * The Apache Software License, Version 1.1
003: *
004: *
005: * Copyright (c) 1999,2000 The Apache Software Foundation. All rights
006: * reserved.
007: *
008: * Redistribution and use in source and binary forms, with or without
009: * modification, are permitted provided that the following conditions
010: * are met:
011: *
012: * 1. Redistributions of source code must retain the above copyright
013: * notice, this list of conditions and the following disclaimer.
014: *
015: * 2. Redistributions in binary form must reproduce the above copyright
016: * notice, this list of conditions and the following disclaimer in
017: * the documentation and/or other materials provided with the
018: * distribution.
019: *
020: * 3. The end-user documentation included with the redistribution,
021: * if any, must include the following acknowledgment:
022: * "This product includes software developed by the
023: * Apache Software Foundation (http://www.apache.org/)."
024: * Alternately, this acknowledgment may appear in the software itself,
025: * if and wherever such third-party acknowledgments normally appear.
026: *
027: * 4. The names "Xerces" and "Apache Software Foundation" must
028: * not be used to endorse or promote products derived from this
029: * software without prior written permission. For written
030: * permission, please contact apache@apache.org.
031: *
032: * 5. Products derived from this software may not be called "Apache",
033: * nor may "Apache" appear in their name, without prior written
034: * permission of the Apache Software Foundation.
035: *
036: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
037: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
038: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
039: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
040: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
041: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
042: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
043: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
044: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
045: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
046: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
047: * SUCH DAMAGE.
048: * ====================================================================
049: *
050: * This software consists of voluntary contributions made by many
051: * individuals on behalf of the Apache Software Foundation and was
052: * originally based on software copyright (c) 1999, International
053: * Business Machines, Inc., http://www.apache.org. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: package org.apache.xerces.utils.regex;
059:
060: import java.util.Hashtable;
061: import java.util.Locale;
062:
063: /**
064: * A regular expression parser for the XML Shema.
065: *
066: * @author TAMURA Kent <kent@trl.ibm.co.jp>
067: */
068: class ParserForXMLSchema extends RegexParser {
069:
070: public ParserForXMLSchema() {
071: //this.setLocale(Locale.getDefault());
072: }
073:
074: public ParserForXMLSchema(Locale locale) {
075: //this.setLocale(locale);
076: }
077:
078: Token processCaret() throws ParseException {
079: this .next();
080: return Token.createChar('^');
081: }
082:
083: Token processDollar() throws ParseException {
084: this .next();
085: return Token.createChar('$');
086: }
087:
088: Token processLookahead() throws ParseException {
089: throw ex("parser.process.1", this .offset);
090: }
091:
092: Token processNegativelookahead() throws ParseException {
093: throw ex("parser.process.1", this .offset);
094: }
095:
096: Token processLookbehind() throws ParseException {
097: throw ex("parser.process.1", this .offset);
098: }
099:
100: Token processNegativelookbehind() throws ParseException {
101: throw ex("parser.process.1", this .offset);
102: }
103:
104: Token processBacksolidus_A() throws ParseException {
105: throw ex("parser.process.1", this .offset);
106: }
107:
108: Token processBacksolidus_Z() throws ParseException {
109: throw ex("parser.process.1", this .offset);
110: }
111:
112: Token processBacksolidus_z() throws ParseException {
113: throw ex("parser.process.1", this .offset);
114: }
115:
116: Token processBacksolidus_b() throws ParseException {
117: throw ex("parser.process.1", this .offset);
118: }
119:
120: Token processBacksolidus_B() throws ParseException {
121: throw ex("parser.process.1", this .offset);
122: }
123:
124: Token processBacksolidus_lt() throws ParseException {
125: throw ex("parser.process.1", this .offset);
126: }
127:
128: Token processBacksolidus_gt() throws ParseException {
129: throw ex("parser.process.1", this .offset);
130: }
131:
132: Token processStar(Token tok) throws ParseException {
133: this .next();
134: return Token.createClosure(tok);
135: }
136:
137: Token processPlus(Token tok) throws ParseException {
138: // X+ -> XX*
139: this .next();
140: return Token.createConcat(tok, Token.createClosure(tok));
141: }
142:
143: Token processQuestion(Token tok) throws ParseException {
144: // X? -> X|
145: this .next();
146: Token par = Token.createUnion();
147: par.addChild(tok);
148: par.addChild(Token.createEmpty());
149: return par;
150: }
151:
152: boolean checkQuestion(int off) {
153: return false;
154: }
155:
156: Token processParen() throws ParseException {
157: this .next();
158: Token tok = Token.createParen(this .parseRegex(), 0);
159: if (this .read() != super .T_RPAREN)
160: throw ex("parser.factor.1", this .offset - 1);
161: this .next(); // Skips ')'
162: return tok;
163: }
164:
165: Token processParen2() throws ParseException {
166: throw ex("parser.process.1", this .offset);
167: }
168:
169: Token processCondition() throws ParseException {
170: throw ex("parser.process.1", this .offset);
171: }
172:
173: Token processModifiers() throws ParseException {
174: throw ex("parser.process.1", this .offset);
175: }
176:
177: Token processIndependent() throws ParseException {
178: throw ex("parser.process.1", this .offset);
179: }
180:
181: Token processBacksolidus_c() throws ParseException {
182: this .next();
183: return this .getTokenForShorthand('c');
184: }
185:
186: Token processBacksolidus_C() throws ParseException {
187: this .next();
188: return this .getTokenForShorthand('C');
189: }
190:
191: Token processBacksolidus_i() throws ParseException {
192: this .next();
193: return this .getTokenForShorthand('i');
194: }
195:
196: Token processBacksolidus_I() throws ParseException {
197: this .next();
198: return this .getTokenForShorthand('I');
199: }
200:
201: Token processBacksolidus_g() throws ParseException {
202: throw this .ex("parser.process.1", this .offset - 2);
203: }
204:
205: Token processBacksolidus_X() throws ParseException {
206: throw ex("parser.process.1", this .offset - 2);
207: }
208:
209: Token processBackreference() throws ParseException {
210: throw ex("parser.process.1", this .offset - 4);
211: }
212:
213: int processCIinCharacterClass(RangeToken tok, int c) {
214: tok.mergeRanges(this .getTokenForShorthand(c));
215: return -1;
216: }
217:
218: /**
219: * Parses a character-class-expression, not a character-class-escape.
220: *
221: * c-c-expression ::= '[' c-group ']'
222: * c-group ::= positive-c-group | negative-c-group | c-c-subtraction
223: * positive-c-group ::= (c-range | c-c-escape)+
224: * negative-c-group ::= '^' positive-c-group
225: * c-c-subtraction ::= (positive-c-group | negative-c-group) subtraction
226: * subtraction ::= '-' c-c-expression
227: * c-range ::= single-range | from-to-range
228: * single-range ::= multi-c-escape | category-c-escape | block-c-escape | <any XML char>
229: * cc-normal-c ::= <any character except [, ], \>
230: * from-to-range ::= cc-normal-c '-' cc-normal-c
231: *
232: * @param useNrage Ignored.
233: * @return This returns no NrageToken.
234: */
235: protected RangeToken parseCharacterClass(boolean useNrange)
236: throws ParseException {
237: this .setContext(S_INBRACKETS);
238: this .next(); // '['
239: boolean nrange = false;
240: RangeToken base = null;
241: RangeToken tok;
242: if (this .read() == T_CHAR && this .chardata == '^') {
243: nrange = true;
244: this .next(); // '^'
245: base = Token.createRange();
246: base.addRange(0, Token.UTF16_MAX);
247: tok = Token.createRange();
248: } else {
249: tok = Token.createRange();
250: }
251: int type;
252: boolean firstloop = true;
253: while ((type = this .read()) != T_EOF) { // Don't use 'cotinue' for this loop.
254: // single-range | from-to-range | subtraction
255: if (type == T_CHAR && this .chardata == ']' && !firstloop) {
256: if (nrange) {
257: base.subtractRanges(tok);
258: tok = base;
259: }
260: break;
261: }
262: int c = this .chardata;
263: boolean end = false;
264: if (type == T_BACKSOLIDUS) {
265: switch (c) {
266: case 'd':
267: case 'D':
268: case 'w':
269: case 'W':
270: case 's':
271: case 'S':
272: tok.mergeRanges(this .getTokenForShorthand(c));
273: end = true;
274: break;
275:
276: case 'i':
277: case 'I':
278: case 'c':
279: case 'C':
280: c = this .processCIinCharacterClass(tok, c);
281: if (c < 0)
282: end = true;
283: break;
284:
285: case 'p':
286: case 'P':
287: int pstart = this .offset;
288: RangeToken tok2 = this .processBacksolidus_pP(c);
289: if (tok2 == null)
290: throw this .ex("parser.atom.5", pstart);
291: tok.mergeRanges(tok2);
292: end = true;
293: break;
294:
295: default:
296: c = this .decodeEscaped();
297: } // \ + c
298: } // backsolidus
299: else if (type == T_XMLSCHEMA_CC_SUBTRACTION && !firstloop) {
300: // Subraction
301: if (nrange) {
302: base.subtractRanges(tok);
303: tok = base;
304: }
305: RangeToken range2 = this .parseCharacterClass(false);
306: tok.subtractRanges(range2);
307: if (this .read() != T_CHAR || this .chardata != ']')
308: throw this .ex("parser.cc.5", this .offset);
309: break; // Exit this loop
310: }
311:
312: this .next();
313: if (!end) { // if not shorthands...
314: if (type == T_CHAR) {
315: if (c == '[')
316: throw this .ex("parser.cc.6", this .offset - 2);
317: if (c == ']')
318: throw this .ex("parser.cc.7", this .offset - 2);
319: }
320: if (this .read() != T_CHAR || this .chardata != '-') { // Here is no '-'.
321: tok.addRange(c, c);
322: } else { // Found '-'
323: // Is this '-' is a from-to token??
324: this .next(); // Skips '-'
325: if ((type = this .read()) == T_EOF)
326: throw this .ex("parser.cc.2", this .offset);
327: // c '-' ']' -> '-' is a single-range.
328: if (type == T_CHAR && this .chardata == ']') {
329: tok.addRange(c, c);
330: tok.addRange('-', '-');
331: }
332: // c '-' '-[' -> '-' is a single-range.
333: else if (type == T_XMLSCHEMA_CC_SUBTRACTION) {
334: tok.addRange(c, c);
335: tok.addRange('-', '-');
336: } else {
337: int rangeend = this .chardata;
338: if (type == T_CHAR) {
339: if (rangeend == '[')
340: throw this .ex("parser.cc.6",
341: this .offset - 1);
342: if (rangeend == ']')
343: throw this .ex("parser.cc.7",
344: this .offset - 1);
345: if (rangeend == '-')
346: throw new RuntimeException(
347: "Invalid character '-' in the middle of positive character range");
348: }
349: if (type == T_BACKSOLIDUS)
350: rangeend = this .decodeEscaped();
351: this .next();
352: if (c > rangeend) {
353: throw new RuntimeException(
354: "The range end code point '"
355: + (char) rangeend
356: + "' is less than the start code point '"
357: + (char) c + "'");
358: }
359: tok.addRange(c, rangeend);
360: }
361: }
362: }
363: firstloop = false;
364: }
365: if (this .read() == T_EOF)
366: throw this .ex("parser.cc.2", this .offset);
367: tok.sortRanges();
368: tok.compactRanges();
369: //tok.dumpRanges();
370: this .setContext(S_NORMAL);
371: this .next(); // Skips ']'
372:
373: return tok;
374: }
375:
376: protected RangeToken parseSetOperations() throws ParseException {
377: throw this .ex("parser.process.1", this .offset);
378: }
379:
380: Token getTokenForShorthand(int ch) {
381: switch (ch) {
382: case 'd':
383: return ParserForXMLSchema.getRange("xml:isDigit", true);
384: case 'D':
385: return ParserForXMLSchema.getRange("xml:isDigit", false);
386: case 'w':
387: return ParserForXMLSchema.getRange("xml:isWord", true);
388: case 'W':
389: return ParserForXMLSchema.getRange("xml:isWord", false);
390: case 's':
391: return ParserForXMLSchema.getRange("xml:isSpace", true);
392: case 'S':
393: return ParserForXMLSchema.getRange("xml:isSpace", false);
394: case 'c':
395: return ParserForXMLSchema.getRange("xml:isNameChar", true);
396: case 'C':
397: return ParserForXMLSchema.getRange("xml:isNameChar", false);
398: case 'i':
399: return ParserForXMLSchema.getRange("xml:isInitialNameChar",
400: true);
401: case 'I':
402: return ParserForXMLSchema.getRange("xml:isInitialNameChar",
403: false);
404: default:
405: throw new RuntimeException(
406: "Internal Error: shorthands: \\u"
407: + Integer.toString(ch, 16));
408: }
409: }
410:
411: int decodeEscaped() throws ParseException {
412: if (this .read() != T_BACKSOLIDUS)
413: throw ex("parser.next.1", this .offset - 1);
414: int c = this .chardata;
415: switch (c) {
416: case 'n':
417: c = '\n';
418: break; // LINE FEED U+000A
419: case 'r':
420: c = '\r';
421: break; // CRRIAGE RETURN U+000D
422: case 't':
423: c = '\t';
424: break; // HORIZONTAL TABULATION U+0009
425:
426: // XML Schema REC: Single Character Escape
427: case '\\':
428: case '|':
429: case '.':
430: case '^':
431: case '-':
432: case '?':
433: case '*':
434: case '+':
435: case '{':
436: case '}':
437: case '(':
438: case ')':
439: case '[':
440: case ']':
441: break;
442: default:
443: throw new RuntimeException(
444: "Regular expression: unrecognized character '\\"
445: + (char) c + "' in charRange");
446: }
447: return c;
448: }
449:
450: static protected Hashtable ranges = null;
451: static protected Hashtable ranges2 = null;
452:
453: static synchronized protected RangeToken getRange(String name,
454: boolean positive) {
455: if (ranges == null) {
456: ranges = new Hashtable();
457: ranges2 = new Hashtable();
458:
459: Token tok = Token.createRange();
460: setupRange(tok, SPACES);
461: ranges.put("xml:isSpace", tok);
462: ranges2.put("xml:isSpace", Token.complementRanges(tok));
463:
464: tok = Token.createRange();
465: setupRange(tok, DIGITS);
466: ranges.put("xml:isDigit", tok);
467: ranges2.put("xml:isDigit", Token.complementRanges(tok));
468:
469: tok = Token.createRange();
470: setupRange(tok, DIGITS);
471: ranges.put("xml:isDigit", tok);
472: ranges2.put("xml:isDigit", Token.complementRanges(tok));
473:
474: tok = Token.createRange();
475: setupRange(tok, LETTERS);
476: tok.mergeRanges((Token) ranges.get("xml:isDigit"));
477: ranges.put("xml:isWord", tok);
478: ranges2.put("xml:isWord", Token.complementRanges(tok));
479:
480: tok = Token.createRange();
481: setupRange(tok, NAMECHARS);
482: ranges.put("xml:isNameChar", tok);
483: ranges2.put("xml:isNameChar", Token.complementRanges(tok));
484:
485: tok = Token.createRange();
486: setupRange(tok, LETTERS);
487: tok.addRange('_', '_');
488: tok.addRange(':', ':');
489: ranges.put("xml:isInitialNameChar", tok);
490: ranges2.put("xml:isInitialNameChar", Token
491: .complementRanges(tok));
492: }
493: RangeToken tok = positive ? (RangeToken) ranges.get(name)
494: : (RangeToken) ranges2.get(name);
495: return tok;
496: }
497:
498: static void setupRange(Token range, String src) {
499: int len = src.length();
500: for (int i = 0; i < len; i += 2)
501: range.addRange(src.charAt(i), src.charAt(i + 1));
502: }
503:
504: private static final String SPACES = "\t\n\r\r ";
505: private static final String NAMECHARS = "\u002d\u002e\u0030\u003a\u0041\u005a\u005f\u005f\u0061\u007a\u00b7\u00b7\u00c0\u00d6"
506: + "\u00d8\u00f6\u00f8\u0131\u0134\u013e\u0141\u0148\u014a\u017e\u0180\u01c3\u01cd\u01f0"
507: + "\u01f4\u01f5\u01fa\u0217\u0250\u02a8\u02bb\u02c1\u02d0\u02d1\u0300\u0345\u0360\u0361"
508: + "\u0386\u038a\u038c\u038c\u038e\u03a1\u03a3\u03ce\u03d0\u03d6\u03da\u03da\u03dc\u03dc"
509: + "\u03de\u03de\u03e0\u03e0\u03e2\u03f3\u0401\u040c\u040e\u044f\u0451\u045c\u045e\u0481"
510: + "\u0483\u0486\u0490\u04c4\u04c7\u04c8\u04cb\u04cc\u04d0\u04eb\u04ee\u04f5\u04f8\u04f9"
511: + "\u0531\u0556\u0559\u0559\u0561\u0586\u0591\u05a1\u05a3\u05b9\u05bb\u05bd\u05bf\u05bf"
512: + "\u05c1\u05c2\u05c4\u05c4\u05d0\u05ea\u05f0\u05f2\u0621\u063a\u0640\u0652\u0660\u0669"
513: + "\u0670\u06b7\u06ba\u06be\u06c0\u06ce\u06d0\u06d3\u06d5\u06e8\u06ea\u06ed\u06f0\u06f9"
514: + "\u0901\u0903\u0905\u0939\u093c\u094d\u0951\u0954\u0958\u0963\u0966\u096f\u0981\u0983"
515: + "\u0985\u098c\u098f\u0990\u0993\u09a8\u09aa\u09b0\u09b2\u09b2\u09b6\u09b9\u09bc\u09bc"
516: + "\u09be\u09c4\u09c7\u09c8\u09cb\u09cd\u09d7\u09d7\u09dc\u09dd\u09df\u09e3\u09e6\u09f1"
517: + "\u0a02\u0a02\u0a05\u0a0a\u0a0f\u0a10\u0a13\u0a28\u0a2a\u0a30\u0a32\u0a33\u0a35\u0a36"
518: + "\u0a38\u0a39\u0a3c\u0a3c\u0a3e\u0a42\u0a47\u0a48\u0a4b\u0a4d\u0a59\u0a5c\u0a5e\u0a5e"
519: + "\u0a66\u0a74\u0a81\u0a83\u0a85\u0a8b\u0a8d\u0a8d\u0a8f\u0a91\u0a93\u0aa8\u0aaa\u0ab0"
520: + "\u0ab2\u0ab3\u0ab5\u0ab9\u0abc\u0ac5\u0ac7\u0ac9\u0acb\u0acd\u0ae0\u0ae0\u0ae6\u0aef"
521: + "\u0b01\u0b03\u0b05\u0b0c\u0b0f\u0b10\u0b13\u0b28\u0b2a\u0b30\u0b32\u0b33\u0b36\u0b39"
522: + "\u0b3c\u0b43\u0b47\u0b48\u0b4b\u0b4d\u0b56\u0b57\u0b5c\u0b5d\u0b5f\u0b61\u0b66\u0b6f"
523: + "\u0b82\u0b83\u0b85\u0b8a\u0b8e\u0b90\u0b92\u0b95\u0b99\u0b9a\u0b9c\u0b9c\u0b9e\u0b9f"
524: + "\u0ba3\u0ba4\u0ba8\u0baa\u0bae\u0bb5\u0bb7\u0bb9\u0bbe\u0bc2\u0bc6\u0bc8\u0bca\u0bcd"
525: + "\u0bd7\u0bd7\u0be7\u0bef\u0c01\u0c03\u0c05\u0c0c\u0c0e\u0c10\u0c12\u0c28\u0c2a\u0c33"
526: + "\u0c35\u0c39\u0c3e\u0c44\u0c46\u0c48\u0c4a\u0c4d\u0c55\u0c56\u0c60\u0c61\u0c66\u0c6f"
527: + "\u0c82\u0c83\u0c85\u0c8c\u0c8e\u0c90\u0c92\u0ca8\u0caa\u0cb3\u0cb5\u0cb9\u0cbe\u0cc4"
528: + "\u0cc6\u0cc8\u0cca\u0ccd\u0cd5\u0cd6\u0cde\u0cde\u0ce0\u0ce1\u0ce6\u0cef\u0d02\u0d03"
529: + "\u0d05\u0d0c\u0d0e\u0d10\u0d12\u0d28\u0d2a\u0d39\u0d3e\u0d43\u0d46\u0d48\u0d4a\u0d4d"
530: + "\u0d57\u0d57\u0d60\u0d61\u0d66\u0d6f\u0e01\u0e2e\u0e30\u0e3a\u0e40\u0e4e\u0e50\u0e59"
531: + "\u0e81\u0e82\u0e84\u0e84\u0e87\u0e88\u0e8a\u0e8a\u0e8d\u0e8d\u0e94\u0e97\u0e99\u0e9f"
532: + "\u0ea1\u0ea3\u0ea5\u0ea5\u0ea7\u0ea7\u0eaa\u0eab\u0ead\u0eae\u0eb0\u0eb9\u0ebb\u0ebd"
533: + "\u0ec0\u0ec4\u0ec6\u0ec6\u0ec8\u0ecd\u0ed0\u0ed9\u0f18\u0f19\u0f20\u0f29\u0f35\u0f35"
534: + "\u0f37\u0f37\u0f39\u0f39\u0f3e\u0f47\u0f49\u0f69\u0f71\u0f84\u0f86\u0f8b\u0f90\u0f95"
535: + "\u0f97\u0f97\u0f99\u0fad\u0fb1\u0fb7\u0fb9\u0fb9\u10a0\u10c5\u10d0\u10f6\u1100\u1100"
536: + "\u1102\u1103\u1105\u1107\u1109\u1109\u110b\u110c\u110e\u1112\u113c\u113c\u113e\u113e"
537: + "\u1140\u1140\u114c\u114c\u114e\u114e\u1150\u1150\u1154\u1155\u1159\u1159\u115f\u1161"
538: + "\u1163\u1163\u1165\u1165\u1167\u1167\u1169\u1169\u116d\u116e\u1172\u1173\u1175\u1175"
539: + "\u119e\u119e\u11a8\u11a8\u11ab\u11ab\u11ae\u11af\u11b7\u11b8\u11ba\u11ba\u11bc\u11c2"
540: + "\u11eb\u11eb\u11f0\u11f0\u11f9\u11f9\u1e00\u1e9b\u1ea0\u1ef9\u1f00\u1f15\u1f18\u1f1d"
541: + "\u1f20\u1f45\u1f48\u1f4d\u1f50\u1f57\u1f59\u1f59\u1f5b\u1f5b\u1f5d\u1f5d\u1f5f\u1f7d"
542: + "\u1f80\u1fb4\u1fb6\u1fbc\u1fbe\u1fbe\u1fc2\u1fc4\u1fc6\u1fcc\u1fd0\u1fd3\u1fd6\u1fdb"
543: + "\u1fe0\u1fec\u1ff2\u1ff4\u1ff6\u1ffc\u20d0\u20dc\u20e1\u20e1\u2126\u2126\u212a\u212b"
544: + "\u212e\u212e\u2180\u2182\u3005\u3005\u3007\u3007\u3021\u302f\u3031\u3035\u3041\u3094"
545: + "\u3099\u309a\u309d\u309e\u30a1\u30fa\u30fc\u30fe\u3105\u312c\u4e00\u9fa5\uac00\ud7a3"
546: + "";
547: private static final String LETTERS = "\u0041\u005a\u0061\u007a\u00c0\u00d6\u00d8\u00f6\u00f8\u0131\u0134\u013e\u0141\u0148"
548: + "\u014a\u017e\u0180\u01c3\u01cd\u01f0\u01f4\u01f5\u01fa\u0217\u0250\u02a8\u02bb\u02c1"
549: + "\u0386\u0386\u0388\u038a\u038c\u038c\u038e\u03a1\u03a3\u03ce\u03d0\u03d6\u03da\u03da"
550: + "\u03dc\u03dc\u03de\u03de\u03e0\u03e0\u03e2\u03f3\u0401\u040c\u040e\u044f\u0451\u045c"
551: + "\u045e\u0481\u0490\u04c4\u04c7\u04c8\u04cb\u04cc\u04d0\u04eb\u04ee\u04f5\u04f8\u04f9"
552: + "\u0531\u0556\u0559\u0559\u0561\u0586\u05d0\u05ea\u05f0\u05f2\u0621\u063a\u0641\u064a"
553: + "\u0671\u06b7\u06ba\u06be\u06c0\u06ce\u06d0\u06d3\u06d5\u06d5\u06e5\u06e6\u0905\u0939"
554: + "\u093d\u093d\u0958\u0961\u0985\u098c\u098f\u0990\u0993\u09a8\u09aa\u09b0\u09b2\u09b2"
555: + "\u09b6\u09b9\u09dc\u09dd\u09df\u09e1\u09f0\u09f1\u0a05\u0a0a\u0a0f\u0a10\u0a13\u0a28"
556: + "\u0a2a\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59\u0a5c\u0a5e\u0a5e\u0a72\u0a74"
557: + "\u0a85\u0a8b\u0a8d\u0a8d\u0a8f\u0a91\u0a93\u0aa8\u0aaa\u0ab0\u0ab2\u0ab3\u0ab5\u0ab9"
558: + "\u0abd\u0abd\u0ae0\u0ae0\u0b05\u0b0c\u0b0f\u0b10\u0b13\u0b28\u0b2a\u0b30\u0b32\u0b33"
559: + "\u0b36\u0b39\u0b3d\u0b3d\u0b5c\u0b5d\u0b5f\u0b61\u0b85\u0b8a\u0b8e\u0b90\u0b92\u0b95"
560: + "\u0b99\u0b9a\u0b9c\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8\u0baa\u0bae\u0bb5\u0bb7\u0bb9"
561: + "\u0c05\u0c0c\u0c0e\u0c10\u0c12\u0c28\u0c2a\u0c33\u0c35\u0c39\u0c60\u0c61\u0c85\u0c8c"
562: + "\u0c8e\u0c90\u0c92\u0ca8\u0caa\u0cb3\u0cb5\u0cb9\u0cde\u0cde\u0ce0\u0ce1\u0d05\u0d0c"
563: + "\u0d0e\u0d10\u0d12\u0d28\u0d2a\u0d39\u0d60\u0d61\u0e01\u0e2e\u0e30\u0e30\u0e32\u0e33"
564: + "\u0e40\u0e45\u0e81\u0e82\u0e84\u0e84\u0e87\u0e88\u0e8a\u0e8a\u0e8d\u0e8d\u0e94\u0e97"
565: + "\u0e99\u0e9f\u0ea1\u0ea3\u0ea5\u0ea5\u0ea7\u0ea7\u0eaa\u0eab\u0ead\u0eae\u0eb0\u0eb0"
566: + "\u0eb2\u0eb3\u0ebd\u0ebd\u0ec0\u0ec4\u0f40\u0f47\u0f49\u0f69\u10a0\u10c5\u10d0\u10f6"
567: + "\u1100\u1100\u1102\u1103\u1105\u1107\u1109\u1109\u110b\u110c\u110e\u1112\u113c\u113c"
568: + "\u113e\u113e\u1140\u1140\u114c\u114c\u114e\u114e\u1150\u1150\u1154\u1155\u1159\u1159"
569: + "\u115f\u1161\u1163\u1163\u1165\u1165\u1167\u1167\u1169\u1169\u116d\u116e\u1172\u1173"
570: + "\u1175\u1175\u119e\u119e\u11a8\u11a8\u11ab\u11ab\u11ae\u11af\u11b7\u11b8\u11ba\u11ba"
571: + "\u11bc\u11c2\u11eb\u11eb\u11f0\u11f0\u11f9\u11f9\u1e00\u1e9b\u1ea0\u1ef9\u1f00\u1f15"
572: + "\u1f18\u1f1d\u1f20\u1f45\u1f48\u1f4d\u1f50\u1f57\u1f59\u1f59\u1f5b\u1f5b\u1f5d\u1f5d"
573: + "\u1f5f\u1f7d\u1f80\u1fb4\u1fb6\u1fbc\u1fbe\u1fbe\u1fc2\u1fc4\u1fc6\u1fcc\u1fd0\u1fd3"
574: + "\u1fd6\u1fdb\u1fe0\u1fec\u1ff2\u1ff4\u1ff6\u1ffc\u2126\u2126\u212a\u212b\u212e\u212e"
575: + "\u2180\u2182\u3007\u3007\u3021\u3029\u3041\u3094\u30a1\u30fa\u3105\u312c\u4e00\u9fa5"
576: + "\uac00\ud7a3";
577: private static final String DIGITS = "\u0030\u0039\u0660\u0669\u06F0\u06F9\u0966\u096F\u09E6\u09EF\u0A66\u0A6F\u0AE6\u0AEF"
578: + "\u0B66\u0B6F\u0BE7\u0BEF\u0C66\u0C6F\u0CE6\u0CEF\u0D66\u0D6F\u0E50\u0E59\u0ED0\u0ED9"
579: + "\u0F20\u0F29";
580: }
|