001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: /**
019: * @author Nikolay A. Kuznetsov
020: * @version $Revision: 1.7.2.2 $
021: */package java.util.regex;
022:
023: import java.util.BitSet;
024: import java.util.ListResourceBundle;
025:
026: /**
027: * This class represents character classes, i.e.
028: * sets of character either predefined or user defined.
029: *
030: * Note, this class represent token, not node, so being
031: * constructed by lexer.
032: *
033: * @author Nikolay A. Kuznetsov
034: * @version $Revision: 1.7.2.2 $
035: */
036: abstract class AbstractCharClass extends SpecialToken {
037: protected boolean alt;
038:
039: protected boolean altSurrogates;
040:
041: //Character.MAX_SURROGATE - Character.MIN_SURROGATE + 1
042: static int SURROGATE_CARDINALITY = 2048;
043:
044: BitSet lowHighSurrogates = new BitSet(SURROGATE_CARDINALITY);
045:
046: AbstractCharClass charClassWithoutSurrogates = null;
047:
048: AbstractCharClass charClassWithSurrogates = null;
049:
050: static PredefinedCharacterClasses charClasses = new PredefinedCharacterClasses();
051:
052: /*
053: * Indicates if this class may contain supplementary Unicode codepoints.
054: * If this flag is specified it doesn't mean that this class contains
055: * supplementary characters but may contain.
056: */
057: protected boolean mayContainSupplCodepoints = false;
058:
059: /**
060: * Returns true if this char class contains character specified;
061: *
062: * @param ch
063: * character to check;
064: */
065: abstract public boolean contains(int ch);
066:
067: /**
068: * Returns BitSet representing this character class or <code>null</code>
069: * if this character class does not have character representation;
070: *
071: * @return bitset
072: */
073: protected BitSet getBits() {
074: return null;
075: }
076:
077: protected BitSet getLowHighSurrogates() {
078: return lowHighSurrogates;
079: }
080:
081: public boolean hasLowHighSurrogates() {
082: return altSurrogates ? lowHighSurrogates.nextClearBit(0) < SURROGATE_CARDINALITY
083: : lowHighSurrogates.nextSetBit(0) < SURROGATE_CARDINALITY;
084: }
085:
086: public boolean mayContainSupplCodepoints() {
087: return mayContainSupplCodepoints;
088: }
089:
090: public int getType() {
091: return SpecialToken.TOK_CHARCLASS;
092: }
093:
094: public AbstractCharClass getInstance() {
095: return this ;
096: }
097:
098: public AbstractCharClass getSurrogates() {
099:
100: if (charClassWithSurrogates == null) {
101: final BitSet lHS = getLowHighSurrogates();
102:
103: charClassWithSurrogates = new AbstractCharClass() {
104: public boolean contains(int ch) {
105: int index = ch - Character.MIN_SURROGATE;
106:
107: return ((index >= 0) && (index < AbstractCharClass.SURROGATE_CARDINALITY)) ? this .altSurrogates
108: ^ lHS.get(index)
109: : false;
110: }
111: };
112: charClassWithSurrogates.setNegative(this .altSurrogates);
113: }
114:
115: return charClassWithSurrogates;
116: }
117:
118: public AbstractCharClass getWithoutSurrogates() {
119: if (charClassWithoutSurrogates == null) {
120: final BitSet lHS = getLowHighSurrogates();
121: final AbstractCharClass this Class = this ;
122:
123: charClassWithoutSurrogates = new AbstractCharClass() {
124: public boolean contains(int ch) {
125: int index = ch - Character.MIN_SURROGATE;
126:
127: boolean containslHS = ((index >= 0) && (index < AbstractCharClass.SURROGATE_CARDINALITY)) ? this .altSurrogates
128: ^ lHS.get(index)
129: : false;
130:
131: return this Class.contains(ch) && !containslHS;
132: }
133: };
134: charClassWithoutSurrogates.setNegative(isNegative());
135: charClassWithoutSurrogates.mayContainSupplCodepoints = mayContainSupplCodepoints;
136: }
137:
138: return charClassWithoutSurrogates;
139: }
140:
141: public boolean hasUCI() {
142: return false;
143: }
144:
145: /**
146: * Sets this CharClass to negative form, i.e. if they will add some
147: * characters and after that set this class to negative it will accept all
148: * the characters except previously set ones.
149: *
150: * Although this method will not alternate all the already set characters,
151: * just overall meaning of the class.
152: *
153: * @see #contains(int)
154: * @see #intersect(CharClass)
155: * @see #union(CharClass)
156: */
157: public AbstractCharClass setNegative(boolean value) {
158: if (alt ^ value) {
159: alt = !alt;
160: altSurrogates = !altSurrogates;
161: }
162: if (!mayContainSupplCodepoints) {
163: mayContainSupplCodepoints = true;
164: }
165: return this ;
166: }
167:
168: public boolean isNegative() {
169: return alt;
170: }
171:
172: // -----------------------------------------------------------------
173: // Static methods and predefined classes
174: // -----------------------------------------------------------------
175:
176: public static boolean intersects(int ch1, int ch2) {
177: return ch1 == ch2;
178: }
179:
180: public static boolean intersects(AbstractCharClass cc, int ch) {
181: return cc.contains(ch);
182: }
183:
184: public static boolean intersects(AbstractCharClass cc1,
185: AbstractCharClass cc2) {
186: if (cc1.getBits() == null || cc2.getBits() == null)
187: return true;
188: return cc1.getBits().intersects(cc2.getBits());
189: }
190:
191: public static AbstractCharClass getPredefinedClass(String name,
192: boolean negative) {
193: return ((LazyCharClass) charClasses.getObject(name))
194: .getValue(negative);
195: }
196:
197: abstract static class LazyCharClass {
198: AbstractCharClass posValue = null;
199:
200: AbstractCharClass negValue = null;
201:
202: public AbstractCharClass getValue(boolean negative) {
203: if (!negative && posValue == null) {
204: posValue = computeValue();
205: } else if (negative && negValue == null) {
206: negValue = computeValue().setNegative(true);
207: }
208: if (!negative)
209: return posValue;
210: return negValue;
211: }
212:
213: protected abstract AbstractCharClass computeValue();
214: }
215:
216: static class LazyDigit extends LazyCharClass {
217: protected AbstractCharClass computeValue() {
218: return new CharClass().add('0', '9');
219: }
220: }
221:
222: static class LazyNonDigit extends LazyDigit {
223: protected AbstractCharClass computeValue() {
224: AbstractCharClass chCl = super .computeValue().setNegative(
225: true);
226:
227: chCl.mayContainSupplCodepoints = true;
228: return chCl;
229: }
230: }
231:
232: static class LazySpace extends LazyCharClass {
233: protected AbstractCharClass computeValue() {
234: /* 9-13 - \t\n\x0B\f\r; 32 - ' ' */
235: return new CharClass().add(9, 13).add(32);
236: }
237: }
238:
239: static class LazyNonSpace extends LazySpace {
240: protected AbstractCharClass computeValue() {
241: AbstractCharClass chCl = super .computeValue().setNegative(
242: true);
243:
244: chCl.mayContainSupplCodepoints = true;
245: return chCl;
246: }
247: }
248:
249: static class LazyWord extends LazyCharClass {
250: protected AbstractCharClass computeValue() {
251: return new CharClass().add('a', 'z').add('A', 'Z').add('0',
252: '9').add('_');
253: }
254: }
255:
256: static class LazyNonWord extends LazyWord {
257: protected AbstractCharClass computeValue() {
258: AbstractCharClass chCl = super .computeValue().setNegative(
259: true);
260:
261: chCl.mayContainSupplCodepoints = true;
262: return chCl;
263: }
264: }
265:
266: static class LazyLower extends LazyCharClass {
267: protected AbstractCharClass computeValue() {
268: return new CharClass().add('a', 'z');
269: }
270: }
271:
272: static class LazyUpper extends LazyCharClass {
273: protected AbstractCharClass computeValue() {
274: return new CharClass().add('A', 'Z');
275: }
276: }
277:
278: static class LazyASCII extends LazyCharClass {
279: protected AbstractCharClass computeValue() {
280: return new CharClass().add(0x00, 0x7F);
281: }
282: }
283:
284: static class LazyAlpha extends LazyCharClass {
285: protected AbstractCharClass computeValue() {
286: return new CharClass().add('a', 'z').add('A', 'Z');
287: }
288: }
289:
290: static class LazyAlnum extends LazyAlpha {
291: protected AbstractCharClass computeValue() {
292: return ((CharClass) super .computeValue()).add('0', '9');
293: }
294: }
295:
296: static class LazyPunct extends LazyCharClass {
297: protected AbstractCharClass computeValue() {
298: /* Punctuation !"#$%&'()*+,-./:;<=>?@ [\]^_` {|}~ */
299: return new CharClass().add(0x21, 0x40).add(0x5B, 0x60).add(
300: 0x7B, 0x7E);
301: }
302: }
303:
304: static class LazyGraph extends LazyAlnum {
305: protected AbstractCharClass computeValue() {
306: /* plus punctuation */
307: return ((CharClass) super .computeValue()).add(0x21, 0x40)
308: .add(0x5B, 0x60).add(0x7B, 0x7E);
309: }
310: }
311:
312: static class LazyPrint extends LazyGraph {
313: protected AbstractCharClass computeValue() {
314: return ((CharClass) super .computeValue()).add(0x20);
315: }
316: }
317:
318: static class LazyBlank extends LazyCharClass {
319: protected AbstractCharClass computeValue() {
320: return new CharClass().add(' ').add('\t');
321: }
322: }
323:
324: static class LazyCntrl extends LazyCharClass {
325: protected AbstractCharClass computeValue() {
326: return new CharClass().add(0x00, 0x1F).add(0x7F);
327: }
328: }
329:
330: static class LazyXDigit extends LazyCharClass {
331: protected AbstractCharClass computeValue() {
332: return new CharClass().add('0', '9').add('a', 'f').add('A',
333: 'F');
334: }
335: }
336:
337: static class LazyRange extends LazyCharClass {
338: int start, end;
339:
340: public LazyRange(int start, int end) {
341: this .start = start;
342: this .end = end;
343: }
344:
345: public AbstractCharClass computeValue() {
346: AbstractCharClass chCl = new CharClass().add(start, end);
347: return chCl;
348: }
349: }
350:
351: static class LazySpecialsBlock extends LazyCharClass {
352: public AbstractCharClass computeValue() {
353: return new CharClass().add(0xFEFF, 0xFEFF).add(0xFFF0,
354: 0xFFFD);
355: }
356: }
357:
358: static class LazyCategoryScope extends LazyCharClass {
359: int category;
360:
361: boolean mayContainSupplCodepoints;
362:
363: boolean containsAllSurrogates;
364:
365: public LazyCategoryScope(int cat,
366: boolean mayContainSupplCodepoints) {
367: this .mayContainSupplCodepoints = mayContainSupplCodepoints;
368: this .category = cat;
369: }
370:
371: public LazyCategoryScope(int cat,
372: boolean mayContainSupplCodepoints,
373: boolean containsAllSurrogates) {
374: this .containsAllSurrogates = containsAllSurrogates;
375: this .mayContainSupplCodepoints = mayContainSupplCodepoints;
376: this .category = cat;
377: }
378:
379: protected AbstractCharClass computeValue() {
380: AbstractCharClass chCl = new UnicodeCategoryScope(category);
381: if (containsAllSurrogates) {
382: chCl.lowHighSurrogates.set(0, SURROGATE_CARDINALITY);
383: }
384:
385: chCl.mayContainSupplCodepoints = mayContainSupplCodepoints;
386: return chCl;
387: }
388: }
389:
390: static class LazyCategory extends LazyCharClass {
391: int category;
392:
393: boolean mayContainSupplCodepoints;
394:
395: boolean containsAllSurrogates;
396:
397: public LazyCategory(int cat, boolean mayContainSupplCodepoints) {
398: this .mayContainSupplCodepoints = mayContainSupplCodepoints;
399: this .category = cat;
400: }
401:
402: public LazyCategory(int cat, boolean mayContainSupplCodepoints,
403: boolean containsAllSurrogates) {
404: this .containsAllSurrogates = containsAllSurrogates;
405: this .mayContainSupplCodepoints = mayContainSupplCodepoints;
406: this .category = cat;
407: }
408:
409: protected AbstractCharClass computeValue() {
410: AbstractCharClass chCl = new UnicodeCategory(category);
411: if (containsAllSurrogates) {
412: chCl.lowHighSurrogates.set(0, SURROGATE_CARDINALITY);
413: }
414: chCl.mayContainSupplCodepoints = mayContainSupplCodepoints;
415: ;
416: return chCl;
417: }
418: }
419:
420: static class LazyJavaLowerCase extends LazyCharClass {
421: protected AbstractCharClass computeValue() {
422: AbstractCharClass chCl = new AbstractCharClass() {
423: public boolean contains(int ch) {
424: return Character.isLowerCase(ch);
425: }
426: };
427:
428: chCl.mayContainSupplCodepoints = true;
429: return chCl;
430: }
431: }
432:
433: static class LazyJavaUpperCase extends LazyCharClass {
434: protected AbstractCharClass computeValue() {
435: AbstractCharClass chCl = new AbstractCharClass() {
436: public boolean contains(int ch) {
437: return Character.isUpperCase(ch);
438: }
439: };
440:
441: chCl.mayContainSupplCodepoints = true;
442: return chCl;
443: }
444: }
445:
446: static class LazyJavaWhitespace extends LazyCharClass {
447: protected AbstractCharClass computeValue() {
448: return new AbstractCharClass() {
449: public boolean contains(int ch) {
450: return Character.isWhitespace(ch);
451: }
452: };
453: }
454: }
455:
456: static class LazyJavaMirrored extends LazyCharClass {
457: protected AbstractCharClass computeValue() {
458: return new AbstractCharClass() {
459: public boolean contains(int ch) {
460: return Character.isMirrored(ch);
461: }
462: };
463: }
464: }
465:
466: static class LazyJavaDefined extends LazyCharClass {
467: protected AbstractCharClass computeValue() {
468: AbstractCharClass chCl = new AbstractCharClass() {
469: public boolean contains(int ch) {
470: return Character.isDefined(ch);
471: }
472: };
473: chCl.lowHighSurrogates.set(0, SURROGATE_CARDINALITY);
474:
475: chCl.mayContainSupplCodepoints = true;
476: return chCl;
477: }
478: }
479:
480: static class LazyJavaDigit extends LazyCharClass {
481: protected AbstractCharClass computeValue() {
482: AbstractCharClass chCl = new AbstractCharClass() {
483: public boolean contains(int ch) {
484: return Character.isDigit(ch);
485: }
486: };
487:
488: chCl.mayContainSupplCodepoints = true;
489: return chCl;
490: }
491: }
492:
493: static class LazyJavaIdentifierIgnorable extends LazyCharClass {
494: protected AbstractCharClass computeValue() {
495: AbstractCharClass chCl = new AbstractCharClass() {
496: public boolean contains(int ch) {
497: return Character.isIdentifierIgnorable(ch);
498: }
499: };
500:
501: chCl.mayContainSupplCodepoints = true;
502: return chCl;
503: }
504: }
505:
506: static class LazyJavaISOControl extends LazyCharClass {
507: protected AbstractCharClass computeValue() {
508: return new AbstractCharClass() {
509: public boolean contains(int ch) {
510: return Character.isISOControl(ch);
511: }
512: };
513: }
514: }
515:
516: static class LazyJavaJavaIdentifierPart extends LazyCharClass {
517: protected AbstractCharClass computeValue() {
518: AbstractCharClass chCl = new AbstractCharClass() {
519: public boolean contains(int ch) {
520: return Character.isJavaIdentifierPart(ch);
521: }
522: };
523:
524: chCl.mayContainSupplCodepoints = true;
525: return chCl;
526: }
527: }
528:
529: static class LazyJavaJavaIdentifierStart extends LazyCharClass {
530: protected AbstractCharClass computeValue() {
531: AbstractCharClass chCl = new AbstractCharClass() {
532: public boolean contains(int ch) {
533: return Character.isJavaIdentifierStart(ch);
534: }
535: };
536:
537: chCl.mayContainSupplCodepoints = true;
538: return chCl;
539: }
540: }
541:
542: static class LazyJavaLetter extends LazyCharClass {
543: protected AbstractCharClass computeValue() {
544: AbstractCharClass chCl = new AbstractCharClass() {
545: public boolean contains(int ch) {
546: return Character.isLetter(ch);
547: }
548: };
549:
550: chCl.mayContainSupplCodepoints = true;
551: return chCl;
552: }
553: }
554:
555: static class LazyJavaLetterOrDigit extends LazyCharClass {
556: protected AbstractCharClass computeValue() {
557: AbstractCharClass chCl = new AbstractCharClass() {
558: public boolean contains(int ch) {
559: return Character.isLetterOrDigit(ch);
560: }
561: };
562:
563: chCl.mayContainSupplCodepoints = true;
564: return chCl;
565: }
566: }
567:
568: static class LazyJavaSpaceChar extends LazyCharClass {
569: protected AbstractCharClass computeValue() {
570: return new AbstractCharClass() {
571: public boolean contains(int ch) {
572: return Character.isSpaceChar(ch);
573: }
574: };
575: }
576: }
577:
578: static class LazyJavaTitleCase extends LazyCharClass {
579: protected AbstractCharClass computeValue() {
580: return new AbstractCharClass() {
581: public boolean contains(int ch) {
582: return Character.isTitleCase(ch);
583: }
584: };
585: }
586: }
587:
588: static class LazyJavaUnicodeIdentifierPart extends LazyCharClass {
589: protected AbstractCharClass computeValue() {
590: AbstractCharClass chCl = new AbstractCharClass() {
591: public boolean contains(int ch) {
592: return Character.isUnicodeIdentifierPart(ch);
593: }
594: };
595:
596: chCl.mayContainSupplCodepoints = true;
597: return chCl;
598: }
599: }
600:
601: static class LazyJavaUnicodeIdentifierStart extends LazyCharClass {
602: protected AbstractCharClass computeValue() {
603: AbstractCharClass chCl = new AbstractCharClass() {
604: public boolean contains(int ch) {
605: return Character.isUnicodeIdentifierStart(ch);
606: }
607: };
608:
609: chCl.mayContainSupplCodepoints = true;
610: return chCl;
611: }
612: }
613:
614: /**
615: * character classes generated from
616: * http://www.unicode.org/reports/tr18/
617: * http://www.unicode.org/Public/4.1.0/ucd/Blocks.txt
618: */
619: static final class PredefinedCharacterClasses extends
620: ListResourceBundle {
621: static LazyCharClass space = new LazySpace();
622:
623: static LazyCharClass digit = new LazyDigit();
624:
625: static final Object[][] contents = {
626: { "Lower", new LazyLower() }, //$NON-NLS-1$
627: { "Upper", new LazyUpper() }, //$NON-NLS-1$
628: { "ASCII", new LazyASCII() }, //$NON-NLS-1$
629: { "Alpha", new LazyAlpha() }, //$NON-NLS-1$
630: { "Digit", digit }, //$NON-NLS-1$
631: { "Alnum", new LazyAlnum() }, //$NON-NLS-1$
632: { "Punct", new LazyPunct() }, //$NON-NLS-1$
633: { "Graph", new LazyGraph() }, //$NON-NLS-1$
634: { "Print", new LazyPrint() }, //$NON-NLS-1$
635: { "Blank", new LazyBlank() }, //$NON-NLS-1$
636: { "Cntrl", new LazyCntrl() }, //$NON-NLS-1$
637: { "XDigit", new LazyXDigit() }, //$NON-NLS-1$
638: { "javaLowerCase", new LazyJavaLowerCase() }, //$NON-NLS-1$
639: { "javaUpperCase", new LazyJavaUpperCase() }, //$NON-NLS-1$
640: { "javaWhitespace", new LazyJavaWhitespace() }, //$NON-NLS-1$
641: { "javaMirrored", new LazyJavaMirrored() }, //$NON-NLS-1$
642: { "javaDefined", new LazyJavaDefined() }, //$NON-NLS-1$
643: { "javaDigit", new LazyJavaDigit() }, //$NON-NLS-1$
644: {
645: "javaIdentifierIgnorable", new LazyJavaIdentifierIgnorable() }, //$NON-NLS-1$
646: { "javaISOControl", new LazyJavaISOControl() }, //$NON-NLS-1$
647: {
648: "javaJavaIdentifierPart", new LazyJavaJavaIdentifierPart() }, //$NON-NLS-1$
649: {
650: "javaJavaIdentifierStart", new LazyJavaJavaIdentifierStart() }, //$NON-NLS-1$
651: { "javaLetter", new LazyJavaLetter() }, //$NON-NLS-1$
652: { "javaLetterOrDigit", new LazyJavaLetterOrDigit() }, //$NON-NLS-1$
653: { "javaSpaceChar", new LazyJavaSpaceChar() }, //$NON-NLS-1$
654: { "javaTitleCase", new LazyJavaTitleCase() }, //$NON-NLS-1$
655: {
656: "javaUnicodeIdentifierPart", new LazyJavaUnicodeIdentifierPart() }, //$NON-NLS-1$
657: {
658: "javaUnicodeIdentifierStart", new LazyJavaUnicodeIdentifierStart() }, //$NON-NLS-1$
659: { "Space", space }, //$NON-NLS-1$
660: { "w", new LazyWord() }, //$NON-NLS-1$
661: { "W", new LazyNonWord() }, //$NON-NLS-1$
662: { "s", space }, //$NON-NLS-1$
663: { "S", new LazyNonSpace() }, //$NON-NLS-1$
664: { "d", digit }, //$NON-NLS-1$
665: { "D", new LazyNonDigit() }, //$NON-NLS-1$
666: { "BasicLatin", new LazyRange(0x0000, 0x007F) }, //$NON-NLS-1$
667: { "Latin-1Supplement", new LazyRange(0x0080, 0x00FF) }, //$NON-NLS-1$
668: { "LatinExtended-A", new LazyRange(0x0100, 0x017F) }, //$NON-NLS-1$
669: { "LatinExtended-B", new LazyRange(0x0180, 0x024F) }, //$NON-NLS-1$
670: { "IPAExtensions", new LazyRange(0x0250, 0x02AF) }, //$NON-NLS-1$
671: {
672: "SpacingModifierLetters", new LazyRange(0x02B0, 0x02FF) }, //$NON-NLS-1$
673: {
674: "CombiningDiacriticalMarks", new LazyRange(0x0300, 0x036F) }, //$NON-NLS-1$
675: { "Greek", new LazyRange(0x0370, 0x03FF) }, //$NON-NLS-1$
676: { "Cyrillic", new LazyRange(0x0400, 0x04FF) }, //$NON-NLS-1$
677: { "CyrillicSupplement", new LazyRange(0x0500, 0x052F) }, //$NON-NLS-1$
678: { "Armenian", new LazyRange(0x0530, 0x058F) }, //$NON-NLS-1$
679: { "Hebrew", new LazyRange(0x0590, 0x05FF) }, //$NON-NLS-1$
680: { "Arabic", new LazyRange(0x0600, 0x06FF) }, //$NON-NLS-1$
681: { "Syriac", new LazyRange(0x0700, 0x074F) }, //$NON-NLS-1$
682: { "ArabicSupplement", new LazyRange(0x0750, 0x077F) }, //$NON-NLS-1$
683: { "Thaana", new LazyRange(0x0780, 0x07BF) }, //$NON-NLS-1$
684: { "Devanagari", new LazyRange(0x0900, 0x097F) }, //$NON-NLS-1$
685: { "Bengali", new LazyRange(0x0980, 0x09FF) }, //$NON-NLS-1$
686: { "Gurmukhi", new LazyRange(0x0A00, 0x0A7F) }, //$NON-NLS-1$
687: { "Gujarati", new LazyRange(0x0A80, 0x0AFF) }, //$NON-NLS-1$
688: { "Oriya", new LazyRange(0x0B00, 0x0B7F) }, //$NON-NLS-1$
689: { "Tamil", new LazyRange(0x0B80, 0x0BFF) }, //$NON-NLS-1$
690: { "Telugu", new LazyRange(0x0C00, 0x0C7F) }, //$NON-NLS-1$
691: { "Kannada", new LazyRange(0x0C80, 0x0CFF) }, //$NON-NLS-1$
692: { "Malayalam", new LazyRange(0x0D00, 0x0D7F) }, //$NON-NLS-1$
693: { "Sinhala", new LazyRange(0x0D80, 0x0DFF) }, //$NON-NLS-1$
694: { "Thai", new LazyRange(0x0E00, 0x0E7F) }, //$NON-NLS-1$
695: { "Lao", new LazyRange(0x0E80, 0x0EFF) }, //$NON-NLS-1$
696: { "Tibetan", new LazyRange(0x0F00, 0x0FFF) }, //$NON-NLS-1$
697: { "Myanmar", new LazyRange(0x1000, 0x109F) }, //$NON-NLS-1$
698: { "Georgian", new LazyRange(0x10A0, 0x10FF) }, //$NON-NLS-1$
699: { "HangulJamo", new LazyRange(0x1100, 0x11FF) }, //$NON-NLS-1$
700: { "Ethiopic", new LazyRange(0x1200, 0x137F) }, //$NON-NLS-1$
701: { "EthiopicSupplement", new LazyRange(0x1380, 0x139F) }, //$NON-NLS-1$
702: { "Cherokee", new LazyRange(0x13A0, 0x13FF) }, //$NON-NLS-1$
703: { "UnifiedCanadianAboriginalSyllabics", //$NON-NLS-1$
704: new LazyRange(0x1400, 0x167F) },
705: { "Ogham", new LazyRange(0x1680, 0x169F) }, //$NON-NLS-1$
706: { "Runic", new LazyRange(0x16A0, 0x16FF) }, //$NON-NLS-1$
707: { "Tagalog", new LazyRange(0x1700, 0x171F) }, //$NON-NLS-1$
708: { "Hanunoo", new LazyRange(0x1720, 0x173F) }, //$NON-NLS-1$
709: { "Buhid", new LazyRange(0x1740, 0x175F) }, //$NON-NLS-1$
710: { "Tagbanwa", new LazyRange(0x1760, 0x177F) }, //$NON-NLS-1$
711: { "Khmer", new LazyRange(0x1780, 0x17FF) }, //$NON-NLS-1$
712: { "Mongolian", new LazyRange(0x1800, 0x18AF) }, //$NON-NLS-1$
713: { "Limbu", new LazyRange(0x1900, 0x194F) }, //$NON-NLS-1$
714: { "TaiLe", new LazyRange(0x1950, 0x197F) }, //$NON-NLS-1$
715: { "NewTaiLue", new LazyRange(0x1980, 0x19DF) }, //$NON-NLS-1$
716: { "KhmerSymbols", new LazyRange(0x19E0, 0x19FF) }, //$NON-NLS-1$
717: { "Buginese", new LazyRange(0x1A00, 0x1A1F) }, //$NON-NLS-1$
718: { "PhoneticExtensions", new LazyRange(0x1D00, 0x1D7F) }, //$NON-NLS-1$
719: {
720: "PhoneticExtensionsSupplement", new LazyRange(0x1D80, 0x1DBF) }, //$NON-NLS-1$
721: { "CombiningDiacriticalMarksSupplement", //$NON-NLS-1$
722: new LazyRange(0x1DC0, 0x1DFF) },
723: {
724: "LatinExtendedAdditional", new LazyRange(0x1E00, 0x1EFF) }, //$NON-NLS-1$
725: { "GreekExtended", new LazyRange(0x1F00, 0x1FFF) }, //$NON-NLS-1$
726: { "GeneralPunctuation", new LazyRange(0x2000, 0x206F) }, //$NON-NLS-1$
727: {
728: "SuperscriptsandSubscripts", new LazyRange(0x2070, 0x209F) }, //$NON-NLS-1$
729: { "CurrencySymbols", new LazyRange(0x20A0, 0x20CF) }, //$NON-NLS-1$
730: {
731: "CombiningMarksforSymbols", new LazyRange(0x20D0, 0x20FF) }, //$NON-NLS-1$
732: { "LetterlikeSymbols", new LazyRange(0x2100, 0x214F) }, //$NON-NLS-1$
733: { "NumberForms", new LazyRange(0x2150, 0x218F) }, //$NON-NLS-1$
734: { "Arrows", new LazyRange(0x2190, 0x21FF) }, //$NON-NLS-1$
735: {
736: "MathematicalOperators", new LazyRange(0x2200, 0x22FF) }, //$NON-NLS-1$
737: {
738: "MiscellaneousTechnical", new LazyRange(0x2300, 0x23FF) }, //$NON-NLS-1$
739: { "ControlPictures", new LazyRange(0x2400, 0x243F) }, //$NON-NLS-1$
740: {
741: "OpticalCharacterRecognition", new LazyRange(0x2440, 0x245F) }, //$NON-NLS-1$
742: {
743: "EnclosedAlphanumerics", new LazyRange(0x2460, 0x24FF) }, //$NON-NLS-1$
744: { "BoxDrawing", new LazyRange(0x2500, 0x257F) }, //$NON-NLS-1$
745: { "BlockElements", new LazyRange(0x2580, 0x259F) }, //$NON-NLS-1$
746: { "GeometricShapes", new LazyRange(0x25A0, 0x25FF) }, //$NON-NLS-1$
747: { "MiscellaneousSymbols", new LazyRange(0x2600, 0x26FF) }, //$NON-NLS-1$
748: { "Dingbats", new LazyRange(0x2700, 0x27BF) }, //$NON-NLS-1$
749: { "MiscellaneousMathematicalSymbols-A", //$NON-NLS-1$
750: new LazyRange(0x27C0, 0x27EF) },
751: { "SupplementalArrows-A", new LazyRange(0x27F0, 0x27FF) }, //$NON-NLS-1$
752: { "BraillePatterns", new LazyRange(0x2800, 0x28FF) }, //$NON-NLS-1$
753: { "SupplementalArrows-B", new LazyRange(0x2900, 0x297F) }, //$NON-NLS-1$
754: { "MiscellaneousMathematicalSymbols-B", //$NON-NLS-1$
755: new LazyRange(0x2980, 0x29FF) },
756: { "SupplementalMathematicalOperators", //$NON-NLS-1$
757: new LazyRange(0x2A00, 0x2AFF) },
758: { "MiscellaneousSymbolsandArrows", //$NON-NLS-1$
759: new LazyRange(0x2B00, 0x2BFF) },
760: { "Glagolitic", new LazyRange(0x2C00, 0x2C5F) }, //$NON-NLS-1$
761: { "Coptic", new LazyRange(0x2C80, 0x2CFF) }, //$NON-NLS-1$
762: { "GeorgianSupplement", new LazyRange(0x2D00, 0x2D2F) }, //$NON-NLS-1$
763: { "Tifinagh", new LazyRange(0x2D30, 0x2D7F) }, //$NON-NLS-1$
764: { "EthiopicExtended", new LazyRange(0x2D80, 0x2DDF) }, //$NON-NLS-1$
765: {
766: "SupplementalPunctuation", new LazyRange(0x2E00, 0x2E7F) }, //$NON-NLS-1$
767: {
768: "CJKRadicalsSupplement", new LazyRange(0x2E80, 0x2EFF) }, //$NON-NLS-1$
769: { "KangxiRadicals", new LazyRange(0x2F00, 0x2FDF) }, //$NON-NLS-1$
770: { "IdeographicDescriptionCharacters", //$NON-NLS-1$
771: new LazyRange(0x2FF0, 0x2FFF) },
772: {
773: "CJKSymbolsandPunctuation", new LazyRange(0x3000, 0x303F) }, //$NON-NLS-1$
774: { "Hiragana", new LazyRange(0x3040, 0x309F) }, //$NON-NLS-1$
775: { "Katakana", new LazyRange(0x30A0, 0x30FF) }, //$NON-NLS-1$
776: { "Bopomofo", new LazyRange(0x3100, 0x312F) }, //$NON-NLS-1$
777: {
778: "HangulCompatibilityJamo", new LazyRange(0x3130, 0x318F) }, //$NON-NLS-1$
779: { "Kanbun", new LazyRange(0x3190, 0x319F) }, //$NON-NLS-1$
780: { "BopomofoExtended", new LazyRange(0x31A0, 0x31BF) }, //$NON-NLS-1$
781: { "CJKStrokes", new LazyRange(0x31C0, 0x31EF) }, //$NON-NLS-1$
782: {
783: "KatakanaPhoneticExtensions", new LazyRange(0x31F0, 0x31FF) }, //$NON-NLS-1$
784: {
785: "EnclosedCJKLettersandMonths", new LazyRange(0x3200, 0x32FF) }, //$NON-NLS-1$
786: { "CJKCompatibility", new LazyRange(0x3300, 0x33FF) }, //$NON-NLS-1$
787: { "CJKUnifiedIdeographsExtensionA", //$NON-NLS-1$
788: new LazyRange(0x3400, 0x4DB5) },
789: {
790: "YijingHexagramSymbols", new LazyRange(0x4DC0, 0x4DFF) }, //$NON-NLS-1$
791: { "CJKUnifiedIdeographs", new LazyRange(0x4E00, 0x9FFF) }, //$NON-NLS-1$
792: { "YiSyllables", new LazyRange(0xA000, 0xA48F) }, //$NON-NLS-1$
793: { "YiRadicals", new LazyRange(0xA490, 0xA4CF) }, //$NON-NLS-1$
794: { "ModifierToneLetters", new LazyRange(0xA700, 0xA71F) }, //$NON-NLS-1$
795: { "SylotiNagri", new LazyRange(0xA800, 0xA82F) }, //$NON-NLS-1$
796: { "HangulSyllables", new LazyRange(0xAC00, 0xD7A3) }, //$NON-NLS-1$
797: { "HighSurrogates", new LazyRange(0xD800, 0xDB7F) }, //$NON-NLS-1$
798: {
799: "HighPrivateUseSurrogates", new LazyRange(0xDB80, 0xDBFF) }, //$NON-NLS-1$
800: { "LowSurrogates", new LazyRange(0xDC00, 0xDFFF) }, //$NON-NLS-1$
801: { "PrivateUseArea", new LazyRange(0xE000, 0xF8FF) }, //$NON-NLS-1$
802: {
803: "CJKCompatibilityIdeographs", new LazyRange(0xF900, 0xFAFF) }, //$NON-NLS-1$
804: {
805: "AlphabeticPresentationForms", new LazyRange(0xFB00, 0xFB4F) }, //$NON-NLS-1$
806: {
807: "ArabicPresentationForms-A", new LazyRange(0xFB50, 0xFDFF) }, //$NON-NLS-1$
808: { "VariationSelectors", new LazyRange(0xFE00, 0xFE0F) }, //$NON-NLS-1$
809: { "VerticalForms", new LazyRange(0xFE10, 0xFE1F) }, //$NON-NLS-1$
810: { "CombiningHalfMarks", new LazyRange(0xFE20, 0xFE2F) }, //$NON-NLS-1$
811: {
812: "CJKCompatibilityForms", new LazyRange(0xFE30, 0xFE4F) }, //$NON-NLS-1$
813: { "SmallFormVariants", new LazyRange(0xFE50, 0xFE6F) }, //$NON-NLS-1$
814: {
815: "ArabicPresentationForms-B", new LazyRange(0xFE70, 0xFEFF) }, //$NON-NLS-1$
816: {
817: "HalfwidthandFullwidthForms", new LazyRange(0xFF00, 0xFFEF) }, //$NON-NLS-1$
818: { "all", new LazyRange(0x00, 0x10FFFF) }, //$NON-NLS-1$
819: { "Specials", new LazySpecialsBlock() }, //$NON-NLS-1$
820: { "Cn", new LazyCategory(Character.UNASSIGNED, true) },
821: { "IsL", new LazyCategoryScope(0x3E, true) },
822: {
823: "Lu",
824: new LazyCategory(Character.UPPERCASE_LETTER,
825: true) },
826: {
827: "Ll",
828: new LazyCategory(Character.LOWERCASE_LETTER,
829: true) },
830: {
831: "Lt",
832: new LazyCategory(Character.TITLECASE_LETTER,
833: false) },
834: {
835: "Lm",
836: new LazyCategory(Character.MODIFIER_LETTER,
837: false) },
838: { "Lo", new LazyCategory(Character.OTHER_LETTER, true) },
839: { "IsM", new LazyCategoryScope(0x1C0, true) },
840: {
841: "Mn",
842: new LazyCategory(Character.NON_SPACING_MARK,
843: true) },
844: {
845: "Me",
846: new LazyCategory(Character.ENCLOSING_MARK,
847: false) },
848: {
849: "Mc",
850: new LazyCategory(
851: Character.COMBINING_SPACING_MARK, true) },
852: { "N", new LazyCategoryScope(0xE00, true) },
853: {
854: "Nd",
855: new LazyCategory(
856: Character.DECIMAL_DIGIT_NUMBER, true) },
857: { "Nl", new LazyCategory(Character.LETTER_NUMBER, true) },
858: { "No", new LazyCategory(Character.OTHER_NUMBER, true) },
859: { "IsZ", new LazyCategoryScope(0x7000, false) },
860: {
861: "Zs",
862: new LazyCategory(Character.SPACE_SEPARATOR,
863: false) },
864: {
865: "Zl",
866: new LazyCategory(Character.LINE_SEPARATOR,
867: false) },
868: {
869: "Zp",
870: new LazyCategory(Character.PARAGRAPH_SEPARATOR,
871: false) },
872: { "IsC", new LazyCategoryScope(0xF0000, true, true) },
873: { "Cc", new LazyCategory(Character.CONTROL, false) },
874: { "Cf", new LazyCategory(Character.FORMAT, true) },
875: { "Co", new LazyCategory(Character.PRIVATE_USE, true) },
876: {
877: "Cs",
878: new LazyCategory(Character.SURROGATE, false,
879: true) },
880: {
881: "IsP",
882: new LazyCategoryScope(
883: (1 << Character.DASH_PUNCTUATION)
884: | (1 << Character.START_PUNCTUATION)
885: | (1 << Character.END_PUNCTUATION)
886: | (1 << Character.CONNECTOR_PUNCTUATION)
887: | (1 << Character.OTHER_PUNCTUATION)
888: | (1 << Character.INITIAL_QUOTE_PUNCTUATION)
889: | (1 << Character.FINAL_QUOTE_PUNCTUATION),
890: true) },
891: {
892: "Pd",
893: new LazyCategory(Character.DASH_PUNCTUATION,
894: false) },
895: {
896: "Ps",
897: new LazyCategory(Character.START_PUNCTUATION,
898: false) },
899: {
900: "Pe",
901: new LazyCategory(Character.END_PUNCTUATION,
902: false) },
903: {
904: "Pc",
905: new LazyCategory(
906: Character.CONNECTOR_PUNCTUATION, false) },
907: {
908: "Po",
909: new LazyCategory(Character.OTHER_PUNCTUATION,
910: true) },
911: { "IsS", new LazyCategoryScope(0x7E000000, true) },
912: { "Sm", new LazyCategory(Character.MATH_SYMBOL, true) },
913: {
914: "Sc",
915: new LazyCategory(Character.CURRENCY_SYMBOL,
916: false) },
917: {
918: "Sk",
919: new LazyCategory(Character.MODIFIER_SYMBOL,
920: false) },
921: { "So", new LazyCategory(Character.OTHER_SYMBOL, true) },
922: {
923: "Pi",
924: new LazyCategory(
925: Character.INITIAL_QUOTE_PUNCTUATION,
926: false) },
927: {
928: "Pf",
929: new LazyCategory(
930: Character.FINAL_QUOTE_PUNCTUATION,
931: false) } };
932:
933: public Object[][] getContents() {
934: return contents;
935: }
936: }
937: }
|