0001: /* Licensed to the Apache Software Foundation (ASF) under one or more
0002: * contributor license agreements. See the NOTICE file distributed with
0003: * this work for additional information regarding copyright ownership.
0004: * The ASF licenses this file to You under the Apache License, Version 2.0
0005: * (the "License"); you may not use this file except in compliance with
0006: * the License. You may obtain a copy of the License at
0007: *
0008: * http://www.apache.org/licenses/LICENSE-2.0
0009: *
0010: * Unless required by applicable law or agreed to in writing, software
0011: * distributed under the License is distributed on an "AS IS" BASIS,
0012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013: * See the License for the specific language governing permissions and
0014: * limitations under the License.
0015: */
0016:
0017: package org.apache.harmony.tests.java.util.regex;
0018:
0019: import java.util.regex.Matcher;
0020: import java.util.regex.Pattern;
0021: import java.util.regex.PatternSyntaxException;
0022:
0023: import junit.framework.TestCase;
0024:
0025: /**
0026: * Tests simple Pattern compilation and Matcher methods
0027: */
0028: @SuppressWarnings("nls")
0029: public class Pattern2Test extends TestCase {
0030: public void testSimpleMatch() throws PatternSyntaxException {
0031: Pattern p = Pattern.compile("foo.*");
0032:
0033: Matcher m1 = p.matcher("foo123");
0034: assertTrue(m1.matches());
0035: assertTrue(m1.find(0));
0036: assertTrue(m1.lookingAt());
0037:
0038: Matcher m2 = p.matcher("fox");
0039: assertFalse(m2.matches());
0040: assertFalse(m2.find(0));
0041: assertFalse(m2.lookingAt());
0042:
0043: assertTrue(Pattern.matches("foo.*", "foo123"));
0044: assertFalse(Pattern.matches("foo.*", "fox"));
0045:
0046: assertFalse(Pattern.matches("bar", "foobar"));
0047:
0048: assertTrue(Pattern.matches("", ""));
0049: }
0050:
0051: public void testCursors() {
0052: Pattern p;
0053: Matcher m;
0054:
0055: try {
0056: p = Pattern.compile("foo");
0057:
0058: m = p.matcher("foobar");
0059: assertTrue(m.find());
0060: assertEquals(0, m.start());
0061: assertEquals(3, m.end());
0062: assertFalse(m.find());
0063:
0064: // Note: also testing reset here
0065: m.reset();
0066: assertTrue(m.find());
0067: assertEquals(0, m.start());
0068: assertEquals(3, m.end());
0069: assertFalse(m.find());
0070:
0071: m.reset("barfoobar");
0072: assertTrue(m.find());
0073: assertEquals(3, m.start());
0074: assertEquals(6, m.end());
0075: assertFalse(m.find());
0076:
0077: m.reset("barfoo");
0078: assertTrue(m.find());
0079: assertEquals(3, m.start());
0080: assertEquals(6, m.end());
0081: assertFalse(m.find());
0082:
0083: m.reset("foobarfoobarfoo");
0084: assertTrue(m.find());
0085: assertEquals(0, m.start());
0086: assertEquals(3, m.end());
0087: assertTrue(m.find());
0088: assertEquals(6, m.start());
0089: assertEquals(9, m.end());
0090: assertTrue(m.find());
0091: assertEquals(12, m.start());
0092: assertEquals(15, m.end());
0093: assertFalse(m.find());
0094: assertTrue(m.find(0));
0095: assertEquals(0, m.start());
0096: assertEquals(3, m.end());
0097: assertTrue(m.find(4));
0098: assertEquals(6, m.start());
0099: assertEquals(9, m.end());
0100: } catch (PatternSyntaxException e) {
0101: System.out.println(e.getMessage());
0102: fail();
0103: }
0104: }
0105:
0106: public void testGroups() throws PatternSyntaxException {
0107: Pattern p;
0108: Matcher m;
0109:
0110: p = Pattern.compile("(p[0-9]*)#?(q[0-9]*)");
0111:
0112: m = p.matcher("p1#q3p2q42p5p71p63#q888");
0113: assertTrue(m.find());
0114: assertEquals(0, m.start());
0115: assertEquals(5, m.end());
0116: assertEquals(2, m.groupCount());
0117: assertEquals(0, m.start(0));
0118: assertEquals(5, m.end(0));
0119: assertEquals(0, m.start(1));
0120: assertEquals(2, m.end(1));
0121: assertEquals(3, m.start(2));
0122: assertEquals(5, m.end(2));
0123: assertEquals("p1#q3", m.group());
0124: assertEquals("p1#q3", m.group(0));
0125: assertEquals("p1", m.group(1));
0126: assertEquals("q3", m.group(2));
0127:
0128: assertTrue(m.find());
0129: assertEquals(5, m.start());
0130: assertEquals(10, m.end());
0131: assertEquals(2, m.groupCount());
0132: assertEquals(10, m.end(0));
0133: assertEquals(5, m.start(1));
0134: assertEquals(7, m.end(1));
0135: assertEquals(7, m.start(2));
0136: assertEquals(10, m.end(2));
0137: assertEquals("p2q42", m.group());
0138: assertEquals("p2q42", m.group(0));
0139: assertEquals("p2", m.group(1));
0140: assertEquals("q42", m.group(2));
0141:
0142: assertTrue(m.find());
0143: assertEquals(15, m.start());
0144: assertEquals(23, m.end());
0145: assertEquals(2, m.groupCount());
0146: assertEquals(15, m.start(0));
0147: assertEquals(23, m.end(0));
0148: assertEquals(15, m.start(1));
0149: assertEquals(18, m.end(1));
0150: assertEquals(19, m.start(2));
0151: assertEquals(23, m.end(2));
0152: assertEquals("p63#q888", m.group());
0153: assertEquals("p63#q888", m.group(0));
0154: assertEquals("p63", m.group(1));
0155: assertEquals("q888", m.group(2));
0156: assertFalse(m.find());
0157: }
0158:
0159: public void testReplace() throws PatternSyntaxException {
0160: Pattern p;
0161: Matcher m;
0162:
0163: // Note: examples from book,
0164: // Hitchens, Ron, 2002, "Java NIO", O'Reilly, page 171
0165: p = Pattern.compile("a*b");
0166:
0167: m = p.matcher("aabfooaabfooabfoob");
0168: assertTrue(m.replaceAll("-").equals("-foo-foo-foo-"));
0169: assertTrue(m.replaceFirst("-").equals("-fooaabfooabfoob"));
0170:
0171: /*
0172: * p = Pattern.compile ("\\p{Blank}");
0173: *
0174: * m = p.matcher ("fee fie foe fum"); assertTrue
0175: * (m.replaceFirst("-").equals ("fee-fie foe fum")); assertTrue
0176: * (m.replaceAll("-").equals ("fee-fie-foe-fum"));
0177: */
0178:
0179: p = Pattern.compile("([bB])yte");
0180:
0181: m = p.matcher("Byte for byte");
0182: assertTrue(m.replaceFirst("$1ite").equals("Bite for byte"));
0183: assertTrue(m.replaceAll("$1ite").equals("Bite for bite"));
0184:
0185: p = Pattern.compile("\\d\\d\\d\\d([- ])");
0186:
0187: m = p.matcher("card #1234-5678-1234");
0188: assertTrue(m.replaceFirst("xxxx$1").equals(
0189: "card #xxxx-5678-1234"));
0190: assertTrue(m.replaceAll("xxxx$1")
0191: .equals("card #xxxx-xxxx-1234"));
0192:
0193: p = Pattern.compile("(up|left)( *)(right|down)");
0194:
0195: m = p.matcher("left right, up down");
0196: assertTrue(m.replaceFirst("$3$2$1").equals(
0197: "right left, up down"));
0198: assertTrue(m.replaceAll("$3$2$1").equals("right left, down up"));
0199:
0200: p = Pattern.compile("([CcPp][hl]e[ea]se)");
0201:
0202: m = p.matcher("I want cheese. Please.");
0203: assertTrue(m.replaceFirst("<b> $1 </b>").equals(
0204: "I want <b> cheese </b>. Please."));
0205: assertTrue(m.replaceAll("<b> $1 </b>").equals(
0206: "I want <b> cheese </b>. <b> Please </b>."));
0207: }
0208:
0209: public void testEscapes() throws PatternSyntaxException {
0210: Pattern p;
0211: Matcher m;
0212:
0213: // Test \\ sequence
0214: p = Pattern.compile("([a-z]+)\\\\([a-z]+);");
0215: m = p.matcher("fred\\ginger;abbott\\costello;jekell\\hyde;");
0216: assertTrue(m.find());
0217: assertEquals("fred", m.group(1));
0218: assertEquals("ginger", m.group(2));
0219: assertTrue(m.find());
0220: assertEquals("abbott", m.group(1));
0221: assertEquals("costello", m.group(2));
0222: assertTrue(m.find());
0223: assertEquals("jekell", m.group(1));
0224: assertEquals("hyde", m.group(2));
0225: assertFalse(m.find());
0226:
0227: // Test \n, \t, \r, \f, \e, \a sequences
0228: p = Pattern.compile("([a-z]+)[\\n\\t\\r\\f\\e\\a]+([a-z]+)");
0229: m = p
0230: .matcher("aa\nbb;cc\u0009\rdd;ee\u000C\u001Bff;gg\n\u0007hh");
0231: assertTrue(m.find());
0232: assertEquals("aa", m.group(1));
0233: assertEquals("bb", m.group(2));
0234: assertTrue(m.find());
0235: assertEquals("cc", m.group(1));
0236: assertEquals("dd", m.group(2));
0237: assertTrue(m.find());
0238: assertEquals("ee", m.group(1));
0239: assertEquals("ff", m.group(2));
0240: assertTrue(m.find());
0241: assertEquals("gg", m.group(1));
0242: assertEquals("hh", m.group(2));
0243: assertFalse(m.find());
0244:
0245: // Test \\u and \\x sequences
0246:p = Pattern.compile("([0-9]+)[\\u0020:\\x21];");
0247: m = p.matcher("11:;22 ;33-;44!;");
0248: assertTrue(m.find());
0249: assertEquals("11", m.group(1));
0250: assertTrue(m.find());
0251: assertEquals("22", m.group(1));
0252: assertTrue(m.find());
0253: assertEquals("44", m.group(1));
0254: assertFalse(m.find());
0255:
0256: // Test invalid unicode sequences
0257: try {
0258: p = Pattern.compile("\\u");
0259: fail("PatternSyntaxException expected");
0260: } catch (PatternSyntaxException e) {
0261: }
0262:
0263: try {
0264: p = Pattern.compile("\\u;");
0265: fail("PatternSyntaxException expected");
0266: } catch (PatternSyntaxException e) {
0267: }
0268:
0269: try {
0270: p = Pattern.compile("\\u002");
0271: fail("PatternSyntaxException expected");
0272: } catch (PatternSyntaxException e) {
0273: }
0274:
0275: try {
0276: p = Pattern.compile("\\u002;");
0277: fail("PatternSyntaxException expected");
0278: } catch (PatternSyntaxException e) {
0279: }
0280:
0281: // Test invalid hex sequences
0282: try {
0283: p = Pattern.compile("\\x");
0284: fail("PatternSyntaxException expected");
0285: } catch (PatternSyntaxException e) {
0286: }
0287:
0288: try {
0289: p = Pattern.compile("\\x;");
0290: fail("PatternSyntaxException expected");
0291: } catch (PatternSyntaxException e) {
0292: }
0293:
0294: try {
0295: p = Pattern.compile("\\xa");
0296: fail("PatternSyntaxException expected");
0297: } catch (PatternSyntaxException e) {
0298: }
0299:
0300: try {
0301: p = Pattern.compile("\\xa;");
0302: fail("PatternSyntaxException expected");
0303: } catch (PatternSyntaxException e) {
0304: }
0305:
0306: // Test \0 (octal) sequences (1, 2 and 3 digit)
0307: p = Pattern.compile("([0-9]+)[\\07\\040\\0160];");
0308: m = p.matcher("11\u0007;22:;33 ;44p;");
0309: assertTrue(m.find());
0310: assertEquals("11", m.group(1));
0311: assertTrue(m.find());
0312: assertEquals("33", m.group(1));
0313: assertTrue(m.find());
0314: assertEquals("44", m.group(1));
0315: assertFalse(m.find());
0316:
0317: // Test invalid octal sequences
0318: try {
0319: p = Pattern.compile("\\08");
0320: fail("PatternSyntaxException expected");
0321: } catch (PatternSyntaxException e) {
0322: }
0323:
0324: // originally contributed test did not check the result
0325: // TODO: check what RI does here
0326: // try {
0327: // p = Pattern.compile("\\0477");
0328: // fail("PatternSyntaxException expected");
0329: // } catch (PatternSyntaxException e) {
0330: // }
0331:
0332: try {
0333: p = Pattern.compile("\\0");
0334: fail("PatternSyntaxException expected");
0335: } catch (PatternSyntaxException e) {
0336: }
0337:
0338: try {
0339: p = Pattern.compile("\\0;");
0340: fail("PatternSyntaxException expected");
0341: } catch (PatternSyntaxException e) {
0342: }
0343:
0344: // Test \c (control character) sequence
0345: p = Pattern.compile("([0-9]+)[\\cA\\cB\\cC\\cD];");
0346: m = p.matcher("11\u0001;22:;33\u0002;44p;55\u0003;66\u0004;");
0347: assertTrue(m.find());
0348: assertEquals("11", m.group(1));
0349: assertTrue(m.find());
0350: assertEquals("33", m.group(1));
0351: assertTrue(m.find());
0352: assertEquals("55", m.group(1));
0353: assertTrue(m.find());
0354: assertEquals("66", m.group(1));
0355: assertFalse(m.find());
0356:
0357: // More thorough control escape test
0358: // Ensure that each escape matches exactly the corresponding
0359: // character
0360: // code and no others (well, from 0-255 at least)
0361: int i, j;
0362: for (i = 0; i < 26; i++) {
0363: p = Pattern.compile("\\c"
0364: + Character.toString((char) ('A' + i)));
0365: int match_char = -1;
0366: for (j = 0; j < 255; j++) {
0367: m = p.matcher(Character.toString((char) j));
0368: if (m.matches()) {
0369: assertEquals(-1, match_char);
0370: match_char = j;
0371: }
0372: }
0373: assertTrue(match_char == i + 1);
0374: }
0375:
0376: // Test invalid control escapes
0377: try {
0378: p = Pattern.compile("\\c");
0379: fail("PatternSyntaxException expected");
0380: } catch (PatternSyntaxException e) {
0381: }
0382:
0383: // originally contributed test did not check the result
0384: // TODO: check what RI does here
0385: // try {
0386: // p = Pattern.compile("\\c;");
0387: // fail("PatternSyntaxException expected");
0388: // } catch (PatternSyntaxException e) {
0389: // }
0390: //
0391: // try {
0392: // p = Pattern.compile("\\ca;");
0393: // fail("PatternSyntaxException expected");
0394: // } catch (PatternSyntaxException e) {
0395: // }
0396: //
0397: // try {
0398: // p = Pattern.compile("\\c4;");
0399: // fail("PatternSyntaxException expected");
0400: // } catch (PatternSyntaxException e) {
0401: // }
0402: }
0403:
0404: public void testCharacterClasses() throws PatternSyntaxException {
0405: Pattern p;
0406: Matcher m;
0407:
0408: // Test one character range
0409: p = Pattern.compile("[p].*[l]");
0410: m = p.matcher("paul");
0411: assertTrue(m.matches());
0412: m = p.matcher("pool");
0413: assertTrue(m.matches());
0414: m = p.matcher("pong");
0415: assertFalse(m.matches());
0416: m = p.matcher("pl");
0417: assertTrue(m.matches());
0418:
0419: // Test two character range
0420: p = Pattern.compile("[pm].*[lp]");
0421: m = p.matcher("prop");
0422: assertTrue(m.matches());
0423: m = p.matcher("mall");
0424: assertTrue(m.matches());
0425: m = p.matcher("pong");
0426: assertFalse(m.matches());
0427: m = p.matcher("pill");
0428: assertTrue(m.matches());
0429:
0430: // Test range including [ and ]
0431: p = Pattern.compile("[<\\[].*[\\]>]");
0432: m = p.matcher("<foo>");
0433: assertTrue(m.matches());
0434: m = p.matcher("[bar]");
0435: assertTrue(m.matches());
0436: m = p.matcher("{foobar]");
0437: assertFalse(m.matches());
0438: m = p.matcher("<pill]");
0439: assertTrue(m.matches());
0440:
0441: // Test range using ^
0442: p = Pattern.compile("[^bc][a-z]+[tr]");
0443: m = p.matcher("pat");
0444: assertTrue(m.matches());
0445: m = p.matcher("liar");
0446: assertTrue(m.matches());
0447: m = p.matcher("car");
0448: assertFalse(m.matches());
0449: m = p.matcher("gnat");
0450: assertTrue(m.matches());
0451:
0452: // Test character range using -
0453: p = Pattern.compile("[a-z]_+[a-zA-Z]-+[0-9p-z]");
0454: m = p.matcher("d__F-8");
0455: assertTrue(m.matches());
0456: m = p.matcher("c_a-q");
0457: assertTrue(m.matches());
0458: m = p.matcher("a__R-a");
0459: assertFalse(m.matches());
0460: m = p.matcher("r_____d-----5");
0461: assertTrue(m.matches());
0462:
0463: // Test range using unicode characters and unicode and hex escapes
0464: p = Pattern.compile("[\\u1234-\\u2345]_+[a-z]-+[\u0001-\\x11]");
0465: m = p.matcher("\u2000_q-\u0007");
0466: assertTrue(m.matches());
0467: m = p.matcher("\u1234_z-\u0001");
0468: assertTrue(m.matches());
0469: m = p.matcher("r_p-q");
0470: assertFalse(m.matches());
0471: m = p.matcher("\u2345_____d-----\n");
0472: assertTrue(m.matches());
0473:
0474: // Test ranges including the "-" character
0475: p = Pattern.compile("[\\*-/]_+[---]!+[--AP]");
0476: m = p.matcher("-_-!!A");
0477: assertTrue(m.matches());
0478: m = p.matcher("\u002b_-!!!-");
0479: assertTrue(m.matches());
0480: m = p.matcher("!_-!@");
0481: assertFalse(m.matches());
0482: m = p.matcher(",______-!!!!!!!P");
0483: assertTrue(m.matches());
0484:
0485: // Test nested ranges
0486: p = Pattern.compile("[pm[t]][a-z]+[[r]lp]");
0487: m = p.matcher("prop");
0488: assertTrue(m.matches());
0489: m = p.matcher("tsar");
0490: assertTrue(m.matches());
0491: m = p.matcher("pong");
0492: assertFalse(m.matches());
0493: m = p.matcher("moor");
0494: assertTrue(m.matches());
0495:
0496: // Test character class intersection with &&
0497: // TODO: figure out what x&&y or any class with a null intersection
0498: // set (like [[a-c]&&[d-f]]) might mean. It doesn't mean "match
0499: // nothing" and doesn't mean "match anything" so I'm stumped.
0500: p = Pattern
0501: .compile("[[a-p]&&[g-z]]+-+[[a-z]&&q]-+[x&&[a-z]]-+");
0502: m = p.matcher("h--q--x--");
0503: assertTrue(m.matches());
0504: m = p.matcher("hog--q-x-");
0505: assertTrue(m.matches());
0506: m = p.matcher("ape--q-x-");
0507: assertFalse(m.matches());
0508: m = p.matcher("mop--q-x----");
0509: assertTrue(m.matches());
0510:
0511: // Test error cases with &&
0512: p = Pattern.compile("[&&[xyz]]");
0513: m = p.matcher("&");
0514: // System.out.println(m.matches());
0515: m = p.matcher("x");
0516: // System.out.println(m.matches());
0517: m = p.matcher("y");
0518: // System.out.println(m.matches());
0519: p = Pattern.compile("[[xyz]&[axy]]");
0520: m = p.matcher("x");
0521: // System.out.println(m.matches());
0522: m = p.matcher("z");
0523: // System.out.println(m.matches());
0524: m = p.matcher("&");
0525: // System.out.println(m.matches());
0526: p = Pattern.compile("[abc[123]&&[345]def]");
0527: m = p.matcher("a");
0528: // System.out.println(m.matches());
0529:
0530: p = Pattern.compile("[[xyz]&&]");
0531:
0532: p = Pattern.compile("[[abc]&]");
0533:
0534: try {
0535: p = Pattern.compile("[[abc]&&");
0536: fail("PatternSyntaxException expected");
0537: } catch (PatternSyntaxException e) {
0538: }
0539:
0540: p = Pattern.compile("[[abc]\\&&[xyz]]");
0541:
0542: p = Pattern.compile("[[abc]&\\&[xyz]]");
0543:
0544: // Test 3-way intersection
0545: p = Pattern.compile("[[a-p]&&[g-z]&&[d-k]]");
0546: m = p.matcher("g");
0547: assertTrue(m.matches());
0548: m = p.matcher("m");
0549: assertFalse(m.matches());
0550:
0551: // Test nested intersection
0552: p = Pattern.compile("[[[a-p]&&[g-z]]&&[d-k]]");
0553: m = p.matcher("g");
0554: assertTrue(m.matches());
0555: m = p.matcher("m");
0556: assertFalse(m.matches());
0557:
0558: // Test character class subtraction with && and ^
0559: p = Pattern.compile("[[a-z]&&[^aeiou]][aeiou][[^xyz]&&[a-z]]");
0560: m = p.matcher("pop");
0561: assertTrue(m.matches());
0562: m = p.matcher("tag");
0563: assertTrue(m.matches());
0564: m = p.matcher("eat");
0565: assertFalse(m.matches());
0566: m = p.matcher("tax");
0567: assertFalse(m.matches());
0568: m = p.matcher("zip");
0569: assertTrue(m.matches());
0570:
0571: // Test . (DOT), with and without DOTALL
0572: // Note: DOT not allowed in character classes
0573: p = Pattern.compile(".+/x.z");
0574: m = p.matcher("!$/xyz");
0575: assertTrue(m.matches());
0576: m = p.matcher("%\n\r/x\nz");
0577: assertFalse(m.matches());
0578: p = Pattern.compile(".+/x.z", Pattern.DOTALL);
0579: m = p.matcher("%\n\r/x\nz");
0580: assertTrue(m.matches());
0581:
0582: // Test \d (digit)
0583: p = Pattern.compile("\\d+[a-z][\\dx]");
0584: m = p.matcher("42a6");
0585: assertTrue(m.matches());
0586: m = p.matcher("21zx");
0587: assertTrue(m.matches());
0588: m = p.matcher("ab6");
0589: assertFalse(m.matches());
0590: m = p.matcher("56912f9");
0591: assertTrue(m.matches());
0592:
0593: // Test \D (not a digit)
0594: p = Pattern.compile("\\D+[a-z]-[\\D3]");
0595: m = p.matcher("za-p");
0596: assertTrue(m.matches());
0597: m = p.matcher("%!e-3");
0598: assertTrue(m.matches());
0599: m = p.matcher("9a-x");
0600: assertFalse(m.matches());
0601: m = p.matcher("\u1234pp\ny-3");
0602: assertTrue(m.matches());
0603:
0604: // Test \s (whitespace)
0605: p = Pattern.compile("<[a-zA-Z]+\\s+[0-9]+[\\sx][^\\s]>");
0606: m = p.matcher("<cat \t1\fx>");
0607: assertTrue(m.matches());
0608: m = p.matcher("<cat \t1\f >");
0609: assertFalse(m.matches());
0610: m = p
0611: .matcher("xyz <foo\n\r22 5> <pp \t\n\f\r \u000b41x\u1234><pp \nx7\rc> zzz");
0612: assertTrue(m.find());
0613: assertTrue(m.find());
0614: assertFalse(m.find());
0615:
0616: // Test \S (not whitespace)
0617: p = Pattern.compile("<[a-z] \\S[0-9][\\S\n]+[^\\S]221>");
0618: m = p.matcher("<f $0**\n** 221>");
0619: assertTrue(m.matches());
0620: m = p.matcher("<x 441\t221>");
0621: assertTrue(m.matches());
0622: m = p.matcher("<z \t9\ng 221>");
0623: assertFalse(m.matches());
0624: m = p.matcher("<z 60\ngg\u1234\f221>");
0625: assertTrue(m.matches());
0626: p = Pattern
0627: .compile("<[a-z] \\S[0-9][\\S\n]+[^\\S]221[\\S&&[^abc]]>");
0628: m = p.matcher("<f $0**\n** 221x>");
0629: assertTrue(m.matches());
0630: m = p.matcher("<x 441\t221z>");
0631: assertTrue(m.matches());
0632: m = p.matcher("<x 441\t221 >");
0633: assertFalse(m.matches());
0634: m = p.matcher("<x 441\t221c>");
0635: assertFalse(m.matches());
0636: m = p.matcher("<z \t9\ng 221x>");
0637: assertFalse(m.matches());
0638: m = p.matcher("<z 60\ngg\u1234\f221\u0001>");
0639: assertTrue(m.matches());
0640:
0641: // Test \w (ascii word)
0642: p = Pattern.compile("<\\w+\\s[0-9]+;[^\\w]\\w+/[\\w$]+;");
0643: m = p.matcher("<f1 99;!foo5/a$7;");
0644: assertTrue(m.matches());
0645: m = p.matcher("<f$ 99;!foo5/a$7;");
0646: assertFalse(m.matches());
0647: m = p
0648: .matcher("<abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789 99;!foo5/a$7;");
0649: assertTrue(m.matches());
0650:
0651: // Test \W (not an ascii word)
0652: p = Pattern
0653: .compile("<\\W\\w+\\s[0-9]+;[\\W_][^\\W]+\\s[0-9]+;");
0654: m = p.matcher("<$foo3\n99;_bar\t0;");
0655: assertTrue(m.matches());
0656: m = p.matcher("<hh 99;_g 0;");
0657: assertFalse(m.matches());
0658: m = p.matcher("<*xx\t00;^zz\f11;");
0659: assertTrue(m.matches());
0660:
0661: // Test x|y pattern
0662: // TODO
0663: }
0664:
0665: public void testPOSIXGroups() throws PatternSyntaxException {
0666: Pattern p;
0667: Matcher m;
0668:
0669: // Test POSIX groups using \p and \P (in the group and not in the group)
0670: // Groups are Lower, Upper, ASCII, Alpha, Digit, XDigit, Alnum, Punct,
0671: // Graph, Print, Blank, Space, Cntrl
0672: // Test \p{Lower}
0673: /*
0674: * FIXME: Requires complex range processing p = Pattern.compile("<\\p{Lower}\\d\\P{Lower}:[\\p{Lower}Z]\\s[^\\P{Lower}]>");
0675: * m = p.matcher("<a4P:g x>"); assertTrue(m.matches()); m = p.matcher("<p4%:Z\tq>");
0676: * assertTrue(m.matches()); m = p.matcher("<A6#:e e>");
0677: * assertFalse(m.matches());
0678: */
0679: p = Pattern.compile("\\p{Lower}+");
0680: m = p.matcher("abcdefghijklmnopqrstuvwxyz");
0681: assertTrue(m.matches());
0682:
0683: // Invalid uses of \p{Lower}
0684: try {
0685: p = Pattern.compile("\\p");
0686: fail("PatternSyntaxException expected");
0687: } catch (PatternSyntaxException e) {
0688: }
0689:
0690: try {
0691: p = Pattern.compile("\\p;");
0692: fail("PatternSyntaxException expected");
0693: } catch (PatternSyntaxException e) {
0694: }
0695:
0696: try {
0697: p = Pattern.compile("\\p{");
0698: fail("PatternSyntaxException expected");
0699: } catch (PatternSyntaxException e) {
0700: }
0701:
0702: try {
0703: p = Pattern.compile("\\p{;");
0704: fail("PatternSyntaxException expected");
0705: } catch (PatternSyntaxException e) {
0706: }
0707:
0708: try {
0709: p = Pattern.compile("\\p{Lower");
0710: fail("PatternSyntaxException expected");
0711: } catch (PatternSyntaxException e) {
0712: }
0713:
0714: try {
0715: p = Pattern.compile("\\p{Lower;");
0716: fail("PatternSyntaxException expected");
0717: } catch (PatternSyntaxException e) {
0718: }
0719:
0720: // Test \p{Upper}
0721: /*
0722: * FIXME: Requires complex range processing p = Pattern.compile("<\\p{Upper}\\d\\P{Upper}:[\\p{Upper}z]\\s[^\\P{Upper}]>");
0723: * m = p.matcher("<A4p:G X>"); assertTrue(m.matches()); m = p.matcher("<P4%:z\tQ>");
0724: * assertTrue(m.matches()); m = p.matcher("<a6#:E E>");
0725: * assertFalse(m.matches());
0726: */
0727: p = Pattern.compile("\\p{Upper}+");
0728: m = p.matcher("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
0729: assertTrue(m.matches());
0730:
0731: // Invalid uses of \p{Upper}
0732: try {
0733: p = Pattern.compile("\\p{Upper");
0734: fail("PatternSyntaxException expected");
0735: } catch (PatternSyntaxException e) {
0736: }
0737:
0738: try {
0739: p = Pattern.compile("\\p{Upper;");
0740: fail("PatternSyntaxException expected");
0741: } catch (PatternSyntaxException e) {
0742: }
0743:
0744: // Test \p{ASCII}
0745: /*
0746: * FIXME: Requires complex range processing p = Pattern.compile("<\\p{ASCII}\\d\\P{ASCII}:[\\p{ASCII}\u1234]\\s[^\\P{ASCII}]>");
0747: * m = p.matcher("<A4\u0080:G X>"); assertTrue(m.matches()); m =
0748: * p.matcher("<P4\u00ff:\u1234\t\n>"); assertTrue(m.matches()); m =
0749: * p.matcher("<\u00846#:E E>"); assertFalse(m.matches())
0750: */
0751: int i;
0752: p = Pattern.compile("\\p{ASCII}");
0753: for (i = 0; i < 0x80; i++) {
0754: m = p.matcher(Character.toString((char) i));
0755: assertTrue(m.matches());
0756: }
0757: for (; i < 0xff; i++) {
0758: m = p.matcher(Character.toString((char) i));
0759: assertFalse(m.matches());
0760: }
0761:
0762: // Invalid uses of \p{ASCII}
0763: try {
0764: p = Pattern.compile("\\p{ASCII");
0765: fail("PatternSyntaxException expected");
0766: } catch (PatternSyntaxException e) {
0767: }
0768:
0769: try {
0770: p = Pattern.compile("\\p{ASCII;");
0771: fail("PatternSyntaxException expected");
0772: } catch (PatternSyntaxException e) {
0773: }
0774:
0775: // Test \p{Alpha}
0776: // TODO
0777:
0778: // Test \p{Digit}
0779: // TODO
0780:
0781: // Test \p{XDigit}
0782: // TODO
0783:
0784: // Test \p{Alnum}
0785: // TODO
0786:
0787: // Test \p{Punct}
0788: // TODO
0789:
0790: // Test \p{Graph}
0791: // TODO
0792:
0793: // Test \p{Print}
0794: // TODO
0795:
0796: // Test \p{Blank}
0797: // TODO
0798:
0799: // Test \p{Space}
0800: // TODO
0801:
0802: // Test \p{Cntrl}
0803: // TODO
0804: }
0805:
0806: public void testUnicodeCategories() throws PatternSyntaxException {
0807: // Test Unicode categories using \p and \P
0808: // One letter codes: L, M, N, P, S, Z, C
0809: // Two letter codes: Lu, Nd, Sc, Sm, ...
0810: // See java.lang.Character and Unicode standard for complete list
0811: // TODO
0812: // Test \p{L}
0813: // TODO
0814:
0815: // Test \p{N}
0816: // TODO
0817:
0818: // ... etc
0819:
0820: // Test two letter codes:
0821: // From unicode.org:
0822: // Lu
0823: // Ll
0824: // Lt
0825: // Lm
0826: // Lo
0827: // Mn
0828: // Mc
0829: // Me
0830: // Nd
0831: // Nl
0832: // No
0833: // Pc
0834: // Pd
0835: // Ps
0836: // Pe
0837: // Pi
0838: // Pf
0839: // Po
0840: // Sm
0841: // Sc
0842: // Sk
0843: // So
0844: // Zs
0845: // Zl
0846: // Zp
0847: // Cc
0848: // Cf
0849: // Cs
0850: // Co
0851: // Cn
0852: }
0853:
0854: public void testUnicodeBlocks() throws PatternSyntaxException {
0855: Pattern p;
0856: Matcher m;
0857: int i, j;
0858:
0859: // Test Unicode blocks using \p and \P
0860: // FIXME:
0861: // Note that LatinExtended-B and ArabicPresentations-B are unrecognized
0862: // by the reference JDK.
0863: for (i = 0; i < UBlocks.length; i++) {
0864: /*
0865: * p = Pattern.compile("\\p{"+UBlocks[i].name+"}");
0866: *
0867: * if (UBlocks[i].low > 0) { m =
0868: * p.matcher(Character.toString((char)(UBlocks[i].low-1)));
0869: * assertFalse(m.matches()); } for (j=UBlocks[i].low; j <=
0870: * UBlocks[i].high; j++) { m =
0871: * p.matcher(Character.toString((char)j)); assertTrue(m.matches()); }
0872: * if (UBlocks[i].high < 0xFFFF) { m =
0873: * p.matcher(Character.toString((char)(UBlocks[i].high+1)));
0874: * assertFalse(m.matches()); }
0875: *
0876: * p = Pattern.compile("\\P{"+UBlocks[i].name+"}");
0877: *
0878: * if (UBlocks[i].low > 0) { m =
0879: * p.matcher(Character.toString((char)(UBlocks[i].low-1)));
0880: * assertTrue(m.matches()); } for (j=UBlocks[i].low; j <
0881: * UBlocks[i].high; j++) { m =
0882: * p.matcher(Character.toString((char)j)); assertFalse(m.matches()); }
0883: * if (UBlocks[i].high < 0xFFFF) { m =
0884: * p.matcher(Character.toString((char)(UBlocks[i].high+1)));
0885: * assertTrue(m.matches()); }
0886: */
0887:
0888: p = Pattern.compile("\\p{In" + UBlocks[i].name + "}");
0889:
0890: if (UBlocks[i].low > 0) {
0891: m = p.matcher(Character
0892: .toString((char) (UBlocks[i].low - 1)));
0893: assertFalse(m.matches());
0894: }
0895: for (j = UBlocks[i].low; j <= UBlocks[i].high; j++) {
0896: m = p.matcher(Character.toString((char) j));
0897: assertTrue(m.matches());
0898: }
0899: if (UBlocks[i].high < 0xFFFF) {
0900: m = p.matcher(Character
0901: .toString((char) (UBlocks[i].high + 1)));
0902: assertFalse(m.matches());
0903: }
0904:
0905: p = Pattern.compile("\\P{In" + UBlocks[i].name + "}");
0906:
0907: if (UBlocks[i].low > 0) {
0908: m = p.matcher(Character
0909: .toString((char) (UBlocks[i].low - 1)));
0910: assertTrue(m.matches());
0911: }
0912: for (j = UBlocks[i].low; j < UBlocks[i].high; j++) {
0913: m = p.matcher(Character.toString((char) j));
0914: assertFalse(m.matches());
0915: }
0916: if (UBlocks[i].high < 0xFFFF) {
0917: m = p.matcher(Character
0918: .toString((char) (UBlocks[i].high + 1)));
0919: assertTrue(m.matches());
0920: }
0921: }
0922: }
0923:
0924: public void testCapturingGroups() throws PatternSyntaxException {
0925: // Test simple capturing groups
0926: // TODO
0927:
0928: // Test grouping without capture (?:...)
0929: // TODO
0930:
0931: // Test combination of grouping and capture
0932: // TODO
0933:
0934: // Test \<num> sequence with capturing and non-capturing groups
0935: // TODO
0936:
0937: // Test \<num> with <num> out of range
0938: // TODO
0939: }
0940:
0941: public void testRepeats() {
0942: // Test ?
0943: // TODO
0944:
0945: // Test *
0946: // TODO
0947:
0948: // Test +
0949: // TODO
0950:
0951: // Test {<num>}, including 0, 1 and more
0952: // TODO
0953:
0954: // Test {<num>,}, including 0, 1 and more
0955: // TODO
0956:
0957: // Test {<n1>,<n2>}, with n1 < n2, n1 = n2 and n1 > n2 (illegal?)
0958: // TODO
0959: }
0960:
0961: public void testAnchors() throws PatternSyntaxException {
0962: // Test ^, default and MULTILINE
0963: // TODO
0964:
0965: // Test $, default and MULTILINE
0966: // TODO
0967:
0968: // Test \b (word boundary)
0969: // TODO
0970:
0971: // Test \B (not a word boundary)
0972: // TODO
0973:
0974: // Test \A (beginning of string)
0975: // TODO
0976:
0977: // Test \Z (end of string)
0978: // TODO
0979:
0980: // Test \z (end of string)
0981: // TODO
0982:
0983: // Test \G
0984: // TODO
0985:
0986: // Test positive lookahead using (?=...)
0987: // TODO
0988:
0989: // Test negative lookahead using (?!...)
0990: // TODO
0991:
0992: // Test positive lookbehind using (?<=...)
0993: // TODO
0994:
0995: // Test negative lookbehind using (?<!...)
0996: // TODO
0997: }
0998:
0999: public void testMisc() throws PatternSyntaxException {
1000: Pattern p;
1001: Matcher m;
1002:
1003: // Test (?>...)
1004: // TODO
1005:
1006: // Test (?onflags-offflags)
1007: // Valid flags are i,m,d,s,u,x
1008: // TODO
1009:
1010: // Test (?onflags-offflags:...)
1011: // TODO
1012:
1013: // Test \Q, \E
1014: p = Pattern.compile("[a-z]+;\\Q[a-z]+;\\Q(foo.*);\\E[0-9]+");
1015: m = p.matcher("abc;[a-z]+;\\Q(foo.*);411");
1016: assertTrue(m.matches());
1017: m = p.matcher("abc;def;foo42;555");
1018: assertFalse(m.matches());
1019: m = p.matcher("abc;\\Qdef;\\Qfoo99;\\E123");
1020: assertFalse(m.matches());
1021:
1022: p = Pattern.compile("[a-z]+;(foo[0-9]-\\Q(...)\\E);[0-9]+");
1023: m = p.matcher("abc;foo5-(...);123");
1024: assertTrue(m.matches());
1025: assertEquals("foo5-(...)", m.group(1));
1026: m = p.matcher("abc;foo9-(xxx);789");
1027: assertFalse(m.matches());
1028:
1029: p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q$-\\E]+);[0-9]+");
1030: m = p.matcher("abc;bar0-def$-;123");
1031: assertTrue(m.matches());
1032:
1033: // FIXME:
1034: // This should work the same as the pattern above but fails with the
1035: // the reference JDK
1036: p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q-$\\E]+);[0-9]+");
1037: m = p.matcher("abc;bar0-def$-;123");
1038: // assertTrue(m.matches());
1039:
1040: // FIXME:
1041: // This should work too .. it looks as if just about anything that
1042: // has more
1043: // than one character between \Q and \E is broken in the the reference
1044: // JDK
1045: p = Pattern
1046: .compile("[a-z]+;(bar[0-9]-[a-z\\Q[0-9]\\E]+);[0-9]+");
1047: m = p.matcher("abc;bar0-def[99]-]0x[;123");
1048: // assertTrue(m.matches());
1049:
1050: // This is the same as above but with explicit escapes .. and this
1051: // does work
1052: // on the the reference JDK
1053: p = Pattern
1054: .compile("[a-z]+;(bar[0-9]-[a-z\\[0\\-9\\]]+);[0-9]+");
1055: m = p.matcher("abc;bar0-def[99]-]0x[;123");
1056: assertTrue(m.matches());
1057:
1058: // Test #<comment text>
1059: // TODO
1060: }
1061:
1062: public void testCompile1() throws PatternSyntaxException {
1063: Pattern pattern = Pattern
1064: .compile("[0-9A-Za-z][0-9A-Za-z\\x2e\\x3a\\x2d\\x5f]*");
1065: String name = "iso-8859-1";
1066: assertTrue(pattern.matcher(name).matches());
1067: }
1068:
1069: public void testCompile2() throws PatternSyntaxException {
1070: String findString = "\\Qimport\\E";
1071:
1072: Pattern pattern = Pattern.compile(findString, 0);
1073: Matcher matcher = pattern.matcher(new String(
1074: "import a.A;\n\n import b.B;\nclass C {}"));
1075:
1076: assertTrue(matcher.find(0));
1077: }
1078:
1079: public void testCompile3() throws PatternSyntaxException {
1080: Pattern p;
1081: Matcher m;
1082: p = Pattern.compile("a$");
1083: m = p.matcher("a\n");
1084: assertTrue(m.find());
1085: assertEquals("a", m.group());
1086: assertFalse(m.find());
1087:
1088: p = Pattern.compile("(a$)");
1089: m = p.matcher("a\n");
1090: assertTrue(m.find());
1091: assertEquals("a", m.group());
1092: assertEquals("a", m.group(1));
1093: assertFalse(m.find());
1094:
1095: p = Pattern.compile("^.*$", Pattern.MULTILINE);
1096:
1097: m = p.matcher("a\n");
1098: assertTrue(m.find());
1099: // System.out.println("["+m.group()+"]");
1100: assertEquals("a", m.group());
1101: assertFalse(m.find());
1102:
1103: m = p.matcher("a\nb\n");
1104: assertTrue(m.find());
1105: // System.out.println("["+m.group()+"]");
1106: assertEquals("a", m.group());
1107: assertTrue(m.find());
1108: // System.out.println("["+m.group()+"]");
1109: assertEquals("b", m.group());
1110: assertFalse(m.find());
1111:
1112: m = p.matcher("a\nb");
1113: assertTrue(m.find());
1114: // System.out.println("["+m.group()+"]");
1115: assertEquals("a", m.group());
1116: assertTrue(m.find());
1117: assertEquals("b", m.group());
1118: assertFalse(m.find());
1119:
1120: m = p.matcher("\naa\r\nbb\rcc\n\n");
1121: assertTrue(m.find());
1122: // System.out.println("["+m.group()+"]");
1123: assertTrue(m.group().equals(""));
1124: assertTrue(m.find());
1125: // System.out.println("["+m.group()+"]");
1126: assertEquals("aa", m.group());
1127: assertTrue(m.find());
1128: // System.out.println("["+m.group()+"]");
1129: assertEquals("bb", m.group());
1130: assertTrue(m.find());
1131: // System.out.println("["+m.group()+"]");
1132: assertEquals("cc", m.group());
1133: assertTrue(m.find());
1134: // System.out.println("["+m.group()+"]");
1135: assertTrue(m.group().equals(""));
1136: assertFalse(m.find());
1137:
1138: m = p.matcher("a");
1139: assertTrue(m.find());
1140: assertEquals("a", m.group());
1141: assertFalse(m.find());
1142:
1143: m = p.matcher("");
1144: // FIXME: This matches the reference behaviour but is
1145: // inconsistent with matching "a" - ie. the end of the
1146: // target string should match against $ always but this
1147: // appears to work with the null string only when not in
1148: // multiline mode (see below)
1149: assertFalse(m.find());
1150:
1151: p = Pattern.compile("^.*$");
1152: m = p.matcher("");
1153: assertTrue(m.find());
1154: assertTrue(m.group().equals(""));
1155: assertFalse(m.find());
1156: }
1157:
1158: public void testCompile4() throws PatternSyntaxException {
1159: String findString = "\\Qpublic\\E";
1160: StringBuffer text = new StringBuffer(
1161: " public class Class {\n"
1162: + " public class Class {");
1163:
1164: Pattern pattern = Pattern.compile(findString, 0);
1165: Matcher matcher = pattern.matcher(text);
1166:
1167: boolean found = matcher.find();
1168: assertTrue(found);
1169: assertEquals(4, matcher.start());
1170: if (found) {
1171: // modify text
1172: text.delete(0, text.length());
1173: text.append("Text have been changed.");
1174: matcher.reset(text);
1175: }
1176:
1177: found = matcher.find();
1178: assertFalse(found);
1179: }
1180:
1181: public void testCompile5() throws PatternSyntaxException {
1182: Pattern p = Pattern.compile("^[0-9]");
1183: String s[] = p.split("12", -1);
1184: assertEquals("", s[0]);
1185: assertEquals("2", s[1]);
1186: assertEquals(2, s.length);
1187: }
1188:
1189: // public void testCompile6() {
1190: // String regex = "[\\p{L}[\\p{Mn}[\\p{Pc}[\\p{Nd}[\\p{Nl}[\\p{Sc}]]]]]]+";
1191: // String regex = "[\\p{L}\\p{Mn}\\p{Pc}\\p{Nd}\\p{Nl}\\p{Sc}]+";
1192: // try {
1193: // Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
1194: // assertTrue(true);
1195: // } catch (PatternSyntaxException e) {
1196: // System.out.println(e.getMessage());
1197: // assertTrue(false);
1198: // }
1199: // }
1200:
1201: private static class UBInfo {
1202: public UBInfo(int low, int high, String name) {
1203: this .name = name;
1204: this .low = low;
1205: this .high = high;
1206: }
1207:
1208: public String name;
1209:
1210: public int low, high;
1211: }
1212:
1213: // A table representing the unicode categories
1214: // private static UBInfo[] UCategories = {
1215: // Lu
1216: // Ll
1217: // Lt
1218: // Lm
1219: // Lo
1220: // Mn
1221: // Mc
1222: // Me
1223: // Nd
1224: // Nl
1225: // No
1226: // Pc
1227: // Pd
1228: // Ps
1229: // Pe
1230: // Pi
1231: // Pf
1232: // Po
1233: // Sm
1234: // Sc
1235: // Sk
1236: // So
1237: // Zs
1238: // Zl
1239: // Zp
1240: // Cc
1241: // Cf
1242: // Cs
1243: // Co
1244: // Cn
1245: // };
1246:
1247: // A table representing the unicode character blocks
1248: private static UBInfo[] UBlocks = {
1249: /* 0000; 007F; Basic Latin */
1250: new UBInfo(0x0000, 0x007F, "BasicLatin"), // Character.UnicodeBlock.BASIC_LATIN
1251: /* 0080; 00FF; Latin-1 Supplement */
1252: new UBInfo(0x0080, 0x00FF, "Latin-1Supplement"), // Character.UnicodeBlock.LATIN_1_SUPPLEMENT
1253: /* 0100; 017F; Latin Extended-A */
1254: new UBInfo(0x0100, 0x017F, "LatinExtended-A"), // Character.UnicodeBlock.LATIN_EXTENDED_A
1255: /* 0180; 024F; Latin Extended-B */
1256: // new UBInfo (0x0180,0x024F,"InLatinExtended-B"), //
1257: // Character.UnicodeBlock.LATIN_EXTENDED_B
1258: /* 0250; 02AF; IPA Extensions */
1259: new UBInfo(0x0250, 0x02AF, "IPAExtensions"), // Character.UnicodeBlock.IPA_EXTENSIONS
1260: /* 02B0; 02FF; Spacing Modifier Letters */
1261: new UBInfo(0x02B0, 0x02FF, "SpacingModifierLetters"), // Character.UnicodeBlock.SPACING_MODIFIER_LETTERS
1262: /* 0300; 036F; Combining Diacritical Marks */
1263: new UBInfo(0x0300, 0x036F, "CombiningDiacriticalMarks"), // Character.UnicodeBlock.COMBINING_DIACRITICAL_MARKS
1264: /* 0370; 03FF; Greek */
1265: new UBInfo(0x0370, 0x03FF, "Greek"), // Character.UnicodeBlock.GREEK
1266: /* 0400; 04FF; Cyrillic */
1267: new UBInfo(0x0400, 0x04FF, "Cyrillic"), // Character.UnicodeBlock.CYRILLIC
1268: /* 0530; 058F; Armenian */
1269: new UBInfo(0x0530, 0x058F, "Armenian"), // Character.UnicodeBlock.ARMENIAN
1270: /* 0590; 05FF; Hebrew */
1271: new UBInfo(0x0590, 0x05FF, "Hebrew"), // Character.UnicodeBlock.HEBREW
1272: /* 0600; 06FF; Arabic */
1273: new UBInfo(0x0600, 0x06FF, "Arabic"), // Character.UnicodeBlock.ARABIC
1274: /* 0700; 074F; Syriac */
1275: new UBInfo(0x0700, 0x074F, "Syriac"), // Character.UnicodeBlock.SYRIAC
1276: /* 0780; 07BF; Thaana */
1277: new UBInfo(0x0780, 0x07BF, "Thaana"), // Character.UnicodeBlock.THAANA
1278: /* 0900; 097F; Devanagari */
1279: new UBInfo(0x0900, 0x097F, "Devanagari"), // Character.UnicodeBlock.DEVANAGARI
1280: /* 0980; 09FF; Bengali */
1281: new UBInfo(0x0980, 0x09FF, "Bengali"), // Character.UnicodeBlock.BENGALI
1282: /* 0A00; 0A7F; Gurmukhi */
1283: new UBInfo(0x0A00, 0x0A7F, "Gurmukhi"), // Character.UnicodeBlock.GURMUKHI
1284: /* 0A80; 0AFF; Gujarati */
1285: new UBInfo(0x0A80, 0x0AFF, "Gujarati"), // Character.UnicodeBlock.GUJARATI
1286: /* 0B00; 0B7F; Oriya */
1287: new UBInfo(0x0B00, 0x0B7F, "Oriya"), // Character.UnicodeBlock.ORIYA
1288: /* 0B80; 0BFF; Tamil */
1289: new UBInfo(0x0B80, 0x0BFF, "Tamil"), // Character.UnicodeBlock.TAMIL
1290: /* 0C00; 0C7F; Telugu */
1291: new UBInfo(0x0C00, 0x0C7F, "Telugu"), // Character.UnicodeBlock.TELUGU
1292: /* 0C80; 0CFF; Kannada */
1293: new UBInfo(0x0C80, 0x0CFF, "Kannada"), // Character.UnicodeBlock.KANNADA
1294: /* 0D00; 0D7F; Malayalam */
1295: new UBInfo(0x0D00, 0x0D7F, "Malayalam"), // Character.UnicodeBlock.MALAYALAM
1296: /* 0D80; 0DFF; Sinhala */
1297: new UBInfo(0x0D80, 0x0DFF, "Sinhala"), // Character.UnicodeBlock.SINHALA
1298: /* 0E00; 0E7F; Thai */
1299: new UBInfo(0x0E00, 0x0E7F, "Thai"), // Character.UnicodeBlock.THAI
1300: /* 0E80; 0EFF; Lao */
1301: new UBInfo(0x0E80, 0x0EFF, "Lao"), // Character.UnicodeBlock.LAO
1302: /* 0F00; 0FFF; Tibetan */
1303: new UBInfo(0x0F00, 0x0FFF, "Tibetan"), // Character.UnicodeBlock.TIBETAN
1304: /* 1000; 109F; Myanmar */
1305: new UBInfo(0x1000, 0x109F, "Myanmar"), // Character.UnicodeBlock.MYANMAR
1306: /* 10A0; 10FF; Georgian */
1307: new UBInfo(0x10A0, 0x10FF, "Georgian"), // Character.UnicodeBlock.GEORGIAN
1308: /* 1100; 11FF; Hangul Jamo */
1309: new UBInfo(0x1100, 0x11FF, "HangulJamo"), // Character.UnicodeBlock.HANGUL_JAMO
1310: /* 1200; 137F; Ethiopic */
1311: new UBInfo(0x1200, 0x137F, "Ethiopic"), // Character.UnicodeBlock.ETHIOPIC
1312: /* 13A0; 13FF; Cherokee */
1313: new UBInfo(0x13A0, 0x13FF, "Cherokee"), // Character.UnicodeBlock.CHEROKEE
1314: /* 1400; 167F; Unified Canadian Aboriginal Syllabics */
1315: new UBInfo(0x1400, 0x167F,
1316: "UnifiedCanadianAboriginalSyllabics"), // Character.UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
1317: /* 1680; 169F; Ogham */
1318: new UBInfo(0x1680, 0x169F, "Ogham"), // Character.UnicodeBlock.OGHAM
1319: /* 16A0; 16FF; Runic */
1320: new UBInfo(0x16A0, 0x16FF, "Runic"), // Character.UnicodeBlock.RUNIC
1321: /* 1780; 17FF; Khmer */
1322: new UBInfo(0x1780, 0x17FF, "Khmer"), // Character.UnicodeBlock.KHMER
1323: /* 1800; 18AF; Mongolian */
1324: new UBInfo(0x1800, 0x18AF, "Mongolian"), // Character.UnicodeBlock.MONGOLIAN
1325: /* 1E00; 1EFF; Latin Extended Additional */
1326: new UBInfo(0x1E00, 0x1EFF, "LatinExtendedAdditional"), // Character.UnicodeBlock.LATIN_EXTENDED_ADDITIONAL
1327: /* 1F00; 1FFF; Greek Extended */
1328: new UBInfo(0x1F00, 0x1FFF, "GreekExtended"), // Character.UnicodeBlock.GREEK_EXTENDED
1329: /* 2000; 206F; General Punctuation */
1330: new UBInfo(0x2000, 0x206F, "GeneralPunctuation"), // Character.UnicodeBlock.GENERAL_PUNCTUATION
1331: /* 2070; 209F; Superscripts and Subscripts */
1332: new UBInfo(0x2070, 0x209F, "SuperscriptsandSubscripts"), // Character.UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS
1333: /* 20A0; 20CF; Currency Symbols */
1334: new UBInfo(0x20A0, 0x20CF, "CurrencySymbols"), // Character.UnicodeBlock.CURRENCY_SYMBOLS
1335: /* 20D0; 20FF; Combining Marks for Symbols */
1336: new UBInfo(0x20D0, 0x20FF, "CombiningMarksforSymbols"), // Character.UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS
1337: /* 2100; 214F; Letterlike Symbols */
1338: new UBInfo(0x2100, 0x214F, "LetterlikeSymbols"), // Character.UnicodeBlock.LETTERLIKE_SYMBOLS
1339: /* 2150; 218F; Number Forms */
1340: new UBInfo(0x2150, 0x218F, "NumberForms"), // Character.UnicodeBlock.NUMBER_FORMS
1341: /* 2190; 21FF; Arrows */
1342: new UBInfo(0x2190, 0x21FF, "Arrows"), // Character.UnicodeBlock.ARROWS
1343: /* 2200; 22FF; Mathematical Operators */
1344: new UBInfo(0x2200, 0x22FF, "MathematicalOperators"), // Character.UnicodeBlock.MATHEMATICAL_OPERATORS
1345: /* 2300; 23FF; Miscellaneous Technical */
1346: new UBInfo(0x2300, 0x23FF, "MiscellaneousTechnical"), // Character.UnicodeBlock.MISCELLANEOUS_TECHNICAL
1347: /* 2400; 243F; Control Pictures */
1348: new UBInfo(0x2400, 0x243F, "ControlPictures"), // Character.UnicodeBlock.CONTROL_PICTURES
1349: /* 2440; 245F; Optical Character Recognition */
1350: new UBInfo(0x2440, 0x245F, "OpticalCharacterRecognition"), // Character.UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION
1351: /* 2460; 24FF; Enclosed Alphanumerics */
1352: new UBInfo(0x2460, 0x24FF, "EnclosedAlphanumerics"), // Character.UnicodeBlock.ENCLOSED_ALPHANUMERICS
1353: /* 2500; 257F; Box Drawing */
1354: new UBInfo(0x2500, 0x257F, "BoxDrawing"), // Character.UnicodeBlock.BOX_DRAWING
1355: /* 2580; 259F; Block Elements */
1356: new UBInfo(0x2580, 0x259F, "BlockElements"), // Character.UnicodeBlock.BLOCK_ELEMENTS
1357: /* 25A0; 25FF; Geometric Shapes */
1358: new UBInfo(0x25A0, 0x25FF, "GeometricShapes"), // Character.UnicodeBlock.GEOMETRIC_SHAPES
1359: /* 2600; 26FF; Miscellaneous Symbols */
1360: new UBInfo(0x2600, 0x26FF, "MiscellaneousSymbols"), // Character.UnicodeBlock.MISCELLANEOUS_SYMBOLS
1361: /* 2700; 27BF; Dingbats */
1362: new UBInfo(0x2700, 0x27BF, "Dingbats"), // Character.UnicodeBlock.DINGBATS
1363: /* 2800; 28FF; Braille Patterns */
1364: new UBInfo(0x2800, 0x28FF, "BraillePatterns"), // Character.UnicodeBlock.BRAILLE_PATTERNS
1365: /* 2E80; 2EFF; CJK Radicals Supplement */
1366: new UBInfo(0x2E80, 0x2EFF, "CJKRadicalsSupplement"), // Character.UnicodeBlock.CJK_RADICALS_SUPPLEMENT
1367: /* 2F00; 2FDF; Kangxi Radicals */
1368: new UBInfo(0x2F00, 0x2FDF, "KangxiRadicals"), // Character.UnicodeBlock.KANGXI_RADICALS
1369: /* 2FF0; 2FFF; Ideographic Description Characters */
1370: new UBInfo(0x2FF0, 0x2FFF,
1371: "IdeographicDescriptionCharacters"), // Character.UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1372: /* 3000; 303F; CJK Symbols and Punctuation */
1373: new UBInfo(0x3000, 0x303F, "CJKSymbolsandPunctuation"), // Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
1374: /* 3040; 309F; Hiragana */
1375: new UBInfo(0x3040, 0x309F, "Hiragana"), // Character.UnicodeBlock.HIRAGANA
1376: /* 30A0; 30FF; Katakana */
1377: new UBInfo(0x30A0, 0x30FF, "Katakana"), // Character.UnicodeBlock.KATAKANA
1378: /* 3100; 312F; Bopomofo */
1379: new UBInfo(0x3100, 0x312F, "Bopomofo"), // Character.UnicodeBlock.BOPOMOFO
1380: /* 3130; 318F; Hangul Compatibility Jamo */
1381: new UBInfo(0x3130, 0x318F, "HangulCompatibilityJamo"), // Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO
1382: /* 3190; 319F; Kanbun */
1383: new UBInfo(0x3190, 0x319F, "Kanbun"), // Character.UnicodeBlock.KANBUN
1384: /* 31A0; 31BF; Bopomofo Extended */
1385: new UBInfo(0x31A0, 0x31BF, "BopomofoExtended"), // Character.UnicodeBlock.BOPOMOFO_EXTENDED
1386: /* 3200; 32FF; Enclosed CJK Letters and Months */
1387: new UBInfo(0x3200, 0x32FF, "EnclosedCJKLettersandMonths"), // Character.UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS
1388: /* 3300; 33FF; CJK Compatibility */
1389: new UBInfo(0x3300, 0x33FF, "CJKCompatibility"), // Character.UnicodeBlock.CJK_COMPATIBILITY
1390: /* 3400; 4DB5; CJK Unified Ideographs Extension A */
1391: new UBInfo(0x3400, 0x4DB5, "CJKUnifiedIdeographsExtensionA"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1392: /* 4E00; 9FFF; CJK Unified Ideographs */
1393: new UBInfo(0x4E00, 0x9FFF, "CJKUnifiedIdeographs"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
1394: /* A000; A48F; Yi Syllables */
1395: new UBInfo(0xA000, 0xA48F, "YiSyllables"), // Character.UnicodeBlock.YI_SYLLABLES
1396: /* A490; A4CF; Yi Radicals */
1397: new UBInfo(0xA490, 0xA4CF, "YiRadicals"), // Character.UnicodeBlock.YI_RADICALS
1398: /* AC00; D7A3; Hangul Syllables */
1399: new UBInfo(0xAC00, 0xD7A3, "HangulSyllables"), // Character.UnicodeBlock.HANGUL_SYLLABLES
1400: /* D800; DB7F; High Surrogates */
1401: /* DB80; DBFF; High Private Use Surrogates */
1402: /* DC00; DFFF; Low Surrogates */
1403: /* E000; F8FF; Private Use */
1404: /* F900; FAFF; CJK Compatibility Ideographs */
1405: new UBInfo(0xF900, 0xFAFF, "CJKCompatibilityIdeographs"), // Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
1406: /* FB00; FB4F; Alphabetic Presentation Forms */
1407: new UBInfo(0xFB00, 0xFB4F, "AlphabeticPresentationForms"), // Character.UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS
1408: /* FB50; FDFF; Arabic Presentation Forms-A */
1409: new UBInfo(0xFB50, 0xFDFF, "ArabicPresentationForms-A"), // Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_A
1410: /* FE20; FE2F; Combining Half Marks */
1411: new UBInfo(0xFE20, 0xFE2F, "CombiningHalfMarks"), // Character.UnicodeBlock.COMBINING_HALF_MARKS
1412: /* FE30; FE4F; CJK Compatibility Forms */
1413: new UBInfo(0xFE30, 0xFE4F, "CJKCompatibilityForms"), // Character.UnicodeBlock.CJK_COMPATIBILITY_FORMS
1414: /* FE50; FE6F; Small Form Variants */
1415: new UBInfo(0xFE50, 0xFE6F, "SmallFormVariants"), // Character.UnicodeBlock.SMALL_FORM_VARIANTS
1416: /* FE70; FEFE; Arabic Presentation Forms-B */
1417: // new UBInfo (0xFE70,0xFEFE,"InArabicPresentationForms-B"), //
1418: // Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_B
1419: /* FEFF; FEFF; Specials */
1420: new UBInfo(0xFEFF, 0xFEFF, "Specials"), // Character.UnicodeBlock.SPECIALS
1421: /* FF00; FFEF; Halfwidth and Fullwidth Forms */
1422: new UBInfo(0xFF00, 0xFFEF, "HalfwidthandFullwidthForms"), // Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS
1423: /* FFF0; FFFD; Specials */
1424: new UBInfo(0xFFF0, 0xFFFD, "Specials") // Character.UnicodeBlock.SPECIALS
1425: };
1426: }
|