Source Code Cross Referenced for Pattern2Test.java in  » Apache-Harmony-Java-SE » org-package » org » apache » harmony » tests » java » util » regex » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Apache Harmony Java SE » org package » org.apache.harmony.tests.java.util.regex 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        /* Licensed to the Apache Software Foundation (ASF) under one or more
0002:         * contributor license agreements.  See the NOTICE file distributed with
0003:         * this work for additional information regarding copyright ownership.
0004:         * The ASF licenses this file to You under the Apache License, Version 2.0
0005:         * (the "License"); you may not use this file except in compliance with
0006:         * the License.  You may obtain a copy of the License at
0007:         * 
0008:         *     http://www.apache.org/licenses/LICENSE-2.0
0009:         * 
0010:         * Unless required by applicable law or agreed to in writing, software
0011:         * distributed under the License is distributed on an "AS IS" BASIS,
0012:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013:         * See the License for the specific language governing permissions and
0014:         * limitations under the License.
0015:         */
0016:
0017:        package org.apache.harmony.tests.java.util.regex;
0018:
0019:        import java.util.regex.Matcher;
0020:        import java.util.regex.Pattern;
0021:        import java.util.regex.PatternSyntaxException;
0022:
0023:        import junit.framework.TestCase;
0024:
0025:        /**
0026:         * Tests simple Pattern compilation and Matcher methods
0027:         */
0028:        @SuppressWarnings("nls")
0029:        public class Pattern2Test extends TestCase {
0030:            public void testSimpleMatch() throws PatternSyntaxException {
0031:                Pattern p = Pattern.compile("foo.*");
0032:
0033:                Matcher m1 = p.matcher("foo123");
0034:                assertTrue(m1.matches());
0035:                assertTrue(m1.find(0));
0036:                assertTrue(m1.lookingAt());
0037:
0038:                Matcher m2 = p.matcher("fox");
0039:                assertFalse(m2.matches());
0040:                assertFalse(m2.find(0));
0041:                assertFalse(m2.lookingAt());
0042:
0043:                assertTrue(Pattern.matches("foo.*", "foo123"));
0044:                assertFalse(Pattern.matches("foo.*", "fox"));
0045:
0046:                assertFalse(Pattern.matches("bar", "foobar"));
0047:
0048:                assertTrue(Pattern.matches("", ""));
0049:            }
0050:
0051:            public void testCursors() {
0052:                Pattern p;
0053:                Matcher m;
0054:
0055:                try {
0056:                    p = Pattern.compile("foo");
0057:
0058:                    m = p.matcher("foobar");
0059:                    assertTrue(m.find());
0060:                    assertEquals(0, m.start());
0061:                    assertEquals(3, m.end());
0062:                    assertFalse(m.find());
0063:
0064:                    // Note: also testing reset here
0065:                    m.reset();
0066:                    assertTrue(m.find());
0067:                    assertEquals(0, m.start());
0068:                    assertEquals(3, m.end());
0069:                    assertFalse(m.find());
0070:
0071:                    m.reset("barfoobar");
0072:                    assertTrue(m.find());
0073:                    assertEquals(3, m.start());
0074:                    assertEquals(6, m.end());
0075:                    assertFalse(m.find());
0076:
0077:                    m.reset("barfoo");
0078:                    assertTrue(m.find());
0079:                    assertEquals(3, m.start());
0080:                    assertEquals(6, m.end());
0081:                    assertFalse(m.find());
0082:
0083:                    m.reset("foobarfoobarfoo");
0084:                    assertTrue(m.find());
0085:                    assertEquals(0, m.start());
0086:                    assertEquals(3, m.end());
0087:                    assertTrue(m.find());
0088:                    assertEquals(6, m.start());
0089:                    assertEquals(9, m.end());
0090:                    assertTrue(m.find());
0091:                    assertEquals(12, m.start());
0092:                    assertEquals(15, m.end());
0093:                    assertFalse(m.find());
0094:                    assertTrue(m.find(0));
0095:                    assertEquals(0, m.start());
0096:                    assertEquals(3, m.end());
0097:                    assertTrue(m.find(4));
0098:                    assertEquals(6, m.start());
0099:                    assertEquals(9, m.end());
0100:                } catch (PatternSyntaxException e) {
0101:                    System.out.println(e.getMessage());
0102:                    fail();
0103:                }
0104:            }
0105:
0106:            public void testGroups() throws PatternSyntaxException {
0107:                Pattern p;
0108:                Matcher m;
0109:
0110:                p = Pattern.compile("(p[0-9]*)#?(q[0-9]*)");
0111:
0112:                m = p.matcher("p1#q3p2q42p5p71p63#q888");
0113:                assertTrue(m.find());
0114:                assertEquals(0, m.start());
0115:                assertEquals(5, m.end());
0116:                assertEquals(2, m.groupCount());
0117:                assertEquals(0, m.start(0));
0118:                assertEquals(5, m.end(0));
0119:                assertEquals(0, m.start(1));
0120:                assertEquals(2, m.end(1));
0121:                assertEquals(3, m.start(2));
0122:                assertEquals(5, m.end(2));
0123:                assertEquals("p1#q3", m.group());
0124:                assertEquals("p1#q3", m.group(0));
0125:                assertEquals("p1", m.group(1));
0126:                assertEquals("q3", m.group(2));
0127:
0128:                assertTrue(m.find());
0129:                assertEquals(5, m.start());
0130:                assertEquals(10, m.end());
0131:                assertEquals(2, m.groupCount());
0132:                assertEquals(10, m.end(0));
0133:                assertEquals(5, m.start(1));
0134:                assertEquals(7, m.end(1));
0135:                assertEquals(7, m.start(2));
0136:                assertEquals(10, m.end(2));
0137:                assertEquals("p2q42", m.group());
0138:                assertEquals("p2q42", m.group(0));
0139:                assertEquals("p2", m.group(1));
0140:                assertEquals("q42", m.group(2));
0141:
0142:                assertTrue(m.find());
0143:                assertEquals(15, m.start());
0144:                assertEquals(23, m.end());
0145:                assertEquals(2, m.groupCount());
0146:                assertEquals(15, m.start(0));
0147:                assertEquals(23, m.end(0));
0148:                assertEquals(15, m.start(1));
0149:                assertEquals(18, m.end(1));
0150:                assertEquals(19, m.start(2));
0151:                assertEquals(23, m.end(2));
0152:                assertEquals("p63#q888", m.group());
0153:                assertEquals("p63#q888", m.group(0));
0154:                assertEquals("p63", m.group(1));
0155:                assertEquals("q888", m.group(2));
0156:                assertFalse(m.find());
0157:            }
0158:
0159:            public void testReplace() throws PatternSyntaxException {
0160:                Pattern p;
0161:                Matcher m;
0162:
0163:                // Note: examples from book,
0164:                // Hitchens, Ron, 2002, "Java NIO", O'Reilly, page 171
0165:                p = Pattern.compile("a*b");
0166:
0167:                m = p.matcher("aabfooaabfooabfoob");
0168:                assertTrue(m.replaceAll("-").equals("-foo-foo-foo-"));
0169:                assertTrue(m.replaceFirst("-").equals("-fooaabfooabfoob"));
0170:
0171:                /*
0172:                 * p = Pattern.compile ("\\p{Blank}");
0173:                 * 
0174:                 * m = p.matcher ("fee fie foe fum"); assertTrue
0175:                 * (m.replaceFirst("-").equals ("fee-fie foe fum")); assertTrue
0176:                 * (m.replaceAll("-").equals ("fee-fie-foe-fum"));
0177:                 */
0178:
0179:                p = Pattern.compile("([bB])yte");
0180:
0181:                m = p.matcher("Byte for byte");
0182:                assertTrue(m.replaceFirst("$1ite").equals("Bite for byte"));
0183:                assertTrue(m.replaceAll("$1ite").equals("Bite for bite"));
0184:
0185:                p = Pattern.compile("\\d\\d\\d\\d([- ])");
0186:
0187:                m = p.matcher("card #1234-5678-1234");
0188:                assertTrue(m.replaceFirst("xxxx$1").equals(
0189:                        "card #xxxx-5678-1234"));
0190:                assertTrue(m.replaceAll("xxxx$1")
0191:                        .equals("card #xxxx-xxxx-1234"));
0192:
0193:                p = Pattern.compile("(up|left)( *)(right|down)");
0194:
0195:                m = p.matcher("left right, up down");
0196:                assertTrue(m.replaceFirst("$3$2$1").equals(
0197:                        "right left, up down"));
0198:                assertTrue(m.replaceAll("$3$2$1").equals("right left, down up"));
0199:
0200:                p = Pattern.compile("([CcPp][hl]e[ea]se)");
0201:
0202:                m = p.matcher("I want cheese. Please.");
0203:                assertTrue(m.replaceFirst("<b> $1 </b>").equals(
0204:                        "I want <b> cheese </b>. Please."));
0205:                assertTrue(m.replaceAll("<b> $1 </b>").equals(
0206:                        "I want <b> cheese </b>. <b> Please </b>."));
0207:            }
0208:
0209:            public void testEscapes() throws PatternSyntaxException {
0210:                Pattern p;
0211:                Matcher m;
0212:
0213:                // Test \\ sequence
0214:                p = Pattern.compile("([a-z]+)\\\\([a-z]+);");
0215:                m = p.matcher("fred\\ginger;abbott\\costello;jekell\\hyde;");
0216:                assertTrue(m.find());
0217:                assertEquals("fred", m.group(1));
0218:                assertEquals("ginger", m.group(2));
0219:                assertTrue(m.find());
0220:                assertEquals("abbott", m.group(1));
0221:                assertEquals("costello", m.group(2));
0222:                assertTrue(m.find());
0223:                assertEquals("jekell", m.group(1));
0224:                assertEquals("hyde", m.group(2));
0225:                assertFalse(m.find());
0226:
0227:                // Test \n, \t, \r, \f, \e, \a sequences
0228:                p = Pattern.compile("([a-z]+)[\\n\\t\\r\\f\\e\\a]+([a-z]+)");
0229:                m = p
0230:                        .matcher("aa\nbb;cc\u0009\rdd;ee\u000C\u001Bff;gg\n\u0007hh");
0231:                assertTrue(m.find());
0232:                assertEquals("aa", m.group(1));
0233:                assertEquals("bb", m.group(2));
0234:                assertTrue(m.find());
0235:                assertEquals("cc", m.group(1));
0236:                assertEquals("dd", m.group(2));
0237:                assertTrue(m.find());
0238:                assertEquals("ee", m.group(1));
0239:                assertEquals("ff", m.group(2));
0240:                assertTrue(m.find());
0241:                assertEquals("gg", m.group(1));
0242:                assertEquals("hh", m.group(2));
0243:                assertFalse(m.find());
0244:
0245:                // Test \\u and \\x sequences
0246:p = Pattern.compile("([0-9]+)[\\u0020:\\x21];");
0247:                m = p.matcher("11:;22 ;33-;44!;");
0248:                assertTrue(m.find());
0249:                assertEquals("11", m.group(1));
0250:                assertTrue(m.find());
0251:                assertEquals("22", m.group(1));
0252:                assertTrue(m.find());
0253:                assertEquals("44", m.group(1));
0254:                assertFalse(m.find());
0255:
0256:                // Test invalid unicode sequences
0257:                try {
0258:                    p = Pattern.compile("\\u");
0259:                    fail("PatternSyntaxException expected");
0260:                } catch (PatternSyntaxException e) {
0261:                }
0262:
0263:                try {
0264:                    p = Pattern.compile("\\u;");
0265:                    fail("PatternSyntaxException expected");
0266:                } catch (PatternSyntaxException e) {
0267:                }
0268:
0269:                try {
0270:                    p = Pattern.compile("\\u002");
0271:                    fail("PatternSyntaxException expected");
0272:                } catch (PatternSyntaxException e) {
0273:                }
0274:
0275:                try {
0276:                    p = Pattern.compile("\\u002;");
0277:                    fail("PatternSyntaxException expected");
0278:                } catch (PatternSyntaxException e) {
0279:                }
0280:
0281:                // Test invalid hex sequences
0282:                try {
0283:                    p = Pattern.compile("\\x");
0284:                    fail("PatternSyntaxException expected");
0285:                } catch (PatternSyntaxException e) {
0286:                }
0287:
0288:                try {
0289:                    p = Pattern.compile("\\x;");
0290:                    fail("PatternSyntaxException expected");
0291:                } catch (PatternSyntaxException e) {
0292:                }
0293:
0294:                try {
0295:                    p = Pattern.compile("\\xa");
0296:                    fail("PatternSyntaxException expected");
0297:                } catch (PatternSyntaxException e) {
0298:                }
0299:
0300:                try {
0301:                    p = Pattern.compile("\\xa;");
0302:                    fail("PatternSyntaxException expected");
0303:                } catch (PatternSyntaxException e) {
0304:                }
0305:
0306:                // Test \0 (octal) sequences (1, 2 and 3 digit)
0307:                p = Pattern.compile("([0-9]+)[\\07\\040\\0160];");
0308:                m = p.matcher("11\u0007;22:;33 ;44p;");
0309:                assertTrue(m.find());
0310:                assertEquals("11", m.group(1));
0311:                assertTrue(m.find());
0312:                assertEquals("33", m.group(1));
0313:                assertTrue(m.find());
0314:                assertEquals("44", m.group(1));
0315:                assertFalse(m.find());
0316:
0317:                // Test invalid octal sequences
0318:                try {
0319:                    p = Pattern.compile("\\08");
0320:                    fail("PatternSyntaxException expected");
0321:                } catch (PatternSyntaxException e) {
0322:                }
0323:
0324:                // originally contributed test did not check the result
0325:                // TODO: check what RI does here
0326:                // try {
0327:                // p = Pattern.compile("\\0477");
0328:                // fail("PatternSyntaxException expected");
0329:                // } catch (PatternSyntaxException e) {
0330:                // }
0331:
0332:                try {
0333:                    p = Pattern.compile("\\0");
0334:                    fail("PatternSyntaxException expected");
0335:                } catch (PatternSyntaxException e) {
0336:                }
0337:
0338:                try {
0339:                    p = Pattern.compile("\\0;");
0340:                    fail("PatternSyntaxException expected");
0341:                } catch (PatternSyntaxException e) {
0342:                }
0343:
0344:                // Test \c (control character) sequence
0345:                p = Pattern.compile("([0-9]+)[\\cA\\cB\\cC\\cD];");
0346:                m = p.matcher("11\u0001;22:;33\u0002;44p;55\u0003;66\u0004;");
0347:                assertTrue(m.find());
0348:                assertEquals("11", m.group(1));
0349:                assertTrue(m.find());
0350:                assertEquals("33", m.group(1));
0351:                assertTrue(m.find());
0352:                assertEquals("55", m.group(1));
0353:                assertTrue(m.find());
0354:                assertEquals("66", m.group(1));
0355:                assertFalse(m.find());
0356:
0357:                // More thorough control escape test
0358:                // Ensure that each escape matches exactly the corresponding
0359:                // character
0360:                // code and no others (well, from 0-255 at least)
0361:                int i, j;
0362:                for (i = 0; i < 26; i++) {
0363:                    p = Pattern.compile("\\c"
0364:                            + Character.toString((char) ('A' + i)));
0365:                    int match_char = -1;
0366:                    for (j = 0; j < 255; j++) {
0367:                        m = p.matcher(Character.toString((char) j));
0368:                        if (m.matches()) {
0369:                            assertEquals(-1, match_char);
0370:                            match_char = j;
0371:                        }
0372:                    }
0373:                    assertTrue(match_char == i + 1);
0374:                }
0375:
0376:                // Test invalid control escapes
0377:                try {
0378:                    p = Pattern.compile("\\c");
0379:                    fail("PatternSyntaxException expected");
0380:                } catch (PatternSyntaxException e) {
0381:                }
0382:
0383:                // originally contributed test did not check the result
0384:                // TODO: check what RI does here
0385:                // try {
0386:                // p = Pattern.compile("\\c;");
0387:                // fail("PatternSyntaxException expected");
0388:                // } catch (PatternSyntaxException e) {
0389:                // }
0390:                //
0391:                // try {
0392:                // p = Pattern.compile("\\ca;");
0393:                // fail("PatternSyntaxException expected");
0394:                // } catch (PatternSyntaxException e) {
0395:                // }
0396:                //
0397:                // try {
0398:                // p = Pattern.compile("\\c4;");
0399:                // fail("PatternSyntaxException expected");
0400:                // } catch (PatternSyntaxException e) {
0401:                // }
0402:            }
0403:
0404:            public void testCharacterClasses() throws PatternSyntaxException {
0405:                Pattern p;
0406:                Matcher m;
0407:
0408:                // Test one character range
0409:                p = Pattern.compile("[p].*[l]");
0410:                m = p.matcher("paul");
0411:                assertTrue(m.matches());
0412:                m = p.matcher("pool");
0413:                assertTrue(m.matches());
0414:                m = p.matcher("pong");
0415:                assertFalse(m.matches());
0416:                m = p.matcher("pl");
0417:                assertTrue(m.matches());
0418:
0419:                // Test two character range
0420:                p = Pattern.compile("[pm].*[lp]");
0421:                m = p.matcher("prop");
0422:                assertTrue(m.matches());
0423:                m = p.matcher("mall");
0424:                assertTrue(m.matches());
0425:                m = p.matcher("pong");
0426:                assertFalse(m.matches());
0427:                m = p.matcher("pill");
0428:                assertTrue(m.matches());
0429:
0430:                // Test range including [ and ]
0431:                p = Pattern.compile("[<\\[].*[\\]>]");
0432:                m = p.matcher("<foo>");
0433:                assertTrue(m.matches());
0434:                m = p.matcher("[bar]");
0435:                assertTrue(m.matches());
0436:                m = p.matcher("{foobar]");
0437:                assertFalse(m.matches());
0438:                m = p.matcher("<pill]");
0439:                assertTrue(m.matches());
0440:
0441:                // Test range using ^
0442:                p = Pattern.compile("[^bc][a-z]+[tr]");
0443:                m = p.matcher("pat");
0444:                assertTrue(m.matches());
0445:                m = p.matcher("liar");
0446:                assertTrue(m.matches());
0447:                m = p.matcher("car");
0448:                assertFalse(m.matches());
0449:                m = p.matcher("gnat");
0450:                assertTrue(m.matches());
0451:
0452:                // Test character range using -
0453:                p = Pattern.compile("[a-z]_+[a-zA-Z]-+[0-9p-z]");
0454:                m = p.matcher("d__F-8");
0455:                assertTrue(m.matches());
0456:                m = p.matcher("c_a-q");
0457:                assertTrue(m.matches());
0458:                m = p.matcher("a__R-a");
0459:                assertFalse(m.matches());
0460:                m = p.matcher("r_____d-----5");
0461:                assertTrue(m.matches());
0462:
0463:                // Test range using unicode characters and unicode and hex escapes
0464:                p = Pattern.compile("[\\u1234-\\u2345]_+[a-z]-+[\u0001-\\x11]");
0465:                m = p.matcher("\u2000_q-\u0007");
0466:                assertTrue(m.matches());
0467:                m = p.matcher("\u1234_z-\u0001");
0468:                assertTrue(m.matches());
0469:                m = p.matcher("r_p-q");
0470:                assertFalse(m.matches());
0471:                m = p.matcher("\u2345_____d-----\n");
0472:                assertTrue(m.matches());
0473:
0474:                // Test ranges including the "-" character
0475:                p = Pattern.compile("[\\*-/]_+[---]!+[--AP]");
0476:                m = p.matcher("-_-!!A");
0477:                assertTrue(m.matches());
0478:                m = p.matcher("\u002b_-!!!-");
0479:                assertTrue(m.matches());
0480:                m = p.matcher("!_-!@");
0481:                assertFalse(m.matches());
0482:                m = p.matcher(",______-!!!!!!!P");
0483:                assertTrue(m.matches());
0484:
0485:                // Test nested ranges
0486:                p = Pattern.compile("[pm[t]][a-z]+[[r]lp]");
0487:                m = p.matcher("prop");
0488:                assertTrue(m.matches());
0489:                m = p.matcher("tsar");
0490:                assertTrue(m.matches());
0491:                m = p.matcher("pong");
0492:                assertFalse(m.matches());
0493:                m = p.matcher("moor");
0494:                assertTrue(m.matches());
0495:
0496:                // Test character class intersection with &&
0497:                // TODO: figure out what x&&y or any class with a null intersection
0498:                // set (like [[a-c]&&[d-f]]) might mean. It doesn't mean "match
0499:                // nothing" and doesn't mean "match anything" so I'm stumped.
0500:                p = Pattern
0501:                        .compile("[[a-p]&&[g-z]]+-+[[a-z]&&q]-+[x&&[a-z]]-+");
0502:                m = p.matcher("h--q--x--");
0503:                assertTrue(m.matches());
0504:                m = p.matcher("hog--q-x-");
0505:                assertTrue(m.matches());
0506:                m = p.matcher("ape--q-x-");
0507:                assertFalse(m.matches());
0508:                m = p.matcher("mop--q-x----");
0509:                assertTrue(m.matches());
0510:
0511:                // Test error cases with &&
0512:                p = Pattern.compile("[&&[xyz]]");
0513:                m = p.matcher("&");
0514:                // System.out.println(m.matches());
0515:                m = p.matcher("x");
0516:                // System.out.println(m.matches());
0517:                m = p.matcher("y");
0518:                // System.out.println(m.matches());
0519:                p = Pattern.compile("[[xyz]&[axy]]");
0520:                m = p.matcher("x");
0521:                // System.out.println(m.matches());
0522:                m = p.matcher("z");
0523:                // System.out.println(m.matches());
0524:                m = p.matcher("&");
0525:                // System.out.println(m.matches());
0526:                p = Pattern.compile("[abc[123]&&[345]def]");
0527:                m = p.matcher("a");
0528:                // System.out.println(m.matches());
0529:
0530:                p = Pattern.compile("[[xyz]&&]");
0531:
0532:                p = Pattern.compile("[[abc]&]");
0533:
0534:                try {
0535:                    p = Pattern.compile("[[abc]&&");
0536:                    fail("PatternSyntaxException expected");
0537:                } catch (PatternSyntaxException e) {
0538:                }
0539:
0540:                p = Pattern.compile("[[abc]\\&&[xyz]]");
0541:
0542:                p = Pattern.compile("[[abc]&\\&[xyz]]");
0543:
0544:                // Test 3-way intersection
0545:                p = Pattern.compile("[[a-p]&&[g-z]&&[d-k]]");
0546:                m = p.matcher("g");
0547:                assertTrue(m.matches());
0548:                m = p.matcher("m");
0549:                assertFalse(m.matches());
0550:
0551:                // Test nested intersection
0552:                p = Pattern.compile("[[[a-p]&&[g-z]]&&[d-k]]");
0553:                m = p.matcher("g");
0554:                assertTrue(m.matches());
0555:                m = p.matcher("m");
0556:                assertFalse(m.matches());
0557:
0558:                // Test character class subtraction with && and ^
0559:                p = Pattern.compile("[[a-z]&&[^aeiou]][aeiou][[^xyz]&&[a-z]]");
0560:                m = p.matcher("pop");
0561:                assertTrue(m.matches());
0562:                m = p.matcher("tag");
0563:                assertTrue(m.matches());
0564:                m = p.matcher("eat");
0565:                assertFalse(m.matches());
0566:                m = p.matcher("tax");
0567:                assertFalse(m.matches());
0568:                m = p.matcher("zip");
0569:                assertTrue(m.matches());
0570:
0571:                // Test . (DOT), with and without DOTALL
0572:                // Note: DOT not allowed in character classes
0573:                p = Pattern.compile(".+/x.z");
0574:                m = p.matcher("!$/xyz");
0575:                assertTrue(m.matches());
0576:                m = p.matcher("%\n\r/x\nz");
0577:                assertFalse(m.matches());
0578:                p = Pattern.compile(".+/x.z", Pattern.DOTALL);
0579:                m = p.matcher("%\n\r/x\nz");
0580:                assertTrue(m.matches());
0581:
0582:                // Test \d (digit)
0583:                p = Pattern.compile("\\d+[a-z][\\dx]");
0584:                m = p.matcher("42a6");
0585:                assertTrue(m.matches());
0586:                m = p.matcher("21zx");
0587:                assertTrue(m.matches());
0588:                m = p.matcher("ab6");
0589:                assertFalse(m.matches());
0590:                m = p.matcher("56912f9");
0591:                assertTrue(m.matches());
0592:
0593:                // Test \D (not a digit)
0594:                p = Pattern.compile("\\D+[a-z]-[\\D3]");
0595:                m = p.matcher("za-p");
0596:                assertTrue(m.matches());
0597:                m = p.matcher("%!e-3");
0598:                assertTrue(m.matches());
0599:                m = p.matcher("9a-x");
0600:                assertFalse(m.matches());
0601:                m = p.matcher("\u1234pp\ny-3");
0602:                assertTrue(m.matches());
0603:
0604:                // Test \s (whitespace)
0605:                p = Pattern.compile("<[a-zA-Z]+\\s+[0-9]+[\\sx][^\\s]>");
0606:                m = p.matcher("<cat \t1\fx>");
0607:                assertTrue(m.matches());
0608:                m = p.matcher("<cat \t1\f >");
0609:                assertFalse(m.matches());
0610:                m = p
0611:                        .matcher("xyz <foo\n\r22 5> <pp \t\n\f\r \u000b41x\u1234><pp \nx7\rc> zzz");
0612:                assertTrue(m.find());
0613:                assertTrue(m.find());
0614:                assertFalse(m.find());
0615:
0616:                // Test \S (not whitespace)
0617:                p = Pattern.compile("<[a-z] \\S[0-9][\\S\n]+[^\\S]221>");
0618:                m = p.matcher("<f $0**\n** 221>");
0619:                assertTrue(m.matches());
0620:                m = p.matcher("<x 441\t221>");
0621:                assertTrue(m.matches());
0622:                m = p.matcher("<z \t9\ng 221>");
0623:                assertFalse(m.matches());
0624:                m = p.matcher("<z 60\ngg\u1234\f221>");
0625:                assertTrue(m.matches());
0626:                p = Pattern
0627:                        .compile("<[a-z] \\S[0-9][\\S\n]+[^\\S]221[\\S&&[^abc]]>");
0628:                m = p.matcher("<f $0**\n** 221x>");
0629:                assertTrue(m.matches());
0630:                m = p.matcher("<x 441\t221z>");
0631:                assertTrue(m.matches());
0632:                m = p.matcher("<x 441\t221 >");
0633:                assertFalse(m.matches());
0634:                m = p.matcher("<x 441\t221c>");
0635:                assertFalse(m.matches());
0636:                m = p.matcher("<z \t9\ng 221x>");
0637:                assertFalse(m.matches());
0638:                m = p.matcher("<z 60\ngg\u1234\f221\u0001>");
0639:                assertTrue(m.matches());
0640:
0641:                // Test \w (ascii word)
0642:                p = Pattern.compile("<\\w+\\s[0-9]+;[^\\w]\\w+/[\\w$]+;");
0643:                m = p.matcher("<f1 99;!foo5/a$7;");
0644:                assertTrue(m.matches());
0645:                m = p.matcher("<f$ 99;!foo5/a$7;");
0646:                assertFalse(m.matches());
0647:                m = p
0648:                        .matcher("<abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789 99;!foo5/a$7;");
0649:                assertTrue(m.matches());
0650:
0651:                // Test \W (not an ascii word)
0652:                p = Pattern
0653:                        .compile("<\\W\\w+\\s[0-9]+;[\\W_][^\\W]+\\s[0-9]+;");
0654:                m = p.matcher("<$foo3\n99;_bar\t0;");
0655:                assertTrue(m.matches());
0656:                m = p.matcher("<hh 99;_g 0;");
0657:                assertFalse(m.matches());
0658:                m = p.matcher("<*xx\t00;^zz\f11;");
0659:                assertTrue(m.matches());
0660:
0661:                // Test x|y pattern
0662:                // TODO
0663:            }
0664:
0665:            public void testPOSIXGroups() throws PatternSyntaxException {
0666:                Pattern p;
0667:                Matcher m;
0668:
0669:                // Test POSIX groups using \p and \P (in the group and not in the group)
0670:                // Groups are Lower, Upper, ASCII, Alpha, Digit, XDigit, Alnum, Punct,
0671:                // Graph, Print, Blank, Space, Cntrl
0672:                // Test \p{Lower}
0673:                /*
0674:                 * FIXME: Requires complex range processing p = Pattern.compile("<\\p{Lower}\\d\\P{Lower}:[\\p{Lower}Z]\\s[^\\P{Lower}]>");
0675:                 * m = p.matcher("<a4P:g x>"); assertTrue(m.matches()); m = p.matcher("<p4%:Z\tq>");
0676:                 * assertTrue(m.matches()); m = p.matcher("<A6#:e e>");
0677:                 * assertFalse(m.matches());
0678:                 */
0679:                p = Pattern.compile("\\p{Lower}+");
0680:                m = p.matcher("abcdefghijklmnopqrstuvwxyz");
0681:                assertTrue(m.matches());
0682:
0683:                // Invalid uses of \p{Lower}
0684:                try {
0685:                    p = Pattern.compile("\\p");
0686:                    fail("PatternSyntaxException expected");
0687:                } catch (PatternSyntaxException e) {
0688:                }
0689:
0690:                try {
0691:                    p = Pattern.compile("\\p;");
0692:                    fail("PatternSyntaxException expected");
0693:                } catch (PatternSyntaxException e) {
0694:                }
0695:
0696:                try {
0697:                    p = Pattern.compile("\\p{");
0698:                    fail("PatternSyntaxException expected");
0699:                } catch (PatternSyntaxException e) {
0700:                }
0701:
0702:                try {
0703:                    p = Pattern.compile("\\p{;");
0704:                    fail("PatternSyntaxException expected");
0705:                } catch (PatternSyntaxException e) {
0706:                }
0707:
0708:                try {
0709:                    p = Pattern.compile("\\p{Lower");
0710:                    fail("PatternSyntaxException expected");
0711:                } catch (PatternSyntaxException e) {
0712:                }
0713:
0714:                try {
0715:                    p = Pattern.compile("\\p{Lower;");
0716:                    fail("PatternSyntaxException expected");
0717:                } catch (PatternSyntaxException e) {
0718:                }
0719:
0720:                // Test \p{Upper}
0721:                /*
0722:                 * FIXME: Requires complex range processing p = Pattern.compile("<\\p{Upper}\\d\\P{Upper}:[\\p{Upper}z]\\s[^\\P{Upper}]>");
0723:                 * m = p.matcher("<A4p:G X>"); assertTrue(m.matches()); m = p.matcher("<P4%:z\tQ>");
0724:                 * assertTrue(m.matches()); m = p.matcher("<a6#:E E>");
0725:                 * assertFalse(m.matches());
0726:                 */
0727:                p = Pattern.compile("\\p{Upper}+");
0728:                m = p.matcher("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
0729:                assertTrue(m.matches());
0730:
0731:                // Invalid uses of \p{Upper}
0732:                try {
0733:                    p = Pattern.compile("\\p{Upper");
0734:                    fail("PatternSyntaxException expected");
0735:                } catch (PatternSyntaxException e) {
0736:                }
0737:
0738:                try {
0739:                    p = Pattern.compile("\\p{Upper;");
0740:                    fail("PatternSyntaxException expected");
0741:                } catch (PatternSyntaxException e) {
0742:                }
0743:
0744:                // Test \p{ASCII}
0745:                /*
0746:                 * FIXME: Requires complex range processing p = Pattern.compile("<\\p{ASCII}\\d\\P{ASCII}:[\\p{ASCII}\u1234]\\s[^\\P{ASCII}]>");
0747:                 * m = p.matcher("<A4\u0080:G X>"); assertTrue(m.matches()); m =
0748:                 * p.matcher("<P4\u00ff:\u1234\t\n>"); assertTrue(m.matches()); m =
0749:                 * p.matcher("<\u00846#:E E>"); assertFalse(m.matches())
0750:                 */
0751:                int i;
0752:                p = Pattern.compile("\\p{ASCII}");
0753:                for (i = 0; i < 0x80; i++) {
0754:                    m = p.matcher(Character.toString((char) i));
0755:                    assertTrue(m.matches());
0756:                }
0757:                for (; i < 0xff; i++) {
0758:                    m = p.matcher(Character.toString((char) i));
0759:                    assertFalse(m.matches());
0760:                }
0761:
0762:                // Invalid uses of \p{ASCII}
0763:                try {
0764:                    p = Pattern.compile("\\p{ASCII");
0765:                    fail("PatternSyntaxException expected");
0766:                } catch (PatternSyntaxException e) {
0767:                }
0768:
0769:                try {
0770:                    p = Pattern.compile("\\p{ASCII;");
0771:                    fail("PatternSyntaxException expected");
0772:                } catch (PatternSyntaxException e) {
0773:                }
0774:
0775:                // Test \p{Alpha}
0776:                // TODO
0777:
0778:                // Test \p{Digit}
0779:                // TODO
0780:
0781:                // Test \p{XDigit}
0782:                // TODO
0783:
0784:                // Test \p{Alnum}
0785:                // TODO
0786:
0787:                // Test \p{Punct}
0788:                // TODO
0789:
0790:                // Test \p{Graph}
0791:                // TODO
0792:
0793:                // Test \p{Print}
0794:                // TODO
0795:
0796:                // Test \p{Blank}
0797:                // TODO
0798:
0799:                // Test \p{Space}
0800:                // TODO
0801:
0802:                // Test \p{Cntrl}
0803:                // TODO
0804:            }
0805:
0806:            public void testUnicodeCategories() throws PatternSyntaxException {
0807:                // Test Unicode categories using \p and \P
0808:                // One letter codes: L, M, N, P, S, Z, C
0809:                // Two letter codes: Lu, Nd, Sc, Sm, ...
0810:                // See java.lang.Character and Unicode standard for complete list
0811:                // TODO
0812:                // Test \p{L}
0813:                // TODO
0814:
0815:                // Test \p{N}
0816:                // TODO
0817:
0818:                // ... etc
0819:
0820:                // Test two letter codes:
0821:                // From unicode.org:
0822:                // Lu
0823:                // Ll
0824:                // Lt
0825:                // Lm
0826:                // Lo
0827:                // Mn
0828:                // Mc
0829:                // Me
0830:                // Nd
0831:                // Nl
0832:                // No
0833:                // Pc
0834:                // Pd
0835:                // Ps
0836:                // Pe
0837:                // Pi
0838:                // Pf
0839:                // Po
0840:                // Sm
0841:                // Sc
0842:                // Sk
0843:                // So
0844:                // Zs
0845:                // Zl
0846:                // Zp
0847:                // Cc
0848:                // Cf
0849:                // Cs
0850:                // Co
0851:                // Cn
0852:            }
0853:
0854:            public void testUnicodeBlocks() throws PatternSyntaxException {
0855:                Pattern p;
0856:                Matcher m;
0857:                int i, j;
0858:
0859:                // Test Unicode blocks using \p and \P
0860:                // FIXME:
0861:                // Note that LatinExtended-B and ArabicPresentations-B are unrecognized
0862:                // by the reference JDK.
0863:                for (i = 0; i < UBlocks.length; i++) {
0864:                    /*
0865:                     * p = Pattern.compile("\\p{"+UBlocks[i].name+"}");
0866:                     * 
0867:                     * if (UBlocks[i].low > 0) { m =
0868:                     * p.matcher(Character.toString((char)(UBlocks[i].low-1)));
0869:                     * assertFalse(m.matches()); } for (j=UBlocks[i].low; j <=
0870:                     * UBlocks[i].high; j++) { m =
0871:                     * p.matcher(Character.toString((char)j)); assertTrue(m.matches()); }
0872:                     * if (UBlocks[i].high < 0xFFFF) { m =
0873:                     * p.matcher(Character.toString((char)(UBlocks[i].high+1)));
0874:                     * assertFalse(m.matches()); }
0875:                     * 
0876:                     * p = Pattern.compile("\\P{"+UBlocks[i].name+"}");
0877:                     * 
0878:                     * if (UBlocks[i].low > 0) { m =
0879:                     * p.matcher(Character.toString((char)(UBlocks[i].low-1)));
0880:                     * assertTrue(m.matches()); } for (j=UBlocks[i].low; j <
0881:                     * UBlocks[i].high; j++) { m =
0882:                     * p.matcher(Character.toString((char)j)); assertFalse(m.matches()); }
0883:                     * if (UBlocks[i].high < 0xFFFF) { m =
0884:                     * p.matcher(Character.toString((char)(UBlocks[i].high+1)));
0885:                     * assertTrue(m.matches()); }
0886:                     */
0887:
0888:                    p = Pattern.compile("\\p{In" + UBlocks[i].name + "}");
0889:
0890:                    if (UBlocks[i].low > 0) {
0891:                        m = p.matcher(Character
0892:                                .toString((char) (UBlocks[i].low - 1)));
0893:                        assertFalse(m.matches());
0894:                    }
0895:                    for (j = UBlocks[i].low; j <= UBlocks[i].high; j++) {
0896:                        m = p.matcher(Character.toString((char) j));
0897:                        assertTrue(m.matches());
0898:                    }
0899:                    if (UBlocks[i].high < 0xFFFF) {
0900:                        m = p.matcher(Character
0901:                                .toString((char) (UBlocks[i].high + 1)));
0902:                        assertFalse(m.matches());
0903:                    }
0904:
0905:                    p = Pattern.compile("\\P{In" + UBlocks[i].name + "}");
0906:
0907:                    if (UBlocks[i].low > 0) {
0908:                        m = p.matcher(Character
0909:                                .toString((char) (UBlocks[i].low - 1)));
0910:                        assertTrue(m.matches());
0911:                    }
0912:                    for (j = UBlocks[i].low; j < UBlocks[i].high; j++) {
0913:                        m = p.matcher(Character.toString((char) j));
0914:                        assertFalse(m.matches());
0915:                    }
0916:                    if (UBlocks[i].high < 0xFFFF) {
0917:                        m = p.matcher(Character
0918:                                .toString((char) (UBlocks[i].high + 1)));
0919:                        assertTrue(m.matches());
0920:                    }
0921:                }
0922:            }
0923:
0924:            public void testCapturingGroups() throws PatternSyntaxException {
0925:                // Test simple capturing groups
0926:                // TODO
0927:
0928:                // Test grouping without capture (?:...)
0929:                // TODO
0930:
0931:                // Test combination of grouping and capture
0932:                // TODO
0933:
0934:                // Test \<num> sequence with capturing and non-capturing groups
0935:                // TODO
0936:
0937:                // Test \<num> with <num> out of range
0938:                // TODO
0939:            }
0940:
0941:            public void testRepeats() {
0942:                // Test ?
0943:                // TODO
0944:
0945:                // Test *
0946:                // TODO
0947:
0948:                // Test +
0949:                // TODO
0950:
0951:                // Test {<num>}, including 0, 1 and more
0952:                // TODO
0953:
0954:                // Test {<num>,}, including 0, 1 and more
0955:                // TODO
0956:
0957:                // Test {<n1>,<n2>}, with n1 < n2, n1 = n2 and n1 > n2 (illegal?)
0958:                // TODO
0959:            }
0960:
0961:            public void testAnchors() throws PatternSyntaxException {
0962:                // Test ^, default and MULTILINE
0963:                // TODO
0964:
0965:                // Test $, default and MULTILINE
0966:                // TODO
0967:
0968:                // Test \b (word boundary)
0969:                // TODO
0970:
0971:                // Test \B (not a word boundary)
0972:                // TODO
0973:
0974:                // Test \A (beginning of string)
0975:                // TODO
0976:
0977:                // Test \Z (end of string)
0978:                // TODO
0979:
0980:                // Test \z (end of string)
0981:                // TODO
0982:
0983:                // Test \G
0984:                // TODO
0985:
0986:                // Test positive lookahead using (?=...)
0987:                // TODO
0988:
0989:                // Test negative lookahead using (?!...)
0990:                // TODO
0991:
0992:                // Test positive lookbehind using (?<=...)
0993:                // TODO
0994:
0995:                // Test negative lookbehind using (?<!...)
0996:                // TODO
0997:            }
0998:
0999:            public void testMisc() throws PatternSyntaxException {
1000:                Pattern p;
1001:                Matcher m;
1002:
1003:                // Test (?>...)
1004:                // TODO
1005:
1006:                // Test (?onflags-offflags)
1007:                // Valid flags are i,m,d,s,u,x
1008:                // TODO
1009:
1010:                // Test (?onflags-offflags:...)
1011:                // TODO
1012:
1013:                // Test \Q, \E
1014:                p = Pattern.compile("[a-z]+;\\Q[a-z]+;\\Q(foo.*);\\E[0-9]+");
1015:                m = p.matcher("abc;[a-z]+;\\Q(foo.*);411");
1016:                assertTrue(m.matches());
1017:                m = p.matcher("abc;def;foo42;555");
1018:                assertFalse(m.matches());
1019:                m = p.matcher("abc;\\Qdef;\\Qfoo99;\\E123");
1020:                assertFalse(m.matches());
1021:
1022:                p = Pattern.compile("[a-z]+;(foo[0-9]-\\Q(...)\\E);[0-9]+");
1023:                m = p.matcher("abc;foo5-(...);123");
1024:                assertTrue(m.matches());
1025:                assertEquals("foo5-(...)", m.group(1));
1026:                m = p.matcher("abc;foo9-(xxx);789");
1027:                assertFalse(m.matches());
1028:
1029:                p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q$-\\E]+);[0-9]+");
1030:                m = p.matcher("abc;bar0-def$-;123");
1031:                assertTrue(m.matches());
1032:
1033:                // FIXME:
1034:                // This should work the same as the pattern above but fails with the
1035:                // the reference JDK
1036:                p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q-$\\E]+);[0-9]+");
1037:                m = p.matcher("abc;bar0-def$-;123");
1038:                // assertTrue(m.matches());
1039:
1040:                // FIXME:
1041:                // This should work too .. it looks as if just about anything that
1042:                // has more
1043:                // than one character between \Q and \E is broken in the the reference
1044:                // JDK
1045:                p = Pattern
1046:                        .compile("[a-z]+;(bar[0-9]-[a-z\\Q[0-9]\\E]+);[0-9]+");
1047:                m = p.matcher("abc;bar0-def[99]-]0x[;123");
1048:                // assertTrue(m.matches());
1049:
1050:                // This is the same as above but with explicit escapes .. and this
1051:                // does work
1052:                // on the the reference JDK
1053:                p = Pattern
1054:                        .compile("[a-z]+;(bar[0-9]-[a-z\\[0\\-9\\]]+);[0-9]+");
1055:                m = p.matcher("abc;bar0-def[99]-]0x[;123");
1056:                assertTrue(m.matches());
1057:
1058:                // Test #<comment text>
1059:                // TODO
1060:            }
1061:
1062:            public void testCompile1() throws PatternSyntaxException {
1063:                Pattern pattern = Pattern
1064:                        .compile("[0-9A-Za-z][0-9A-Za-z\\x2e\\x3a\\x2d\\x5f]*");
1065:                String name = "iso-8859-1";
1066:                assertTrue(pattern.matcher(name).matches());
1067:            }
1068:
1069:            public void testCompile2() throws PatternSyntaxException {
1070:                String findString = "\\Qimport\\E";
1071:
1072:                Pattern pattern = Pattern.compile(findString, 0);
1073:                Matcher matcher = pattern.matcher(new String(
1074:                        "import a.A;\n\n import b.B;\nclass C {}"));
1075:
1076:                assertTrue(matcher.find(0));
1077:            }
1078:
1079:            public void testCompile3() throws PatternSyntaxException {
1080:                Pattern p;
1081:                Matcher m;
1082:                p = Pattern.compile("a$");
1083:                m = p.matcher("a\n");
1084:                assertTrue(m.find());
1085:                assertEquals("a", m.group());
1086:                assertFalse(m.find());
1087:
1088:                p = Pattern.compile("(a$)");
1089:                m = p.matcher("a\n");
1090:                assertTrue(m.find());
1091:                assertEquals("a", m.group());
1092:                assertEquals("a", m.group(1));
1093:                assertFalse(m.find());
1094:
1095:                p = Pattern.compile("^.*$", Pattern.MULTILINE);
1096:
1097:                m = p.matcher("a\n");
1098:                assertTrue(m.find());
1099:                // System.out.println("["+m.group()+"]");
1100:                assertEquals("a", m.group());
1101:                assertFalse(m.find());
1102:
1103:                m = p.matcher("a\nb\n");
1104:                assertTrue(m.find());
1105:                // System.out.println("["+m.group()+"]");
1106:                assertEquals("a", m.group());
1107:                assertTrue(m.find());
1108:                // System.out.println("["+m.group()+"]");
1109:                assertEquals("b", m.group());
1110:                assertFalse(m.find());
1111:
1112:                m = p.matcher("a\nb");
1113:                assertTrue(m.find());
1114:                // System.out.println("["+m.group()+"]");
1115:                assertEquals("a", m.group());
1116:                assertTrue(m.find());
1117:                assertEquals("b", m.group());
1118:                assertFalse(m.find());
1119:
1120:                m = p.matcher("\naa\r\nbb\rcc\n\n");
1121:                assertTrue(m.find());
1122:                // System.out.println("["+m.group()+"]");
1123:                assertTrue(m.group().equals(""));
1124:                assertTrue(m.find());
1125:                // System.out.println("["+m.group()+"]");
1126:                assertEquals("aa", m.group());
1127:                assertTrue(m.find());
1128:                // System.out.println("["+m.group()+"]");
1129:                assertEquals("bb", m.group());
1130:                assertTrue(m.find());
1131:                // System.out.println("["+m.group()+"]");
1132:                assertEquals("cc", m.group());
1133:                assertTrue(m.find());
1134:                // System.out.println("["+m.group()+"]");
1135:                assertTrue(m.group().equals(""));
1136:                assertFalse(m.find());
1137:
1138:                m = p.matcher("a");
1139:                assertTrue(m.find());
1140:                assertEquals("a", m.group());
1141:                assertFalse(m.find());
1142:
1143:                m = p.matcher("");
1144:                // FIXME: This matches the reference behaviour but is
1145:                // inconsistent with matching "a" - ie. the end of the
1146:                // target string should match against $ always but this
1147:                // appears to work with the null string only when not in
1148:                // multiline mode (see below)
1149:                assertFalse(m.find());
1150:
1151:                p = Pattern.compile("^.*$");
1152:                m = p.matcher("");
1153:                assertTrue(m.find());
1154:                assertTrue(m.group().equals(""));
1155:                assertFalse(m.find());
1156:            }
1157:
1158:            public void testCompile4() throws PatternSyntaxException {
1159:                String findString = "\\Qpublic\\E";
1160:                StringBuffer text = new StringBuffer(
1161:                        "    public class Class {\n"
1162:                                + "    public class Class {");
1163:
1164:                Pattern pattern = Pattern.compile(findString, 0);
1165:                Matcher matcher = pattern.matcher(text);
1166:
1167:                boolean found = matcher.find();
1168:                assertTrue(found);
1169:                assertEquals(4, matcher.start());
1170:                if (found) {
1171:                    // modify text
1172:                    text.delete(0, text.length());
1173:                    text.append("Text have been changed.");
1174:                    matcher.reset(text);
1175:                }
1176:
1177:                found = matcher.find();
1178:                assertFalse(found);
1179:            }
1180:
1181:            public void testCompile5() throws PatternSyntaxException {
1182:                Pattern p = Pattern.compile("^[0-9]");
1183:                String s[] = p.split("12", -1);
1184:                assertEquals("", s[0]);
1185:                assertEquals("2", s[1]);
1186:                assertEquals(2, s.length);
1187:            }
1188:
1189:            // public void testCompile6() {
1190:            // String regex = "[\\p{L}[\\p{Mn}[\\p{Pc}[\\p{Nd}[\\p{Nl}[\\p{Sc}]]]]]]+";
1191:            // String regex = "[\\p{L}\\p{Mn}\\p{Pc}\\p{Nd}\\p{Nl}\\p{Sc}]+";
1192:            // try {
1193:            // Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
1194:            // assertTrue(true);
1195:            // } catch (PatternSyntaxException e) {
1196:            // System.out.println(e.getMessage());
1197:            // assertTrue(false);
1198:            // }
1199:            // }
1200:
1201:            private static class UBInfo {
1202:                public UBInfo(int low, int high, String name) {
1203:                    this .name = name;
1204:                    this .low = low;
1205:                    this .high = high;
1206:                }
1207:
1208:                public String name;
1209:
1210:                public int low, high;
1211:            }
1212:
1213:            // A table representing the unicode categories
1214:            // private static UBInfo[] UCategories = {
1215:            // Lu
1216:            // Ll
1217:            // Lt
1218:            // Lm
1219:            // Lo
1220:            // Mn
1221:            // Mc
1222:            // Me
1223:            // Nd
1224:            // Nl
1225:            // No
1226:            // Pc
1227:            // Pd
1228:            // Ps
1229:            // Pe
1230:            // Pi
1231:            // Pf
1232:            // Po
1233:            // Sm
1234:            // Sc
1235:            // Sk
1236:            // So
1237:            // Zs
1238:            // Zl
1239:            // Zp
1240:            // Cc
1241:            // Cf
1242:            // Cs
1243:            // Co
1244:            // Cn
1245:            // };
1246:
1247:            // A table representing the unicode character blocks
1248:            private static UBInfo[] UBlocks = {
1249:                    /* 0000; 007F; Basic Latin */
1250:                    new UBInfo(0x0000, 0x007F, "BasicLatin"), // Character.UnicodeBlock.BASIC_LATIN
1251:                    /* 0080; 00FF; Latin-1 Supplement */
1252:                    new UBInfo(0x0080, 0x00FF, "Latin-1Supplement"), // Character.UnicodeBlock.LATIN_1_SUPPLEMENT
1253:                    /* 0100; 017F; Latin Extended-A */
1254:                    new UBInfo(0x0100, 0x017F, "LatinExtended-A"), // Character.UnicodeBlock.LATIN_EXTENDED_A
1255:                    /* 0180; 024F; Latin Extended-B */
1256:                    // new UBInfo (0x0180,0x024F,"InLatinExtended-B"), //
1257:                    // Character.UnicodeBlock.LATIN_EXTENDED_B
1258:                    /* 0250; 02AF; IPA Extensions */
1259:                    new UBInfo(0x0250, 0x02AF, "IPAExtensions"), // Character.UnicodeBlock.IPA_EXTENSIONS
1260:                    /* 02B0; 02FF; Spacing Modifier Letters */
1261:                    new UBInfo(0x02B0, 0x02FF, "SpacingModifierLetters"), // Character.UnicodeBlock.SPACING_MODIFIER_LETTERS
1262:                    /* 0300; 036F; Combining Diacritical Marks */
1263:                    new UBInfo(0x0300, 0x036F, "CombiningDiacriticalMarks"), // Character.UnicodeBlock.COMBINING_DIACRITICAL_MARKS
1264:                    /* 0370; 03FF; Greek */
1265:                    new UBInfo(0x0370, 0x03FF, "Greek"), // Character.UnicodeBlock.GREEK
1266:                    /* 0400; 04FF; Cyrillic */
1267:                    new UBInfo(0x0400, 0x04FF, "Cyrillic"), // Character.UnicodeBlock.CYRILLIC
1268:                    /* 0530; 058F; Armenian */
1269:                    new UBInfo(0x0530, 0x058F, "Armenian"), // Character.UnicodeBlock.ARMENIAN
1270:                    /* 0590; 05FF; Hebrew */
1271:                    new UBInfo(0x0590, 0x05FF, "Hebrew"), // Character.UnicodeBlock.HEBREW
1272:                    /* 0600; 06FF; Arabic */
1273:                    new UBInfo(0x0600, 0x06FF, "Arabic"), // Character.UnicodeBlock.ARABIC
1274:                    /* 0700; 074F; Syriac */
1275:                    new UBInfo(0x0700, 0x074F, "Syriac"), // Character.UnicodeBlock.SYRIAC
1276:                    /* 0780; 07BF; Thaana */
1277:                    new UBInfo(0x0780, 0x07BF, "Thaana"), // Character.UnicodeBlock.THAANA
1278:                    /* 0900; 097F; Devanagari */
1279:                    new UBInfo(0x0900, 0x097F, "Devanagari"), // Character.UnicodeBlock.DEVANAGARI
1280:                    /* 0980; 09FF; Bengali */
1281:                    new UBInfo(0x0980, 0x09FF, "Bengali"), // Character.UnicodeBlock.BENGALI
1282:                    /* 0A00; 0A7F; Gurmukhi */
1283:                    new UBInfo(0x0A00, 0x0A7F, "Gurmukhi"), // Character.UnicodeBlock.GURMUKHI
1284:                    /* 0A80; 0AFF; Gujarati */
1285:                    new UBInfo(0x0A80, 0x0AFF, "Gujarati"), // Character.UnicodeBlock.GUJARATI
1286:                    /* 0B00; 0B7F; Oriya */
1287:                    new UBInfo(0x0B00, 0x0B7F, "Oriya"), // Character.UnicodeBlock.ORIYA
1288:                    /* 0B80; 0BFF; Tamil */
1289:                    new UBInfo(0x0B80, 0x0BFF, "Tamil"), // Character.UnicodeBlock.TAMIL
1290:                    /* 0C00; 0C7F; Telugu */
1291:                    new UBInfo(0x0C00, 0x0C7F, "Telugu"), // Character.UnicodeBlock.TELUGU
1292:                    /* 0C80; 0CFF; Kannada */
1293:                    new UBInfo(0x0C80, 0x0CFF, "Kannada"), // Character.UnicodeBlock.KANNADA
1294:                    /* 0D00; 0D7F; Malayalam */
1295:                    new UBInfo(0x0D00, 0x0D7F, "Malayalam"), // Character.UnicodeBlock.MALAYALAM
1296:                    /* 0D80; 0DFF; Sinhala */
1297:                    new UBInfo(0x0D80, 0x0DFF, "Sinhala"), // Character.UnicodeBlock.SINHALA
1298:                    /* 0E00; 0E7F; Thai */
1299:                    new UBInfo(0x0E00, 0x0E7F, "Thai"), // Character.UnicodeBlock.THAI
1300:                    /* 0E80; 0EFF; Lao */
1301:                    new UBInfo(0x0E80, 0x0EFF, "Lao"), // Character.UnicodeBlock.LAO
1302:                    /* 0F00; 0FFF; Tibetan */
1303:                    new UBInfo(0x0F00, 0x0FFF, "Tibetan"), // Character.UnicodeBlock.TIBETAN
1304:                    /* 1000; 109F; Myanmar */
1305:                    new UBInfo(0x1000, 0x109F, "Myanmar"), // Character.UnicodeBlock.MYANMAR
1306:                    /* 10A0; 10FF; Georgian */
1307:                    new UBInfo(0x10A0, 0x10FF, "Georgian"), // Character.UnicodeBlock.GEORGIAN
1308:                    /* 1100; 11FF; Hangul Jamo */
1309:                    new UBInfo(0x1100, 0x11FF, "HangulJamo"), // Character.UnicodeBlock.HANGUL_JAMO
1310:                    /* 1200; 137F; Ethiopic */
1311:                    new UBInfo(0x1200, 0x137F, "Ethiopic"), // Character.UnicodeBlock.ETHIOPIC
1312:                    /* 13A0; 13FF; Cherokee */
1313:                    new UBInfo(0x13A0, 0x13FF, "Cherokee"), // Character.UnicodeBlock.CHEROKEE
1314:                    /* 1400; 167F; Unified Canadian Aboriginal Syllabics */
1315:                    new UBInfo(0x1400, 0x167F,
1316:                            "UnifiedCanadianAboriginalSyllabics"), // Character.UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
1317:                    /* 1680; 169F; Ogham */
1318:                    new UBInfo(0x1680, 0x169F, "Ogham"), // Character.UnicodeBlock.OGHAM
1319:                    /* 16A0; 16FF; Runic */
1320:                    new UBInfo(0x16A0, 0x16FF, "Runic"), // Character.UnicodeBlock.RUNIC
1321:                    /* 1780; 17FF; Khmer */
1322:                    new UBInfo(0x1780, 0x17FF, "Khmer"), // Character.UnicodeBlock.KHMER
1323:                    /* 1800; 18AF; Mongolian */
1324:                    new UBInfo(0x1800, 0x18AF, "Mongolian"), // Character.UnicodeBlock.MONGOLIAN
1325:                    /* 1E00; 1EFF; Latin Extended Additional */
1326:                    new UBInfo(0x1E00, 0x1EFF, "LatinExtendedAdditional"), // Character.UnicodeBlock.LATIN_EXTENDED_ADDITIONAL
1327:                    /* 1F00; 1FFF; Greek Extended */
1328:                    new UBInfo(0x1F00, 0x1FFF, "GreekExtended"), // Character.UnicodeBlock.GREEK_EXTENDED
1329:                    /* 2000; 206F; General Punctuation */
1330:                    new UBInfo(0x2000, 0x206F, "GeneralPunctuation"), // Character.UnicodeBlock.GENERAL_PUNCTUATION
1331:                    /* 2070; 209F; Superscripts and Subscripts */
1332:                    new UBInfo(0x2070, 0x209F, "SuperscriptsandSubscripts"), // Character.UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS
1333:                    /* 20A0; 20CF; Currency Symbols */
1334:                    new UBInfo(0x20A0, 0x20CF, "CurrencySymbols"), // Character.UnicodeBlock.CURRENCY_SYMBOLS
1335:                    /* 20D0; 20FF; Combining Marks for Symbols */
1336:                    new UBInfo(0x20D0, 0x20FF, "CombiningMarksforSymbols"), // Character.UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS
1337:                    /* 2100; 214F; Letterlike Symbols */
1338:                    new UBInfo(0x2100, 0x214F, "LetterlikeSymbols"), // Character.UnicodeBlock.LETTERLIKE_SYMBOLS
1339:                    /* 2150; 218F; Number Forms */
1340:                    new UBInfo(0x2150, 0x218F, "NumberForms"), // Character.UnicodeBlock.NUMBER_FORMS
1341:                    /* 2190; 21FF; Arrows */
1342:                    new UBInfo(0x2190, 0x21FF, "Arrows"), // Character.UnicodeBlock.ARROWS
1343:                    /* 2200; 22FF; Mathematical Operators */
1344:                    new UBInfo(0x2200, 0x22FF, "MathematicalOperators"), // Character.UnicodeBlock.MATHEMATICAL_OPERATORS
1345:                    /* 2300; 23FF; Miscellaneous Technical */
1346:                    new UBInfo(0x2300, 0x23FF, "MiscellaneousTechnical"), // Character.UnicodeBlock.MISCELLANEOUS_TECHNICAL
1347:                    /* 2400; 243F; Control Pictures */
1348:                    new UBInfo(0x2400, 0x243F, "ControlPictures"), // Character.UnicodeBlock.CONTROL_PICTURES
1349:                    /* 2440; 245F; Optical Character Recognition */
1350:                    new UBInfo(0x2440, 0x245F, "OpticalCharacterRecognition"), // Character.UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION
1351:                    /* 2460; 24FF; Enclosed Alphanumerics */
1352:                    new UBInfo(0x2460, 0x24FF, "EnclosedAlphanumerics"), // Character.UnicodeBlock.ENCLOSED_ALPHANUMERICS
1353:                    /* 2500; 257F; Box Drawing */
1354:                    new UBInfo(0x2500, 0x257F, "BoxDrawing"), // Character.UnicodeBlock.BOX_DRAWING
1355:                    /* 2580; 259F; Block Elements */
1356:                    new UBInfo(0x2580, 0x259F, "BlockElements"), // Character.UnicodeBlock.BLOCK_ELEMENTS
1357:                    /* 25A0; 25FF; Geometric Shapes */
1358:                    new UBInfo(0x25A0, 0x25FF, "GeometricShapes"), // Character.UnicodeBlock.GEOMETRIC_SHAPES
1359:                    /* 2600; 26FF; Miscellaneous Symbols */
1360:                    new UBInfo(0x2600, 0x26FF, "MiscellaneousSymbols"), // Character.UnicodeBlock.MISCELLANEOUS_SYMBOLS
1361:                    /* 2700; 27BF; Dingbats */
1362:                    new UBInfo(0x2700, 0x27BF, "Dingbats"), // Character.UnicodeBlock.DINGBATS
1363:                    /* 2800; 28FF; Braille Patterns */
1364:                    new UBInfo(0x2800, 0x28FF, "BraillePatterns"), // Character.UnicodeBlock.BRAILLE_PATTERNS
1365:                    /* 2E80; 2EFF; CJK Radicals Supplement */
1366:                    new UBInfo(0x2E80, 0x2EFF, "CJKRadicalsSupplement"), // Character.UnicodeBlock.CJK_RADICALS_SUPPLEMENT
1367:                    /* 2F00; 2FDF; Kangxi Radicals */
1368:                    new UBInfo(0x2F00, 0x2FDF, "KangxiRadicals"), // Character.UnicodeBlock.KANGXI_RADICALS
1369:                    /* 2FF0; 2FFF; Ideographic Description Characters */
1370:                    new UBInfo(0x2FF0, 0x2FFF,
1371:                            "IdeographicDescriptionCharacters"), // Character.UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1372:                    /* 3000; 303F; CJK Symbols and Punctuation */
1373:                    new UBInfo(0x3000, 0x303F, "CJKSymbolsandPunctuation"), // Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
1374:                    /* 3040; 309F; Hiragana */
1375:                    new UBInfo(0x3040, 0x309F, "Hiragana"), // Character.UnicodeBlock.HIRAGANA
1376:                    /* 30A0; 30FF; Katakana */
1377:                    new UBInfo(0x30A0, 0x30FF, "Katakana"), // Character.UnicodeBlock.KATAKANA
1378:                    /* 3100; 312F; Bopomofo */
1379:                    new UBInfo(0x3100, 0x312F, "Bopomofo"), // Character.UnicodeBlock.BOPOMOFO
1380:                    /* 3130; 318F; Hangul Compatibility Jamo */
1381:                    new UBInfo(0x3130, 0x318F, "HangulCompatibilityJamo"), // Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO
1382:                    /* 3190; 319F; Kanbun */
1383:                    new UBInfo(0x3190, 0x319F, "Kanbun"), // Character.UnicodeBlock.KANBUN
1384:                    /* 31A0; 31BF; Bopomofo Extended */
1385:                    new UBInfo(0x31A0, 0x31BF, "BopomofoExtended"), // Character.UnicodeBlock.BOPOMOFO_EXTENDED
1386:                    /* 3200; 32FF; Enclosed CJK Letters and Months */
1387:                    new UBInfo(0x3200, 0x32FF, "EnclosedCJKLettersandMonths"), // Character.UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS
1388:                    /* 3300; 33FF; CJK Compatibility */
1389:                    new UBInfo(0x3300, 0x33FF, "CJKCompatibility"), // Character.UnicodeBlock.CJK_COMPATIBILITY
1390:                    /* 3400; 4DB5; CJK Unified Ideographs Extension A */
1391:                    new UBInfo(0x3400, 0x4DB5, "CJKUnifiedIdeographsExtensionA"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1392:                    /* 4E00; 9FFF; CJK Unified Ideographs */
1393:                    new UBInfo(0x4E00, 0x9FFF, "CJKUnifiedIdeographs"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
1394:                    /* A000; A48F; Yi Syllables */
1395:                    new UBInfo(0xA000, 0xA48F, "YiSyllables"), // Character.UnicodeBlock.YI_SYLLABLES
1396:                    /* A490; A4CF; Yi Radicals */
1397:                    new UBInfo(0xA490, 0xA4CF, "YiRadicals"), // Character.UnicodeBlock.YI_RADICALS
1398:                    /* AC00; D7A3; Hangul Syllables */
1399:                    new UBInfo(0xAC00, 0xD7A3, "HangulSyllables"), // Character.UnicodeBlock.HANGUL_SYLLABLES
1400:                    /* D800; DB7F; High Surrogates */
1401:                    /* DB80; DBFF; High Private Use Surrogates */
1402:                    /* DC00; DFFF; Low Surrogates */
1403:                    /* E000; F8FF; Private Use */
1404:                    /* F900; FAFF; CJK Compatibility Ideographs */
1405:                    new UBInfo(0xF900, 0xFAFF, "CJKCompatibilityIdeographs"), // Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
1406:                    /* FB00; FB4F; Alphabetic Presentation Forms */
1407:                    new UBInfo(0xFB00, 0xFB4F, "AlphabeticPresentationForms"), // Character.UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS
1408:                    /* FB50; FDFF; Arabic Presentation Forms-A */
1409:                    new UBInfo(0xFB50, 0xFDFF, "ArabicPresentationForms-A"), // Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_A
1410:                    /* FE20; FE2F; Combining Half Marks */
1411:                    new UBInfo(0xFE20, 0xFE2F, "CombiningHalfMarks"), // Character.UnicodeBlock.COMBINING_HALF_MARKS
1412:                    /* FE30; FE4F; CJK Compatibility Forms */
1413:                    new UBInfo(0xFE30, 0xFE4F, "CJKCompatibilityForms"), // Character.UnicodeBlock.CJK_COMPATIBILITY_FORMS
1414:                    /* FE50; FE6F; Small Form Variants */
1415:                    new UBInfo(0xFE50, 0xFE6F, "SmallFormVariants"), // Character.UnicodeBlock.SMALL_FORM_VARIANTS
1416:                    /* FE70; FEFE; Arabic Presentation Forms-B */
1417:                    // new UBInfo (0xFE70,0xFEFE,"InArabicPresentationForms-B"), //
1418:                    // Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_B
1419:                    /* FEFF; FEFF; Specials */
1420:                    new UBInfo(0xFEFF, 0xFEFF, "Specials"), // Character.UnicodeBlock.SPECIALS
1421:                    /* FF00; FFEF; Halfwidth and Fullwidth Forms */
1422:                    new UBInfo(0xFF00, 0xFFEF, "HalfwidthandFullwidthForms"), // Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS
1423:                    /* FFF0; FFFD; Specials */
1424:                    new UBInfo(0xFFF0, 0xFFFD, "Specials") // Character.UnicodeBlock.SPECIALS
1425:            };
1426:        }
w__ww___.ja___v___a_2___s.co___m | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.