Source Code Cross Referenced for BreakIteratorTest.java in  » Internationalization-Localization » icu4j » com » ibm » icu » dev » test » rbbi » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.dev.test.rbbi 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:         *******************************************************************************
003:         * Copyright (C) 1996-2006, International Business Machines Corporation and    *
004:         * others. All Rights Reserved.                                                *
005:         *******************************************************************************
006:         */
007:        package com.ibm.icu.dev.test.rbbi;
008:
009:        import com.ibm.icu.dev.test.*;
010:        import com.ibm.icu.text.BreakIterator;
011:        import java.text.StringCharacterIterator;
012:        import java.util.Locale;
013:        import java.util.Vector;
014:
015:        public class BreakIteratorTest extends TestFmwk {
016:            private BreakIterator characterBreak;
017:            private BreakIterator wordBreak;
018:            private BreakIterator lineBreak;
019:            private BreakIterator sentenceBreak;
020:            private BreakIterator titleBreak;
021:
022:            public static void main(String[] args) throws Exception {
023:                new BreakIteratorTest().run(args);
024:            }
025:
026:            public BreakIteratorTest() {
027:
028:            }
029:
030:            protected void init() {
031:                characterBreak = BreakIterator.getCharacterInstance();
032:                wordBreak = BreakIterator.getWordInstance();
033:                lineBreak = BreakIterator.getLineInstance();
034:                //logln("Creating sentence iterator...");
035:                sentenceBreak = BreakIterator.getSentenceInstance();
036:                //logln("Finished creating sentence iterator...");
037:                titleBreak = BreakIterator.getTitleInstance();
038:            }
039:
040:            //=========================================================================
041:            // general test subroutines
042:            //=========================================================================
043:
044:            private void generalIteratorTest(BreakIterator bi,
045:                    Vector expectedResult) {
046:                StringBuffer buffer = new StringBuffer();
047:                String text;
048:                for (int i = 0; i < expectedResult.size(); i++) {
049:                    text = (String) expectedResult.elementAt(i);
050:                    buffer.append(text);
051:                }
052:                text = buffer.toString();
053:
054:                bi.setText(text);
055:
056:                Vector nextResults = _testFirstAndNext(bi, text);
057:                Vector previousResults = _testLastAndPrevious(bi, text);
058:
059:                logln("comparing forward and backward...");
060:                int errs = getErrorCount();
061:                compareFragmentLists("forward iteration", "backward iteration",
062:                        nextResults, previousResults);
063:                if (getErrorCount() == errs) {
064:                    logln("comparing expected and actual...");
065:                    compareFragmentLists("expected result", "actual result",
066:                            expectedResult, nextResults);
067:                }
068:
069:                int[] boundaries = new int[expectedResult.size() + 3];
070:                boundaries[0] = BreakIterator.DONE;
071:                boundaries[1] = 0;
072:                for (int i = 0; i < expectedResult.size(); i++)
073:                    boundaries[i + 2] = boundaries[i + 1]
074:                            + ((String) expectedResult.elementAt(i)).length();
075:                boundaries[boundaries.length - 1] = BreakIterator.DONE;
076:
077:                _testFollowing(bi, text, boundaries);
078:                _testPreceding(bi, text, boundaries);
079:                _testIsBoundary(bi, text, boundaries);
080:
081:                doMultipleSelectionTest(bi, text);
082:            }
083:
084:            private Vector _testFirstAndNext(BreakIterator bi, String text) {
085:                int p = bi.first();
086:                int lastP = p;
087:                Vector result = new Vector();
088:
089:                if (p != 0)
090:                    errln("first() returned " + p + " instead of 0");
091:                while (p != BreakIterator.DONE) {
092:                    p = bi.next();
093:                    if (p != BreakIterator.DONE) {
094:                        if (p <= lastP)
095:                            errln("next() failed to move forward: next() on position "
096:                                    + lastP + " yielded " + p);
097:
098:                        result.addElement(text.substring(lastP, p));
099:                    } else {
100:                        if (lastP != text.length())
101:                            errln("next() returned DONE prematurely: offset was "
102:                                    + lastP + " instead of " + text.length());
103:                    }
104:                    lastP = p;
105:                }
106:                return result;
107:            }
108:
109:            private Vector _testLastAndPrevious(BreakIterator bi, String text) {
110:                int p = bi.last();
111:                int lastP = p;
112:                Vector result = new Vector();
113:
114:                if (p != text.length())
115:                    errln("last() returned " + p + " instead of "
116:                            + text.length());
117:                while (p != BreakIterator.DONE) {
118:                    p = bi.previous();
119:                    if (p != BreakIterator.DONE) {
120:                        if (p >= lastP)
121:                            errln("previous() failed to move backward: previous() on position "
122:                                    + lastP + " yielded " + p);
123:
124:                        result.insertElementAt(text.substring(p, lastP), 0);
125:                    } else {
126:                        if (lastP != 0)
127:                            errln("previous() returned DONE prematurely: offset was "
128:                                    + lastP + " instead of 0");
129:                    }
130:                    lastP = p;
131:                }
132:                return result;
133:            }
134:
135:            private void compareFragmentLists(String f1Name, String f2Name,
136:                    Vector f1, Vector f2) {
137:                int p1 = 0;
138:                int p2 = 0;
139:                String s1;
140:                String s2;
141:                int t1 = 0;
142:                int t2 = 0;
143:
144:                while (p1 < f1.size() && p2 < f2.size()) {
145:                    s1 = (String) f1.elementAt(p1);
146:                    s2 = (String) f2.elementAt(p2);
147:                    t1 += s1.length();
148:                    t2 += s2.length();
149:
150:                    if (s1.equals(s2)) {
151:                        debugLogln("   >" + s1 + "<");
152:                        ++p1;
153:                        ++p2;
154:                    } else {
155:                        int tempT1 = t1;
156:                        int tempT2 = t2;
157:                        int tempP1 = p1;
158:                        int tempP2 = p2;
159:
160:                        while (tempT1 != tempT2 && tempP1 < f1.size()
161:                                && tempP2 < f2.size()) {
162:                            while (tempT1 < tempT2 && tempP1 < f1.size()) {
163:                                tempT1 += ((String) f1.elementAt(tempP1))
164:                                        .length();
165:                                ++tempP1;
166:                            }
167:                            while (tempT2 < tempT1 && tempP2 < f2.size()) {
168:                                tempT2 += ((String) f2.elementAt(tempP2))
169:                                        .length();
170:                                ++tempP2;
171:                            }
172:                        }
173:                        logln("*** " + f1Name + " has:");
174:                        while (p1 <= tempP1 && p1 < f1.size()) {
175:                            s1 = (String) f1.elementAt(p1);
176:                            t1 += s1.length();
177:                            debugLogln(" *** >" + s1 + "<");
178:                            ++p1;
179:                        }
180:                        logln("***** " + f2Name + " has:");
181:                        while (p2 <= tempP2 && p2 < f2.size()) {
182:                            s2 = (String) f2.elementAt(p2);
183:                            t2 += s2.length();
184:                            debugLogln(" ***** >" + s2 + "<");
185:                            ++p2;
186:                        }
187:                        errln("Discrepancy between " + f1Name + " and "
188:                                + f2Name);
189:                    }
190:                }
191:            }
192:
193:            private void _testFollowing(BreakIterator bi, String text,
194:                    int[] boundaries) {
195:                logln("testFollowing():");
196:                int p = 2;
197:                for (int i = 0; i <= text.length(); i++) {
198:                    if (i == boundaries[p])
199:                        ++p;
200:
201:                    int b = bi.following(i);
202:                    logln("bi.following(" + i + ") -> " + b);
203:                    if (b != boundaries[p])
204:                        errln("Wrong result from following() for " + i
205:                                + ": expected " + boundaries[p] + ", got " + b);
206:                }
207:            }
208:
209:            private void _testPreceding(BreakIterator bi, String text,
210:                    int[] boundaries) {
211:                logln("testPreceding():");
212:                int p = 0;
213:                for (int i = 0; i <= text.length(); i++) {
214:                    int b = bi.preceding(i);
215:                    logln("bi.preceding(" + i + ") -> " + b);
216:                    if (b != boundaries[p])
217:                        errln("Wrong result from preceding() for " + i
218:                                + ": expected " + boundaries[p] + ", got " + b);
219:
220:                    if (i == boundaries[p + 1])
221:                        ++p;
222:                }
223:            }
224:
225:            private void _testIsBoundary(BreakIterator bi, String text,
226:                    int[] boundaries) {
227:                logln("testIsBoundary():");
228:                int p = 1;
229:                boolean isB;
230:                for (int i = 0; i <= text.length(); i++) {
231:                    isB = bi.isBoundary(i);
232:                    logln("bi.isBoundary(" + i + ") -> " + isB);
233:
234:                    if (i == boundaries[p]) {
235:                        if (!isB)
236:                            errln("Wrong result from isBoundary() for " + i
237:                                    + ": expected true, got false");
238:                        ++p;
239:                    } else {
240:                        if (isB)
241:                            errln("Wrong result from isBoundary() for " + i
242:                                    + ": expected false, got true");
243:                    }
244:                }
245:            }
246:
247:            private void doMultipleSelectionTest(BreakIterator iterator,
248:                    String testText) {
249:                logln("Multiple selection test...");
250:                BreakIterator testIterator = (BreakIterator) iterator.clone();
251:                int offset = iterator.first();
252:                int testOffset;
253:                int count = 0;
254:
255:                do {
256:                    testOffset = testIterator.first();
257:                    testOffset = testIterator.next(count);
258:                    logln("next(" + count + ") -> " + testOffset);
259:                    if (offset != testOffset)
260:                        errln("next(n) and next() not returning consistent results: for step "
261:                                + count
262:                                + ", next(n) returned "
263:                                + testOffset
264:                                + " and next() had " + offset);
265:
266:                    if (offset != BreakIterator.DONE) {
267:                        count++;
268:                        offset = iterator.next();
269:                    }
270:                } while (offset != BreakIterator.DONE);
271:
272:                // now do it backwards...
273:                offset = iterator.last();
274:                count = 0;
275:
276:                do {
277:                    testOffset = testIterator.last();
278:                    testOffset = testIterator.next(count);
279:                    logln("next(" + count + ") -> " + testOffset);
280:                    if (offset != testOffset)
281:                        errln("next(n) and next() not returning consistent results: for step "
282:                                + count
283:                                + ", next(n) returned "
284:                                + testOffset
285:                                + " and next() had " + offset);
286:
287:                    if (offset != BreakIterator.DONE) {
288:                        count--;
289:                        offset = iterator.previous();
290:                    }
291:                } while (offset != BreakIterator.DONE);
292:            }
293:
294:            private void doOtherInvariantTest(BreakIterator tb, String testChars) {
295:                StringBuffer work = new StringBuffer("a\r\na");
296:                int errorCount = 0;
297:
298:                // a break should never occur between CR and LF
299:                for (int i = 0; i < testChars.length(); i++) {
300:                    work.setCharAt(0, testChars.charAt(i));
301:                    for (int j = 0; j < testChars.length(); j++) {
302:                        work.setCharAt(3, testChars.charAt(j));
303:                        tb.setText(work.toString());
304:                        for (int k = tb.first(); k != BreakIterator.DONE; k = tb
305:                                .next())
306:                            if (k == 2) {
307:                                errln("Break between CR and LF in string U+"
308:                                        + Integer.toHexString((int) (work
309:                                                .charAt(0)))
310:                                        + ", U+d U+a U+"
311:                                        + Integer.toHexString((int) (work
312:                                                .charAt(3))));
313:                                errorCount++;
314:                                if (errorCount >= 75)
315:                                    return;
316:                            }
317:                    }
318:                }
319:
320:                // a break should never occur before a non-spacing mark, unless it's preceded
321:                // by a line terminator
322:                work.setLength(0);
323:                work.append("aaaa");
324:                for (int i = 0; i < testChars.length(); i++) {
325:                    char c = testChars.charAt(i);
326:                    if (c == '\n' || c == '\r' || c == '\u2029'
327:                            || c == '\u2028' || c == '\u0003')
328:                        continue;
329:                    work.setCharAt(1, c);
330:                    for (int j = 0; j < testChars.length(); j++) {
331:                        c = testChars.charAt(j);
332:                        if (Character.getType(c) != Character.NON_SPACING_MARK
333:                                && Character.getType(c) != Character.ENCLOSING_MARK)
334:                            continue;
335:                        work.setCharAt(2, c);
336:                        tb.setText(work.toString());
337:                        for (int k = tb.first(); k != BreakIterator.DONE; k = tb
338:                                .next())
339:                            if (k == 2) {
340:                                errln("Break between U+"
341:                                        + Integer.toHexString((int) (work
342:                                                .charAt(1)))
343:                                        + " and U+"
344:                                        + Integer.toHexString((int) (work
345:                                                .charAt(2))));
346:                                errorCount++;
347:                                if (errorCount >= 75)
348:                                    return;
349:                            }
350:                    }
351:                }
352:            }
353:
354:            public void debugLogln(String s) {
355:                final String zeros = "0000";
356:                String temp;
357:                StringBuffer out = new StringBuffer();
358:                for (int i = 0; i < s.length(); i++) {
359:                    char c = s.charAt(i);
360:                    if (c >= ' ' && c < '\u007f')
361:                        out.append(c);
362:                    else {
363:                        out.append("\\u");
364:                        temp = Integer.toHexString((int) c);
365:                        out.append(zeros.substring(0, 4 - temp.length()));
366:                        out.append(temp);
367:                    }
368:                }
369:                logln(out.toString());
370:            }
371:
372:            //=========================================================================
373:            // tests
374:            //=========================================================================
375:
376:            /**
377:             * @bug 4097779
378:             */
379:            public void TestBug4097779() {
380:                Vector wordSelectionData = new Vector();
381:
382:                wordSelectionData.addElement("aa\u0300a");
383:                wordSelectionData.addElement(" ");
384:
385:                generalIteratorTest(wordBreak, wordSelectionData);
386:            }
387:
388:            /**
389:             * @bug 4098467
390:             */
391:            public void TestBug4098467Words() {
392:                Vector wordSelectionData = new Vector();
393:
394:                // What follows is a string of Korean characters (I found it in the Yellow Pages
395:                // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
396:                // it correctly), first as precomposed syllables, and then as conjoining jamo.
397:                // Both sequences should be semantically identical and break the same way.
398:                // precomposed syllables...
399:                wordSelectionData.addElement("\uc0c1\ud56d");
400:                wordSelectionData.addElement(" ");
401:                wordSelectionData.addElement("\ud55c\uc778");
402:                wordSelectionData.addElement(" ");
403:                wordSelectionData.addElement("\uc5f0\ud569");
404:                wordSelectionData.addElement(" ");
405:                wordSelectionData.addElement("\uc7a5\ub85c\uad50\ud68c");
406:                wordSelectionData.addElement(" ");
407:                // conjoining jamo...
408:                wordSelectionData
409:                        .addElement("\u1109\u1161\u11bc\u1112\u1161\u11bc");
410:                wordSelectionData.addElement(" ");
411:                wordSelectionData
412:                        .addElement("\u1112\u1161\u11ab\u110b\u1175\u11ab");
413:                wordSelectionData.addElement(" ");
414:                wordSelectionData
415:                        .addElement("\u110b\u1167\u11ab\u1112\u1161\u11b8");
416:                wordSelectionData.addElement(" ");
417:                wordSelectionData
418:                        .addElement("\u110c\u1161\u11bc\u1105\u1169\u1100\u116d\u1112\u116c");
419:                wordSelectionData.addElement(" ");
420:
421:                generalIteratorTest(wordBreak, wordSelectionData);
422:            }
423:
424:            /**
425:             * @bug 4111338
426:             */
427:            public void TestBug4111338() {
428:                Vector sentenceSelectionData = new Vector();
429:
430:                // test for bug #4111338: Don't break sentences at the boundary between CJK
431:                // and other letters
432:                sentenceSelectionData
433:                        .addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165:\"JAVA\u821c"
434:                                + "\u8165\u7fc8\u51ce\u306d,\u2494\u56d8\u4ec0\u60b1\u8560\u51ba"
435:                                + "\u611d\u57b6\u2510\u5d46\".\u2029");
436:                sentenceSelectionData
437:                        .addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8"
438:                                + "\u97e4JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8\u4ec0"
439:                                + "\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");
440:                sentenceSelectionData
441:                        .addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8\u97e4"
442:                                + "\u6470\u8790JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8"
443:                                + "\u4ec0\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");
444:                sentenceSelectionData
445:                        .addElement("He said, \"I can go there.\"\u2029");
446:
447:                generalIteratorTest(sentenceBreak, sentenceSelectionData);
448:            }
449:
450:            /**
451:             * @bug 4143071
452:             */
453:            public void TestBug4143071() {
454:                Vector sentenceSelectionData = new Vector();
455:
456:                // Make sure sentences that end with digits work right
457:                sentenceSelectionData
458:                        .addElement("Today is the 27th of May, 1998.  ");
459:                sentenceSelectionData
460:                        .addElement("Tomorrow will be 28 May 1998.  ");
461:                sentenceSelectionData
462:                        .addElement("The day after will be the 30th.\u2029");
463:
464:                generalIteratorTest(sentenceBreak, sentenceSelectionData);
465:            }
466:
467:            /**
468:             * @bug 4152416
469:             */
470:            public void TestBug4152416() {
471:                Vector sentenceSelectionData = new Vector();
472:
473:                // Make sure sentences ending with a capital letter are treated correctly
474:                sentenceSelectionData
475:                        .addElement("The type of all primitive "
476:                                + "<code>boolean</code> values accessed in the target VM.  ");
477:                sentenceSelectionData.addElement("Calls to xxx will return an "
478:                        + "implementor of this interface.\u2029");
479:
480:                generalIteratorTest(sentenceBreak, sentenceSelectionData);
481:            }
482:
483:            /**
484:             * @bug 4152117
485:             */
486:            public void TestBug4152117() {
487:                Vector sentenceSelectionData = new Vector();
488:
489:                // Make sure sentence breaking is handling punctuation correctly
490:                // [COULD NOT REPRODUCE THIS BUG, BUT TEST IS HERE TO MAKE SURE
491:                // IT DOESN'T CROP UP]
492:                sentenceSelectionData
493:                        .addElement("Constructs a randomly generated "
494:                                + "BigInteger, uniformly distributed over the range <tt>0</tt> "
495:                                + "to <tt>(2<sup>numBits</sup> - 1)</tt>, inclusive.  ");
496:                sentenceSelectionData
497:                        .addElement("The uniformity of the distribution "
498:                                + "assumes that a fair source of random bits is provided in "
499:                                + "<tt>rnd</tt>.  ");
500:                sentenceSelectionData
501:                        .addElement("Note that this constructor always "
502:                                + "constructs a non-negative BigInteger.\u2029");
503:
504:                generalIteratorTest(sentenceBreak, sentenceSelectionData);
505:            }
506:
507:            public void TestLineBreak() {
508:                Vector lineSelectionData = new Vector();
509:
510:                lineSelectionData.addElement("Multi-");
511:                lineSelectionData.addElement("Level ");
512:                lineSelectionData.addElement("example ");
513:                lineSelectionData.addElement("of ");
514:                lineSelectionData.addElement("a ");
515:                lineSelectionData.addElement("semi-");
516:                lineSelectionData.addElement("idiotic ");
517:                lineSelectionData.addElement("non-");
518:                lineSelectionData.addElement("sensical ");
519:                lineSelectionData.addElement("(non-");
520:                lineSelectionData.addElement("important) ");
521:                lineSelectionData.addElement("sentence. ");
522:
523:                lineSelectionData.addElement("Hi  ");
524:                lineSelectionData.addElement("Hello ");
525:                lineSelectionData.addElement("How\n");
526:                lineSelectionData.addElement("are\r");
527:                lineSelectionData.addElement("you\u2028");
528:                lineSelectionData.addElement("fine.\t");
529:                lineSelectionData.addElement("good.  ");
530:
531:                lineSelectionData.addElement("Now\r");
532:                lineSelectionData.addElement("is\n");
533:                lineSelectionData.addElement("the\r\n");
534:                lineSelectionData.addElement("time\n");
535:                lineSelectionData.addElement("\r");
536:                lineSelectionData.addElement("for\r");
537:                lineSelectionData.addElement("\r");
538:                lineSelectionData.addElement("all");
539:
540:                generalIteratorTest(lineBreak, lineSelectionData);
541:            }
542:
543:            /**
544:             * @bug 4068133
545:             */
546:            public void TestBug4068133() {
547:                Vector lineSelectionData = new Vector();
548:
549:                lineSelectionData.addElement("\u96f6");
550:                lineSelectionData.addElement("\u4e00\u3002");
551:                lineSelectionData.addElement("\u4e8c\u3001");
552:                lineSelectionData.addElement("\u4e09\u3002\u3001");
553:                lineSelectionData.addElement("\u56db\u3001\u3002\u3001");
554:                lineSelectionData.addElement("\u4e94,");
555:                lineSelectionData.addElement("\u516d.");
556:                lineSelectionData.addElement("\u4e03.\u3001,\u3002");
557:                lineSelectionData.addElement("\u516b");
558:
559:                generalIteratorTest(lineBreak, lineSelectionData);
560:            }
561:
562:            /**
563:             * @bug 4086052
564:             */
565:            public void TestBug4086052() {
566:                Vector lineSelectionData = new Vector();
567:
568:                lineSelectionData.addElement("foo\u00a0bar ");
569:                //        lineSelectionData.addElement("foo\ufeffbar");
570:
571:                generalIteratorTest(lineBreak, lineSelectionData);
572:            }
573:
574:            /**
575:             * @bug 4097920
576:             */
577:            public void TestBug4097920() {
578:                Vector lineSelectionData = new Vector();
579:
580:                lineSelectionData.addElement("dog,cat,mouse ");
581:                lineSelectionData.addElement("(one)");
582:                lineSelectionData.addElement("(two)\n");
583:                generalIteratorTest(lineBreak, lineSelectionData);
584:            }
585:
586:            /**
587:             * @bug 4117554
588:             */
589:            public void TestBug4117554Lines() {
590:                Vector lineSelectionData = new Vector();
591:
592:                // Fullwidth .!? should be treated as postJwrd
593:                lineSelectionData.addElement("\u4e01\uff0e");
594:                lineSelectionData.addElement("\u4e02\uff01");
595:                lineSelectionData.addElement("\u4e03\uff1f");
596:
597:                generalIteratorTest(lineBreak, lineSelectionData);
598:            }
599:
600:            public void TestLettersAndDigits() {
601:                // a character sequence such as "X11" or "30F3" or "native2ascii" should
602:                // be kept together as a single word
603:                Vector lineSelectionData = new Vector();
604:
605:                lineSelectionData.addElement("X11 ");
606:                lineSelectionData.addElement("30F3 ");
607:                lineSelectionData.addElement("native2ascii");
608:
609:                generalIteratorTest(lineBreak, lineSelectionData);
610:            }
611:
612:            private static final String graveS = "S\u0300";
613:            private static final String acuteBelowI = "i\u0317";
614:            private static final String acuteE = "e\u0301";
615:            private static final String circumflexA = "a\u0302";
616:            private static final String tildeE = "e\u0303";
617:
618:            public void TestCharacterBreak() {
619:                Vector characterSelectionData = new Vector();
620:
621:                characterSelectionData.addElement(graveS);
622:                characterSelectionData.addElement(acuteBelowI);
623:                characterSelectionData.addElement("m");
624:                characterSelectionData.addElement("p");
625:                characterSelectionData.addElement("l");
626:                characterSelectionData.addElement(acuteE);
627:                characterSelectionData.addElement(" ");
628:                characterSelectionData.addElement("s");
629:                characterSelectionData.addElement(circumflexA);
630:                characterSelectionData.addElement("m");
631:                characterSelectionData.addElement("p");
632:                characterSelectionData.addElement("l");
633:                characterSelectionData.addElement(tildeE);
634:                characterSelectionData.addElement(".");
635:                characterSelectionData.addElement("w");
636:                characterSelectionData.addElement(circumflexA);
637:                characterSelectionData.addElement("w");
638:                characterSelectionData.addElement("a");
639:                characterSelectionData.addElement("f");
640:                characterSelectionData.addElement("q");
641:                characterSelectionData.addElement("\n");
642:                characterSelectionData.addElement("\r");
643:                characterSelectionData.addElement("\r\n");
644:                characterSelectionData.addElement("\n");
645:
646:                generalIteratorTest(characterBreak, characterSelectionData);
647:            }
648:
649:            /**
650:             * @bug 4098467
651:             */
652:            public void TestBug4098467Characters() {
653:                Vector characterSelectionData = new Vector();
654:
655:                // What follows is a string of Korean characters (I found it in the Yellow Pages
656:                // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
657:                // it correctly), first as precomposed syllables, and then as conjoining jamo.
658:                // Both sequences should be semantically identical and break the same way.
659:                // precomposed syllables...
660:                characterSelectionData.addElement("\uc0c1");
661:                characterSelectionData.addElement("\ud56d");
662:                characterSelectionData.addElement(" ");
663:                characterSelectionData.addElement("\ud55c");
664:                characterSelectionData.addElement("\uc778");
665:                characterSelectionData.addElement(" ");
666:                characterSelectionData.addElement("\uc5f0");
667:                characterSelectionData.addElement("\ud569");
668:                characterSelectionData.addElement(" ");
669:                characterSelectionData.addElement("\uc7a5");
670:                characterSelectionData.addElement("\ub85c");
671:                characterSelectionData.addElement("\uad50");
672:                characterSelectionData.addElement("\ud68c");
673:                characterSelectionData.addElement(" ");
674:                // conjoining jamo...
675:                characterSelectionData.addElement("\u1109\u1161\u11bc");
676:                characterSelectionData.addElement("\u1112\u1161\u11bc");
677:                characterSelectionData.addElement(" ");
678:                characterSelectionData.addElement("\u1112\u1161\u11ab");
679:                characterSelectionData.addElement("\u110b\u1175\u11ab");
680:                characterSelectionData.addElement(" ");
681:                characterSelectionData.addElement("\u110b\u1167\u11ab");
682:                characterSelectionData.addElement("\u1112\u1161\u11b8");
683:                characterSelectionData.addElement(" ");
684:                characterSelectionData.addElement("\u110c\u1161\u11bc");
685:                characterSelectionData.addElement("\u1105\u1169");
686:                characterSelectionData.addElement("\u1100\u116d");
687:                characterSelectionData.addElement("\u1112\u116c");
688:
689:                generalIteratorTest(characterBreak, characterSelectionData);
690:            }
691:
692:            public void TestTitleBreak() {
693:                Vector titleData = new Vector();
694:                titleData.addElement("   ");
695:                titleData.addElement("This ");
696:                titleData.addElement("is ");
697:                titleData.addElement("a ");
698:                titleData.addElement("simple ");
699:                titleData.addElement("sample ");
700:                titleData.addElement("sentence. ");
701:                titleData.addElement("This ");
702:
703:                generalIteratorTest(titleBreak, titleData);
704:            }
705:
706:            /*
707:             * @bug 4153072
708:             */
709:            public void TestBug4153072() {
710:                BreakIterator iter = BreakIterator.getWordInstance();
711:                String str = "...Hello, World!...";
712:                int begin = 3;
713:                int end = str.length() - 3;
714:                // not used boolean gotException = false;
715:
716:                iter
717:                        .setText(new StringCharacterIterator(str, begin, end,
718:                                begin));
719:                for (int index = -1; index < begin + 1; ++index) {
720:                    try {
721:                        iter.isBoundary(index);
722:                        if (index < begin)
723:                            errln("Didn't get exception with offset = " + index
724:                                    + " and begin index = " + begin);
725:                    } catch (IllegalArgumentException e) {
726:                        if (index >= begin)
727:                            errln("Got exception with offset = " + index
728:                                    + " and begin index = " + begin);
729:                    }
730:                }
731:            }
732:
733:            public void TestBug4146175Lines() {
734:                Vector lineSelectionData = new Vector();
735:
736:                // the fullwidth comma should stick to the preceding Japanese character
737:                lineSelectionData.addElement("\u7d42\uff0c");
738:                lineSelectionData.addElement("\u308f");
739:
740:                generalIteratorTest(lineBreak, lineSelectionData);
741:            }
742:
743:            private static final String cannedTestChars = "\u0000\u0001\u0002\u0003\u0004 !\"#$%&()+-01234<=>ABCDE[]^_`abcde{}|\u00a0\u00a2"
744:                    + "\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00ab\u00ad\u00ae\u00af\u00b0\u00b2\u00b3"
745:                    + "\u00b4\u00b9\u00bb\u00bc\u00bd\u02b0\u02b1\u02b2\u02b3\u02b4\u0300\u0301\u0302\u0303"
746:                    + "\u0304\u05d0\u05d1\u05d2\u05d3\u05d4\u0903\u093e\u093f\u0940\u0949\u0f3a\u0f3b\u2000"
747:                    + "\u2001\u2002\u200c\u200d\u200e\u200f\u2010\u2011\u2012\u2028\u2029\u202a\u203e\u203f"
748:                    + "\u2040\u20dd\u20de\u20df\u20e0\u2160\u2161\u2162\u2163\u2164";
749:
750:            public void TestSentenceInvariants() {
751:                BreakIterator e = BreakIterator.getSentenceInstance();
752:                doOtherInvariantTest(e, cannedTestChars
753:                        + ".,\u3001\u3002\u3041\u3042\u3043\ufeff");
754:            }
755:
756:            public void TestEmptyString() {
757:                String text = "";
758:                Vector x = new Vector();
759:                x.addElement(text);
760:
761:                generalIteratorTest(lineBreak, x);
762:            }
763:
764:            public void TestGetAvailableLocales() {
765:                Locale[] locList = BreakIterator.getAvailableLocales();
766:
767:                if (locList.length == 0)
768:                    errln("getAvailableLocales() returned an empty list!");
769:                // I have no idea how to test this function...
770:
771:                com.ibm.icu.util.ULocale[] ulocList = BreakIterator
772:                        .getAvailableULocales();
773:                if (ulocList.length == 0) {
774:                    errln("getAvailableULocales() returned an empty list!");
775:                } else {
776:                    logln("getAvailableULocales() returned " + ulocList.length
777:                            + " locales");
778:                }
779:            }
780:
781:            /**
782:             * @bug 4068137
783:             */
784:            public void TestEndBehavior() {
785:                String testString = "boo.";
786:                BreakIterator wb = BreakIterator.getWordInstance();
787:                wb.setText(testString);
788:
789:                if (wb.first() != 0)
790:                    errln("Didn't get break at beginning of string.");
791:                if (wb.next() != 3)
792:                    errln("Didn't get break before period in \"boo.\"");
793:                if (wb.current() != 4 && wb.next() != 4)
794:                    errln("Didn't get break at end of string.");
795:            }
796:
797:            // The Following two tests are ported from ICU4C 1.8.1 [Richard/GCL]
798:            /**
799:             * Port From:   ICU4C v1.8.1 : textbounds : IntlTestTextBoundary
800:             * Source File: $ICU4CRoot/source/test/intltest/ittxtbd.cpp
801:             **/
802:            /**
803:             * test methods preceding, following and isBoundary
804:             **/
805:            public void TestPreceding() {
806:                String words3 = "aaa bbb ccc";
807:                BreakIterator e = BreakIterator.getWordInstance(Locale
808:                        .getDefault());
809:                e.setText(words3);
810:                e.first();
811:                int p1 = e.next();
812:                int p2 = e.next();
813:                int p3 = e.next();
814:                int p4 = e.next();
815:
816:                int f = e.following(p2 + 1);
817:                int p = e.preceding(p2 + 1);
818:                if (f != p3)
819:                    errln("IntlTestTextBoundary::TestPreceding: f!=p3");
820:                if (p != p2)
821:                    errln("IntlTestTextBoundary::TestPreceding: p!=p2");
822:
823:                if (p1 + 1 != p2)
824:                    errln("IntlTestTextBoundary::TestPreceding: p1+1!=p2");
825:
826:                if (p3 + 1 != p4)
827:                    errln("IntlTestTextBoundary::TestPreceding: p3+1!=p4");
828:
829:                if (!e.isBoundary(p2) || e.isBoundary(p2 + 1)
830:                        || !e.isBoundary(p3)) {
831:                    errln("IntlTestTextBoundary::TestPreceding: isBoundary err");
832:                }
833:            }
834:
835:            /**
836:             * Bug 4450804
837:             */
838:            public void TestLineBreakContractions() {
839:                Vector expected = new Vector();
840:                expected.add("These ");
841:                expected.add("are ");
842:                expected.add("'foobles'. ");
843:                expected.add("Don't ");
844:                expected.add("you ");
845:                expected.add("like ");
846:                expected.add("them?");
847:                generalIteratorTest(lineBreak, expected);
848:            }
849:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.