001: /*
002: *******************************************************************************
003: * Copyright (C) 1996-2006, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */
007: package com.ibm.icu.dev.test.rbbi;
008:
009: import com.ibm.icu.dev.test.*;
010: import com.ibm.icu.text.BreakIterator;
011: import java.text.StringCharacterIterator;
012: import java.util.Locale;
013: import java.util.Vector;
014:
015: public class BreakIteratorTest extends TestFmwk {
016: private BreakIterator characterBreak;
017: private BreakIterator wordBreak;
018: private BreakIterator lineBreak;
019: private BreakIterator sentenceBreak;
020: private BreakIterator titleBreak;
021:
022: public static void main(String[] args) throws Exception {
023: new BreakIteratorTest().run(args);
024: }
025:
026: public BreakIteratorTest() {
027:
028: }
029:
030: protected void init() {
031: characterBreak = BreakIterator.getCharacterInstance();
032: wordBreak = BreakIterator.getWordInstance();
033: lineBreak = BreakIterator.getLineInstance();
034: //logln("Creating sentence iterator...");
035: sentenceBreak = BreakIterator.getSentenceInstance();
036: //logln("Finished creating sentence iterator...");
037: titleBreak = BreakIterator.getTitleInstance();
038: }
039:
040: //=========================================================================
041: // general test subroutines
042: //=========================================================================
043:
044: private void generalIteratorTest(BreakIterator bi,
045: Vector expectedResult) {
046: StringBuffer buffer = new StringBuffer();
047: String text;
048: for (int i = 0; i < expectedResult.size(); i++) {
049: text = (String) expectedResult.elementAt(i);
050: buffer.append(text);
051: }
052: text = buffer.toString();
053:
054: bi.setText(text);
055:
056: Vector nextResults = _testFirstAndNext(bi, text);
057: Vector previousResults = _testLastAndPrevious(bi, text);
058:
059: logln("comparing forward and backward...");
060: int errs = getErrorCount();
061: compareFragmentLists("forward iteration", "backward iteration",
062: nextResults, previousResults);
063: if (getErrorCount() == errs) {
064: logln("comparing expected and actual...");
065: compareFragmentLists("expected result", "actual result",
066: expectedResult, nextResults);
067: }
068:
069: int[] boundaries = new int[expectedResult.size() + 3];
070: boundaries[0] = BreakIterator.DONE;
071: boundaries[1] = 0;
072: for (int i = 0; i < expectedResult.size(); i++)
073: boundaries[i + 2] = boundaries[i + 1]
074: + ((String) expectedResult.elementAt(i)).length();
075: boundaries[boundaries.length - 1] = BreakIterator.DONE;
076:
077: _testFollowing(bi, text, boundaries);
078: _testPreceding(bi, text, boundaries);
079: _testIsBoundary(bi, text, boundaries);
080:
081: doMultipleSelectionTest(bi, text);
082: }
083:
084: private Vector _testFirstAndNext(BreakIterator bi, String text) {
085: int p = bi.first();
086: int lastP = p;
087: Vector result = new Vector();
088:
089: if (p != 0)
090: errln("first() returned " + p + " instead of 0");
091: while (p != BreakIterator.DONE) {
092: p = bi.next();
093: if (p != BreakIterator.DONE) {
094: if (p <= lastP)
095: errln("next() failed to move forward: next() on position "
096: + lastP + " yielded " + p);
097:
098: result.addElement(text.substring(lastP, p));
099: } else {
100: if (lastP != text.length())
101: errln("next() returned DONE prematurely: offset was "
102: + lastP + " instead of " + text.length());
103: }
104: lastP = p;
105: }
106: return result;
107: }
108:
109: private Vector _testLastAndPrevious(BreakIterator bi, String text) {
110: int p = bi.last();
111: int lastP = p;
112: Vector result = new Vector();
113:
114: if (p != text.length())
115: errln("last() returned " + p + " instead of "
116: + text.length());
117: while (p != BreakIterator.DONE) {
118: p = bi.previous();
119: if (p != BreakIterator.DONE) {
120: if (p >= lastP)
121: errln("previous() failed to move backward: previous() on position "
122: + lastP + " yielded " + p);
123:
124: result.insertElementAt(text.substring(p, lastP), 0);
125: } else {
126: if (lastP != 0)
127: errln("previous() returned DONE prematurely: offset was "
128: + lastP + " instead of 0");
129: }
130: lastP = p;
131: }
132: return result;
133: }
134:
135: private void compareFragmentLists(String f1Name, String f2Name,
136: Vector f1, Vector f2) {
137: int p1 = 0;
138: int p2 = 0;
139: String s1;
140: String s2;
141: int t1 = 0;
142: int t2 = 0;
143:
144: while (p1 < f1.size() && p2 < f2.size()) {
145: s1 = (String) f1.elementAt(p1);
146: s2 = (String) f2.elementAt(p2);
147: t1 += s1.length();
148: t2 += s2.length();
149:
150: if (s1.equals(s2)) {
151: debugLogln(" >" + s1 + "<");
152: ++p1;
153: ++p2;
154: } else {
155: int tempT1 = t1;
156: int tempT2 = t2;
157: int tempP1 = p1;
158: int tempP2 = p2;
159:
160: while (tempT1 != tempT2 && tempP1 < f1.size()
161: && tempP2 < f2.size()) {
162: while (tempT1 < tempT2 && tempP1 < f1.size()) {
163: tempT1 += ((String) f1.elementAt(tempP1))
164: .length();
165: ++tempP1;
166: }
167: while (tempT2 < tempT1 && tempP2 < f2.size()) {
168: tempT2 += ((String) f2.elementAt(tempP2))
169: .length();
170: ++tempP2;
171: }
172: }
173: logln("*** " + f1Name + " has:");
174: while (p1 <= tempP1 && p1 < f1.size()) {
175: s1 = (String) f1.elementAt(p1);
176: t1 += s1.length();
177: debugLogln(" *** >" + s1 + "<");
178: ++p1;
179: }
180: logln("***** " + f2Name + " has:");
181: while (p2 <= tempP2 && p2 < f2.size()) {
182: s2 = (String) f2.elementAt(p2);
183: t2 += s2.length();
184: debugLogln(" ***** >" + s2 + "<");
185: ++p2;
186: }
187: errln("Discrepancy between " + f1Name + " and "
188: + f2Name);
189: }
190: }
191: }
192:
193: private void _testFollowing(BreakIterator bi, String text,
194: int[] boundaries) {
195: logln("testFollowing():");
196: int p = 2;
197: for (int i = 0; i <= text.length(); i++) {
198: if (i == boundaries[p])
199: ++p;
200:
201: int b = bi.following(i);
202: logln("bi.following(" + i + ") -> " + b);
203: if (b != boundaries[p])
204: errln("Wrong result from following() for " + i
205: + ": expected " + boundaries[p] + ", got " + b);
206: }
207: }
208:
209: private void _testPreceding(BreakIterator bi, String text,
210: int[] boundaries) {
211: logln("testPreceding():");
212: int p = 0;
213: for (int i = 0; i <= text.length(); i++) {
214: int b = bi.preceding(i);
215: logln("bi.preceding(" + i + ") -> " + b);
216: if (b != boundaries[p])
217: errln("Wrong result from preceding() for " + i
218: + ": expected " + boundaries[p] + ", got " + b);
219:
220: if (i == boundaries[p + 1])
221: ++p;
222: }
223: }
224:
225: private void _testIsBoundary(BreakIterator bi, String text,
226: int[] boundaries) {
227: logln("testIsBoundary():");
228: int p = 1;
229: boolean isB;
230: for (int i = 0; i <= text.length(); i++) {
231: isB = bi.isBoundary(i);
232: logln("bi.isBoundary(" + i + ") -> " + isB);
233:
234: if (i == boundaries[p]) {
235: if (!isB)
236: errln("Wrong result from isBoundary() for " + i
237: + ": expected true, got false");
238: ++p;
239: } else {
240: if (isB)
241: errln("Wrong result from isBoundary() for " + i
242: + ": expected false, got true");
243: }
244: }
245: }
246:
247: private void doMultipleSelectionTest(BreakIterator iterator,
248: String testText) {
249: logln("Multiple selection test...");
250: BreakIterator testIterator = (BreakIterator) iterator.clone();
251: int offset = iterator.first();
252: int testOffset;
253: int count = 0;
254:
255: do {
256: testOffset = testIterator.first();
257: testOffset = testIterator.next(count);
258: logln("next(" + count + ") -> " + testOffset);
259: if (offset != testOffset)
260: errln("next(n) and next() not returning consistent results: for step "
261: + count
262: + ", next(n) returned "
263: + testOffset
264: + " and next() had " + offset);
265:
266: if (offset != BreakIterator.DONE) {
267: count++;
268: offset = iterator.next();
269: }
270: } while (offset != BreakIterator.DONE);
271:
272: // now do it backwards...
273: offset = iterator.last();
274: count = 0;
275:
276: do {
277: testOffset = testIterator.last();
278: testOffset = testIterator.next(count);
279: logln("next(" + count + ") -> " + testOffset);
280: if (offset != testOffset)
281: errln("next(n) and next() not returning consistent results: for step "
282: + count
283: + ", next(n) returned "
284: + testOffset
285: + " and next() had " + offset);
286:
287: if (offset != BreakIterator.DONE) {
288: count--;
289: offset = iterator.previous();
290: }
291: } while (offset != BreakIterator.DONE);
292: }
293:
294: private void doOtherInvariantTest(BreakIterator tb, String testChars) {
295: StringBuffer work = new StringBuffer("a\r\na");
296: int errorCount = 0;
297:
298: // a break should never occur between CR and LF
299: for (int i = 0; i < testChars.length(); i++) {
300: work.setCharAt(0, testChars.charAt(i));
301: for (int j = 0; j < testChars.length(); j++) {
302: work.setCharAt(3, testChars.charAt(j));
303: tb.setText(work.toString());
304: for (int k = tb.first(); k != BreakIterator.DONE; k = tb
305: .next())
306: if (k == 2) {
307: errln("Break between CR and LF in string U+"
308: + Integer.toHexString((int) (work
309: .charAt(0)))
310: + ", U+d U+a U+"
311: + Integer.toHexString((int) (work
312: .charAt(3))));
313: errorCount++;
314: if (errorCount >= 75)
315: return;
316: }
317: }
318: }
319:
320: // a break should never occur before a non-spacing mark, unless it's preceded
321: // by a line terminator
322: work.setLength(0);
323: work.append("aaaa");
324: for (int i = 0; i < testChars.length(); i++) {
325: char c = testChars.charAt(i);
326: if (c == '\n' || c == '\r' || c == '\u2029'
327: || c == '\u2028' || c == '\u0003')
328: continue;
329: work.setCharAt(1, c);
330: for (int j = 0; j < testChars.length(); j++) {
331: c = testChars.charAt(j);
332: if (Character.getType(c) != Character.NON_SPACING_MARK
333: && Character.getType(c) != Character.ENCLOSING_MARK)
334: continue;
335: work.setCharAt(2, c);
336: tb.setText(work.toString());
337: for (int k = tb.first(); k != BreakIterator.DONE; k = tb
338: .next())
339: if (k == 2) {
340: errln("Break between U+"
341: + Integer.toHexString((int) (work
342: .charAt(1)))
343: + " and U+"
344: + Integer.toHexString((int) (work
345: .charAt(2))));
346: errorCount++;
347: if (errorCount >= 75)
348: return;
349: }
350: }
351: }
352: }
353:
354: public void debugLogln(String s) {
355: final String zeros = "0000";
356: String temp;
357: StringBuffer out = new StringBuffer();
358: for (int i = 0; i < s.length(); i++) {
359: char c = s.charAt(i);
360: if (c >= ' ' && c < '\u007f')
361: out.append(c);
362: else {
363: out.append("\\u");
364: temp = Integer.toHexString((int) c);
365: out.append(zeros.substring(0, 4 - temp.length()));
366: out.append(temp);
367: }
368: }
369: logln(out.toString());
370: }
371:
372: //=========================================================================
373: // tests
374: //=========================================================================
375:
376: /**
377: * @bug 4097779
378: */
379: public void TestBug4097779() {
380: Vector wordSelectionData = new Vector();
381:
382: wordSelectionData.addElement("aa\u0300a");
383: wordSelectionData.addElement(" ");
384:
385: generalIteratorTest(wordBreak, wordSelectionData);
386: }
387:
388: /**
389: * @bug 4098467
390: */
391: public void TestBug4098467Words() {
392: Vector wordSelectionData = new Vector();
393:
394: // What follows is a string of Korean characters (I found it in the Yellow Pages
395: // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
396: // it correctly), first as precomposed syllables, and then as conjoining jamo.
397: // Both sequences should be semantically identical and break the same way.
398: // precomposed syllables...
399: wordSelectionData.addElement("\uc0c1\ud56d");
400: wordSelectionData.addElement(" ");
401: wordSelectionData.addElement("\ud55c\uc778");
402: wordSelectionData.addElement(" ");
403: wordSelectionData.addElement("\uc5f0\ud569");
404: wordSelectionData.addElement(" ");
405: wordSelectionData.addElement("\uc7a5\ub85c\uad50\ud68c");
406: wordSelectionData.addElement(" ");
407: // conjoining jamo...
408: wordSelectionData
409: .addElement("\u1109\u1161\u11bc\u1112\u1161\u11bc");
410: wordSelectionData.addElement(" ");
411: wordSelectionData
412: .addElement("\u1112\u1161\u11ab\u110b\u1175\u11ab");
413: wordSelectionData.addElement(" ");
414: wordSelectionData
415: .addElement("\u110b\u1167\u11ab\u1112\u1161\u11b8");
416: wordSelectionData.addElement(" ");
417: wordSelectionData
418: .addElement("\u110c\u1161\u11bc\u1105\u1169\u1100\u116d\u1112\u116c");
419: wordSelectionData.addElement(" ");
420:
421: generalIteratorTest(wordBreak, wordSelectionData);
422: }
423:
424: /**
425: * @bug 4111338
426: */
427: public void TestBug4111338() {
428: Vector sentenceSelectionData = new Vector();
429:
430: // test for bug #4111338: Don't break sentences at the boundary between CJK
431: // and other letters
432: sentenceSelectionData
433: .addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165:\"JAVA\u821c"
434: + "\u8165\u7fc8\u51ce\u306d,\u2494\u56d8\u4ec0\u60b1\u8560\u51ba"
435: + "\u611d\u57b6\u2510\u5d46\".\u2029");
436: sentenceSelectionData
437: .addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8"
438: + "\u97e4JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8\u4ec0"
439: + "\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");
440: sentenceSelectionData
441: .addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8\u97e4"
442: + "\u6470\u8790JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8"
443: + "\u4ec0\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");
444: sentenceSelectionData
445: .addElement("He said, \"I can go there.\"\u2029");
446:
447: generalIteratorTest(sentenceBreak, sentenceSelectionData);
448: }
449:
450: /**
451: * @bug 4143071
452: */
453: public void TestBug4143071() {
454: Vector sentenceSelectionData = new Vector();
455:
456: // Make sure sentences that end with digits work right
457: sentenceSelectionData
458: .addElement("Today is the 27th of May, 1998. ");
459: sentenceSelectionData
460: .addElement("Tomorrow will be 28 May 1998. ");
461: sentenceSelectionData
462: .addElement("The day after will be the 30th.\u2029");
463:
464: generalIteratorTest(sentenceBreak, sentenceSelectionData);
465: }
466:
467: /**
468: * @bug 4152416
469: */
470: public void TestBug4152416() {
471: Vector sentenceSelectionData = new Vector();
472:
473: // Make sure sentences ending with a capital letter are treated correctly
474: sentenceSelectionData
475: .addElement("The type of all primitive "
476: + "<code>boolean</code> values accessed in the target VM. ");
477: sentenceSelectionData.addElement("Calls to xxx will return an "
478: + "implementor of this interface.\u2029");
479:
480: generalIteratorTest(sentenceBreak, sentenceSelectionData);
481: }
482:
483: /**
484: * @bug 4152117
485: */
486: public void TestBug4152117() {
487: Vector sentenceSelectionData = new Vector();
488:
489: // Make sure sentence breaking is handling punctuation correctly
490: // [COULD NOT REPRODUCE THIS BUG, BUT TEST IS HERE TO MAKE SURE
491: // IT DOESN'T CROP UP]
492: sentenceSelectionData
493: .addElement("Constructs a randomly generated "
494: + "BigInteger, uniformly distributed over the range <tt>0</tt> "
495: + "to <tt>(2<sup>numBits</sup> - 1)</tt>, inclusive. ");
496: sentenceSelectionData
497: .addElement("The uniformity of the distribution "
498: + "assumes that a fair source of random bits is provided in "
499: + "<tt>rnd</tt>. ");
500: sentenceSelectionData
501: .addElement("Note that this constructor always "
502: + "constructs a non-negative BigInteger.\u2029");
503:
504: generalIteratorTest(sentenceBreak, sentenceSelectionData);
505: }
506:
507: public void TestLineBreak() {
508: Vector lineSelectionData = new Vector();
509:
510: lineSelectionData.addElement("Multi-");
511: lineSelectionData.addElement("Level ");
512: lineSelectionData.addElement("example ");
513: lineSelectionData.addElement("of ");
514: lineSelectionData.addElement("a ");
515: lineSelectionData.addElement("semi-");
516: lineSelectionData.addElement("idiotic ");
517: lineSelectionData.addElement("non-");
518: lineSelectionData.addElement("sensical ");
519: lineSelectionData.addElement("(non-");
520: lineSelectionData.addElement("important) ");
521: lineSelectionData.addElement("sentence. ");
522:
523: lineSelectionData.addElement("Hi ");
524: lineSelectionData.addElement("Hello ");
525: lineSelectionData.addElement("How\n");
526: lineSelectionData.addElement("are\r");
527: lineSelectionData.addElement("you\u2028");
528: lineSelectionData.addElement("fine.\t");
529: lineSelectionData.addElement("good. ");
530:
531: lineSelectionData.addElement("Now\r");
532: lineSelectionData.addElement("is\n");
533: lineSelectionData.addElement("the\r\n");
534: lineSelectionData.addElement("time\n");
535: lineSelectionData.addElement("\r");
536: lineSelectionData.addElement("for\r");
537: lineSelectionData.addElement("\r");
538: lineSelectionData.addElement("all");
539:
540: generalIteratorTest(lineBreak, lineSelectionData);
541: }
542:
543: /**
544: * @bug 4068133
545: */
546: public void TestBug4068133() {
547: Vector lineSelectionData = new Vector();
548:
549: lineSelectionData.addElement("\u96f6");
550: lineSelectionData.addElement("\u4e00\u3002");
551: lineSelectionData.addElement("\u4e8c\u3001");
552: lineSelectionData.addElement("\u4e09\u3002\u3001");
553: lineSelectionData.addElement("\u56db\u3001\u3002\u3001");
554: lineSelectionData.addElement("\u4e94,");
555: lineSelectionData.addElement("\u516d.");
556: lineSelectionData.addElement("\u4e03.\u3001,\u3002");
557: lineSelectionData.addElement("\u516b");
558:
559: generalIteratorTest(lineBreak, lineSelectionData);
560: }
561:
562: /**
563: * @bug 4086052
564: */
565: public void TestBug4086052() {
566: Vector lineSelectionData = new Vector();
567:
568: lineSelectionData.addElement("foo\u00a0bar ");
569: // lineSelectionData.addElement("foo\ufeffbar");
570:
571: generalIteratorTest(lineBreak, lineSelectionData);
572: }
573:
574: /**
575: * @bug 4097920
576: */
577: public void TestBug4097920() {
578: Vector lineSelectionData = new Vector();
579:
580: lineSelectionData.addElement("dog,cat,mouse ");
581: lineSelectionData.addElement("(one)");
582: lineSelectionData.addElement("(two)\n");
583: generalIteratorTest(lineBreak, lineSelectionData);
584: }
585:
586: /**
587: * @bug 4117554
588: */
589: public void TestBug4117554Lines() {
590: Vector lineSelectionData = new Vector();
591:
592: // Fullwidth .!? should be treated as postJwrd
593: lineSelectionData.addElement("\u4e01\uff0e");
594: lineSelectionData.addElement("\u4e02\uff01");
595: lineSelectionData.addElement("\u4e03\uff1f");
596:
597: generalIteratorTest(lineBreak, lineSelectionData);
598: }
599:
600: public void TestLettersAndDigits() {
601: // a character sequence such as "X11" or "30F3" or "native2ascii" should
602: // be kept together as a single word
603: Vector lineSelectionData = new Vector();
604:
605: lineSelectionData.addElement("X11 ");
606: lineSelectionData.addElement("30F3 ");
607: lineSelectionData.addElement("native2ascii");
608:
609: generalIteratorTest(lineBreak, lineSelectionData);
610: }
611:
612: private static final String graveS = "S\u0300";
613: private static final String acuteBelowI = "i\u0317";
614: private static final String acuteE = "e\u0301";
615: private static final String circumflexA = "a\u0302";
616: private static final String tildeE = "e\u0303";
617:
618: public void TestCharacterBreak() {
619: Vector characterSelectionData = new Vector();
620:
621: characterSelectionData.addElement(graveS);
622: characterSelectionData.addElement(acuteBelowI);
623: characterSelectionData.addElement("m");
624: characterSelectionData.addElement("p");
625: characterSelectionData.addElement("l");
626: characterSelectionData.addElement(acuteE);
627: characterSelectionData.addElement(" ");
628: characterSelectionData.addElement("s");
629: characterSelectionData.addElement(circumflexA);
630: characterSelectionData.addElement("m");
631: characterSelectionData.addElement("p");
632: characterSelectionData.addElement("l");
633: characterSelectionData.addElement(tildeE);
634: characterSelectionData.addElement(".");
635: characterSelectionData.addElement("w");
636: characterSelectionData.addElement(circumflexA);
637: characterSelectionData.addElement("w");
638: characterSelectionData.addElement("a");
639: characterSelectionData.addElement("f");
640: characterSelectionData.addElement("q");
641: characterSelectionData.addElement("\n");
642: characterSelectionData.addElement("\r");
643: characterSelectionData.addElement("\r\n");
644: characterSelectionData.addElement("\n");
645:
646: generalIteratorTest(characterBreak, characterSelectionData);
647: }
648:
649: /**
650: * @bug 4098467
651: */
652: public void TestBug4098467Characters() {
653: Vector characterSelectionData = new Vector();
654:
655: // What follows is a string of Korean characters (I found it in the Yellow Pages
656: // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
657: // it correctly), first as precomposed syllables, and then as conjoining jamo.
658: // Both sequences should be semantically identical and break the same way.
659: // precomposed syllables...
660: characterSelectionData.addElement("\uc0c1");
661: characterSelectionData.addElement("\ud56d");
662: characterSelectionData.addElement(" ");
663: characterSelectionData.addElement("\ud55c");
664: characterSelectionData.addElement("\uc778");
665: characterSelectionData.addElement(" ");
666: characterSelectionData.addElement("\uc5f0");
667: characterSelectionData.addElement("\ud569");
668: characterSelectionData.addElement(" ");
669: characterSelectionData.addElement("\uc7a5");
670: characterSelectionData.addElement("\ub85c");
671: characterSelectionData.addElement("\uad50");
672: characterSelectionData.addElement("\ud68c");
673: characterSelectionData.addElement(" ");
674: // conjoining jamo...
675: characterSelectionData.addElement("\u1109\u1161\u11bc");
676: characterSelectionData.addElement("\u1112\u1161\u11bc");
677: characterSelectionData.addElement(" ");
678: characterSelectionData.addElement("\u1112\u1161\u11ab");
679: characterSelectionData.addElement("\u110b\u1175\u11ab");
680: characterSelectionData.addElement(" ");
681: characterSelectionData.addElement("\u110b\u1167\u11ab");
682: characterSelectionData.addElement("\u1112\u1161\u11b8");
683: characterSelectionData.addElement(" ");
684: characterSelectionData.addElement("\u110c\u1161\u11bc");
685: characterSelectionData.addElement("\u1105\u1169");
686: characterSelectionData.addElement("\u1100\u116d");
687: characterSelectionData.addElement("\u1112\u116c");
688:
689: generalIteratorTest(characterBreak, characterSelectionData);
690: }
691:
692: public void TestTitleBreak() {
693: Vector titleData = new Vector();
694: titleData.addElement(" ");
695: titleData.addElement("This ");
696: titleData.addElement("is ");
697: titleData.addElement("a ");
698: titleData.addElement("simple ");
699: titleData.addElement("sample ");
700: titleData.addElement("sentence. ");
701: titleData.addElement("This ");
702:
703: generalIteratorTest(titleBreak, titleData);
704: }
705:
706: /*
707: * @bug 4153072
708: */
709: public void TestBug4153072() {
710: BreakIterator iter = BreakIterator.getWordInstance();
711: String str = "...Hello, World!...";
712: int begin = 3;
713: int end = str.length() - 3;
714: // not used boolean gotException = false;
715:
716: iter
717: .setText(new StringCharacterIterator(str, begin, end,
718: begin));
719: for (int index = -1; index < begin + 1; ++index) {
720: try {
721: iter.isBoundary(index);
722: if (index < begin)
723: errln("Didn't get exception with offset = " + index
724: + " and begin index = " + begin);
725: } catch (IllegalArgumentException e) {
726: if (index >= begin)
727: errln("Got exception with offset = " + index
728: + " and begin index = " + begin);
729: }
730: }
731: }
732:
733: public void TestBug4146175Lines() {
734: Vector lineSelectionData = new Vector();
735:
736: // the fullwidth comma should stick to the preceding Japanese character
737: lineSelectionData.addElement("\u7d42\uff0c");
738: lineSelectionData.addElement("\u308f");
739:
740: generalIteratorTest(lineBreak, lineSelectionData);
741: }
742:
743: private static final String cannedTestChars = "\u0000\u0001\u0002\u0003\u0004 !\"#$%&()+-01234<=>ABCDE[]^_`abcde{}|\u00a0\u00a2"
744: + "\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00ab\u00ad\u00ae\u00af\u00b0\u00b2\u00b3"
745: + "\u00b4\u00b9\u00bb\u00bc\u00bd\u02b0\u02b1\u02b2\u02b3\u02b4\u0300\u0301\u0302\u0303"
746: + "\u0304\u05d0\u05d1\u05d2\u05d3\u05d4\u0903\u093e\u093f\u0940\u0949\u0f3a\u0f3b\u2000"
747: + "\u2001\u2002\u200c\u200d\u200e\u200f\u2010\u2011\u2012\u2028\u2029\u202a\u203e\u203f"
748: + "\u2040\u20dd\u20de\u20df\u20e0\u2160\u2161\u2162\u2163\u2164";
749:
750: public void TestSentenceInvariants() {
751: BreakIterator e = BreakIterator.getSentenceInstance();
752: doOtherInvariantTest(e, cannedTestChars
753: + ".,\u3001\u3002\u3041\u3042\u3043\ufeff");
754: }
755:
756: public void TestEmptyString() {
757: String text = "";
758: Vector x = new Vector();
759: x.addElement(text);
760:
761: generalIteratorTest(lineBreak, x);
762: }
763:
764: public void TestGetAvailableLocales() {
765: Locale[] locList = BreakIterator.getAvailableLocales();
766:
767: if (locList.length == 0)
768: errln("getAvailableLocales() returned an empty list!");
769: // I have no idea how to test this function...
770:
771: com.ibm.icu.util.ULocale[] ulocList = BreakIterator
772: .getAvailableULocales();
773: if (ulocList.length == 0) {
774: errln("getAvailableULocales() returned an empty list!");
775: } else {
776: logln("getAvailableULocales() returned " + ulocList.length
777: + " locales");
778: }
779: }
780:
781: /**
782: * @bug 4068137
783: */
784: public void TestEndBehavior() {
785: String testString = "boo.";
786: BreakIterator wb = BreakIterator.getWordInstance();
787: wb.setText(testString);
788:
789: if (wb.first() != 0)
790: errln("Didn't get break at beginning of string.");
791: if (wb.next() != 3)
792: errln("Didn't get break before period in \"boo.\"");
793: if (wb.current() != 4 && wb.next() != 4)
794: errln("Didn't get break at end of string.");
795: }
796:
797: // The Following two tests are ported from ICU4C 1.8.1 [Richard/GCL]
798: /**
799: * Port From: ICU4C v1.8.1 : textbounds : IntlTestTextBoundary
800: * Source File: $ICU4CRoot/source/test/intltest/ittxtbd.cpp
801: **/
802: /**
803: * test methods preceding, following and isBoundary
804: **/
805: public void TestPreceding() {
806: String words3 = "aaa bbb ccc";
807: BreakIterator e = BreakIterator.getWordInstance(Locale
808: .getDefault());
809: e.setText(words3);
810: e.first();
811: int p1 = e.next();
812: int p2 = e.next();
813: int p3 = e.next();
814: int p4 = e.next();
815:
816: int f = e.following(p2 + 1);
817: int p = e.preceding(p2 + 1);
818: if (f != p3)
819: errln("IntlTestTextBoundary::TestPreceding: f!=p3");
820: if (p != p2)
821: errln("IntlTestTextBoundary::TestPreceding: p!=p2");
822:
823: if (p1 + 1 != p2)
824: errln("IntlTestTextBoundary::TestPreceding: p1+1!=p2");
825:
826: if (p3 + 1 != p4)
827: errln("IntlTestTextBoundary::TestPreceding: p3+1!=p4");
828:
829: if (!e.isBoundary(p2) || e.isBoundary(p2 + 1)
830: || !e.isBoundary(p3)) {
831: errln("IntlTestTextBoundary::TestPreceding: isBoundary err");
832: }
833: }
834:
835: /**
836: * Bug 4450804
837: */
838: public void TestLineBreakContractions() {
839: Vector expected = new Vector();
840: expected.add("These ");
841: expected.add("are ");
842: expected.add("'foobles'. ");
843: expected.add("Don't ");
844: expected.add("you ");
845: expected.add("like ");
846: expected.add("them?");
847: generalIteratorTest(lineBreak, expected);
848: }
849: }
|