001: /*
002: *******************************************************************************
003: * Copyright (C) 1996-2006, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */
007:
008: package com.ibm.icu.dev.test.util;
009:
010: import com.ibm.icu.dev.test.TestFmwk;
011: import com.ibm.icu.impl.Utility;
012: import com.ibm.icu.util.StringTokenizer;
013: import com.ibm.icu.text.UnicodeSet;
014:
015: /**
016: * Testing class for StringTokenizer class
017: * @author Syn Wee Quek
018: * @since oct 26 2002
019: */
020: public final class StringTokenizerTest extends TestFmwk {
021: // constructor ===================================================
022:
023: /**
024: * Constructor
025: */
026: public StringTokenizerTest() {
027: }
028:
029: // public methods --------------------------------------------------------
030:
031: /**
032: * Testing constructors
033: */
034: public void TestConstructors() {
035: String str = "this\tis\na\rstring\ftesting\tStringTokenizer\nconstructors!";
036: String delimiter = " \t\n\r\f";
037: String expected[] = { "this", "is", "a", "string", "testing",
038: "StringTokenizer", "constructors!" };
039: StringTokenizer defaultst = new StringTokenizer(str);
040: StringTokenizer stdelimiter = new StringTokenizer(str,
041: delimiter);
042: StringTokenizer stdelimiterreturn = new StringTokenizer(str,
043: delimiter, false);
044: UnicodeSet delimiterset = new UnicodeSet("[" + delimiter + "]",
045: false);
046: StringTokenizer stdelimiterset = new StringTokenizer(str,
047: delimiterset);
048: StringTokenizer stdelimitersetreturn = new StringTokenizer(str,
049: delimiterset, false);
050: for (int i = 0; i < expected.length; i++) {
051: if (!(defaultst.nextElement().equals(expected[i])
052: && stdelimiter.nextElement().equals(expected[i])
053: && stdelimiterreturn.nextElement().equals(
054: expected[i])
055: && stdelimiterset.nextElement().equals(expected[i]) && stdelimitersetreturn
056: .nextElement().equals(expected[i]))) {
057: errln("Constructor with default delimiter gives wrong results");
058: }
059: }
060:
061: String expected1[] = { "this", "\t", "is", "\n", "a", "\r",
062: "string", "\f", "testing", "\t", "StringTokenizer",
063: "\n", "constructors!" };
064: stdelimiterreturn = new StringTokenizer(str, delimiter, true);
065: stdelimitersetreturn = new StringTokenizer(str, delimiterset,
066: true);
067: for (int i = 0; i < expected1.length; i++) {
068: if (!(stdelimiterreturn.nextElement().equals(expected1[i]) && stdelimitersetreturn
069: .nextElement().equals(expected1[i]))) {
070: errln("Constructor with default delimiter and delimiter tokens gives wrong results");
071: }
072: }
073:
074: stdelimiter = new StringTokenizer(str, (String) null);
075: stdelimiterreturn = new StringTokenizer(str, (String) null,
076: false);
077: delimiterset = null;
078: stdelimiterset = new StringTokenizer(str, delimiterset);
079: stdelimitersetreturn = new StringTokenizer(str, delimiterset,
080: false);
081:
082: if (!(stdelimiter.nextElement().equals(str)
083: && stdelimiterreturn.nextElement().equals(str)
084: && stdelimiterset.nextElement().equals(str) && stdelimitersetreturn
085: .nextElement().equals(str))) {
086: errln("Constructor with null delimiter gives wrong results");
087: }
088:
089: delimiter = "";
090: stdelimiter = new StringTokenizer(str, delimiter);
091: stdelimiterreturn = new StringTokenizer(str, delimiter, false);
092: delimiterset = new UnicodeSet();
093: stdelimiterset = new StringTokenizer(str, delimiterset);
094: stdelimitersetreturn = new StringTokenizer(str, delimiterset,
095: false);
096:
097: if (!(stdelimiter.nextElement().equals(str)
098: && stdelimiterreturn.nextElement().equals(str)
099: && stdelimiterset.nextElement().equals(str) && stdelimitersetreturn
100: .nextElement().equals(str))) {
101: errln("Constructor with empty delimiter gives wrong results");
102: }
103:
104: try {
105: defaultst = new StringTokenizer(null);
106: errln("null string should throw an exception");
107: } catch (Exception e) {
108: logln("PASS: Constructor with null string failed as expected");
109: }
110: try {
111: stdelimiter = new StringTokenizer(null, delimiter);
112: errln("null string should throw an exception");
113: } catch (Exception e) {
114: logln("PASS: Constructor with null string failed as expected");
115: }
116: try {
117: stdelimiterreturn = new StringTokenizer(null, delimiter,
118: false);
119: errln("null string should throw an exception");
120: } catch (Exception e) {
121: logln("PASS: Constructor with null string failed as expected");
122: }
123: try {
124: stdelimiterset = new StringTokenizer(null, delimiterset);
125: errln("null string should throw an exception");
126: } catch (Exception e) {
127: logln("PASS: Constructor with null string failed as expected");
128: }
129: try {
130: stdelimitersetreturn = new StringTokenizer(null,
131: delimiterset, false);
132: errln("null string should throw an exception");
133: } catch (Exception e) {
134: logln("PASS: Constructor with null string failed as expected");
135: }
136: }
137:
138: /**
139: * Testing supplementary
140: */
141: public void TestSupplementary() {
142: String str = "bmp string \ud800 with a unmatched surrogate character";
143: String delimiter = "\ud800\udc00";
144: String expected[] = { str };
145:
146: StringTokenizer tokenizer = new StringTokenizer(str, delimiter);
147: if (!tokenizer.nextElement().equals(expected[0])) {
148: errln("Error parsing \"" + Utility.hex(str) + "\"");
149: }
150: if (tokenizer.hasMoreElements()) {
151: errln("Number of tokens exceeded expected");
152: }
153: delimiter = "\ud800";
154: String expected1[] = { "bmp string ",
155: " with a unmatched surrogate character" };
156: tokenizer = new StringTokenizer(str, delimiter);
157: int i = 0;
158: while (tokenizer.hasMoreElements()) {
159: if (!tokenizer.nextElement().equals(expected1[i++])) {
160: errln("Error parsing \"" + Utility.hex(str) + "\"");
161: }
162: }
163: if (tokenizer.hasMoreElements()) {
164: errln("Number of tokens exceeded expected");
165: }
166:
167: str = "string \ud800\udc00 with supplementary character";
168: delimiter = "\ud800";
169: String expected2[] = { str };
170: tokenizer = new StringTokenizer(str, delimiter);
171: if (!tokenizer.nextElement().equals(expected2[0])) {
172: errln("Error parsing \"" + Utility.hex(str) + "\"");
173: }
174: if (tokenizer.hasMoreElements()) {
175: errln("Number of tokens exceeded expected");
176: }
177:
178: delimiter = "\ud800\udc00";
179: String expected3[] = { "string ",
180: " with supplementary character" };
181: tokenizer = new StringTokenizer(str, delimiter);
182: i = 0;
183: while (tokenizer.hasMoreElements()) {
184: if (!tokenizer.nextElement().equals(expected3[i++])) {
185: errln("Error parsing \"" + Utility.hex(str) + "\"");
186: }
187: }
188: if (tokenizer.hasMoreElements()) {
189: errln("Number of tokens exceeded expected");
190: }
191:
192: str = "\ud800 \ud800\udc00 \ud800 \ud800\udc00";
193: delimiter = "\ud800";
194: String expected4[] = { " \ud800\udc00 ", " \ud800\udc00" };
195: i = 0;
196: while (tokenizer.hasMoreElements()) {
197: if (!tokenizer.nextElement().equals(expected4[i++])) {
198: errln("Error parsing \"" + Utility.hex(str) + "\"");
199: }
200: }
201: if (tokenizer.hasMoreElements()) {
202: errln("Number of tokens exceeded expected");
203: }
204:
205: delimiter = "\ud800\udc00";
206: String expected5[] = { "\ud800 ", " \ud800 " };
207: i = 0;
208: while (tokenizer.hasMoreElements()) {
209: if (!tokenizer.nextElement().equals(expected5[i++])) {
210: errln("Error parsing \"" + Utility.hex(str) + "\"");
211: }
212: }
213: if (tokenizer.hasMoreElements()) {
214: errln("Number of tokens exceeded expected");
215: }
216: }
217:
218: /**
219: * Testing next api
220: */
221: public void TestNextNonDelimiterToken() {
222: String str = " , 1 2 3 AHHHHH! 5.5 6 7 , 8\n";
223: String expected[] = { ",", "1", "2", "3", "AHHHHH!", "5.5",
224: "6", "7", ",", "8\n" };
225: String delimiter = " ";
226:
227: StringTokenizer tokenizer = new StringTokenizer(str, delimiter);
228: int currtoken = 0;
229: while (tokenizer.hasMoreElements()) {
230: if (!tokenizer.nextElement().equals(expected[currtoken])) {
231: errln("Error token mismatch, expected "
232: + expected[currtoken]);
233: }
234: currtoken++;
235: }
236:
237: if (currtoken != expected.length) {
238: errln("Didn't get correct number of tokens");
239: }
240:
241: tokenizer = new StringTokenizer("", delimiter);
242: if (tokenizer.hasMoreElements()) {
243: errln("Empty string should not have any tokens");
244: }
245: try {
246: tokenizer.nextElement();
247: errln("Empty string should not have any tokens");
248: } catch (Exception e) {
249: logln("PASS: empty string failed as expected");
250: }
251:
252: tokenizer = new StringTokenizer(", ,", ", ");
253: if (tokenizer.hasMoreElements()) {
254: errln("String with only delimiters should not have any tokens");
255: }
256: try {
257: tokenizer.nextElement();
258: errln("String with only delimiters should not have any tokens");
259: } catch (Exception e) {
260: logln("PASS: String with only delimiters failed as expected");
261: }
262:
263: tokenizer = new StringTokenizer("q, ,", ", ");
264: if (!tokenizer.hasMoreElements()) {
265: errln("String that does not begin with delimiters should have some tokens");
266: }
267: if (!tokenizer.nextElement().equals("q")) {
268: errln("String that does not begin with delimiters should have some tokens");
269: }
270: try {
271: tokenizer.nextElement();
272: errln("String has only one token");
273: } catch (Exception e) {
274: logln("PASS: String with only one token failed as expected");
275: }
276:
277: try {
278: tokenizer = new StringTokenizer(null, delimiter);
279: errln("StringTokenizer constructed with null source should throw a nullpointerexception");
280: } catch (Exception e) {
281: logln("PASS: StringTokenizer constructed with null source failed as expected");
282: }
283:
284: tokenizer = new StringTokenizer(str, "q");
285: if (!tokenizer.nextElement().equals(str)) {
286: errln("Should have received the same string when there are no delimiters");
287: }
288: }
289:
290: /**
291: * Test java compatibility, except we support surrogates.
292: */
293: public void TestNoCoalesce() {
294: String str = "This is a test\rto see if\nwhitespace is handled \n\r unusually\r\n by our tokenizer\n\n\n!!!plus some other odd ones like \ttab\ttab\ttab\nand form\ffeed\ffoo.\n";
295: String delims = " \t\n\r\f\ud800\udc00";
296:
297: java.util.StringTokenizer jt = new java.util.StringTokenizer(
298: str, delims, true);
299: com.ibm.icu.util.StringTokenizer it = new com.ibm.icu.util.StringTokenizer(
300: str, delims, true);
301: int n = 0;
302: while (jt.hasMoreTokens() && it.hasMoreTokens()) {
303: assertEquals("[" + String.valueOf(n++) + "]", jt
304: .nextToken(), it.nextToken());
305: }
306: assertFalse("java tokenizer has no more tokens", jt
307: .hasMoreTokens());
308: assertFalse("icu tokenizer has no more tokens", it
309: .hasMoreTokens());
310:
311: String sur = "Even\ud800\udc00 works.\n\n";
312: it = new com.ibm.icu.util.StringTokenizer(sur, delims, true); // no coalesce
313: assertEquals("sur1", it.nextToken(), "Even");
314: assertEquals("sur2", it.nextToken(), "\ud800\udc00");
315: assertEquals("sur3", it.nextToken(), " ");
316: assertEquals("sur4", it.nextToken(), "works.");
317: assertEquals("sur5", it.nextToken(), "\n");
318: assertEquals("sur6", it.nextToken(), "\n");
319: assertFalse("sur7", it.hasMoreTokens());
320: }
321:
322: /**
323: * Testing next api
324: */
325: public void TestNextDelimiterToken() {
326: String str = " , 1 2 3 AHHHHH! 5.5 6 7 , 8\n";
327: String expected[] = { " ", ",", " ", "1", " ", "2", " ", "3",
328: " ", "AHHHHH!", " ", "5.5", " ", "6", " ", "7",
329: " ", ",", " ", "8\n" };
330: String delimiter = " ";
331:
332: StringTokenizer tokenizer = new StringTokenizer(str, delimiter,
333: true, true);
334:
335: int currtoken = 0;
336: while (tokenizer.hasMoreElements()) {
337: if (!tokenizer.nextElement().equals(expected[currtoken])) {
338: errln("Error token mismatch, expected "
339: + expected[currtoken]);
340: }
341: currtoken++;
342: }
343:
344: if (currtoken != expected.length) {
345: errln("Didn't get correct number of tokens");
346: }
347:
348: tokenizer = new StringTokenizer("", delimiter, true);
349: if (tokenizer.hasMoreElements()) {
350: errln("Empty string should not have any tokens");
351: }
352: try {
353: tokenizer.nextElement();
354: errln("Empty string should not have any tokens");
355: } catch (Exception e) {
356: logln("PASS: Empty string failed as expected");
357: }
358:
359: tokenizer = new StringTokenizer(", ,", ", ", true, true);
360: if (!tokenizer.hasMoreElements()) {
361: errln("String with only delimiters should have tokens when delimiter is treated as tokens");
362: }
363: if (!tokenizer.nextElement().equals(", ,")) {
364: errln("String with only delimiters should return itself when delimiter is treated as tokens");
365: }
366:
367: tokenizer = new StringTokenizer("q, ,", ", ", true, true);
368:
369: if (!tokenizer.hasMoreElements()) {
370: errln("String should have some tokens");
371: }
372: if (!tokenizer.nextElement().equals("q")
373: || !tokenizer.nextElement().equals(", ,")) {
374: errln("String tokens do not match expected results");
375: }
376:
377: try {
378: tokenizer = new StringTokenizer(null, delimiter, true);
379: errln("StringTokenizer constructed with null source should throw a nullpointerexception");
380: } catch (Exception e) {
381: logln("PASS: StringTokenizer constructed with null source failed as expected");
382: }
383:
384: tokenizer = new StringTokenizer(str, "q", true);
385: if (!tokenizer.nextElement().equals(str)) {
386: errln("Should have recieved the same string when there are no delimiters");
387: }
388: }
389:
390: /**
391: * Testing count tokens
392: */
393: public void TestCountTokens() {
394: String str = "this\tis\na\rstring\ftesting\tStringTokenizer\nconstructors!";
395: String delimiter = " \t\n\r\f";
396: String expected[] = { "this", "is", "a", "string", "testing",
397: "StringTokenizer", "constructors!" };
398: String expectedreturn[] = { "this", "\t", "is", "\n", "a",
399: "\r", "string", "\f", "testing", "\t",
400: "StringTokenizer", "\n", "constructors!" };
401: StringTokenizer st = new StringTokenizer(str, delimiter);
402: StringTokenizer streturn = new StringTokenizer(str, delimiter,
403: true);
404: if (st.countTokens() != expected.length) {
405: errln("CountTokens failed for non-delimiter tokens");
406: }
407: if (streturn.countTokens() != expectedreturn.length) {
408: errln("CountTokens failed for delimiter tokens");
409: }
410: for (int i = 0; i < expected.length; i++) {
411: if (!st.nextElement().equals(expected[i])
412: || st.countTokens() != expected.length - i - 1) {
413: errln("CountTokens default delimiter gives wrong results");
414: }
415: }
416: for (int i = 0; i < expectedreturn.length; i++) {
417: if (!streturn.nextElement().equals(expectedreturn[i])
418: || streturn.countTokens() != expectedreturn.length
419: - i - 1) {
420: errln("CountTokens with default delimiter and delimiter tokens gives wrong results");
421: }
422: }
423: }
424:
425: /**
426: * Next token with new delimiters
427: */
428: public void TestNextNewDelimiters() {
429: String str = "abc0def1ghi2jkl3mno4pqr0stu1vwx2yza3bcd4efg0hij1klm2nop3qrs4tuv";
430: String delimiter[] = { "0", "1", "2", "3", "4" };
431: String expected[][] = { { "abc", "pqr", "efg" },
432: { "def", "stu", "hij" }, { "ghi", "vwx", "klm" },
433: { "jkl", "yza", "nop" }, { "mno", "bcd", "qrs" } };
434: StringTokenizer st = new StringTokenizer(str);
435: int size = expected[0].length;
436: for (int i = 0; i < size; i++) {
437: for (int j = 0; j < delimiter.length; j++) {
438: if (!st.nextToken(delimiter[j]).equals(expected[j][i])) {
439: errln("nextToken() with delimiters error " + i
440: + " " + j);
441: }
442: if (st.countTokens() != expected[j].length - i) {
443: errln("countTokens() after nextToken() with delimiters error"
444: + i + " " + j);
445: }
446: }
447: }
448: st = new StringTokenizer(str);
449: String delimiter1[] = { "0", "2", "4" };
450: String expected1[] = { "abc", "def1ghi", "jkl3mno", "pqr",
451: "stu1vwx", "yza3bcd", "efg", "hij1klm", "nop3qrs",
452: "tuv" };
453: for (int i = 0; i < expected1.length; i++) {
454: if (!st.nextToken(delimiter1[i % delimiter1.length])
455: .equals(expected1[i])) {
456: errln("nextToken() with delimiters error " + i);
457: }
458: }
459: }
460:
461: public void TestBug4423() {
462: // bug 4423: a bad interaction between countTokens() and hasMoreTokens().
463: //
464: String s1 = "This is a test";
465: StringTokenizer tzr = new StringTokenizer(s1);
466: int tokenCount = 0;
467:
468: int t = tzr.countTokens();
469: if (t != 4) {
470: errln("tzr.countTokens() returned " + t + ". Expected 4");
471: }
472: while (tzr.hasMoreTokens()) {
473: String tok = tzr.nextToken();
474: if (tok.length() == 0) {
475: errln("token with length == 0");
476: }
477: tokenCount++;
478: }
479: if (tokenCount != 4) {
480: errln("Incorrect number of tokens found = " + tokenCount);
481: }
482:
483: // Precomputed tokens arrays can grow. Check for edge cases around
484: // boundary where growth is forced. Array grows in increments of 100 tokens.
485: String s2 = "";
486: for (int i = 1; i < 250; i++) {
487: s2 = s2 + " " + i;
488: StringTokenizer tzb = new StringTokenizer(s2);
489: int t2 = tzb.countTokens();
490: if (t2 != i) {
491: errln("tzb.countTokens() returned " + t
492: + ". Expected " + i);
493: break;
494: }
495: int j = 0;
496: while (tzb.hasMoreTokens()) {
497: String tok = tzb.nextToken();
498: j++;
499: if (tok.equals(Integer.toString(j)) == false) {
500: errln("Wrong token string. Expected \"" + j
501: + "\", got \"" + tok + "\".");
502: break;
503: }
504: }
505: if (j != i) {
506: errln("Wrong number of tokens. Expected " + i
507: + ". Got " + j + ".");
508: break;
509: }
510: }
511:
512: }
513:
514: public void TestCountTokensNoCoalesce() {
515: // jitterbug 5207
516: String str = "\"\"";
517: String del = "\"";
518: StringTokenizer st = new StringTokenizer(str, del, true);
519: int count = 0;
520: while (st.hasMoreTokens()) {
521: String t = st.nextToken();
522: logln("[" + count + "] '" + t + "'");
523: ++count;
524: }
525: st = new StringTokenizer(str, del, true);
526: int ncount = st.countTokens();
527: int xcount = 0;
528: while (st.hasMoreTokens()) {
529: String t = st.nextToken();
530: logln("[" + xcount + "] '" + t + "'");
531: ++xcount;
532: }
533: if (count != ncount || count != xcount) {
534: errln("inconsistent counts " + count + ", " + ncount + ", "
535: + xcount);
536: }
537: }
538:
539: public static void main(String[] arg) {
540: try {
541: StringTokenizerTest test = new StringTokenizerTest();
542: test.run(arg);
543: // test.TestCaseCompare();
544: } catch (Exception e) {
545: e.printStackTrace();
546: }
547: }
548: }
|