001: /**
002: *******************************************************************************
003: * Copyright (C) 1996-2006, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */package com.ibm.icu.dev.test.util;
007:
008: import com.ibm.icu.dev.test.TestFmwk;
009: import com.ibm.icu.impl.Trie;
010: import com.ibm.icu.impl.IntTrie;
011: import com.ibm.icu.impl.CharTrie;
012: import com.ibm.icu.impl.TrieBuilder;
013: import com.ibm.icu.impl.IntTrieBuilder;
014: import com.ibm.icu.impl.TrieIterator;
015: import com.ibm.icu.impl.UCharacterProperty;
016: import com.ibm.icu.text.UTF16;
017: import com.ibm.icu.util.RangeValueIterator;
018:
019: /**
020: * Testing class for Trie. Tests here will be simple, since both CharTrie and
021: * IntTrie are very similar and are heavily used in other parts of ICU4J.
022: * Codes using Tries are expected to have detailed tests.
023: * @author Syn Wee Quek
024: * @since release 2.1 Jan 01 2002
025: */
026: public final class TrieTest extends TestFmwk {
027: // constructor ---------------------------------------------------
028:
029: /**
030: * Constructor
031: */
032: public TrieTest() {
033: }
034:
035: // public methods -----------------------------------------------
036:
037: public static void main(String arg[]) {
038: TrieTest test = new TrieTest();
039: try {
040: test.run(arg);
041: } catch (Exception e) {
042: test.errln("Error testing trietest");
043: }
044: }
045:
046: /**
047: * Values for setting possibly overlapping, out-of-order ranges of values
048: */
049: private static final class SetRange {
050: SetRange(int start, int limit, int value, boolean overwrite) {
051: this .start = start;
052: this .limit = limit;
053: this .value = value;
054: this .overwrite = overwrite;
055: }
056:
057: int start, limit;
058: int value;
059: boolean overwrite;
060: }
061:
062: /**
063: * Values for testing:
064: * value is set from the previous boundary's limit to before
065: * this boundary's limit
066: */
067: private static final class CheckRange {
068: CheckRange(int limit, int value) {
069: this .limit = limit;
070: this .value = value;
071: }
072:
073: int limit;
074: int value;
075: }
076:
077: private static final class storageHolder {
078: double bogus; // needed for aligining the storage
079: byte storage[] = new byte[10000000];
080: }
081:
082: private static final class _testFoldedValue implements
083: TrieBuilder.DataManipulate {
084: public _testFoldedValue(IntTrieBuilder builder) {
085: m_builder_ = builder;
086: }
087:
088: public int getFoldedValue(int start, int offset) {
089: int foldedValue = 0;
090: int limit = start + 0x400;
091: while (start < limit) {
092: int value = m_builder_.getValue(start);
093: if (m_builder_.isInZeroBlock(start)) {
094: start += TrieBuilder.DATA_BLOCK_LENGTH;
095: } else {
096: foldedValue |= value;
097: ++start;
098: }
099: }
100:
101: if (foldedValue != 0) {
102: return (offset << 16) | foldedValue;
103: }
104: return 0;
105: }
106:
107: private IntTrieBuilder m_builder_;
108: }
109:
110: private static final class _testFoldingOffset implements
111: Trie.DataManipulate {
112: public int getFoldingOffset(int value) {
113: return value >>> 16;
114: }
115: }
116:
117: private static final class _testEnumValue extends TrieIterator {
118: public _testEnumValue(Trie data) {
119: super (data);
120: }
121:
122: protected int extract(int value) {
123: return value ^ 0x5555;
124: }
125: }
126:
127: private void _testTrieIteration(IntTrie trie,
128: CheckRange checkRanges[], int countCheckRanges) {
129: // write a string
130: int countValues = 0;
131: StringBuffer s = new StringBuffer();
132: int values[] = new int[30];
133: for (int i = 0; i < countCheckRanges; ++i) {
134: int c = checkRanges[i].limit;
135: if (c != 0) {
136: --c;
137: UTF16.append(s, c);
138: values[countValues++] = checkRanges[i].value;
139: }
140: }
141: int limit = s.length();
142: // try forward
143: int p = 0;
144: int i = 0;
145: while (p < limit) {
146: int c = UTF16.charAt(s, p);
147: p += UTF16.getCharCount(c);
148: int value = trie.getCodePointValue(c);
149: if (value != values[i]) {
150: errln("wrong value from UTRIE_NEXT(U+"
151: + Integer.toHexString(c) + "): 0x"
152: + Integer.toHexString(value) + " instead of 0x"
153: + Integer.toHexString(values[i]));
154: }
155: // unlike the c version lead is 0 if c is non-supplementary
156: char lead = UTF16.getLeadSurrogate(c);
157: char trail = UTF16.getTrailSurrogate(c);
158: if (lead == 0 ? trail != s.charAt(p - 1) : !UTF16
159: .isLeadSurrogate(lead)
160: || !UTF16.isTrailSurrogate(trail)
161: || lead != s.charAt(p - 2)
162: || trail != s.charAt(p - 1)) {
163: errln("wrong (lead, trail) from UTRIE_NEXT(U+"
164: + Integer.toHexString(c));
165: continue;
166: }
167: if (lead != 0) {
168: value = trie.getLeadValue(lead);
169: value = trie.getTrailValue(value, trail);
170: if (value != trie.getSurrogateValue(lead, trail)
171: && value != values[i]) {
172: errln("wrong value from getting supplementary "
173: + "values (U+" + Integer.toHexString(c)
174: + "): 0x" + Integer.toHexString(value)
175: + " instead of 0x"
176: + Integer.toHexString(values[i]));
177: }
178: }
179: ++i;
180: }
181: }
182:
183: private void _testTrieRanges(SetRange setRanges[],
184: int countSetRanges, CheckRange checkRanges[],
185: int countCheckRanges, boolean latin1Linear) {
186: IntTrieBuilder newTrie = new IntTrieBuilder(null, 2000,
187: checkRanges[0].value, checkRanges[0].value,
188: latin1Linear);
189:
190: // set values from setRanges[]
191: boolean ok = true;
192: for (int i = 0; i < countSetRanges; ++i) {
193: int start = setRanges[i].start;
194: int limit = setRanges[i].limit;
195: int value = setRanges[i].value;
196: boolean overwrite = setRanges[i].overwrite;
197: if ((limit - start) == 1 && overwrite) {
198: ok &= newTrie.setValue(start, value);
199: } else {
200: ok &= newTrie.setRange(start, limit, value, overwrite);
201: }
202: }
203: if (!ok) {
204: errln("setting values into a trie failed");
205: return;
206: }
207:
208: // verify that all these values are in the new Trie
209: int start = 0;
210: for (int i = 0; i < countCheckRanges; ++i) {
211: int limit = checkRanges[i].limit;
212: int value = checkRanges[i].value;
213:
214: while (start < limit) {
215: if (value != newTrie.getValue(start)) {
216: errln("newTrie [U+"
217: + Integer.toHexString(start)
218: + "]==0x"
219: + Integer.toHexString(newTrie
220: .getValue(start))
221: + " instead of 0x"
222: + Integer.toHexString(value));
223: }
224: ++start;
225: }
226: }
227:
228: IntTrie trie = newTrie.serialize(new _testFoldedValue(newTrie),
229: new _testFoldingOffset());
230:
231: // test linear Latin-1 range from utrie_getData()
232: if (latin1Linear) {
233: start = 0;
234: for (int i = 0; i < countCheckRanges && start <= 0xff; ++i) {
235: int limit = checkRanges[i].limit;
236: int value = checkRanges[i].value;
237:
238: while (start < limit && start <= 0xff) {
239: if (value != trie
240: .getLatin1LinearValue((char) start)) {
241: errln("IntTrie.getLatin1LinearValue[U+"
242: + Integer.toHexString(start)
243: + "]==0x"
244: + Integer
245: .toHexString(trie
246: .getLatin1LinearValue((char) start))
247: + " instead of 0x"
248: + Integer.toHexString(value));
249: }
250: ++start;
251: }
252: }
253: }
254:
255: if (latin1Linear != trie.isLatin1Linear()) {
256: errln("trie serialization did not preserve "
257: + "Latin-1-linearity");
258: }
259:
260: // verify that all these values are in the serialized Trie
261: start = 0;
262: for (int i = 0; i < countCheckRanges; ++i) {
263: int limit = checkRanges[i].limit;
264: int value = checkRanges[i].value;
265:
266: if (start == 0xd800) {
267: // skip surrogates
268: start = limit;
269: continue;
270: }
271:
272: while (start < limit) {
273: if (start <= 0xffff) {
274: int value2 = trie.getBMPValue((char) start);
275: if (value != value2) {
276: errln("serialized trie.getBMPValue(U+"
277: + Integer.toHexString(start) + " == 0x"
278: + Integer.toHexString(value2)
279: + " instead of 0x"
280: + Integer.toHexString(value));
281: }
282: if (!UTF16.isLeadSurrogate((char) start)) {
283: value2 = trie.getLeadValue((char) start);
284: if (value != value2) {
285: errln("serialized trie.getLeadValue(U+"
286: + Integer.toHexString(start)
287: + " == 0x"
288: + Integer.toHexString(value2)
289: + " instead of 0x"
290: + Integer.toHexString(value));
291: }
292: }
293: }
294: int value2 = trie.getCodePointValue(start);
295: if (value != value2) {
296: errln("serialized trie.getCodePointValue(U+"
297: + Integer.toHexString(start) + ")==0x"
298: + Integer.toHexString(value2)
299: + " instead of 0x"
300: + Integer.toHexString(value));
301: }
302: ++start;
303: }
304: }
305:
306: // enumerate and verify all ranges
307:
308: int enumRanges = 1;
309: TrieIterator iter = new _testEnumValue(trie);
310: RangeValueIterator.Element result = new RangeValueIterator.Element();
311: while (iter.next(result)) {
312: if (result.start != checkRanges[enumRanges - 1].limit
313: || result.limit != checkRanges[enumRanges].limit
314: || (result.value ^ 0x5555) != checkRanges[enumRanges].value) {
315: errln("utrie_enum() delivers wrong range [U+"
316: + Integer.toHexString(result.start)
317: + "..U+"
318: + Integer.toHexString(result.limit)
319: + "].0x"
320: + Integer.toHexString(result.value ^ 0x5555)
321: + " instead of [U+"
322: + Integer
323: .toHexString(checkRanges[enumRanges - 1].limit)
324: + "..U+"
325: + Integer
326: .toHexString(checkRanges[enumRanges].limit)
327: + "].0x"
328: + Integer
329: .toHexString(checkRanges[enumRanges].value));
330: }
331: enumRanges++;
332: }
333:
334: // test linear Latin-1 range
335: if (trie.isLatin1Linear()) {
336: for (start = 0; start < 0x100; ++start) {
337: if (trie.getLatin1LinearValue((char) start) != trie
338: .getLeadValue((char) start)) {
339: errln("trie.getLatin1LinearValue[U+"
340: + Integer.toHexString(start)
341: + "]=0x"
342: + Integer
343: .toHexString(trie
344: .getLatin1LinearValue((char) start))
345: + " instead of 0x"
346: + Integer.toHexString(trie
347: .getLeadValue((char) start)));
348: }
349: }
350: }
351:
352: _testTrieIteration(trie, checkRanges, countCheckRanges);
353: }
354:
355: private void _testTrieRanges2(SetRange setRanges[],
356: int countSetRanges, CheckRange checkRanges[],
357: int countCheckRanges) {
358: _testTrieRanges(setRanges, countSetRanges, checkRanges,
359: countCheckRanges, false);
360:
361: _testTrieRanges(setRanges, countSetRanges, checkRanges,
362: countCheckRanges, true);
363: }
364:
365: private void _testTrieRanges4(SetRange setRanges[],
366: int countSetRanges, CheckRange checkRanges[],
367: int countCheckRanges) {
368: _testTrieRanges2(setRanges, countSetRanges, checkRanges,
369: countCheckRanges);
370: }
371:
372: // test data ------------------------------------------------------------
373:
374: /**
375: * set consecutive ranges, even with value 0
376: */
377: private static SetRange setRanges1[] = {
378: new SetRange(0, 0x20, 0, false),
379: new SetRange(0x20, 0xa7, 0x1234, false),
380: new SetRange(0xa7, 0x3400, 0, false),
381: new SetRange(0x3400, 0x9fa6, 0x6162, false),
382: new SetRange(0x9fa6, 0xda9e, 0x3132, false),
383: // try to disrupt _testFoldingOffset16()
384: new SetRange(0xdada, 0xeeee, 0x87ff, false),
385: new SetRange(0xeeee, 0x11111, 1, false),
386: new SetRange(0x11111, 0x44444, 0x6162, false),
387: new SetRange(0x44444, 0x60003, 0, false),
388: new SetRange(0xf0003, 0xf0004, 0xf, false),
389: new SetRange(0xf0004, 0xf0006, 0x10, false),
390: new SetRange(0xf0006, 0xf0007, 0x11, false),
391: new SetRange(0xf0007, 0xf0020, 0x12, false),
392: new SetRange(0xf0020, 0x110000, 0, false) };
393:
394: private static CheckRange checkRanges1[] = {
395: new CheckRange(0, 0), // dummy start range to make _testEnumRange() simpler
396: new CheckRange(0x20, 0), new CheckRange(0xa7, 0x1234),
397: new CheckRange(0x3400, 0), new CheckRange(0x9fa6, 0x6162),
398: new CheckRange(0xda9e, 0x3132), new CheckRange(0xdada, 0),
399: new CheckRange(0xeeee, 0x87ff), new CheckRange(0x11111, 1),
400: new CheckRange(0x44444, 0x6162),
401: new CheckRange(0xf0003, 0), new CheckRange(0xf0004, 0xf),
402: new CheckRange(0xf0006, 0x10),
403: new CheckRange(0xf0007, 0x11),
404: new CheckRange(0xf0020, 0x12), new CheckRange(0x110000, 0) };
405:
406: /**
407: * set some interesting overlapping ranges
408: */
409: private static SetRange setRanges2[] = {
410: new SetRange(0x21, 0x7f, 0x5555, true),
411: new SetRange(0x2f800, 0x2fedc, 0x7a, true),
412: new SetRange(0x72, 0xdd, 3, true),
413: new SetRange(0xdd, 0xde, 4, false),
414: new SetRange(0x2f987, 0x2fa98, 5, true),
415: new SetRange(0x2f777, 0x2f833, 0, true),
416: new SetRange(0x2f900, 0x2ffee, 1, false),
417: new SetRange(0x2ffee, 0x2ffef, 2, true) };
418:
419: private static CheckRange checkRanges2[] = {
420: // dummy start range to make _testEnumRange() simpler
421: new CheckRange(0, 0), new CheckRange(0x21, 0),
422: new CheckRange(0x72, 0x5555), new CheckRange(0xdd, 3),
423: new CheckRange(0xde, 4), new CheckRange(0x2f833, 0),
424: new CheckRange(0x2f987, 0x7a), new CheckRange(0x2fa98, 5),
425: new CheckRange(0x2fedc, 0x7a), new CheckRange(0x2ffee, 1),
426: new CheckRange(0x2ffef, 2), new CheckRange(0x110000, 0) };
427:
428: /**
429: * use a non-zero initial value
430: */
431: private static SetRange setRanges3[] = {
432: new SetRange(0x31, 0xa4, 1, false),
433: new SetRange(0x3400, 0x6789, 2, false),
434: new SetRange(0x30000, 0x34567, 9, true),
435: new SetRange(0x45678, 0x56789, 3, true) };
436:
437: private static CheckRange checkRanges3[] = {
438: // dummy start range, also carries the initial value
439: new CheckRange(0, 9), new CheckRange(0x31, 9),
440: new CheckRange(0xa4, 1), new CheckRange(0x3400, 9),
441: new CheckRange(0x6789, 2), new CheckRange(0x45678, 9),
442: new CheckRange(0x56789, 3), new CheckRange(0x110000, 9) };
443:
444: public void TestIntTrie() {
445: _testTrieRanges4(setRanges1, setRanges1.length, checkRanges1,
446: checkRanges1.length);
447: _testTrieRanges4(setRanges2, setRanges2.length, checkRanges2,
448: checkRanges2.length);
449: _testTrieRanges4(setRanges3, setRanges3.length, checkRanges3,
450: checkRanges3.length);
451: }
452:
453: public void TestCharValues() {
454: CharTrie trie = null;
455: try {
456: trie = UCharacterProperty.getInstance().m_trie_;
457: } catch (Exception e) {
458: warnln("Error creating ucharacter trie");
459: return;
460: }
461:
462: for (int i = 0; i < 0xFFFF; i++) {
463: if (i < 0xFF
464: && trie.getBMPValue((char) i) != trie
465: .getLatin1LinearValue((char) i)) {
466: errln("For latin 1 codepoint, getBMPValue should be the same "
467: + "as getLatin1LinearValue");
468: }
469: if (trie.getBMPValue((char) i) != trie.getCodePointValue(i)) {
470: errln("For BMP codepoint, getBMPValue should be the same "
471: + "as getCodepointValue");
472: }
473: }
474: for (int i = 0x10000; i < 0x10ffff; i++) {
475: char lead = UTF16.getLeadSurrogate(i);
476: char trail = UTF16.getTrailSurrogate(i);
477: char value = trie.getCodePointValue(i);
478: if (value != trie.getSurrogateValue(lead, trail)
479: || value != trie.getTrailValue(trie
480: .getLeadValue(lead), trail)) {
481: errln("For Non-BMP codepoints, getSurrogateValue should be "
482: + "the same s getCodepointValue and getTrailValue");
483: }
484: }
485: }
486:
487: private static class DummyGetFoldingOffset implements
488: Trie.DataManipulate {
489: public int getFoldingOffset(int value) {
490: return -1; /* never get non-initialValue data for supplementary code points */
491: }
492: }
493:
494: public void TestDummyCharTrie() {
495: CharTrie trie;
496: final int initialValue = 0x313, leadUnitValue = 0xaffe;
497: int value;
498: int c;
499: trie = new CharTrie(initialValue, leadUnitValue,
500: new DummyGetFoldingOffset());
501:
502: /* test that all code points have initialValue */
503: for (c = 0; c <= 0x10ffff; ++c) {
504: value = trie.getCodePointValue(c);
505: if (value != initialValue) {
506: errln("CharTrie/dummy.getCodePointValue(c)(U+" + hex(c)
507: + ")=0x" + hex(value) + " instead of 0x"
508: + hex(initialValue));
509: }
510: }
511:
512: /* test that the lead surrogate code units have leadUnitValue */
513: for (c = 0xd800; c <= 0xdbff; ++c) {
514: value = trie.getLeadValue((char) c);
515: if (value != leadUnitValue) {
516: errln("CharTrie/dummy.getLeadValue(c)(U+" + hex(c)
517: + ")=0x" + hex(value) + " instead of 0x"
518: + hex(leadUnitValue));
519: }
520: }
521: }
522:
523: public void TestDummyIntTrie() {
524: IntTrie trie;
525: final int initialValue = 0x01234567, leadUnitValue = 0x89abcdef;
526: int value;
527: int c;
528: trie = new IntTrie(initialValue, leadUnitValue,
529: new DummyGetFoldingOffset());
530:
531: /* test that all code points have initialValue */
532: for (c = 0; c <= 0x10ffff; ++c) {
533: value = trie.getCodePointValue(c);
534: if (value != initialValue) {
535: errln("IntTrie/dummy.getCodePointValue(c)(U+" + hex(c)
536: + ")=0x" + hex(value) + " instead of 0x"
537: + hex(initialValue));
538: }
539: }
540:
541: /* test that the lead surrogate code units have leadUnitValue */
542: for (c = 0xd800; c <= 0xdbff; ++c) {
543: value = trie.getLeadValue((char) c);
544: if (value != leadUnitValue) {
545: errln("IntTrie/dummy.getLeadValue(c)(U+" + hex(c)
546: + ")=0x" + hex(value) + " instead of 0x"
547: + hex(leadUnitValue));
548: }
549: }
550: }
551: }
|