001: /*
002: *******************************************************************************
003: * Copyright (C) 2001-2006, International Business Machines
004: * Corporation and others. All Rights Reserved.
005: *******************************************************************************
006: */
007:
008: package com.ibm.icu.dev.test.shaping;
009:
010: import java.util.MissingResourceException;
011:
012: import com.ibm.icu.dev.test.TestFmwk;
013: import com.ibm.icu.text.ArabicShaping;
014: import com.ibm.icu.text.ArabicShapingException;
015:
016: /**
017: * Regression test for Arabic shaping.
018: */
019: public class ArabicShapingRegTest extends TestFmwk {
020:
021: /* constants copied from ArabicShaping for convenience */
022:
023: public static final int LENGTH_GROW_SHRINK = 0;
024: public static final int LENGTH_FIXED_SPACES_NEAR = 1;
025: public static final int LENGTH_FIXED_SPACES_AT_END = 2;
026: public static final int LENGTH_FIXED_SPACES_AT_BEGINNING = 3;
027:
028: public static final int TEXT_DIRECTION_LOGICAL = 0;
029: public static final int TEXT_DIRECTION_VISUAL_LTR = 4;
030:
031: public static final int LETTERS_NOOP = 0;
032: public static final int LETTERS_SHAPE = 8;
033: public static final int LETTERS_SHAPE_TASHKEEL_ISOLATED = 0x18;
034: public static final int LETTERS_UNSHAPE = 0x10;
035:
036: public static final int DIGITS_NOOP = 0;
037: public static final int DIGITS_EN2AN = 0x20;
038: public static final int DIGITS_AN2EN = 0x40;
039: public static final int DIGITS_EN2AN_INIT_LR = 0x60;
040: public static final int DIGITS_EN2AN_INIT_AL = 0x80;
041: private static final int DIGITS_RESERVED = 0xa0;
042:
043: public static final int DIGIT_TYPE_AN = 0;
044: public static final int DIGIT_TYPE_AN_EXTENDED = 0x100;
045:
046: public static class TestData {
047: public int type;
048: public String source;
049: public int flags;
050: public String result;
051: public int length;
052: public Class error;
053:
054: public static final int STANDARD = 0;
055: public static final int PREFLIGHT = 1;
056: public static final int ERROR = 2;
057:
058: public static TestData standard(String source, int flags,
059: String result) {
060: return new TestData(STANDARD, source, flags, result, 0,
061: null);
062: }
063:
064: public static TestData preflight(String source, int flags,
065: int length) {
066: return new TestData(PREFLIGHT, source, flags, null, length,
067: null);
068: }
069:
070: public static TestData error(String source, int flags,
071: Class error) {
072: return new TestData(ERROR, source, flags, null, 0, error);
073: }
074:
075: private TestData(int type, String source, int flags,
076: String result, int length, Class error) {
077: this .type = type;
078: this .source = source;
079: this .flags = flags;
080: this .result = result;
081: this .length = length;
082: this .error = error;
083: }
084:
085: private static final String[] typenames = { "standard",
086: "preflight", "error" };
087:
088: public String toString() {
089: StringBuffer buf = new StringBuffer(super .toString());
090: buf.append("[\n");
091: buf.append(typenames[type]);
092: buf.append(",\n");
093: if (source == null) {
094: buf.append("null");
095: } else {
096: buf.append('"');
097: buf.append(escapedString(source));
098: buf.append('"');
099: }
100: buf.append(",\n");
101: buf.append(Integer.toHexString(flags));
102: buf.append(",\n");
103: if (result == null) {
104: buf.append("null");
105: } else {
106: buf.append('"');
107: buf.append(escapedString(result));
108: buf.append('"');
109: }
110: buf.append(",\n");
111: buf.append(length);
112: buf.append(",\n");
113: buf.append(error);
114: buf.append(']');
115: return buf.toString();
116: }
117: }
118:
119: private static final String lamAlefSpecialVLTR = "\u0020\u0646\u0622\u0644\u0627\u0020"
120: + "\u0646\u0623\u064E\u0644\u0627\u0020"
121: + "\u0646\u0627\u0670\u0644\u0627\u0020"
122: + "\u0646\u0622\u0653\u0644\u0627\u0020"
123: + "\u0646\u0625\u0655\u0644\u0627\u0020"
124: + "\u0646\u0622\u0654\u0644\u0627\u0020" + "\uFEFC\u0639";
125:
126: private static final String tashkeelSpecialVLTR = "\u064A\u0628\u0631\u0639\u0020"
127: + "\u064A\u0628\u0651\u0631\u064E\u0639\u0020"
128: + "\u064C\u064A\u0628\u0631\u064F\u0639\u0020"
129: + "\u0628\u0670\u0631\u0670\u0639\u0020"
130: + "\u0628\u0653\u0631\u0653\u0639\u0020"
131: + "\u0628\u0654\u0631\u0654\u0639\u0020"
132: + "\u0628\u0655\u0631\u0655\u0639\u0020";
133:
134: private static final String logicalUnshape = "\u0020\u0020\u0020\uFE8D\uFEF5\u0020\uFEE5\u0020\uFE8D\uFEF7\u0020"
135: + "\uFED7\uFEFC\u0020\uFEE1\u0020\uFE8D\uFEDF\uFECC\uFEAE\uFE91\uFEF4"
136: + "\uFE94\u0020\uFE8D\uFEDF\uFEA4\uFEAE\uFE93\u0020\u0020\u0020\u0020";
137:
138: private static final String numSource = "\u0031" + /* en:1 */
139: "\u0627" + /* arabic:alef */
140: "\u0032" + /* en:2 */
141: "\u06f3" + /* an:3 */
142: "\u0061" + /* latin:a */
143: "\u0034"; /* en:4 */
144:
145: private static final TestData[] standardTests = {
146: /* lam alef special visual ltr */
147: TestData.standard(lamAlefSpecialVLTR, LETTERS_SHAPE
148: | TEXT_DIRECTION_VISUAL_LTR
149: | LENGTH_FIXED_SPACES_NEAR,
150: "\u0020\ufee5\u0020\ufef5\ufe8d\u0020"
151: + "\ufee5\u0020\ufe76\ufef7\ufe8d\u0020"
152: + "\ufee5\u0020\u0670\ufefb\ufe8d\u0020"
153: + "\ufee5\u0020\u0653\ufef5\ufe8d\u0020"
154: + "\ufee5\u0020\u0655\ufef9\ufe8d\u0020"
155: + "\ufee5\u0020\u0654\ufef5\ufe8d\u0020"
156: + "\ufefc\ufecb"),
157: TestData.standard(lamAlefSpecialVLTR, LETTERS_SHAPE
158: | TEXT_DIRECTION_VISUAL_LTR
159: | LENGTH_FIXED_SPACES_AT_END,
160: "\u0020\ufee5\ufef5\ufe8d\u0020\ufee5"
161: + "\ufe76\ufef7\ufe8d\u0020\ufee5\u0670"
162: + "\ufefb\ufe8d\u0020\ufee5\u0653\ufef5"
163: + "\ufe8d\u0020\ufee5\u0655\ufef9\ufe8d"
164: + "\u0020\ufee5\u0654\ufef5\ufe8d\u0020"
165: + "\ufefc\ufecb\u0020\u0020\u0020\u0020"
166: + "\u0020\u0020"),
167: TestData.standard(lamAlefSpecialVLTR, LETTERS_SHAPE
168: | TEXT_DIRECTION_VISUAL_LTR
169: | LENGTH_FIXED_SPACES_AT_BEGINNING,
170: "\u0020\u0020\u0020\u0020\u0020\u0020"
171: + "\u0020\ufee5\ufef5\ufe8d\u0020\ufee5"
172: + "\ufe76\ufef7\ufe8d\u0020\ufee5\u0670"
173: + "\ufefb\ufe8d\u0020\ufee5\u0653\ufef5"
174: + "\ufe8d\u0020\ufee5\u0655\ufef9\ufe8d"
175: + "\u0020\ufee5\u0654\ufef5\ufe8d\u0020"
176: + "\ufefc\ufecb"),
177: TestData.standard(lamAlefSpecialVLTR, LETTERS_SHAPE
178: | TEXT_DIRECTION_VISUAL_LTR | LENGTH_GROW_SHRINK,
179: "\u0020\ufee5\ufef5\ufe8d\u0020\ufee5"
180: + "\ufe76\ufef7\ufe8d\u0020\ufee5\u0670"
181: + "\ufefb\ufe8d\u0020\ufee5\u0653\ufef5"
182: + "\ufe8d\u0020\ufee5\u0655\ufef9\ufe8d"
183: + "\u0020\ufee5\u0654\ufef5\ufe8d\u0020"
184: + "\ufefc\ufecb"),
185:
186: /* TASHKEEL */
187: TestData.standard(lamAlefSpecialVLTR,
188: LETTERS_SHAPE_TASHKEEL_ISOLATED
189: | TEXT_DIRECTION_VISUAL_LTR
190: | LENGTH_FIXED_SPACES_NEAR,
191: "\u0020\ufee5\u0020\ufef5\ufe8d\u0020"
192: + "\ufee5\u0020\ufe76\ufef7\ufe8d\u0020"
193: + "\ufee5\u0020\u0670\ufefb\ufe8d\u0020"
194: + "\ufee5\u0020\u0653\ufef5\ufe8d\u0020"
195: + "\ufee5\u0020\u0655\ufef9\ufe8d\u0020"
196: + "\ufee5\u0020\u0654\ufef5\ufe8d\u0020"
197: + "\ufefc\ufecb"),
198: TestData.standard(lamAlefSpecialVLTR,
199: LETTERS_SHAPE_TASHKEEL_ISOLATED
200: | TEXT_DIRECTION_VISUAL_LTR
201: | LENGTH_FIXED_SPACES_AT_END,
202: "\u0020\ufee5\ufef5\ufe8d\u0020\ufee5"
203: + "\ufe76\ufef7\ufe8d\u0020\ufee5\u0670"
204: + "\ufefb\ufe8d\u0020\ufee5\u0653\ufef5"
205: + "\ufe8d\u0020\ufee5\u0655\ufef9\ufe8d"
206: + "\u0020\ufee5\u0654\ufef5\ufe8d\u0020"
207: + "\ufefc\ufecb\u0020\u0020\u0020\u0020"
208: + "\u0020\u0020"),
209: TestData.standard(lamAlefSpecialVLTR,
210: LETTERS_SHAPE_TASHKEEL_ISOLATED
211: | TEXT_DIRECTION_VISUAL_LTR
212: | LENGTH_FIXED_SPACES_AT_BEGINNING,
213: "\u0020\u0020\u0020\u0020\u0020\u0020"
214: + "\u0020\ufee5\ufef5\ufe8d\u0020\ufee5"
215: + "\ufe76\ufef7\ufe8d\u0020\ufee5\u0670"
216: + "\ufefb\ufe8d\u0020\ufee5\u0653\ufef5"
217: + "\ufe8d\u0020\ufee5\u0655\ufef9\ufe8d"
218: + "\u0020\ufee5\u0654\ufef5\ufe8d\u0020"
219: + "\ufefc\ufecb"),
220: TestData.standard(lamAlefSpecialVLTR,
221: LETTERS_SHAPE_TASHKEEL_ISOLATED
222: | TEXT_DIRECTION_VISUAL_LTR
223: | LENGTH_GROW_SHRINK,
224: "\u0020\ufee5\ufef5\ufe8d\u0020\ufee5"
225: + "\ufe76\ufef7\ufe8d\u0020\ufee5\u0670"
226: + "\ufefb\ufe8d\u0020\ufee5\u0653\ufef5"
227: + "\ufe8d\u0020\ufee5\u0655\ufef9\ufe8d"
228: + "\u0020\ufee5\u0654\ufef5\ufe8d\u0020"
229: + "\ufefc\ufecb"),
230:
231: /* tashkeel special visual ltr */
232: TestData
233: .standard(
234: tashkeelSpecialVLTR,
235: LETTERS_SHAPE | TEXT_DIRECTION_VISUAL_LTR
236: | LENGTH_FIXED_SPACES_NEAR,
237: "\ufef2\ufe91\ufeae\ufecb\u0020"
238: + "\ufef2\ufe91\ufe7c\ufeae\ufe77\ufecb\u0020"
239: + "\ufe72\ufef2\ufe91\ufeae\ufe79\ufecb\u0020"
240: + "\ufe8f\u0670\ufeae\u0670\ufecb\u0020"
241: + "\ufe8f\u0653\ufeae\u0653\ufecb\u0020"
242: + "\ufe8f\u0654\ufeae\u0654\ufecb\u0020"
243: + "\ufe8f\u0655\ufeae\u0655\ufecb\u0020"),
244:
245: TestData
246: .standard(
247: tashkeelSpecialVLTR,
248: LETTERS_SHAPE_TASHKEEL_ISOLATED
249: | TEXT_DIRECTION_VISUAL_LTR
250: | LENGTH_FIXED_SPACES_NEAR,
251: "\ufef2\ufe91\ufeae\ufecb\u0020"
252: + "\ufef2\ufe91\ufe7c\ufeae\ufe76\ufecb\u0020"
253: + "\ufe72\ufef2\ufe91\ufeae\ufe78\ufecb\u0020"
254: + "\ufe8f\u0670\ufeae\u0670\ufecb\u0020"
255: + "\ufe8f\u0653\ufeae\u0653\ufecb\u0020"
256: + "\ufe8f\u0654\ufeae\u0654\ufecb\u0020"
257: + "\ufe8f\u0655\ufeae\u0655\ufecb\u0020"),
258:
259: /* logical unshape */
260: TestData
261: .standard(
262: logicalUnshape,
263: LETTERS_UNSHAPE | TEXT_DIRECTION_LOGICAL
264: | LENGTH_FIXED_SPACES_NEAR,
265: "\u0020\u0020\u0020\u0627\u0644\u0622\u0646\u0020\u0627\u0644\u0623\u0642\u0644\u0627"
266: + "\u0645\u0020\u0627\u0644\u0639\u0631\u0628\u064a\u0629\u0020\u0627\u0644\u062d\u0631"
267: + "\u0629\u0020\u0020\u0020\u0020"),
268: TestData
269: .standard(
270: logicalUnshape,
271: LETTERS_UNSHAPE | TEXT_DIRECTION_LOGICAL
272: | LENGTH_FIXED_SPACES_AT_END,
273: "\u0020\u0020\u0020\u0627\u0644\u0622\u0020\u0646\u0020\u0627\u0644\u0623\u0020\u0642"
274: + "\u0644\u0627\u0020\u0645\u0020\u0627\u0644\u0639\u0631\u0628\u064a\u0629\u0020\u0627"
275: + "\u0644\u062d\u0631\u0629\u0020"),
276: TestData
277: .standard(
278: logicalUnshape,
279: LETTERS_UNSHAPE | TEXT_DIRECTION_LOGICAL
280: | LENGTH_FIXED_SPACES_AT_BEGINNING,
281: "\u0627\u0644\u0622\u0020\u0646\u0020\u0627\u0644\u0623\u0020\u0642\u0644\u0627\u0020"
282: + "\u0645\u0020\u0627\u0644\u0639\u0631\u0628\u064a\u0629\u0020\u0627\u0644\u062d\u0631"
283: + "\u0629\u0020\u0020\u0020\u0020"),
284: TestData
285: .standard(
286: logicalUnshape,
287: LETTERS_UNSHAPE | TEXT_DIRECTION_LOGICAL
288: | LENGTH_GROW_SHRINK,
289: "\u0020\u0020\u0020\u0627\u0644\u0622\u0020\u0646\u0020\u0627\u0644\u0623\u0020\u0642"
290: + "\u0644\u0627\u0020\u0645\u0020\u0627\u0644\u0639\u0631\u0628\u064a\u0629\u0020\u0627"
291: + "\u0644\u062d\u0631\u0629\u0020\u0020\u0020\u0020"),
292:
293: /* numbers */
294: TestData.standard(numSource, DIGITS_EN2AN | DIGIT_TYPE_AN,
295: "\u0661\u0627\u0662\u06f3\u0061\u0664"),
296: TestData.standard(numSource, DIGITS_AN2EN
297: | DIGIT_TYPE_AN_EXTENDED,
298: "\u0031\u0627\u0032\u0033\u0061\u0034"),
299: TestData.standard(numSource, DIGITS_EN2AN_INIT_LR
300: | DIGIT_TYPE_AN,
301: "\u0031\u0627\u0662\u06f3\u0061\u0034"),
302: TestData.standard(numSource, DIGITS_EN2AN_INIT_AL
303: | DIGIT_TYPE_AN_EXTENDED,
304: "\u06f1\u0627\u06f2\u06f3\u0061\u0034"),
305: TestData.standard(numSource, DIGITS_EN2AN_INIT_LR
306: | DIGIT_TYPE_AN | TEXT_DIRECTION_VISUAL_LTR,
307: "\u0661\u0627\u0032\u06f3\u0061\u0034"),
308: TestData.standard(numSource, DIGITS_EN2AN_INIT_AL
309: | DIGIT_TYPE_AN_EXTENDED
310: | TEXT_DIRECTION_VISUAL_LTR,
311: "\u06f1\u0627\u0032\u06f3\u0061\u06f4"),
312:
313: /* no-op */
314: TestData.standard(numSource, 0, numSource), };
315:
316: private static final TestData[] preflightTests = {
317: /* preflight */
318: TestData.preflight("\u0644\u0627", LETTERS_SHAPE
319: | LENGTH_GROW_SHRINK, 1),
320:
321: TestData.preflight("\u0644\u0627\u0031", DIGITS_EN2AN
322: | DIGIT_TYPE_AN_EXTENDED | LENGTH_GROW_SHRINK, 3),
323:
324: TestData.preflight("\u0644\u0644", LETTERS_SHAPE
325: | LENGTH_GROW_SHRINK, 2),
326:
327: TestData.preflight("\ufef7", LETTERS_UNSHAPE
328: | LENGTH_GROW_SHRINK, 2), };
329:
330: private static final TestData[] errorTests = {
331: /* bad data */
332: TestData.error("\u0020\ufef7\u0644\u0020", LETTERS_UNSHAPE
333: | LENGTH_FIXED_SPACES_NEAR,
334: ArabicShapingException.class),
335:
336: TestData.error("\u0020\ufef7", LETTERS_UNSHAPE
337: | LENGTH_FIXED_SPACES_AT_END,
338: ArabicShapingException.class),
339:
340: TestData.error("\ufef7\u0020", LETTERS_UNSHAPE
341: | LENGTH_FIXED_SPACES_AT_BEGINNING,
342: ArabicShapingException.class),
343:
344: /* bad options */
345: TestData.error("\ufef7", 0xffffffff,
346: IllegalArgumentException.class),
347:
348: TestData.error("\ufef7", LETTERS_UNSHAPE
349: | LENGTH_GROW_SHRINK, ArabicShapingException.class),
350:
351: TestData.error(null, LETTERS_UNSHAPE
352: | LENGTH_FIXED_SPACES_AT_END,
353: IllegalArgumentException.class), };
354:
355: public void testStandard() {
356: for (int i = 0; i < standardTests.length; ++i) {
357: TestData test = standardTests[i];
358:
359: Exception ex = null;
360: String result = null;
361: ArabicShaping shaper = null;
362:
363: try {
364: shaper = new ArabicShaping(test.flags);
365: result = shaper.shape(test.source);
366: } catch (MissingResourceException e) {
367: throw e;
368: } catch (IllegalStateException ie) {
369: warnln("IllegalStateException: " + ie.toString());
370: return;
371: } catch (Exception e) {
372: ex = e;
373: }
374:
375: if (!test.result.equals(result)) {
376: reportTestFailure(i, test, shaper, result, ex);
377: }
378: }
379: }
380:
381: public void testPreflight() {
382: for (int i = 0; i < preflightTests.length; ++i) {
383: TestData test = preflightTests[i];
384:
385: Exception ex = null;
386: char src[] = null;
387: int len = 0;
388: ArabicShaping shaper = null;
389:
390: if (test.source != null) {
391: src = test.source.toCharArray();
392: }
393:
394: try {
395: shaper = new ArabicShaping(test.flags);
396: len = shaper.shape(src, 0, src.length, null, 0, 0);
397: } catch (Exception e) {
398: ex = e;
399: }
400:
401: if (test.length != len) {
402: reportTestFailure(i, test, shaper, test.source, ex);
403: }
404: }
405: }
406:
407: public void testError() {
408: for (int i = 0; i < errorTests.length; ++i) {
409: TestData test = errorTests[i];
410:
411: Exception ex = null;
412: char src[] = null;
413: int len = 0;
414: ArabicShaping shaper = null;
415:
416: if (test.source != null) {
417: src = test.source.toCharArray();
418: len = src.length;
419: }
420:
421: try {
422: shaper = new ArabicShaping(test.flags);
423: shaper.shape(src, 0, len);
424: } catch (Exception e) {
425: ex = e;
426: }
427:
428: if (!test.error.isInstance(ex)) {
429: reportTestFailure(i, test, shaper, test.source, ex);
430: }
431: }
432: }
433:
434: public void testEquals() {
435: ArabicShaping as1 = new ArabicShaping(LETTERS_SHAPE
436: | TEXT_DIRECTION_VISUAL_LTR | LENGTH_FIXED_SPACES_NEAR);
437: ArabicShaping as2 = new ArabicShaping(LETTERS_SHAPE
438: | TEXT_DIRECTION_VISUAL_LTR | LENGTH_FIXED_SPACES_NEAR);
439: ArabicShaping as3 = new ArabicShaping(LETTERS_UNSHAPE
440: | TEXT_DIRECTION_LOGICAL
441: | LENGTH_FIXED_SPACES_AT_BEGINNING);
442:
443: if (!as1.equals(as1)) {
444: err("as1: " + as1 + " does not equal itself!\n");
445: }
446:
447: if (!as1.equals(as2)) {
448: err("as1: " + as1 + ", as2: " + as2
449: + " are not equal, but should be.\n");
450: }
451:
452: if (as1.equals(as3)) {
453: err("as1: " + as1 + ", as3: " + as3
454: + " are equal but should not be.\n");
455: }
456: }
457:
458: public void reportTestFailure(int index, TestData test,
459: ArabicShaping shaper, String result, Exception error) {
460: if (noData() && error != null
461: && error instanceof MissingResourceException) {
462: warnln(error.getMessage());
463: }
464:
465: StringBuffer buf = new StringBuffer();
466: buf.append("*** test failure ***\n");
467: buf.append("index: " + index + "\n");
468: buf.append("test: " + test + "\n");
469: buf.append("shaper: " + shaper + "\n");
470: buf.append("result: " + escapedString(result) + "\n");
471: buf.append("error: " + error + "\n");
472:
473: if (result != null && test.result != null
474: && !test.result.equals(result)) {
475: for (int i = 0; i < Math.max(test.result.length(), result
476: .length()); ++i) {
477: String temp = Integer.toString(i);
478: if (temp.length() < 2) {
479: temp = " ".concat(temp);
480: }
481: char trg = i < test.result.length() ? test.result
482: .charAt(i) : '\uffff';
483: char res = i < result.length() ? result.charAt(i)
484: : '\uffff';
485:
486: buf.append("[" + temp + "] ");
487: buf.append(escapedString("" + trg) + " ");
488: buf.append(escapedString("" + res) + " ");
489: if (trg != res) {
490: buf.append("***");
491: }
492: buf.append("\n");
493: }
494: }
495: err(buf.toString());
496: }
497:
498: private static String escapedString(String str) {
499: if (str == null) {
500: return null;
501: }
502:
503: StringBuffer buf = new StringBuffer(str.length() * 6);
504: for (int i = 0; i < str.length(); ++i) {
505: char ch = str.charAt(i);
506: buf.append("\\u");
507: if (ch < 0x1000) {
508: buf.append('0');
509: }
510: if (ch < 0x0100) {
511: buf.append('0');
512: }
513: if (ch < 0x0010) {
514: buf.append('0');
515: }
516: buf.append(Integer.toHexString(ch));
517: }
518: return buf.toString();
519: }
520:
521: public static void main(String[] args) {
522: try {
523: new ArabicShapingRegTest().run(args);
524: } catch (Exception e) {
525: System.out.println(e);
526: }
527: }
528: }
|