001: //##header
002: /*
003: **********************************************************************
004: * Copyright (c) 2002-2006, International Business Machines
005: * Corporation and others. All Rights Reserved.
006: **********************************************************************
007: * Author: Mark Davis
008: **********************************************************************
009: */
010: //#ifndef FOUNDATION
011: package com.ibm.icu.dev.test.cldr;
012:
013: import java.io.File;
014:
015: //import org.unicode.cldr.util.LanguageTagParser;
016: //import org.unicode.cldr.util.Utility;
017: import java.io.IOException;
018: import java.io.PrintWriter;
019: import java.io.StringWriter;
020: import java.text.ParseException;
021: import java.util.ArrayList;
022: import java.util.Collection;
023: import java.util.Date;
024: import java.util.HashMap;
025: import java.util.List;
026: import java.util.Map;
027: import java.util.Set;
028: import java.util.TreeMap;
029: import java.util.Iterator;
030: import java.util.TreeSet;
031: import java.util.regex.Matcher;
032: import java.util.regex.Pattern;
033:
034: import javax.xml.parsers.SAXParser;
035: import javax.xml.parsers.SAXParserFactory;
036:
037: //import org.unicode.cldr.test.CLDRTest;
038: //import org.unicode.cldr.tool.GenerateCldrTests;
039: import org.xml.sax.Attributes;
040: import org.xml.sax.SAXException;
041: import org.xml.sax.helpers.DefaultHandler;
042:
043: import com.ibm.icu.util.Currency;
044: import com.ibm.icu.util.TimeZone;
045: import com.ibm.icu.util.ULocale;
046: import com.ibm.icu.dev.test.TestFmwk;
047: import com.ibm.icu.dev.test.util.BagFormatter;
048: import com.ibm.icu.dev.tool.UOption;
049:
050: import com.ibm.icu.text.Collator;
051: import com.ibm.icu.text.DateFormat;
052: import com.ibm.icu.text.DateFormatSymbols;
053: import com.ibm.icu.text.NumberFormat;
054: import com.ibm.icu.text.SimpleDateFormat;
055: import com.ibm.icu.text.Transliterator;
056: import com.ibm.icu.text.UTF16;
057: import com.ibm.icu.text.UnicodeSet;
058:
059: /**
060: * This is a file that runs the CLDR tests for ICU4J, to verify that ICU4J implements them
061: * correctly.
062: * WARNING:
063: * 1. for this to work right, you have to have downloaded the CLDR data, and
064: * then set the CLDR directory correctly, using
065: * -DCLDR_DIRECTORY=<top level of cldr>
066: * 2. You probably also need to increase memory, eg with -Xmx512m
067: * 3. For speed, you should also use -DCLDR_DTD_CACHE=C:\cldrcache\, where
068: * C:\cldrcache\ is a temp directory to keep the program from hitting the net for
069: * each file access.
070: * 4. You may use other environment variables to narrow what you test. Eg
071: * -DXML_MATCH=".*" -DTEST_MATCH="zone.*" -DZONE_MATCH="(?!America/Argentina).*"
072: * a. -DXML_MATCH="de.*" (or whatever regex you want) to just
073: * test certain locales.
074: * b. -DTEST_MATCH="zone.*" (or whatever regex you want) to just test collation, numbers, etc.
075: * c. -DZONE_MATCH=".*Moscow.*" (to only test certain zones)
076: * @author medavis
077: */
078: public class TestCLDRVsICU extends TestFmwk {
079: static final boolean DEBUG = false;
080:
081: //ULocale uLocale = ULocale.ENGLISH;
082: //Locale oLocale = Locale.ENGLISH; // TODO Drop once ICU4J has ULocale everywhere
083: //static PrintWriter log;
084: SAXParser SAX;
085: static Matcher LOCALE_MATCH, TEST_MATCH, ZONE_MATCH;
086: static String CLDR_DIRECTORY;
087: static {
088: System.out.println();
089: LOCALE_MATCH = getEnvironmentRegex("XML_MATCH", ".*");
090: TEST_MATCH = getEnvironmentRegex("TEST_MATCH", ".*");
091: ZONE_MATCH = getEnvironmentRegex("ZONE_MATCH", ".*"); // example
092:
093: // WARNING: THIS IS TEMPORARY UNTIL I GET THE FILES STRAIGHTENED OUT
094: CLDR_DIRECTORY = getEnvironmentString("CLDR_DIRECTORY",
095: "C:\\Unicode-CVS2\\cldr\\");
096: System.out.println();
097: }
098:
099: /**
100: *
101: */
102: private static Matcher getEnvironmentRegex(String key,
103: String defaultValue) {
104: return Pattern.compile(getEnvironmentString(key, defaultValue))
105: .matcher("");
106: }
107:
108: /**
109: *
110: */
111: private static String getEnvironmentString(String key,
112: String defaultValue) {
113: String temp = System.getProperty(key);
114: if (temp == null)
115: temp = defaultValue;
116: else
117: System.out.print("-D" + key + "=\"" + temp + "\" ");
118: return temp;
119: }
120:
121: public static void main(String[] args) throws Exception {
122:
123: new TestCLDRVsICU().run(args);
124: }
125:
126: Set allLocales = new TreeSet();
127:
128: public void TestFiles() throws SAXException, IOException {
129: // only get ICU's locales
130: Set s = new TreeSet();
131: addLocales(NumberFormat.getAvailableULocales(), s);
132: addLocales(DateFormat.getAvailableULocales(), s);
133: addLocales(Collator.getAvailableULocales(), s);
134:
135: // filter, to make tracking down bugs easier
136:
137: for (Iterator it = s.iterator(); it.hasNext();) {
138: String locale = (String) it.next();
139: if (!LOCALE_MATCH.reset(locale).matches())
140: continue;
141: _test(locale);
142: }
143: }
144:
145: public void addLocales(ULocale[] list, Collection s) {
146: for (int i = 0; i < list.length; ++i) {
147: allLocales.add(list[i].toString());
148: s.add(list[i].getLanguage());
149: }
150: }
151:
152: public String getLanguage(ULocale uLocale) {
153: String result = uLocale.getLanguage();
154: String script = uLocale.getScript();
155: if (script.length() != 0)
156: result += "_" + script;
157: return result;
158: }
159:
160: public void _test(String localeName) throws SAXException,
161: IOException {
162: //uLocale = new ULocale(localeName);
163: //oLocale = uLocale.toLocale();
164:
165: File f = new File(CLDR_DIRECTORY + "common\\test\\"
166: + localeName + ".xml");
167: logln("Testing " + f.getCanonicalPath());
168: SAX.parse(f, DEFAULT_HANDLER);
169: }
170:
171: static Transliterator toUnicode = Transliterator
172: .getInstance("any-hex");
173:
174: static public String showString(String in) {
175: return "\u00AB" + in + "\u00BB (" + toUnicode.transliterate(in)
176: + ")";
177: }
178:
179: // ============ SAX Handler Infrastructure ============
180:
181: abstract public class Handler {
182: Map settings = new TreeMap();
183: String name;
184: List currentLocales = new ArrayList();
185: int failures = 0;
186:
187: void setName(String name) {
188: this .name = name;
189: }
190:
191: void set(String attributeName, String attributeValue) {
192: //if (DEBUG) logln(attributeName + " => " + attributeValue);
193: settings.put(attributeName, attributeValue);
194: }
195:
196: void checkResult(String value) {
197: if ("true".equals(settings.get("draft"))) {
198: return; // skip draft
199: } else {
200: int x = 1; // for debug stopping
201: }
202: ULocale ul = new ULocale("xx");
203: try {
204: for (int i = 0; i < currentLocales.size(); ++i) {
205: ul = (ULocale) currentLocales.get(i);
206: //loglnSAX(" Checking " + ul + "(" + ul.getDisplayName(ULocale.ENGLISH) + ")" + " for " + name);
207: handleResult(ul, value);
208: if (failures != 0) {
209: errln("\tTotal Failures: " + failures + "\t"
210: + ul + "("
211: + ul.getDisplayName(ULocale.ENGLISH)
212: + ")");
213: failures = 0;
214: }
215: }
216: } catch (Exception e) {
217: StringWriter sw = new StringWriter();
218: PrintWriter pw = new PrintWriter(sw);
219: e.printStackTrace(pw);
220: pw.flush();
221: errln("Exception: Locale: " + ul + ",\tValue: <"
222: + value + ">\r\n" + sw.toString());
223: }
224: }
225:
226: public void loglnSAX(String message) {
227: String temp = message + "\t[" + name;
228: for (Iterator it = settings.keySet().iterator(); it
229: .hasNext();) {
230: String attributeName = (String) it.next();
231: String attributeValue = (String) settings
232: .get(attributeName);
233: temp += " " + attributeName + "=<" + attributeValue
234: + ">";
235: }
236: logln(temp + "]");
237: }
238:
239: int lookupValue(Object x, Object[] list) {
240: for (int i = 0; i < list.length; ++i) {
241: if (x.equals(list[i]))
242: return i;
243: }
244: loglnSAX("Unknown String: " + x);
245: return -1;
246: }
247:
248: abstract void handleResult(ULocale currentLocale, String value)
249: throws Exception;
250:
251: /**
252: * @param attributes
253: */
254: public void setAttributes(Attributes attributes) {
255: String localeList = attributes.getValue("locales");
256: String[] currentLocaleString = new String[50];
257: com.ibm.icu.impl.Utility.split(localeList, ' ',
258: currentLocaleString);
259: currentLocales.clear();
260: for (int i = 0; i < currentLocaleString.length; ++i) {
261: if (currentLocaleString[i].length() == 0)
262: continue;
263: if (allLocales.contains("")) {
264: logln("Skipping locale, not in ICU4J: "
265: + currentLocaleString[i]);
266: continue;
267: }
268: currentLocales.add(new ULocale(currentLocaleString[i]));
269: }
270: if (DEBUG)
271: logln("Setting locales: " + currentLocales);
272: }
273: }
274:
275: public Handler getHandler(String name, Attributes attributes) {
276: if (DEBUG)
277: logln("Creating Handler: " + name);
278: Handler result = (Handler) RegisteredHandlers.get(name);
279: if (result == null)
280: logln("Unexpected test type: " + name);
281: else {
282: result.setAttributes(attributes);
283: }
284: return result;
285: }
286:
287: public void addHandler(String name, Handler handler) {
288: if (!TEST_MATCH.reset(name).matches())
289: handler = new NullHandler();
290: handler.setName(name);
291: RegisteredHandlers.put(name, handler);
292: }
293:
294: Map RegisteredHandlers = new HashMap();
295:
296: class NullHandler extends Handler {
297: void handleResult(ULocale currentLocale, String value)
298: throws Exception {
299: }
300: }
301:
302: // ============ Statics for Date/Number Support ============
303:
304: static TimeZone utc = TimeZone.getTimeZone("GMT");
305: static DateFormat iso = new SimpleDateFormat(
306: "yyyy-MM-dd'T'HH:mm:ss'Z'");
307: {
308: iso.setTimeZone(utc);
309: }
310: static int[] DateFormatValues = { -1, DateFormat.SHORT,
311: DateFormat.MEDIUM, DateFormat.LONG, DateFormat.FULL };
312: static String[] DateFormatNames = { "none", "short", "medium",
313: "long", "full" };
314:
315: static String[] NumberNames = { "standard", "integer", "decimal",
316: "percent", "scientific", "GBP" };
317:
318: // ============ Handler for Collation ============
319: static UnicodeSet controlsAndSpace = new UnicodeSet("[:cc:]");
320:
321: static String remove(String in, UnicodeSet toRemove) {
322: int cp;
323: StringBuffer result = new StringBuffer();
324: for (int i = 0; i < in.length(); i += UTF16.getCharCount(cp)) {
325: cp = UTF16.charAt(in, i);
326: if (!toRemove.contains(cp))
327: UTF16.append(result, cp);
328: }
329: return result.toString();
330: }
331:
332: {
333: addHandler("collation", new Handler() {
334: public void handleResult(ULocale currentLocale, String value) {
335: Collator col = Collator.getInstance(currentLocale);
336: String lastLine = "";
337: int count = 0;
338: for (int pos = 0; pos < value.length();) {
339: int nextPos = value.indexOf('\n', pos);
340: if (nextPos < 0)
341: nextPos = value.length();
342: String line = value.substring(pos, nextPos);
343: line = remove(line, controlsAndSpace); // HACK for SAX
344: if (line.trim().length() != 0) { // HACK for SAX
345: int comp = col.compare(lastLine, line);
346: if (comp > 0) {
347: failures++;
348: errln("\tLine " + (count + 1)
349: + "\tFailure: "
350: + showString(lastLine)
351: + " should be leq "
352: + showString(line));
353: } else if (DEBUG) {
354: logln("OK: " + line);
355: }
356: lastLine = line;
357: }
358: pos = nextPos + 1;
359: count++;
360: }
361: }
362: });
363:
364: // ============ Handler for Numbers ============
365: addHandler("number", new Handler() {
366: public void handleResult(ULocale locale, String result) {
367: NumberFormat nf = null;
368: double v = Double.NaN;
369: for (Iterator it = settings.keySet().iterator(); it
370: .hasNext();) {
371: String attributeName = (String) it.next();
372: String attributeValue = (String) settings
373: .get(attributeName);
374: if (attributeName.equals("input")) {
375: v = Double.parseDouble(attributeValue);
376: continue;
377: }
378: // must be either numberType at this point
379: int index = lookupValue(attributeValue, NumberNames);
380: if (DEBUG)
381: logln("Getting number format for " + locale);
382: switch (index) {
383: case 0:
384: nf = NumberFormat.getInstance(locale);
385: break;
386: case 1:
387: nf = NumberFormat.getIntegerInstance(locale);
388: break;
389: case 2:
390: nf = NumberFormat.getNumberInstance(locale);
391: break;
392: case 3:
393: nf = NumberFormat.getPercentInstance(locale);
394: break;
395: case 4:
396: nf = NumberFormat.getScientificInstance(locale);
397: break;
398: default:
399: nf = NumberFormat.getCurrencyInstance(locale);
400: nf.setCurrency(Currency
401: .getInstance(attributeValue));
402: break;
403: }
404: String temp = nf.format(v).trim();
405: result = result.trim(); // HACK because of SAX
406: if (!temp.equals(result)) {
407: errln("Number: Locale: " + locale
408: + ", \tType: " + attributeValue
409: + ", \tDraft: " + settings.get("draft")
410: + ", \tCLDR: <" + result + ">, ICU: <"
411: + temp + ">");
412: }
413:
414: }
415: }
416: });
417:
418: // ============ Handler for Dates ============
419: addHandler("date", new Handler() {
420: public void handleResult(ULocale locale, String result)
421: throws ParseException {
422: int dateFormat = 0;
423: int timeFormat = 0;
424: Date date = new Date();
425: for (Iterator it = settings.keySet().iterator(); it
426: .hasNext();) {
427: String attributeName = (String) it.next();
428: String attributeValue = (String) settings
429: .get(attributeName);
430: if (attributeName.equals("input")) {
431: date = iso.parse(attributeValue);
432: continue;
433: }
434: // must be either dateType or timeType at this point
435: int index = lookupValue(attributeValue,
436: DateFormatNames);
437: if (attributeName.equals("dateType"))
438: dateFormat = index;
439: else
440: timeFormat = index;
441:
442: }
443: SimpleDateFormat dt = getDateFormat(locale, dateFormat,
444: timeFormat);
445: dt.setTimeZone(utc);
446: String temp = dt.format(date).trim();
447: result = result.trim(); // HACK because of SAX
448: if (!temp.equals(result)) {
449: errln("DateTime: Locale: " + locale + ", \tDate: "
450: + DateFormatNames[dateFormat]
451: + ", \tTime: "
452: + DateFormatNames[timeFormat]
453: + ", \tDraft: " + settings.get("draft")
454: + ", \tCLDR: <" + result + ">, ICU: <"
455: + temp + ">");
456: }
457: }
458:
459: /**
460: *
461: */
462: private SimpleDateFormat getDateFormat(ULocale locale,
463: int dateFormat, int timeFormat) {
464: if (DEBUG)
465: logln("Getting date/time format for " + locale);
466: if (DEBUG && "ar_EG".equals(locale.toString())) {
467: System.out.println("debug here");
468: }
469: DateFormat dt;
470: if (dateFormat == 0) {
471: dt = DateFormat.getTimeInstance(
472: DateFormatValues[timeFormat], locale);
473: if (DEBUG)
474: System.out.print("getTimeInstance");
475: } else if (timeFormat == 0) {
476: dt = DateFormat.getDateInstance(
477: DateFormatValues[dateFormat], locale);
478: if (DEBUG)
479: System.out.print("getDateInstance");
480: } else {
481: dt = DateFormat.getDateTimeInstance(
482: DateFormatValues[dateFormat],
483: DateFormatValues[timeFormat], locale);
484: if (DEBUG)
485: System.out.print("getDateTimeInstance");
486: }
487: if (DEBUG)
488: System.out.println("\tinput:\t" + dateFormat + ", "
489: + timeFormat + " => "
490: + ((SimpleDateFormat) dt).toPattern());
491: return (SimpleDateFormat) dt;
492: }
493: });
494:
495: // ============ Handler for Zones ============
496: addHandler("zoneFields", new Handler() {
497: String date = "";
498: String zone = "";
499: String parse = "";
500: String pattern = "";
501:
502: public void handleResult(ULocale locale, String result)
503: throws ParseException {
504: for (Iterator it = settings.keySet().iterator(); it
505: .hasNext();) {
506: String attributeName = (String) it.next();
507: String attributeValue = (String) settings
508: .get(attributeName);
509: if (attributeName.equals("date")) {
510: date = attributeValue;
511: } else if (attributeName.equals("field")) {
512: pattern = attributeValue;
513: } else if (attributeName.equals("zone")) {
514: zone = attributeValue;
515: } else if (attributeName.equals("parse")) {
516: parse = attributeValue;
517: }
518: }
519: if (!ZONE_MATCH.reset(zone).matches())
520: return;
521: Date dateValue = iso.parse(date);
522: SimpleDateFormat field = new SimpleDateFormat(pattern,
523: locale);
524: field.setTimeZone(TimeZone.getTimeZone(zone));
525: String temp = field.format(dateValue).trim();
526: // SKIP PARSE FOR NOW
527: result = result.trim(); // HACK because of SAX
528: if (!temp.equals(result)) {
529: temp = field.format(dateValue).trim(); // call again for debugging
530: errln("Zone Format: Locale: " + locale
531: + ", \tZone: " + zone + ", \tDate: " + date
532: + ", \tField: " + pattern + ", \tDraft: "
533: + settings.get("draft") + ", \tCLDR: <"
534: + result + ">, \tICU: <" + temp + ">");
535: }
536: }
537: });
538: }
539:
540: // ============ Gorp for SAX ============
541:
542: {
543: try {
544: SAXParserFactory factory = SAXParserFactory.newInstance();
545: factory.setValidating(true);
546: SAX = factory.newSAXParser();
547: } catch (Exception e) {
548: throw new IllegalArgumentException("can't start");
549: }
550: }
551:
552: DefaultHandler DEFAULT_HANDLER = new DefaultHandler() {
553: static final boolean DEBUG = false;
554: StringBuffer lastChars = new StringBuffer();
555: boolean justPopped = false;
556: Handler handler;
557:
558: public void startElement(String uri, String localName,
559: String qName, Attributes attributes)
560: throws SAXException {
561: //data.put(new ContextStack(contextStack), lastChars);
562: //lastChars = "";
563: try {
564: if (qName.equals("cldrTest")) {
565: // skip
566: } else if (qName.equals("result")) {
567: for (int i = 0; i < attributes.getLength(); ++i) {
568: handler.set(attributes.getQName(i), attributes
569: .getValue(i));
570: }
571: } else {
572: handler = getHandler(qName, attributes);
573: //handler.set("locale", uLocale.toString());
574: }
575: //if (DEBUG) logln("startElement:\t" + contextStack);
576: justPopped = false;
577: } catch (RuntimeException e) {
578: e.printStackTrace();
579: throw e;
580: }
581: }
582:
583: public void endElement(String uri, String localName,
584: String qName) throws SAXException {
585: try {
586: //if (DEBUG) logln("endElement:\t" + contextStack);
587: if (qName.equals("result"))
588: handler.checkResult(lastChars.toString());
589: else if (qName.length() != 0) {
590: //logln("Unexpected contents of: " + qName + ", <" + lastChars + ">");
591: }
592: lastChars.setLength(0);
593: justPopped = true;
594: } catch (RuntimeException e) {
595: e.printStackTrace();
596: throw e;
597: }
598: }
599:
600: // Have to hack around the fact that the character data might be in pieces
601: public void characters(char[] ch, int start, int length)
602: throws SAXException {
603: try {
604: String value = new String(ch, start, length);
605: if (DEBUG)
606: logln("characters:\t" + value);
607: lastChars.append(value);
608: justPopped = false;
609: } catch (RuntimeException e) {
610: e.printStackTrace();
611: throw e;
612: }
613: }
614:
615: // just for debugging
616:
617: public void notationDecl(String name, String publicId,
618: String systemId) throws SAXException {
619: logln("notationDecl: " + name + ", " + publicId + ", "
620: + systemId);
621: }
622:
623: public void processingInstruction(String target, String data)
624: throws SAXException {
625: logln("processingInstruction: " + target + ", " + data);
626: }
627:
628: public void skippedEntity(String name) throws SAXException {
629: logln("skippedEntity: " + name);
630: }
631:
632: public void unparsedEntityDecl(String name, String publicId,
633: String systemId, String notationName)
634: throws SAXException {
635: logln("unparsedEntityDecl: " + name + ", " + publicId
636: + ", " + systemId + ", " + notationName);
637: }
638:
639: };
640: }
641: //#endif
|