001: /*
002: *******************************************************************************
003: * Copyright (C) 2002-2005, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */
007:
008: package com.ibm.icu.dev.tool.localeconverter;
009:
010: import java.io.*;
011: import java.util.*;
012:
013: /**
014:
015: The ConvertPOSIXLocale application converts POSIX locale files to
016: Java and ICU Locale files. It's usage is as follows
017:
018: ConvertPOSIXLocale
019: [-LC_CTYPE]
020: [-LC_TIME]
021: [-LC_NUMERIC]
022: [-LC_MONETARY]
023: [-LC_MESSAGES]
024: [-11]
025: [-12]
026: [-icu]
027: [-icu2]
028: localeName
029: localeDataFile
030: [charMapFile ...]
031:
032: The application is invoked with options specifying the format(s) of
033: the locale file(s) to generate as well as the POSIX locale file and
034: character mapping files.
035:
036: Usage
037:
038: -LC_CTYPE
039: If the -LC_CTYPE option is specified, the
040: following items are added to the locale if they
041: are present in the source: upper, lower, alpha, digit,
042: space, cntrl, punct, graph, print, xdigit, blank,
043: toupper, tolower.
044:
045: <CODE>-LC_TIME
046: If the -LC_TIME option is specified, the following
047: items will be included if they are present in the POSIX source:
048: abday, day, abmon, mon, d_t_fmt, d_ftm, t_fmt, am_pm,
049: t_fmt_ampm, era, era_year, era_d_fmt, alt_digits.
050:
051: -LC_NUMERIC
052: If the -LC_NUMERIC option is specified, the following
053: items will be included if they are present in the source:
054: decimal_point, thousands_sep, grouping
055:
056: -LC_MONETARY
057: If the -LC_MONETARY option is specified, the following
058: items will be included if they are present in the source:
059: int_curr_symbol, currency_symbol, mon_decimal_point,
060: mon_thousands_sep, mon_grouping, positive_sign,
061: negative_sign, int_frac_digits, frac_digits, p_cs_precedes,
062: p_sep_by_space, n_cs_precedes, n_sep_by_space, p_sign_posn.
063:
064: -LC_MESSAGES
065: If the -LC_MESSAGES option is specified, the
066: following items are added to the locale if they
067: are present in the source: yesexpr, noexpr
068:
069: -11
070: If this option is specified, data is output in
071: Java 1.1.x locale format.
072:
073: -12
074: If this option is specified, data is output in
075: Java 1.2.x locale format. If an output format
076: is not specified, -12 is the default.
077:
078: -icu
079: If this option is specified, data is output in
080: ICU locale format.
081:
082: localeName
083: The name of the locale in the localeDataFile. Ex. en_US.
084:
085: localeDataFile
086: The localeDataFile path is required and specifies the path
087: to the locale data file. If a "copy" directive is encountered
088: while processing the localeDataFile, ConvertPOSIXLocale will look
089: in the same directory as the localeDataFile for additional
090: POSIX locale data files. Files must be in the POSIX format
091: specified in ISO/IEC 9945- with exceptions noted below. Exactly
092: one localeDataFile must be specified.
093:
094: charMapFile
095: Zero or more character mapping files may be specified. charMapFiles are used
096: to map symbols in the localeDataFile to Unicode values. They are processed
097: as follows. ConvertPOSIXLocale searchs for a line containing only the
098: word "CHARMAP" and reads symbol mappings until it reaches a line
099: containing only the words "END CHARMAP". Symbol mappings have
100: the form "<SYMBOL> <Uhhhh>" where "<SYMBOL>" is any symbol valid
101: in a localeDataFile and "hhhh" is four hexidecimal digits representing
102: the Unicode value for that symbol. Surrogate pairs are not supported
103: in charMapFiles. An example charMapFile might contain the following:
104:
105: CHARMAP
106: <START_OF_TEXT> <U0002>
107: <E> <U0045>
108: <q> <U0071>
109: END CHARMAP
110:
111: specifying that the symbol <START_OF_TEXT> should be replaced by
112: the Unicode value of 0x0002 wherever it occurs.
113:
114: When multiple charMapFiles are specified, mappings in files listed
115: later take precedence over earlier ones.
116:
117:
118: Conversion to ICU and Java:
119:
120: collations
121: Converted from the LC_COLLATE section. The "..." directive is ignored.
122: The "replace-after" directive is ignored.
123:
124: CurrencyElements
125: element 0 is set to currency_symbol
126: element 1 is set to int_curr_symbol
127: element 2 is set to mon_decimal_point
128: All other elements default.
129:
130: NumberElements
131: element 0 is set to decimal_point
132: element 1 is set to thousands_sep
133:
134: MonthNames is set to mon
135:
136: MonthAbbreviations is set to abmon
137:
138: DayNames is set to day
139:
140: DayAbbreviations is set to abday
141:
142: AmPmMarkers is set to am_pm
143:
144: DateTimePatterns
145: elements 0 through 3 are set to t_fmt_ampm with the patterns converted
146: elements 4 through 7 are set to d_fmt with the patterns converted
147:
148:
149: Adition POSIX data may be included in the Locale as follows:
150:
151: LC_TYPE
152: This section is ignored unless the -LC_CTYPE option is
153: specified. If the -LC_CTYPE option is specified, the
154: following items are added to the locale if they
155: are present in the source: upper, lower, alpha, digit,
156: space, cntrl, punct, graph, print, xdigit, blank,
157: toupper, tolower.
158:
159: LC_MESSAGES
160:
161: LC_MONETARY
162:
163: LC_NUMERIC
164:
165: LC_TIME
166: If the -LC_TIME option is specified, the following
167: items will be included if they are present in the source:
168: abday, day, abmon, mon, d_t_fmt, d_ftm, t_fmt, am_pm,
169: t_fmt_ampm, era, era_year, era_d_fmt, alt_digits.
170:
171: LC_COLLATE
172: Converted to collations in the resource file.
173:
174:
175: */
176:
177: public class ConvertPOSIXLocale {
178: private static final short OPT_LC_CTYPE = 0x001;
179: private static final short OPT_LC_TIME = 0x002;
180: private static final short OPT_LC_NUMERIC = 0x004;
181: private static final short OPT_LC_MONETARY = 0x008;
182: private static final short OPT_LC_MESSAGES = 0x010;
183: private static final short OPT_11 = 0x020;
184: private static final short OPT_12 = 0x040;
185: private static final short OPT_ICU = 0x080;
186: private static final short OPT_ICU2 = 0x100;
187: private static final short OPT_RAW = 0x200;
188: private static final short OPT_UNKNOWN = 0x4000;
189: private static final String USER_OPTIONS[] = { "-LC_CTYPE",
190: "-LC_TIME", "-LC_NUMERIC", "-LC_MONETARY", "-LC_MESSAGES",
191: "-11", "-12", "-icu", "-icu2", "-RAW", "-enc", };
192: private static final short OPT_CONVERT = (short) (OPT_LC_CTYPE
193: | OPT_LC_TIME | OPT_LC_NUMERIC | OPT_LC_MONETARY | OPT_LC_MESSAGES);
194:
195: private Hashtable data;
196:
197: public static void main(String args[]) {
198: try {
199: new ConvertPOSIXLocale(args);
200: } catch (Throwable t) {
201: t.printStackTrace();
202: System.err.println("Unknown error: " + t);
203: }
204: }
205:
206: public ConvertPOSIXLocale(String args[]) {
207: process(args);
208: //{{INIT_CONTROLS
209: //}}
210: }
211:
212: public void process(String args[]) {
213: short options = identifyOptions(args);
214: String enc = null;
215: if ((args.length < 2) || ((options & OPT_UNKNOWN) != 0)) {
216: printUsage();
217: } else {
218: Vector mapFiles = new Vector();
219: Locale locale = null;
220: String fileName = null;
221: for (int i = 0; i < args.length; i++) {
222: final String this Arg = args[i];
223: if (this Arg.startsWith("-")) {
224: if (this Arg.startsWith("-enc")) {
225: enc = args[++i];
226: }
227: } else if (locale == null) {
228: locale = localeFromString(this Arg);
229: } else if (fileName == null) {
230: fileName = this Arg;
231: } else {
232: mapFiles.addElement(this Arg);
233: }
234:
235: }
236: if (enc == null) {
237: enc = "Default";
238: }
239: if ((fileName == null) || (locale == null)
240: || (options == 0)) {
241: printUsage();
242: } else {
243: PosixCharMap map = new PosixCharMap();
244: Enumeration enumer = mapFiles.elements();
245: while (enumer.hasMoreElements()) {
246: String mapFile = (String) enumer.nextElement();
247: System.err.println("Locale: " + locale);
248: System.err.println("Loading character map file: "
249: + mapFile);
250: try {
251: map.load(new File(mapFile), enc);
252: } catch (IOException e) {
253: System.err.println("Error loading map file: "
254: + mapFile + " " + e);
255: System.err.println("File skipped");
256: }
257: }
258: SymbolTransition.setCharMap(map);
259: File dataFile = new File(fileName);
260: System.err.println("Locale directory: "
261: + dataFile.getParent());
262: POSIXLocaleReader reader = new POSIXLocaleReader(
263: dataFile.getParent(), locale);
264: System.err.println("Parsing file: "
265: + dataFile.getName());
266: try {
267: data = reader.parse(dataFile.getName(),
268: (byte) (options & OPT_CONVERT));
269: System.err.println("Converting....");
270: if ((options & OPT_11) != 0) {
271: new Java1LocaleWriter(System.out, System.err)
272: .write(locale, data);
273: }
274: if ((options & OPT_12) != 0) {
275: new JavaLocaleWriter(System.out, System.err)
276: .write(locale, data);
277: }
278: if ((options & OPT_ICU) != 0) {
279: new ICULocaleWriter(System.out, System.err)
280: .write(locale, data);
281: }
282: if ((options & OPT_ICU2) != 0) {
283: new ICU2LocaleWriter(System.out, System.err)
284: .write(locale, data);
285: }
286: if ((options & OPT_RAW) != 0) {
287: new ICULocaleWriter(System.out, System.err)
288: .write(locale, data);
289: }
290: } catch (IOException e) {
291: System.err.println(e);
292: }
293: }
294: }
295: }
296:
297: private void printUsage() {
298: System.err
299: .println("Usage: ConvertPOSIXLocale [-LC_CTYPE] [-LC_TIME]"
300: + " [-LC_NUMERIC] [-LC_MONETARY] [-LC_MESSAGES] [-11] [-12] [-icu]"
301: + " localeName localeDataFile [charMapFile ...]");
302: }
303:
304: private short identifyOptions(String[] options) {
305: short result = 0;
306: for (int j = 0; j < options.length; j++) {
307: String option = options[j];
308: if (option.startsWith("-")) {
309: boolean optionRecognized = false;
310: for (short i = 0; i < USER_OPTIONS.length; i++) {
311: if (USER_OPTIONS[i].equals(option)) {
312: result |= (short) (1 << i);
313: optionRecognized = true;
314: break;
315: }
316: }
317: if (!optionRecognized) {
318: result |= OPT_UNKNOWN;
319: }
320: }
321: }
322: return result;
323: }
324:
325: private Locale localeFromString(final String localeName) {
326: String language = localeName;
327: String country = "";
328: String variant = "";
329:
330: int ndx = language.indexOf('_');
331: if (ndx >= 0) {
332: country = language.substring(ndx + 1);
333: language = language.substring(0, ndx);
334: }
335: ndx = country.indexOf('_');
336: if (ndx >= 0) {
337: variant = country.substring(ndx + 1);
338: country = country.substring(0, ndx);
339: }
340: ndx = country.indexOf('@');
341: if (ndx > 0) {
342: variant = country.substring(ndx + 1);
343: country = country.substring(0, ndx);
344: }
345: return new Locale(language, country, variant);
346: }
347: //{{DECLARE_CONTROLS
348: //}}
349: }
|