001: package org.apache.turbine.services.mimetype.util;
002:
003: /*
004: * Licensed to the Apache Software Foundation (ASF) under one
005: * or more contributor license agreements. See the NOTICE file
006: * distributed with this work for additional information
007: * regarding copyright ownership. The ASF licenses this file
008: * to you under the Apache License, Version 2.0 (the
009: * "License"); you may not use this file except in compliance
010: * with the License. You may obtain a copy of the License at
011: *
012: * http://www.apache.org/licenses/LICENSE-2.0
013: *
014: * Unless required by applicable law or agreed to in writing,
015: * software distributed under the License is distributed on an
016: * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017: * KIND, either express or implied. See the License for the
018: * specific language governing permissions and limitations
019: * under the License.
020: */
021:
022: import java.io.File;
023: import java.io.FileInputStream;
024: import java.io.IOException;
025: import java.io.InputStream;
026:
027: import java.util.HashMap;
028: import java.util.Hashtable;
029: import java.util.Locale;
030: import java.util.Map;
031: import java.util.Properties;
032:
033: /**
034: * This class maintains a set of mappers defining mappings
035: * between locales and the corresponding charsets. The mappings
036: * are defined as properties between locale and charset names.
037: * The definitions can be listed in property files located in user's
038: * home directory, Java home directory or the current class jar.
039: * In addition, this class maintains static default mappings
040: * and constructors support application specific mappings.
041: *
042: * @author <a href="mailto:ilkka.priha@simsoft.fi">Ilkka Priha</a>
043: * @version $Id: CharSetMap.java 534527 2007-05-02 16:10:59Z tv $
044: */
045: public class CharSetMap {
046: /**
047: * The default charset when nothing else is applicable.
048: */
049: public static final String DEFAULT_CHARSET = "ISO-8859-1";
050:
051: /**
052: * The name for charset mapper resources.
053: */
054: public static final String CHARSET_RESOURCE = "charset.properties";
055:
056: /**
057: * Priorities of available mappers.
058: */
059: private static final int MAP_CACHE = 0;
060: private static final int MAP_PROG = 1;
061: private static final int MAP_HOME = 2;
062: private static final int MAP_SYS = 3;
063: private static final int MAP_JAR = 4;
064: private static final int MAP_COM = 5;
065:
066: /**
067: * A common charset mapper for languages.
068: */
069: private static HashMap commonMapper = new HashMap();
070:
071: static {
072: commonMapper.put("ar", "ISO-8859-6");
073: commonMapper.put("be", "ISO-8859-5");
074: commonMapper.put("bg", "ISO-8859-5");
075: commonMapper.put("ca", "ISO-8859-1");
076: commonMapper.put("cs", "ISO-8859-2");
077: commonMapper.put("da", "ISO-8859-1");
078: commonMapper.put("de", "ISO-8859-1");
079: commonMapper.put("el", "ISO-8859-7");
080: commonMapper.put("en", "ISO-8859-1");
081: commonMapper.put("es", "ISO-8859-1");
082: commonMapper.put("et", "ISO-8859-1");
083: commonMapper.put("fi", "ISO-8859-1");
084: commonMapper.put("fr", "ISO-8859-1");
085: commonMapper.put("hr", "ISO-8859-2");
086: commonMapper.put("hu", "ISO-8859-2");
087: commonMapper.put("is", "ISO-8859-1");
088: commonMapper.put("it", "ISO-8859-1");
089: commonMapper.put("iw", "ISO-8859-8");
090: commonMapper.put("ja", "Shift_JIS");
091: commonMapper.put("ko", "EUC-KR");
092: commonMapper.put("lt", "ISO-8859-2");
093: commonMapper.put("lv", "ISO-8859-2");
094: commonMapper.put("mk", "ISO-8859-5");
095: commonMapper.put("nl", "ISO-8859-1");
096: commonMapper.put("no", "ISO-8859-1");
097: commonMapper.put("pl", "ISO-8859-2");
098: commonMapper.put("pt", "ISO-8859-1");
099: commonMapper.put("ro", "ISO-8859-2");
100: commonMapper.put("ru", "ISO-8859-5");
101: commonMapper.put("sh", "ISO-8859-5");
102: commonMapper.put("sk", "ISO-8859-2");
103: commonMapper.put("sl", "ISO-8859-2");
104: commonMapper.put("sq", "ISO-8859-2");
105: commonMapper.put("sr", "ISO-8859-5");
106: commonMapper.put("sv", "ISO-8859-1");
107: commonMapper.put("tr", "ISO-8859-9");
108: commonMapper.put("uk", "ISO-8859-5");
109: commonMapper.put("zh", "GB2312");
110: commonMapper.put("zh_TW", "Big5");
111: }
112:
113: /**
114: * An array of available charset mappers.
115: */
116: private Map mappers[] = new Map[6];
117:
118: /**
119: * Loads mappings from a stream.
120: *
121: * @param input an input stream.
122: * @return the mappings.
123: * @throws IOException for an incorrect stream.
124: */
125: protected static Map loadStream(InputStream input)
126: throws IOException {
127: Properties props = new Properties();
128: props.load(input);
129: return new HashMap(props);
130: }
131:
132: /**
133: * Loads mappings from a file.
134: *
135: * @param file a file.
136: * @return the mappings.
137: * @throws IOException for an incorrect file.
138: */
139: protected static Map loadFile(File file) throws IOException {
140: return loadStream(new FileInputStream(file));
141: }
142:
143: /**
144: * Loads mappings from a file path.
145: *
146: * @param path a file path.
147: * @return the mappings.
148: * @throws IOException for an incorrect file.
149: */
150: protected static Map loadPath(String path) throws IOException {
151: return loadFile(new File(path));
152: }
153:
154: /**
155: * Loads mappings from a resource.
156: *
157: * @param name a resource name.
158: * @return the mappings.
159: */
160: protected static Map loadResource(String name) {
161: InputStream input = CharSetMap.class.getResourceAsStream(name);
162: if (input != null) {
163: try {
164: return loadStream(input);
165: } catch (IOException x) {
166: return null;
167: }
168: } else {
169: return null;
170: }
171: }
172:
173: /**
174: * Constructs a new charset map with default mappers.
175: */
176: public CharSetMap() {
177: String path;
178: try {
179: // Check whether the user directory contains mappings.
180: path = System.getProperty("user.home");
181: if (path != null) {
182: path = path + File.separator + CHARSET_RESOURCE;
183: mappers[MAP_HOME] = loadPath(path);
184: }
185: } catch (Exception x) {
186: }
187:
188: try {
189: // Check whether the system directory contains mappings.
190: path = System.getProperty("java.home") + File.separator
191: + "lib" + File.separator + CHARSET_RESOURCE;
192: mappers[MAP_SYS] = loadPath(path);
193: } catch (Exception x) {
194: }
195:
196: // Check whether the current class jar contains mappings.
197: mappers[MAP_JAR] = loadResource("/META-INF/" + CHARSET_RESOURCE);
198:
199: // Set the common mapper to have the lowest priority.
200: mappers[MAP_COM] = commonMapper;
201:
202: // Set the cache mapper to have the highest priority.
203: mappers[MAP_CACHE] = new Hashtable();
204: }
205:
206: /**
207: * Contructs a charset map from properties.
208: *
209: * @param props charset mapping propeties.
210: */
211: public CharSetMap(Properties props) {
212: this ();
213: mappers[MAP_PROG] = new HashMap(props);
214: }
215:
216: /**
217: * Contructs a charset map read from a stream.
218: *
219: * @param input an input stream.
220: * @throws IOException for an incorrect stream.
221: */
222: public CharSetMap(InputStream input) throws IOException {
223: this ();
224: mappers[MAP_PROG] = loadStream(input);
225: }
226:
227: /**
228: * Contructs a charset map read from a property file.
229: *
230: * @param file a property file.
231: * @throws IOException for an incorrect property file.
232: */
233: public CharSetMap(File file) throws IOException {
234: this ();
235: mappers[MAP_PROG] = loadFile(file);
236: }
237:
238: /**
239: * Contructs a charset map read from a property file path.
240: *
241: * @param path a property file path.
242: * @throws IOException for an incorrect property file.
243: */
244: public CharSetMap(String path) throws IOException {
245: this ();
246: mappers[MAP_PROG] = loadPath(path);
247: }
248:
249: /**
250: * Sets a locale-charset mapping.
251: *
252: * @param key the key for the charset.
253: * @param charset the corresponding charset.
254: */
255: public synchronized void setCharSet(String key, String charset) {
256: HashMap mapper = (HashMap) mappers[MAP_PROG];
257: mapper = mapper != null ? (HashMap) mapper.clone()
258: : new HashMap();
259: mapper.put(key, charset);
260: mappers[MAP_PROG] = mapper;
261: mappers[MAP_CACHE].clear();
262: }
263:
264: /**
265: * Gets the charset for a locale. First a locale specific charset
266: * is searched for, then a country specific one and lastly a language
267: * specific one. If none is found, the default charset is returned.
268: *
269: * @param locale the locale.
270: * @return the charset.
271: */
272: public String getCharSet(Locale locale) {
273: // Check the cache first.
274: String key = locale.toString();
275: if (key.length() == 0) {
276: key = "__" + locale.getVariant();
277: if (key.length() == 2) {
278: return DEFAULT_CHARSET;
279: }
280: }
281: String charset = searchCharSet(key);
282: if (charset.length() == 0) {
283: // Not found, perform a full search and update the cache.
284: String[] items = new String[3];
285: items[2] = locale.getVariant();
286: items[1] = locale.getCountry();
287: items[0] = locale.getLanguage();
288: charset = searchCharSet(items);
289: if (charset.length() == 0) {
290: charset = DEFAULT_CHARSET;
291: }
292: mappers[MAP_CACHE].put(key, charset);
293: }
294: return charset;
295: }
296:
297: /**
298: * Gets the charset for a locale with a variant. The search
299: * is performed in the following order:
300: * "lang"_"country"_"variant"="charset",
301: * _"counry"_"variant"="charset",
302: * "lang"__"variant"="charset",
303: * __"variant"="charset",
304: * "lang"_"country"="charset",
305: * _"country"="charset",
306: * "lang"="charset".
307: * If nothing of the above is found, the default charset is returned.
308: *
309: * @param locale the locale.
310: * @param variant a variant field.
311: * @return the charset.
312: */
313: public String getCharSet(Locale locale, String variant) {
314: // Check the cache first.
315: if ((variant != null) && (variant.length() > 0)) {
316: String key = locale.toString();
317: if (key.length() == 0) {
318: key = "__" + locale.getVariant();
319: if (key.length() > 2) {
320: key += '_' + variant;
321: } else {
322: key += variant;
323: }
324: } else if (locale.getCountry().length() == 0) {
325: key += "__" + variant;
326: } else {
327: key += '_' + variant;
328: }
329: String charset = searchCharSet(key);
330: if (charset.length() == 0) {
331: // Not found, perform a full search and update the cache.
332: String[] items = new String[4];
333: items[3] = variant;
334: items[2] = locale.getVariant();
335: items[1] = locale.getCountry();
336: items[0] = locale.getLanguage();
337: charset = searchCharSet(items);
338: if (charset.length() == 0) {
339: charset = DEFAULT_CHARSET;
340: }
341: mappers[MAP_CACHE].put(key, charset);
342: }
343: return charset;
344: } else {
345: return getCharSet(locale);
346: }
347: }
348:
349: /**
350: * Gets the charset for a specified key.
351: *
352: * @param key the key for the charset.
353: * @return the found charset or the default one.
354: */
355: public String getCharSet(String key) {
356: String charset = searchCharSet(key);
357: return charset.length() > 0 ? charset : DEFAULT_CHARSET;
358: }
359:
360: /**
361: * Gets the charset for a specified key.
362: *
363: * @param key the key for the charset.
364: * @param def the default charset if none is found.
365: * @return the found charset or the given default.
366: */
367: public String getCharSet(String key, String def) {
368: String charset = searchCharSet(key);
369: return charset.length() > 0 ? charset : def;
370: }
371:
372: /**
373: * Searches for a charset for a specified locale.
374: *
375: * @param items an array of locale items.
376: * @return the found charset or an empty string.
377: */
378: private String searchCharSet(String[] items) {
379: String charset;
380: StringBuffer sb = new StringBuffer();
381: for (int i = items.length; i > 0; i--) {
382: charset = searchCharSet(items, sb, i);
383: if (charset.length() > 0) {
384: return charset;
385: }
386: sb.setLength(0);
387: }
388: return "";
389: }
390:
391: /**
392: * Searches recursively for a charset for a specified locale.
393: *
394: * @param items an array of locale items.
395: * @param base a buffer of base items.
396: * @param count the number of items to go through.
397: * @return the found charset or an empty string.
398: */
399: private String searchCharSet(String[] items, StringBuffer base,
400: int count) {
401: if ((--count >= 0) && (items[count] != null)
402: && (items[count].length() > 0)) {
403: String charset;
404: base.insert(0, items[count]);
405: int length = base.length();
406: for (int i = count; i > 0; i--) {
407: if ((i == count) || (i <= 1)) {
408: base.insert(0, '_');
409: length++;
410: }
411: charset = searchCharSet(items, base, i);
412: if (charset.length() > 0) {
413: return charset;
414: }
415: base.delete(0, base.length() - length);
416: }
417: return searchCharSet(base.toString());
418: } else {
419: return "";
420: }
421: }
422:
423: /**
424: * Searches for a charset for a specified key.
425: *
426: * @param key the key for the charset.
427: * @return the found charset or an empty string.
428: */
429: private String searchCharSet(String key) {
430: if ((key != null) && (key.length() > 0)) {
431: // Go through mappers.
432: Map mapper;
433: String charset;
434: for (int i = 0; i < mappers.length; i++) {
435: mapper = mappers[i];
436: if (mapper != null) {
437: charset = (String) mapper.get(key);
438: if (charset != null) {
439: // Update the cache.
440: if (i > MAP_CACHE) {
441: mappers[MAP_CACHE].put(key, charset);
442: }
443: return charset;
444: }
445: }
446: }
447:
448: // Not found, add an empty string to the cache.
449: mappers[MAP_CACHE].put(key, "");
450: }
451: return "";
452: }
453:
454: /**
455: * Sets a common locale-charset mapping.
456: *
457: * @param key the key for the charset.
458: * @param charset the corresponding charset.
459: */
460: protected synchronized void setCommonCharSet(String key,
461: String charset) {
462: HashMap mapper = (HashMap) ((HashMap) mappers[MAP_COM]).clone();
463: mapper.put(key, charset);
464: mappers[MAP_COM] = mapper;
465: mappers[MAP_CACHE].clear();
466: }
467: }
|