001: /**
002: *******************************************************************************
003: * Copyright (C) 2006, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: *
007: *******************************************************************************
008: */package com.ibm.icu.charset;
009:
010: import java.io.IOException;
011: import java.nio.charset.Charset;
012: import java.nio.charset.UnsupportedCharsetException;
013: import java.nio.charset.spi.CharsetProvider;
014: import java.util.HashMap;
015: import java.util.Iterator;
016: import java.util.Map;
017:
018: /**
019: * A concrete subclass of CharsetProvider for loading and providing charset converters
020: * in ICU.
021: * @draft ICU 3.6
022: * @provisional This API might change or be removed in a future release.
023: */
024: public final class CharsetProviderICU extends CharsetProvider {
025:
026: /**
027: * Default constructor
028: * @draft ICU 3.6
029: * @provisional This API might change or be removed in a future release.
030: */
031: public CharsetProviderICU() {
032: }
033:
034: /**
035: * Constructs a charset for the given charset name.
036: * Implements the abstract method of super class.
037: * @param charsetName charset name
038: * @return charset objet for the given charset name, null if unsupported
039: * @stable ICU 3.6
040: */
041: public final Charset charsetForName(String charsetName) {
042: try {
043: // get the canonical name
044: String icuCanonicalName = getICUCanonicalName(charsetName);
045:
046: // create the converter object and return it
047: if (icuCanonicalName == null
048: || icuCanonicalName.length() == 0) {
049: // this would make the Charset API to throw
050: // unsupported encoding exception
051: return null;
052: }
053: return getCharset(icuCanonicalName);
054: } catch (UnsupportedCharsetException ex) {
055: } catch (IOException ex) {
056: }
057: return null;
058: }
059:
060: /**
061: * Gets the canonical name of the converter as defined by Java
062: * @param enc converter name
063: * @return canonical name of the converter
064: * @internal ICU 3.6
065: * @deprecated This API is for internal ICU use only
066: */
067: public static final String getICUCanonicalName(String enc)
068: throws UnsupportedCharsetException {
069: String canonicalName = null;
070: String ret = null;
071: try {
072: if (enc != null) {
073: if ((canonicalName = UConverterAlias.getCanonicalName(
074: enc, "MIME")) != null) {
075: ret = canonicalName;
076: } else if ((canonicalName = UConverterAlias
077: .getCanonicalName(enc, "IANA")) != null) {
078: ret = canonicalName;
079: } else if ((canonicalName = UConverterAlias
080: .getCanonicalName(enc, "")) != null) {
081: ret = canonicalName;
082: } else if ((canonicalName = UConverterAlias.getAlias(
083: enc, 0)) != null) {
084: /* we have some aliases in the form x-blah .. match those first */
085: ret = canonicalName;
086: } else if (enc.indexOf("x-") == 0) {
087: /* TODO: Match with getJavaCanonicalName method */
088: /*
089: char temp[ UCNV_MAX_CONVERTER_NAME_LENGTH] = {0};
090: strcpy(temp, encName+2);
091: */
092: ret = enc.substring(2);
093: } else {
094: /* unsupported encoding */
095: ret = "";
096: }
097: }
098: return ret;
099: } catch (IOException ex) {
100: throw new UnsupportedCharsetException(enc);
101: }
102: }
103:
104: private static final Charset getCharset(String icuCanonicalName)
105: throws IOException {
106: String[] aliases = (String[]) getAliases(icuCanonicalName);
107: String canonicalName = getJavaCanonicalName(icuCanonicalName);
108: return (CharsetICU.getCharset(icuCanonicalName, canonicalName,
109: aliases));
110: }
111:
112: /**
113: * Gets the canonical name of the converter as defined by Java
114: * @param icuCanonicalName converter name
115: * @return canonical name of the converter
116: * @internal ICU 3.6
117: */
118: private static String getJavaCanonicalName(String icuCanonicalName) {
119: /*
120: If a charset listed in the IANA Charset Registry is supported by an implementation
121: of the Java platform then its canonical name must be the name listed in the registry.
122: Many charsets are given more than one name in the registry, in which case the registry
123: identifies one of the names as MIME-preferred. If a charset has more than one registry
124: name then its canonical name must be the MIME-preferred name and the other names in
125: the registry must be valid aliases. If a supported charset is not listed in the IANA
126: registry then its canonical name must begin with one of the strings "X-" or "x-".
127: */
128: if (icuCanonicalName == null) {
129: return null;
130: }
131: try {
132: String cName = null;
133: /* find out the alias with MIME tag */
134: if ((cName = UConverterAlias.getStandardName(
135: icuCanonicalName, "MIME")) != null) {
136: /* find out the alias with IANA tag */
137: } else if ((cName = UConverterAlias.getStandardName(
138: icuCanonicalName, "IANA")) != null) {
139: } else {
140: /*
141: check to see if an alias already exists with x- prefix, if yes then
142: make that the canonical name
143: */
144: int aliasNum = UConverterAlias
145: .countAliases(icuCanonicalName);
146: String name;
147: for (int i = 0; i < aliasNum; i++) {
148: name = UConverterAlias
149: .getAlias(icuCanonicalName, i);
150: if (name != null && name.indexOf("x-") == 0) {
151: cName = name;
152: break;
153: }
154: }
155: /* last resort just append x- to any of the alias and
156: make it the canonical name */
157: if ((cName == null || cName.length() == 0)) {
158: name = UConverterAlias.getStandardName(
159: icuCanonicalName, "UTR22");
160: if (name == null
161: && icuCanonicalName.indexOf(",") != -1) {
162: name = UConverterAlias.getAlias(
163: icuCanonicalName, 1);
164: }
165: /* if there is no UTR22 canonical name .. then just return itself*/
166: if (name == null) {
167: name = icuCanonicalName;
168: }
169: cName = "x-" + name;
170: }
171: }
172: return cName;
173: } catch (IOException ex) {
174:
175: }
176: return null;
177: }
178:
179: /**
180: * Gets the aliases associated with the converter name
181: * @param encName converter name
182: * @return converter names as elements in an object array
183: * @internal ICU 3.6
184: */
185: private static final String[] getAliases(String encName)
186: throws IOException {
187: String[] ret = null;
188: int aliasNum = 0;
189: int i = 0;
190: int j = 0;
191: String aliasArray[/*50*/] = new String[50];
192:
193: if (encName != null) {
194: aliasNum = UConverterAlias.countAliases(encName);
195: for (i = 0, j = 0; i < aliasNum; i++) {
196: String name = UConverterAlias.getAlias(encName, i);
197: if (name.indexOf('+') == -1 && name.indexOf(',') == -1) {
198: aliasArray[j++] = name;
199: }
200: }
201: ret = new String[j];
202: for (; --j >= 0;) {
203: ret[j] = aliasArray[j];
204: }
205:
206: }
207: return (ret);
208:
209: }
210:
211: private static final void putCharsets(Map map) {
212: int num = UConverterAlias.countAvailable();
213: for (int i = 0; i < num; i++) {
214: String name = UConverterAlias.getAvailableName(i);
215: try {
216: Charset cs = getCharset(name);
217: map.put(cs, getJavaCanonicalName(name));
218: } catch (UnsupportedCharsetException ex) {
219: } catch (IOException e) {
220: }
221: // add only charsets that can be created!
222: }
223: }
224:
225: /**
226: * Returns an iterator for the available charsets.
227: * Implements the abstract method of super class.
228: * @return Iterator the charset name iterator
229: * @stable ICU 3.6
230: */
231: public final Iterator charsets() {
232: HashMap map = new HashMap();
233: putCharsets(map);
234: return map.keySet().iterator();
235: }
236:
237: /**
238: * Gets the canonical names of available converters
239: * @return Object[] names as an object array
240: * @internal ICU 3.6
241: * @deprecated This API is for internal ICU use only
242: */
243: public static final Object[] getAvailableNames() {
244: HashMap map = new HashMap();
245: putCharsets(map);
246: return map.values().toArray();
247: }
248:
249: /**
250: * Return all names available
251: * @return String[] an arrya of all available names
252: * @internal ICU 3.6
253: * @deprecated This API is for internal ICU use only
254: */
255: public static final String[] getAllNames() {
256: int num = UConverterAlias.countAvailable();
257: String[] names = new String[num];
258: for (int i = 0; i < num; i++) {
259: names[i] = UConverterAlias.getAvailableName(i);
260: }
261: return names;
262: }
263: }
|