001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package org.apache.harmony.niochar;
019:
020: import java.lang.reflect.Constructor;
021: import java.nio.charset.Charset;
022: import java.nio.charset.spi.CharsetProvider;
023: import java.security.AccessController;
024: import java.security.PrivilegedAction;
025: import java.util.ArrayList;
026: import java.util.Collections;
027: import java.util.HashMap;
028: import java.util.Iterator;
029: import java.util.Map;
030:
031: /**
032: * This class is an implementation of the java.nio.charset.spi.CharsetProvider
033: * class, in spite of the fact that it is abstract. It is a base class of
034: * a concrete character set provider implementation
035: * Please note, a derived class should define the getPackageName() and
036: * getCharsetsInfo() methods.
037: * The first of them has to return a string with a package name where
038: * the derived class is located.
039: * The second one has to construct an array, the structure of which is
040: * described below.
041: * See CharsetProviderImplStd or CharsetProviderImplExt for example.
042: */
043: public class CharsetProviderImpl extends CharsetProvider {
044:
045: static {
046: System.loadLibrary("hycharset");
047: }
048:
049: /**
050: * The named index of the 0th element of
051: * the <code>charsets[]</code> array.
052: * It means a charset class name.
053: */
054: protected static final int CHARSET_CLASS = 0;
055:
056: /**
057: * The named index of the 1st element of
058: * the <code>charsets[]</code> array.
059: * It means a charset instance.
060: */
061: protected static final int CHARSET_INSTANCE = 1;
062:
063: /**
064: * The named index of the 2nd element of
065: * the <code>charsets[]</code> array.
066: * It means a charset aliases array.
067: */
068: protected static final int CHARSET_ALIASES = 2;
069:
070: /**
071: * A cache of the charset instances.
072: */
073: protected Map cache;
074:
075: /**
076: * An array returned by <code>getCharsetsInfo()</code>.
077: */
078: protected Object charsets[][];
079:
080: /**
081: * A package name returned by <code>getPackageName()</code>.
082: */
083: protected String packageName;
084:
085: public CharsetProviderImpl() {
086: cache = Collections.synchronizedMap(new HashMap());
087: charsets = getCharsetsInfo();
088: packageName = getPackageName();
089: for (int i = 0; i < charsets.length; i++) {
090: String aliases[] = (String[]) charsets[i][CHARSET_ALIASES];
091: for (int a = 0; a < aliases.length; a++) {
092: cache.put(aliases[a].toUpperCase(), charsets[i]);
093: }
094: }
095: }
096:
097: public Iterator<Charset> charsets() {
098: ArrayList<Charset> list = new ArrayList<Charset>();
099: for (int i = 0; i < charsets.length; i++) {
100: list
101: .add(charsetForName(((String[]) charsets[i][CHARSET_ALIASES])[0]));
102: }
103: return list.iterator();
104: }
105:
106: public Charset charsetForName(String charsetName) {
107: Object arr[] = (Object[]) cache.get(charsetName.toUpperCase());
108: if (arr == null) {
109: return null;
110: }
111: // Make an instance of the found charset.
112: if (arr[CHARSET_INSTANCE] == null) {
113: final String className = packageName + "."
114: + (String) arr[CHARSET_CLASS];
115: final String canonicalName = ((String[]) arr[CHARSET_ALIASES])[0];
116: final String aliases[] = (String[]) arr[CHARSET_ALIASES];
117: arr[CHARSET_INSTANCE] = AccessController
118: .doPrivileged(new PrivilegedAction() {
119: public Object run() {
120: try {
121: Class cls = Class.forName(className);
122: Constructor ctor = cls
123: .getConstructor(new Class[] {
124: String.class,
125: String[].class });
126: ctor.setAccessible(true);
127: return ctor.newInstance(new Object[] {
128: canonicalName, aliases });
129: } catch (Exception e) {
130: return null;
131: }
132: }
133: });
134: }
135: return (Charset) arr[CHARSET_INSTANCE];
136: }
137:
138: public final void putCharsets(Map map) {
139: Object[][] charsets = getCharsetsInfo();
140: for (int i = 0; i < charsets.length; i++) {
141: final String canonicalName = ((String[]) charsets[i][CHARSET_ALIASES])[0];
142: Charset cs = charsetForName(canonicalName);
143: if (cs != null) {
144: map.put(canonicalName, cs);
145: }
146: }
147: }
148:
149: protected String getPackageName() {
150: return "org.apache.harmony.niochar.charset";
151: }
152:
153: protected Object[][] getCharsetsInfo() {
154: // The next charset aliases corresponds IANA registry
155: // http://www.iana.org/assignments/character-sets.
156: //
157: //
158: // Array structure:
159: //
160: // charsetsInfo[][0] - String: A charset class name.
161: // The named index is CHARSET_CLASS.
162: // charsetsInfo[][1] - Charset: A charset instance.
163: // The named index is CHARSET_INSTANCE.
164: // charsetsInfo[][2] - String[]: A charset aliases array.
165: // The named index is CHARSET_ALIASES.
166: // THE FIRST ELEMENT OF THE ALIASES ARRAY MUST BE
167: // A CANONICAL CHARSET NAME.
168: Object charsetsInfo[][] = {
169: {
170: "US_ASCII",
171: null,
172: new String[] { "US-ASCII", "ANSI_X3.4-1968",
173: "ANSI_X3.4-1986", "iso-ir-6",
174: "iso_646.irv:1983", "ISO_646.irv:1991",
175: "ASCII", "ISO646-US", "us", "cp367",
176: "ascii7", "646", "csASCII" } },
177:
178: { "KOI8_R", null, new String[] { "KOI8-R", "csKOI8R" } },
179:
180: { "CP_1250", null,
181: new String[] { "windows-1250", "cp1250" } },
182:
183: { "CP_1251", null,
184: new String[] { "windows-1251", "cp1251" } },
185:
186: { "CP_1252", null,
187: new String[] { "windows-1252", "cp1252" } },
188:
189: { "CP_1253", null,
190: new String[] { "windows-1253", "cp1253" } },
191:
192: { "CP_1254", null,
193: new String[] { "windows-1254", "cp1254" } },
194:
195: { "CP_1257", null,
196: new String[] { "windows-1257", "cp1257" } },
197:
198: {
199: "ISO_8859_1",
200: null,
201: new String[] { "ISO-8859-1", "8859_1", /*not in IANA Registry*/
202: "ISO8859-1", /*not in IANA Registry*/
203: "ISO8859_1", /*not in IANA Registry*/
204: "ISO_8859-1:1987", "iso-ir-100", "ISO_8859-1",
205: "latin1", "l1", "IBM819", "ISO_8859_1",
206: "IBM-819", "CP819", "819",
207: "csISOLatin1" } },
208:
209: {
210: "ISO_8859_2",
211: null,
212: new String[] { "ISO-8859-2", "8859_2", /*not in IANA Registry*/
213: "ISO_8859-2:1987", "iso-ir-101", "ISO_8859-2",
214: "latin2", "l2", "csISOLatin2" } },
215:
216: {
217: "ISO_8859_4",
218: null,
219: new String[] { "ISO-8859-4", "8859_4", /*not in IANA Registry*/
220: "ISO_8859-4:1988", "iso-ir-110", "ISO_8859-4",
221: "latin4", "l4", "csISOLatin4" } },
222:
223: {
224: "ISO_8859_5",
225: null,
226: new String[] { "ISO-8859-5", "8859_5", /*not in IANA Registry*/
227: "ISO_8859-5:1988", "iso-ir-144", "ISO_8859-5",
228: "cyrillic", "csISOLatinCyrillic" } },
229:
230: {
231: "ISO_8859_7",
232: null,
233: new String[] { "ISO-8859-7", "ISO_8859-7:1987",
234: "ISO_8859-7", "iso-ir-126", "ELOT_928",
235: "ECMA-118", "greek", "greek8",
236: "csISOLatinGreek" } },
237:
238: {
239: "ISO_8859_9",
240: null,
241: new String[] { "ISO-8859-9", "ISO_8859-9:1989",
242: "iso-ir-148", "ISO_8859-9", "latin5",
243: "l5", "csISOLatin5" } },
244: { "ISO_8859_13", null, new String[] { "ISO-8859-13" } },
245:
246: {
247: "ISO_8859_15",
248: null,
249: new String[] { "ISO-8859-15", "ISO_8859-15",
250: "Latin-9" } },
251:
252: {
253: "UTF_8",
254: null,
255: new String[] { "UTF-8", "UTF8" /*not in IANA Registry*/} },
256:
257: { "UTF_16", null,
258: new String[] { "UTF-16", "UTF16", "UTF_16" } },
259:
260: {
261: "UTF_16LE",
262: null,
263: new String[] { "UTF-16LE", "X-UTF-16LE",
264: "UTF_16LE" } },
265:
266: {
267: "UTF_16BE",
268: null,
269: new String[] { "UTF-16BE", "X-UTF-16BE",
270: "UTF_16BE" } },
271:
272: {
273: "IBM866",
274: null,
275: new String[] { "cp866", "IBM866", "866",
276: "csIBM866" } },
277:
278: //additional charsets
279:
280: { "additional.windows_1255", null,
281: new String[] { "windows-1255", "cp1255" } },
282:
283: { "additional.windows_1256", null,
284: new String[] { "windows-1256", "cp1256" } },
285:
286: {
287: "additional.IBM1026",
288: null,
289: new String[] { "IBM1026", "CP1026", "csIBM1026" } },
290:
291: {
292: "additional.IBM1047",
293: null,
294: new String[] { "IBM1047", "1047", "cp1047",
295: "ibm-1047" } },
296:
297: {
298: "additional.IBM037",
299: null,
300: new String[] { "IBM037", "cp037",
301: "ebcdic-cp-us", "ebcdic-cp-ca",
302: "ebcdic-cp-wt", "ebcdic-cp-nl",
303: "csIBM037" } },
304:
305: {
306: "additional.IBM424",
307: null,
308: new String[] { "IBM424", "cp424",
309: "ebcdic-cp-he", "csIBM424" } },
310:
311: {
312: "additional.IBM437",
313: null,
314: new String[] { "IBM437", "cp437", "437",
315: "csPC8CodePage437" } },
316:
317: {
318: "additional.IBM500",
319: null,
320: new String[] { "IBM500", "CP500",
321: "ebcdic-cp-be", "ebcdic-cp-ch",
322: "csIBM500" } },
323:
324: {
325: "additional.IBM775",
326: null,
327: new String[] { "IBM775", "cp775",
328: "csPC775Baltic" } },
329:
330: {
331: "additional.IBM850",
332: null,
333: new String[] { "cp850", "IBM850", "850",
334: "csPC850Multilingual" } },
335:
336: {
337: "additional.IBM852",
338: null,
339: new String[] { "IBM852", "cp852", "852",
340: "csPCp852" } },
341:
342: {
343: "additional.IBM855",
344: null,
345: new String[] { "IBM855", "cp855", "855",
346: "csIBM855" } },
347:
348: {
349: "additional.IBM857",
350: null,
351: new String[] { "cp857", "IBM857", "857",
352: "csIBM857" } },
353:
354: {
355: "additional.IBM860",
356: null,
357: new String[] { "cp860", "IBM860", "860",
358: "csIBM860" } },
359:
360: {
361: "additional.IBM861",
362: null,
363: new String[] { "cp861", "IBM861", "861",
364: "cp-is", "csIBM861" } },
365:
366: {
367: "additional.IBM862",
368: null,
369: new String[] { "cp862", "IBM862", "862",
370: "csPC862LatinHebrew" } },
371:
372: {
373: "additional.IBM863",
374: null,
375: new String[] { "cp863", "IBM863", "863",
376: "csIBM863" } },
377:
378: {
379: "additional.IBM865",
380: null,
381: new String[] { "cp865", "IBM865", "865",
382: "csIBM865" } },
383:
384: {
385: "additional.IBM869",
386: null,
387: new String[] { "cp869", "IBM869", "869",
388: "cp-gr", "csIBM869" } },
389:
390: {
391: "additional.IBM00858",
392: null,
393: new String[] { "cp858", "IBM00858",
394: "CCSID00858", "CP00858" } },
395:
396: {
397: "additional.IBM01140",
398: null,
399: new String[] { "IBM01140", "cp1140",
400: "CCSID01140", "CP01140" } },
401:
402: {
403: "additional.IBM01141",
404: null,
405: new String[] { "IBM01141", "cp1141",
406: "CCSID01141", "CP01141" } },
407:
408: {
409: "additional.IBM01142",
410: null,
411: new String[] { "IBM01142", "cp1142",
412: "CCSID01142", "CP01142" } },
413:
414: {
415: "additional.IBM01143",
416: null,
417: new String[] { "IBM01143", "cp1143",
418: "CCSID01143", "CP01143" } },
419:
420: {
421: "additional.IBM01144",
422: null,
423: new String[] { "IBM01144", "cp1144",
424: "CCSID01144", "CP01144" } },
425:
426: {
427: "additional.IBM01145",
428: null,
429: new String[] { "IBM01145", "cp1145",
430: "CCSID01145", "CP01145" } },
431:
432: {
433: "additional.IBM01146",
434: null,
435: new String[] { "IBM01146", "cp1146",
436: "CCSID01146", "CP01146" } },
437:
438: {
439: "additional.IBM01147",
440: null,
441: new String[] { "IBM01147", "cp1147",
442: "CCSID01147", "CP01147" } },
443:
444: {
445: "additional.IBM01148",
446: null,
447: new String[] { "IBM01148", "cp1148",
448: "CCSID01148", "CP01148" } },
449:
450: {
451: "additional.IBM01149",
452: null,
453: new String[] { "IBM01149", "cp1149",
454: "CCSID01149", "CP01149" } },
455:
456: { "additional.IBM273", null,
457: new String[] { "IBM273", "cp273", "csIBM273" } },
458:
459: {
460: "additional.IBM277",
461: null,
462: new String[] { "IBM277", "cp277",
463: "EBCDIC-CP-DK", "EBCDIC-CP-NO",
464: "csIBM277" } },
465:
466: {
467: "additional.IBM278",
468: null,
469: new String[] { "IBM278", "cp278",
470: "ebcdic-cp-fi", "ebcdic-cp-se",
471: "csIBM278" } },
472:
473: {
474: "additional.IBM280",
475: null,
476: new String[] { "IBM280", "cp280",
477: "ebcdic-cp-it", "csIBM280" } },
478:
479: {
480: "additional.IBM284",
481: null,
482: new String[] { "IBM284", "cp284",
483: "ebcdic-cp-es", "csIBM284" } },
484:
485: {
486: "additional.IBM285",
487: null,
488: new String[] { "IBM285", "cp285",
489: "ebcdic-cp-gb", "csIBM285" } },
490:
491: {
492: "additional.IBM297",
493: null,
494: new String[] { "IBM297", "cp297",
495: "ebcdic-cp-fr", "csIBM297" } },
496:
497: {
498: "additional.IBM870",
499: null,
500: new String[] { "IBM870", "cp870",
501: "ebcdic-cp-roece", "ebcdic-cp-yu",
502: "csIBM870" } },
503:
504: {
505: "additional.IBM871",
506: null,
507: new String[] { "IBM871", "cp871",
508: "ebcdic-cp-is", "csIBM871" } },
509:
510: {
511: "additional.IBM918",
512: null,
513: new String[] { "IBM918", "cp918",
514: "ebcdic-cp-ar2", "csIBM918" } },
515:
516: {
517: "additional.IBM420",
518: null,
519: new String[] { "IBM420", "cp420",
520: "ebcdic-cp-ar1", "csIBM420" } },
521:
522: { "additional.IBM864", null,
523: new String[] { "cp864", "IBM864", "csIBM864" } },
524:
525: {
526: "additional.IBM868",
527: null,
528: new String[] { "cp868", "IBM868", "cp-ar",
529: "csIBM868" } },
530:
531: {
532: "additional.ISO_8859_3",
533: null,
534: new String[] { "ISO-8859-3", "8859_3", /*not in IANA Registry*/
535: "ISO_8859_3", "ISO_8859_2:1998", "iso-ir-109",
536: "ISO_8859-3", "latin3", "l3",
537: "csISOLatin3" } },
538:
539: {
540: "additional.ISO_8859_6",
541: null,
542: new String[] { "ISO-8859-6", "ISO_8859_6",
543: "ISO_8859-6:1987", "iso-ir-127",
544: "ISO_8859-6", "ECMA-114", "ASMO-708",
545: "arabic", "csISOLatinArabic" } },
546:
547: {
548: "additional.ISO_8859_8",
549: null,
550: new String[] { "ISO-8859-8", "ISO_8859_8",
551: "ISO_8859-8:1988", "iso-ir-138",
552: "ISO_8859-8", "hebrew",
553: "csISOLatinHebrew" } },
554:
555: { "additional.IBM_Thai", null,
556: new String[] { "IBM-Thai", "cp838" } },
557:
558: {
559: "additional.x_IBM737",
560: null,
561: new String[] { "x-ibm-737_P100-1997",
562: "x-IBM737", "cp737" } },
563:
564: { "additional.x_IBM856", null,
565: new String[] { "cp856", "x-IBM856" } },
566:
567: { "additional.x_IBM874", null,
568: new String[] { "TIS-620", "x-IBM874", "cp874" } },
569:
570: {
571: "additional.x_IBM875",
572: null,
573: new String[] { "x-ibm-875_P100-1995",
574: "x-IBM875", "cp875" } },
575:
576: { "additional.x_IBM922", null,
577: new String[] { "cp922", "x-IBM922" } },
578:
579: {
580: "additional.x_IBM1006",
581: null,
582: new String[] { "x-ibm-1006_P100-1995",
583: "x-IBM1006", "cp1006" } },
584:
585: {
586: "additional.x_IBM1025",
587: null,
588: new String[] { "x-ibm-1025_P100-1995",
589: "x-IBM1025", "cp1025" } },
590:
591: {
592: "additional.x_IBM1112",
593: null,
594: new String[] { "x-ibm-1112_P100-1995",
595: "x-IBM1112", "cp1112" } },
596:
597: {
598: "additional.x_IBM1122",
599: null,
600: new String[] { "x-ibm-1122_P100-1999",
601: "x-IBM1122", "cp1122" } },
602:
603: {
604: "additional.x_IBM1123",
605: null,
606: new String[] { "x-ibm-1123_P100-1995",
607: "x-IBM1123", "cp1123" } },
608:
609: {
610: "additional.x_IBM1124",
611: null,
612: new String[] { "x-ibm-1124_P100-1996",
613: "x-IBM1124", "cp1124" } },
614:
615: {
616: "additional.x_IBM1097",
617: null,
618: new String[] { "x-ibm-1097_P100-1995",
619: "x-IBM1097", "cp1097" } },
620:
621: {
622: "additional.x_IBM1098",
623: null,
624: new String[] { "x-ibm-1098_P100-1995",
625: "x-IBM1098", "cp1098" } },
626:
627: {
628: "additional.x_MacCyrillic",
629: null,
630: new String[] { "x-mac-cyrillic",
631: "x-MacCyrillic", "MacCyrillic" } },
632:
633: {
634: "additional.x_MacGreek",
635: null,
636: new String[] { "x-mac-greek", "x-MacGreek",
637: "MacGreek" } },
638:
639: {
640: "additional.x_MacTurkish",
641: null,
642: new String[] { "x-mac-turkish", "x-MacTurkish",
643: "MacTurkish" } },
644:
645: {
646: "additional.windows_31j",
647: null,
648: new String[] { "Shift_JIS", "windows-31j",
649: "MS932", "windows-932", "cp932",
650: "csWindows31J", "cp943c", "x-ms-cp932",
651: "ibm-943" } },
652:
653: {
654: "additional.Big5",
655: null,
656: new String[] { "Big5", "csBig5", "windows-950" } },
657:
658: { "additional.Big5_HKSCS", null,
659: new String[] { "Big5-HKSCS", "ibm-1375" } },
660:
661: {
662: "additional.EUC_KR",
663: null,
664: new String[] { "EUC-KR", "windows-51949",
665: "ibm-970" } },
666:
667: { "additional.GBK", null, new String[] { "GBK" } },
668:
669: {
670: "additional.x_MS950_HKSCS",
671: null,
672: new String[] { "x-ibm-1375_P100-2003",
673: "x-MS950-HKSCS", "MS950_HKSCS" } },
674:
675: { "additional.x_windows_949", null,
676: new String[] { "x-windows-949", "MS949" } },
677:
678: {
679: "additional.GB18030",
680: null,
681: new String[] { "GB18030", "windows-54936",
682: "ibm-1392" } },
683:
684: { "additional.GB2312", null,
685: new String[] { "GB2312", "cp1383", "EUC_CN" } }
686:
687: };
688: return charsetsInfo;
689: }
690:
691: }
|