001: /*
002: * The Apache Software License, Version 1.1
003: *
004: *
005: * Copyright (c) 1999 The Apache Software Foundation. All rights
006: * reserved.
007: *
008: * Redistribution and use in source and binary forms, with or without
009: * modification, are permitted provided that the following conditions
010: * are met:
011: *
012: * 1. Redistributions of source code must retain the above copyright
013: * notice, this list of conditions and the following disclaimer.
014: *
015: * 2. Redistributions in binary form must reproduce the above copyright
016: * notice, this list of conditions and the following disclaimer in
017: * the documentation and/or other materials provided with the
018: * distribution.
019: *
020: * 3. The end-user documentation included with the redistribution,
021: * if any, must include the following acknowledgment:
022: * "This product includes software developed by the
023: * Apache Software Foundation (http://www.apache.org/)."
024: * Alternately, this acknowledgment may appear in the software itself,
025: * if and wherever such third-party acknowledgments normally appear.
026: *
027: * 4. The names "Xerces" and "Apache Software Foundation" must
028: * not be used to endorse or promote products derived from this
029: * software without prior written permission. For written
030: * permission, please contact apache@apache.org.
031: *
032: * 5. Products derived from this software may not be called "Apache",
033: * nor may "Apache" appear in their name, without prior written
034: * permission of the Apache Software Foundation.
035: *
036: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
037: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
038: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
039: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
040: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
041: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
042: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
043: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
044: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
045: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
046: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
047: * SUCH DAMAGE.
048: * ====================================================================
049: *
050: * This software consists of voluntary contributions made by many
051: * individuals on behalf of the Apache Software Foundation and was
052: * originally based on software copyright (c) 1999, International
053: * Business Machines, Inc., http://www.apache.org. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: package org.apache.xerces.readers;
059:
060: import java.util.*;
061:
062: /**
063: * MIME2Java is a convenience class which handles conversions between MIME charset names
064: * and Java encoding names.
065: * <p>The supported XML encodings are the intersection of XML-supported code sets and those
066: * supported in JDK 1.1.
067: * <p>MIME charset names are used on <var>xmlEncoding</var> parameters to methods such
068: * as <code>TXDocument#setEncoding</code> and <code>DTD#setEncoding</code>.
069: * <p>Java encoding names are used on <var>encoding</var> parameters to
070: * methods such as <code>TXDocument#printWithFormat</code> and <code>DTD#printExternal</code>.
071: * <P>
072: * <TABLE BORDER="0" WIDTH="100%">
073: * <TR>
074: * <TD WIDTH="33%">
075: * <P ALIGN="CENTER"><B>Common Name</B>
076: * </TD>
077: * <TD WIDTH="15%">
078: * <P ALIGN="CENTER"><B>Use this name in XML files</B>
079: * </TD>
080: * <TD WIDTH="12%">
081: * <P ALIGN="CENTER"><B>Name Type</B>
082: * </TD>
083: * <TD WIDTH="31%">
084: * <P ALIGN="CENTER"><B>Xerces converts to this Java Encoder Name</B>
085: * </TD>
086: * </TR>
087: * <TR>
088: * <TD WIDTH="33%">8 bit Unicode</TD>
089: * <TD WIDTH="15%">
090: * <P ALIGN="CENTER">UTF-8
091: * </TD>
092: * <TD WIDTH="12%">
093: * <P ALIGN="CENTER">IANA
094: * </TD>
095: * <TD WIDTH="31%">
096: * <P ALIGN="CENTER">UTF8
097: * </TD>
098: * </TR>
099: * <TR>
100: * <TD WIDTH="33%">ISO Latin 1</TD>
101: * <TD WIDTH="15%">
102: * <P ALIGN="CENTER">ISO-8859-1
103: * </TD>
104: * <TD WIDTH="12%">
105: * <P ALIGN="CENTER">MIME
106: * </TD>
107: * <TD WIDTH="31%">
108: * <P ALIGN="CENTER">ISO-8859-1
109: * </TD>
110: * </TR>
111: * <TR>
112: * <TD WIDTH="33%">ISO Latin 2</TD>
113: * <TD WIDTH="15%">
114: * <P ALIGN="CENTER">ISO-8859-2
115: * </TD>
116: * <TD WIDTH="12%">
117: * <P ALIGN="CENTER">MIME
118: * </TD>
119: * <TD WIDTH="31%">
120: * <P ALIGN="CENTER">ISO-8859-2
121: * </TD>
122: * </TR>
123: * <TR>
124: * <TD WIDTH="33%">ISO Latin 3</TD>
125: * <TD WIDTH="15%">
126: * <P ALIGN="CENTER">ISO-8859-3
127: * </TD>
128: * <TD WIDTH="12%">
129: * <P ALIGN="CENTER">MIME
130: * </TD>
131: * <TD WIDTH="31%">
132: * <P ALIGN="CENTER">ISO-8859-3
133: * </TD>
134: * </TR>
135: * <TR>
136: * <TD WIDTH="33%">ISO Latin 4</TD>
137: * <TD WIDTH="15%">
138: * <P ALIGN="CENTER">ISO-8859-4
139: * </TD>
140: * <TD WIDTH="12%">
141: * <P ALIGN="CENTER">MIME
142: * </TD>
143: * <TD WIDTH="31%">
144: * <P ALIGN="CENTER">ISO-8859-4
145: * </TD>
146: * </TR>
147: * <TR>
148: * <TD WIDTH="33%">ISO Latin Cyrillic</TD>
149: * <TD WIDTH="15%">
150: * <P ALIGN="CENTER">ISO-8859-5
151: * </TD>
152: * <TD WIDTH="12%">
153: * <P ALIGN="CENTER">MIME
154: * </TD>
155: * <TD WIDTH="31%">
156: * <P ALIGN="CENTER">ISO-8859-5
157: * </TD>
158: * </TR>
159: * <TR>
160: * <TD WIDTH="33%">ISO Latin Arabic</TD>
161: * <TD WIDTH="15%">
162: * <P ALIGN="CENTER">ISO-8859-6
163: * </TD>
164: * <TD WIDTH="12%">
165: * <P ALIGN="CENTER">MIME
166: * </TD>
167: * <TD WIDTH="31%">
168: * <P ALIGN="CENTER">ISO-8859-6
169: * </TD>
170: * </TR>
171: * <TR>
172: * <TD WIDTH="33%">ISO Latin Greek</TD>
173: * <TD WIDTH="15%">
174: * <P ALIGN="CENTER">ISO-8859-7
175: * </TD>
176: * <TD WIDTH="12%">
177: * <P ALIGN="CENTER">MIME
178: * </TD>
179: * <TD WIDTH="31%">
180: * <P ALIGN="CENTER">ISO-8859-7
181: * </TD>
182: * </TR>
183: * <TR>
184: * <TD WIDTH="33%">ISO Latin Hebrew</TD>
185: * <TD WIDTH="15%">
186: * <P ALIGN="CENTER">ISO-8859-8
187: * </TD>
188: * <TD WIDTH="12%">
189: * <P ALIGN="CENTER">MIME
190: * </TD>
191: * <TD WIDTH="31%">
192: * <P ALIGN="CENTER">ISO-8859-8
193: * </TD>
194: * </TR>
195: * <TR>
196: * <TD WIDTH="33%">ISO Latin 5</TD>
197: * <TD WIDTH="15%">
198: * <P ALIGN="CENTER">ISO-8859-9
199: * </TD>
200: * <TD WIDTH="12%">
201: * <P ALIGN="CENTER">MIME
202: * </TD>
203: * <TD WIDTH="31%">
204: * <P ALIGN="CENTER">ISO-8859-9
205: * </TD>
206: * </TR>
207: * <TR>
208: * <TD WIDTH="33%">EBCDIC: US</TD>
209: * <TD WIDTH="15%">
210: * <P ALIGN="CENTER">ebcdic-cp-us
211: * </TD>
212: * <TD WIDTH="12%">
213: * <P ALIGN="CENTER">IANA
214: * </TD>
215: * <TD WIDTH="31%">
216: * <P ALIGN="CENTER">cp037
217: * </TD>
218: * </TR>
219: * <TR>
220: * <TD WIDTH="33%">EBCDIC: Canada</TD>
221: * <TD WIDTH="15%">
222: * <P ALIGN="CENTER">ebcdic-cp-ca
223: * </TD>
224: * <TD WIDTH="12%">
225: * <P ALIGN="CENTER">IANA
226: * </TD>
227: * <TD WIDTH="31%">
228: * <P ALIGN="CENTER">cp037
229: * </TD>
230: * </TR>
231: * <TR>
232: * <TD WIDTH="33%">EBCDIC: Netherlands</TD>
233: * <TD WIDTH="15%">
234: * <P ALIGN="CENTER">ebcdic-cp-nl
235: * </TD>
236: * <TD WIDTH="12%">
237: * <P ALIGN="CENTER">IANA
238: * </TD>
239: * <TD WIDTH="31%">
240: * <P ALIGN="CENTER">cp037
241: * </TD>
242: * </TR>
243: * <TR>
244: * <TD WIDTH="33%">EBCDIC: Denmark</TD>
245: * <TD WIDTH="15%">
246: * <P ALIGN="CENTER">ebcdic-cp-dk
247: * </TD>
248: * <TD WIDTH="12%">
249: * <P ALIGN="CENTER">IANA
250: * </TD>
251: * <TD WIDTH="31%">
252: * <P ALIGN="CENTER">cp277
253: * </TD>
254: * </TR>
255: * <TR>
256: * <TD WIDTH="33%">EBCDIC: Norway</TD>
257: * <TD WIDTH="15%">
258: * <P ALIGN="CENTER">ebcdic-cp-no
259: * </TD>
260: * <TD WIDTH="12%">
261: * <P ALIGN="CENTER">IANA
262: * </TD>
263: * <TD WIDTH="31%">
264: * <P ALIGN="CENTER">cp277
265: * </TD>
266: * </TR>
267: * <TR>
268: * <TD WIDTH="33%">EBCDIC: Finland</TD>
269: * <TD WIDTH="15%">
270: * <P ALIGN="CENTER">ebcdic-cp-fi
271: * </TD>
272: * <TD WIDTH="12%">
273: * <P ALIGN="CENTER">IANA
274: * </TD>
275: * <TD WIDTH="31%">
276: * <P ALIGN="CENTER">cp278
277: * </TD>
278: * </TR>
279: * <TR>
280: * <TD WIDTH="33%">EBCDIC: Sweden</TD>
281: * <TD WIDTH="15%">
282: * <P ALIGN="CENTER">ebcdic-cp-se
283: * </TD>
284: * <TD WIDTH="12%">
285: * <P ALIGN="CENTER">IANA
286: * </TD>
287: * <TD WIDTH="31%">
288: * <P ALIGN="CENTER">cp278
289: * </TD>
290: * </TR>
291: * <TR>
292: * <TD WIDTH="33%">EBCDIC: Italy</TD>
293: * <TD WIDTH="15%">
294: * <P ALIGN="CENTER">ebcdic-cp-it
295: * </TD>
296: * <TD WIDTH="12%">
297: * <P ALIGN="CENTER">IANA
298: * </TD>
299: * <TD WIDTH="31%">
300: * <P ALIGN="CENTER">cp280
301: * </TD>
302: * </TR>
303: * <TR>
304: * <TD WIDTH="33%">EBCDIC: Spain, Latin America</TD>
305: * <TD WIDTH="15%">
306: * <P ALIGN="CENTER">ebcdic-cp-es
307: * </TD>
308: * <TD WIDTH="12%">
309: * <P ALIGN="CENTER">IANA
310: * </TD>
311: * <TD WIDTH="31%">
312: * <P ALIGN="CENTER">cp284
313: * </TD>
314: * </TR>
315: * <TR>
316: * <TD WIDTH="33%">EBCDIC: Great Britain</TD>
317: * <TD WIDTH="15%">
318: * <P ALIGN="CENTER">ebcdic-cp-gb
319: * </TD>
320: * <TD WIDTH="12%">
321: * <P ALIGN="CENTER">IANA
322: * </TD>
323: * <TD WIDTH="31%">
324: * <P ALIGN="CENTER">cp285
325: * </TD>
326: * </TR>
327: * <TR>
328: * <TD WIDTH="33%">EBCDIC: France</TD>
329: * <TD WIDTH="15%">
330: * <P ALIGN="CENTER">ebcdic-cp-fr
331: * </TD>
332: * <TD WIDTH="12%">
333: * <P ALIGN="CENTER">IANA
334: * </TD>
335: * <TD WIDTH="31%">
336: * <P ALIGN="CENTER">cp297
337: * </TD>
338: * </TR>
339: * <TR>
340: * <TD WIDTH="33%">EBCDIC: Arabic</TD>
341: * <TD WIDTH="15%">
342: * <P ALIGN="CENTER">ebcdic-cp-ar1
343: * </TD>
344: * <TD WIDTH="12%">
345: * <P ALIGN="CENTER">IANA
346: * </TD>
347: * <TD WIDTH="31%">
348: * <P ALIGN="CENTER">cp420
349: * </TD>
350: * </TR>
351: * <TR>
352: * <TD WIDTH="33%">EBCDIC: Hebrew</TD>
353: * <TD WIDTH="15%">
354: * <P ALIGN="CENTER">ebcdic-cp-he
355: * </TD>
356: * <TD WIDTH="12%">
357: * <P ALIGN="CENTER">IANA
358: * </TD>
359: * <TD WIDTH="31%">
360: * <P ALIGN="CENTER">cp424
361: * </TD>
362: * </TR>
363: * <TR>
364: * <TD WIDTH="33%">EBCDIC: Switzerland</TD>
365: * <TD WIDTH="15%">
366: * <P ALIGN="CENTER">ebcdic-cp-ch
367: * </TD>
368: * <TD WIDTH="12%">
369: * <P ALIGN="CENTER">IANA
370: * </TD>
371: * <TD WIDTH="31%">
372: * <P ALIGN="CENTER">cp500
373: * </TD>
374: * </TR>
375: * <TR>
376: * <TD WIDTH="33%">EBCDIC: Roece</TD>
377: * <TD WIDTH="15%">
378: * <P ALIGN="CENTER">ebcdic-cp-roece
379: * </TD>
380: * <TD WIDTH="12%">
381: * <P ALIGN="CENTER">IANA
382: * </TD>
383: * <TD WIDTH="31%">
384: * <P ALIGN="CENTER">cp870
385: * </TD>
386: * </TR>
387: * <TR>
388: * <TD WIDTH="33%">EBCDIC: Yogoslavia</TD>
389: * <TD WIDTH="15%">
390: * <P ALIGN="CENTER">ebcdic-cp-yu
391: * </TD>
392: * <TD WIDTH="12%">
393: * <P ALIGN="CENTER">IANA
394: * </TD>
395: * <TD WIDTH="31%">
396: * <P ALIGN="CENTER">cp870
397: * </TD>
398: * </TR>
399: * <TR>
400: * <TD WIDTH="33%">EBCDIC: Iceland</TD>
401: * <TD WIDTH="15%">
402: * <P ALIGN="CENTER">ebcdic-cp-is
403: * </TD>
404: * <TD WIDTH="12%">
405: * <P ALIGN="CENTER">IANA
406: * </TD>
407: * <TD WIDTH="31%">
408: * <P ALIGN="CENTER">cp871
409: * </TD>
410: * </TR>
411: * <TR>
412: * <TD WIDTH="33%">EBCDIC: Urdu</TD>
413: * <TD WIDTH="15%">
414: * <P ALIGN="CENTER">ebcdic-cp-ar2
415: * </TD>
416: * <TD WIDTH="12%">
417: * <P ALIGN="CENTER">IANA
418: * </TD>
419: * <TD WIDTH="31%">
420: * <P ALIGN="CENTER">cp918
421: * </TD>
422: * </TR>
423: * <TR>
424: * <TD WIDTH="33%">Chinese for PRC, mixed 1/2 byte</TD>
425: * <TD WIDTH="15%">
426: * <P ALIGN="CENTER">gb2312
427: * </TD>
428: * <TD WIDTH="12%">
429: * <P ALIGN="CENTER">MIME
430: * </TD>
431: * <TD WIDTH="31%">
432: * <P ALIGN="CENTER">GB2312
433: * </TD>
434: * </TR>
435: * <TR>
436: * <TD WIDTH="33%">Extended Unix Code, packed for Japanese</TD>
437: * <TD WIDTH="15%">
438: * <P ALIGN="CENTER">euc-jp
439: * </TD>
440: * <TD WIDTH="12%">
441: * <P ALIGN="CENTER">MIME
442: * </TD>
443: * <TD WIDTH="31%">
444: * <P ALIGN="CENTER">eucjis
445: * </TD>
446: * </TR>
447: * <TR>
448: * <TD WIDTH="33%">Japanese: ISO-2022-jp</TD>
449: * <TD WIDTH="15%">
450: * <P ALIGN="CENTER">ISO-2020-jp
451: * </TD>
452: * <TD WIDTH="12%">
453: * <P ALIGN="CENTER">MIME
454: * </TD>
455: * <TD WIDTH="31%">
456: * <P ALIGN="CENTER">JIS
457: * </TD>
458: * </TR>
459: * <TR>
460: * <TD WIDTH="33%">Japanese: Shift JIS</TD>
461: * <TD WIDTH="15%">
462: * <P ALIGN="CENTER">Shift_JIS
463: * </TD>
464: * <TD WIDTH="12%">
465: * <P ALIGN="CENTER">MIME
466: * </TD>
467: * <TD WIDTH="31%">
468: * <P ALIGN="CENTER">SJIS
469: * </TD>
470: * </TR>
471: * <TR>
472: * <TD WIDTH="33%">Japanese Windows: An extension of Shift JIS</TD>
473: * <TD WIDTH="15%">
474: * <P ALIGN="CENTER">Windows-31J
475: * </TD>
476: * <TD WIDTH="12%">
477: * <P ALIGN="CENTER">MIME
478: * </TD>
479: * <TD WIDTH="31%">
480: * <P ALIGN="CENTER">MS932 (since JDK 1.2)
481: * </TD>
482: * </TR>
483: * <TR>
484: * <TD WIDTH="33%">Chinese: Big5</TD>
485: * <TD WIDTH="15%">
486: * <P ALIGN="CENTER">Big5
487: * </TD>
488: * <TD WIDTH="12%">
489: * <P ALIGN="CENTER">MIME
490: * </TD>
491: * <TD WIDTH="31%">
492: * <P ALIGN="CENTER">Big5
493: * </TD>
494: * </TR>
495: * <TR>
496: * <TD WIDTH="33%">Extended Unix Code, packed for Korean</TD>
497: * <TD WIDTH="15%">
498: * <P ALIGN="CENTER">euc-kr
499: * </TD>
500: * <TD WIDTH="12%">
501: * <P ALIGN="CENTER">MIME
502: * </TD>
503: * <TD WIDTH="31%">
504: * <P ALIGN="CENTER">iso2022kr
505: * </TD>
506: * </TR>
507: * <TR>
508: * <TD WIDTH="33%">Cyrillic</TD>
509: * <TD WIDTH="15%">
510: * <P ALIGN="CENTER">koi8-r
511: * </TD>
512: * <TD WIDTH="12%">
513: * <P ALIGN="CENTER">MIME
514: * </TD>
515: * <TD WIDTH="31%">
516: * <P ALIGN="CENTER">koi8-r
517: * </TD>
518: * </TR>
519: * </TABLE>
520: *
521: * @version $Id: MIME2Java.java,v 1.8 2001/07/12 20:05:36 neilg Exp $
522: * @author TAMURA Kent <kent@trl.ibm.co.jp>
523: */
524: public class MIME2Java {
525:
526: static private Hashtable s_enchash;
527: static private Hashtable s_revhash;
528:
529: static {
530: s_enchash = new Hashtable();
531: // <preferred MIME name (uppercase)>, <Java encoding name>
532: s_enchash.put("UTF-8", "UTF8");
533: s_enchash.put("US-ASCII", "ASCII");
534: s_enchash.put("ISO-IR-6", "ASCII");
535: s_enchash.put("ANSI_X3.4-1986", "ASCII");
536: s_enchash.put("ISO_646.IRV:1991", "ASCII");
537: s_enchash.put("ASCII", "ASCII");
538: s_enchash.put("ISO646-US", "ASCII");
539: s_enchash.put("US", "ASCII");
540: s_enchash.put("IBM367", "ASCII");
541: s_enchash.put("CP367", "ASCII");
542: s_enchash.put("ISO-8859-1", "ISO8859_1");
543: s_enchash.put("ISO-IR-100", "ISO8859_1");
544: s_enchash.put("ISO_8859-1", "ISO8859_1");
545: s_enchash.put("LATIN1", "ISO8859_1");
546: s_enchash.put("L1", "ISO8859_1");
547: s_enchash.put("IBM819", "ISO8859_1");
548: s_enchash.put("CP819", "ISO8859_1");
549: s_enchash.put("ISO-8859-2", "ISO8859_2");
550: s_enchash.put("ISO-IR-101", "ISO8859_2");
551: s_enchash.put("ISO_8859-2", "ISO8859_2");
552: s_enchash.put("LATIN2", "ISO8859_2");
553: s_enchash.put("L2", "ISO8859_2");
554: s_enchash.put("ISO-8859-3", "ISO8859_3");
555: s_enchash.put("ISO-IR-109", "ISO8859_3");
556: s_enchash.put("ISO_8859-3", "ISO8859_3");
557: s_enchash.put("LATIN3", "ISO8859_3");
558: s_enchash.put("L3", "ISO8859_3");
559: s_enchash.put("ISO-8859-4", "ISO8859_4");
560: s_enchash.put("ISO-IR-110", "ISO8859_4");
561: s_enchash.put("ISO_8859-4", "ISO8859_4");
562: s_enchash.put("LATIN4", "ISO8859_4");
563: s_enchash.put("L4", "ISO8859_4");
564: s_enchash.put("ISO-8859-5", "ISO8859_5");
565: s_enchash.put("ISO-IR-144", "ISO8859_5");
566: s_enchash.put("ISO_8859-5", "ISO8859_5");
567: s_enchash.put("CYRILLIC", "ISO8859_5");
568: s_enchash.put("ISO-8859-6", "ISO8859_6");
569: s_enchash.put("ISO-IR-127", "ISO8859_6");
570: s_enchash.put("ISO_8859-6", "ISO8859_6");
571: s_enchash.put("ECMA-114", "ISO8859_6");
572: s_enchash.put("ASMO-708", "ISO8859_6");
573: s_enchash.put("ARABIC", "ISO8859_6");
574: s_enchash.put("ISO-8859-7", "ISO8859_7");
575: s_enchash.put("ISO-IR-126", "ISO8859_7");
576: s_enchash.put("ISO_8859-7", "ISO8859_7");
577: s_enchash.put("ELOT_928", "ISO8859_7");
578: s_enchash.put("ECMA-118", "ISO8859_7");
579: s_enchash.put("GREEK", "ISO8859_7");
580: s_enchash.put("GREEK8", "ISO8859_7");
581: s_enchash.put("ISO-8859-8", "ISO8859_8");
582: s_enchash.put("ISO-IR-138", "ISO8859_8");
583: s_enchash.put("ISO_8859-8", "ISO8859_8");
584: s_enchash.put("HEBREW", "ISO8859_8");
585: s_enchash.put("ISO-8859-9", "ISO8859_9");
586: s_enchash.put("ISO-IR-148", "ISO8859_9");
587: s_enchash.put("ISO_8859-9", "ISO8859_9");
588: s_enchash.put("LATIN5", "ISO8859_9");
589: s_enchash.put("L5", "ISO8859_9");
590: s_enchash.put("ISO-2022-JP", "ISO2022JP");
591: s_enchash.put("SHIFT_JIS", "SJIS");
592: s_enchash.put("MS_Kanji", "SJIS");
593: /**
594: * MS932 is suitable for Windows-31J,
595: * but JDK 1.1.x does not support MS932.
596: */
597: String version = System.getProperty("java.version");
598: if (version.equals("1.1") || version.startsWith("1.1.")) {
599: s_enchash.put("WINDOWS-31J", "SJIS");
600: } else {
601: s_enchash.put("WINDOWS-31J", "MS932");
602: }
603: s_enchash.put("EUC-JP", "EUC_JP");
604: s_enchash.put("GB2312", "GB2312");
605: s_enchash.put("BIG5", "Big5");
606: s_enchash.put("EUC-KR", "EUC_KR");
607: s_enchash.put("ISO-2022-KR", "ISO2022KR");
608: s_enchash.put("KOI8-R", "KOI8_R");
609:
610: s_enchash.put("EBCDIC-CP-US", "CP037");
611: s_enchash.put("EBCDIC-CP-CA", "CP037");
612: s_enchash.put("EBCDIC-CP-NL", "CP037");
613: s_enchash.put("EBCDIC-CP-WT", "CP037");
614: s_enchash.put("EBCDIC-CP-DK", "CP277");
615: s_enchash.put("EBCDIC-CP-NO", "CP277");
616: s_enchash.put("EBCDIC-CP-FI", "CP278");
617: s_enchash.put("EBCDIC-CP-SE", "CP278");
618: s_enchash.put("EBCDIC-CP-IT", "CP280");
619: s_enchash.put("EBCDIC-CP-ES", "CP284");
620: s_enchash.put("EBCDIC-CP-GB", "CP285");
621: s_enchash.put("EBCDIC-CP-FR", "CP297");
622: s_enchash.put("EBCDIC-CP-AR1", "CP420");
623: s_enchash.put("EBCDIC-CP-HE", "CP424");
624: s_enchash.put("EBCDIC-CP-CH", "CP500");
625: s_enchash.put("EBCDIC-CP-BE", "CP500");
626: s_enchash.put("CP-AR", "CP868");
627: s_enchash.put("CP-GR", "CP869");
628: s_enchash.put("EBCDIC-CP-ROECE", "CP870");
629: s_enchash.put("EBCDIC-CP-YU", "CP870");
630: s_enchash.put("EBCDIC-CP-IS", "CP871");
631: s_enchash.put("EBCDIC-CP-AR2", "CP918");
632:
633: // Add support for Cp1252 and its friends
634: s_enchash.put("WINDOWS-1250", "Cp1250");
635: s_enchash.put("WINDOWS-1251", "Cp1251");
636: s_enchash.put("WINDOWS-1252", "Cp1252");
637: s_enchash.put("WINDOWS-1253", "Cp1253");
638: s_enchash.put("WINDOWS-1254", "Cp1254");
639: s_enchash.put("WINDOWS-1255", "Cp1255");
640: s_enchash.put("WINDOWS-1256", "Cp1256");
641: s_enchash.put("WINDOWS-1257", "Cp1257");
642: s_enchash.put("WINDOWS-1258", "Cp1258");
643: s_enchash.put("TIS-620", "TIS620");
644: // j:CNS11643 -> EUC-TW?
645: s_enchash.put("ISO-2022-CN", "ISO2022CN");
646: s_enchash.put("X0201", "JIS0201");
647: s_enchash.put("X0208", "JIS0208");
648: s_enchash.put("X0212", "JIS0212");
649: s_enchash.put("ISO-IR-159", "JIS0212");
650:
651: s_revhash = new Hashtable();
652: // <Java encoding name (uppercase)>, <preferred MIME name>
653: s_revhash.put("UTF8", "UTF-8");
654: s_revhash.put("ASCII", "US-ASCII");
655: s_revhash.put("ASCII", "ISO-IR-6");
656: s_revhash.put("ASCII", "ANSI_X3.4-1986");
657: s_revhash.put("ASCII", "ISO_646.IRV:1991");
658: s_revhash.put("ASCII", "ASCII");
659: s_revhash.put("ASCII", "ISO646-US");
660: s_revhash.put("ASCII", "US");
661: s_revhash.put("ASCII", "IBM367");
662: s_revhash.put("ASCII", "CP367");
663: s_revhash.put("ISO8859_1", "ISO-8859-1");
664: s_revhash.put("ISO8859_1", "ISO-IR-100");
665: s_revhash.put("ISO8859_1", "ISO_8859-1");
666: s_revhash.put("ISO8859_1", "LATIN1");
667: s_revhash.put("ISO8859_1", "L1");
668: s_revhash.put("ISO8859_1", "IBM819");
669: s_revhash.put("ISO8859_1", "CP819");
670: s_revhash.put("ISO8859_2", "ISO-8859-2");
671: s_revhash.put("ISO8859_2", "ISO-IR-101");
672: s_revhash.put("ISO8859_2", "ISO_8859-2");
673: s_revhash.put("ISO8859_2", "LATIN2");
674: s_revhash.put("ISO8859_2", "L2");
675: s_revhash.put("ISO8859_3", "ISO-8859-3");
676: s_revhash.put("ISO8859_3", "ISO-IR-109");
677: s_revhash.put("ISO8859_3", "ISO_8859-3");
678: s_revhash.put("ISO8859_3", "LATIN3");
679: s_revhash.put("ISO8859_3", "L3");
680: s_revhash.put("ISO8859_4", "ISO-8859-4");
681: s_revhash.put("ISO8859_4", "ISO-IR-110");
682: s_revhash.put("ISO8859_4", "ISO_8859-4");
683: s_revhash.put("ISO8859_4", "LATIN4");
684: s_revhash.put("ISO8859_4", "L4");
685: s_revhash.put("ISO8859_5", "ISO-8859-5");
686: s_revhash.put("ISO8859_5", "ISO-IR-144");
687: s_revhash.put("ISO8859_5", "ISO_8859-5");
688: s_revhash.put("ISO8859_5", "CYRILLIC");
689: s_revhash.put("ISO8859_6", "ISO-8859-6");
690: s_revhash.put("ISO8859_6", "ISO-IR-127");
691: s_revhash.put("ISO8859_6", "ISO_8859-6");
692: s_revhash.put("ISO8859_6", "ECMA-114");
693: s_revhash.put("ISO8859_6", "ASMO-708");
694: s_revhash.put("ISO8859_6", "ARABIC");
695: s_revhash.put("ISO8859_7", "ISO-8859-7");
696: s_revhash.put("ISO8859_7", "ISO-IR-126");
697: s_revhash.put("ISO8859_7", "ISO_8859-7");
698: s_revhash.put("ISO8859_7", "ELOT_928");
699: s_revhash.put("ISO8859_7", "ECMA-118");
700: s_revhash.put("ISO8859_7", "GREEK");
701: s_revhash.put("ISO8859_7", "GREEK8");
702: s_revhash.put("ISO8859_8", "ISO-8859-8");
703: s_revhash.put("ISO8859_8", "ISO-IR-138");
704: s_revhash.put("ISO8859_8", "ISO_8859-8");
705: s_revhash.put("ISO8859_8", "HEBREW");
706: s_revhash.put("ISO8859_9", "ISO-8859-9");
707: s_revhash.put("ISO8859_9", "ISO-IR-148");
708: s_revhash.put("ISO8859_9", "ISO_8859-9");
709: s_revhash.put("ISO8859_9", "LATIN5");
710: s_revhash.put("ISO8859_9", "L5");
711: s_revhash.put("ISO2022JP", "ISO-2022-JP");
712: s_revhash.put("SJIS", "Shift_JIS");
713: s_revhash.put("SJIS", "MS_Kanji");
714: s_revhash.put("MS932", "WINDOWS-31J");
715: s_revhash.put("EUC_JP", "EUC-JP");
716: s_revhash.put("GB2312", "GB2312");
717: s_revhash.put("BIG5", "Big5");
718: s_revhash.put("EUC_KR", "EUC-KR");
719: s_revhash.put("ISO2022KR", "ISO-2022-KR");
720: s_revhash.put("KOI8_R", "KOI8-R");
721:
722: s_revhash.put("CP037", "EBCDIC-CP-US");
723: s_revhash.put("CP037", "EBCDIC-CP-CA");
724: s_revhash.put("CP037", "EBCDIC-CP-NL");
725: s_revhash.put("CP037", "EBCDIC-CP-WT");
726: s_revhash.put("CP277", "EBCDIC-CP-DK");
727: s_revhash.put("CP277", "EBCDIC-CP-NO");
728: s_revhash.put("CP278", "EBCDIC-CP-FI");
729: s_revhash.put("CP278", "EBCDIC-CP-SE");
730: s_revhash.put("CP280", "EBCDIC-CP-IT");
731: s_revhash.put("CP284", "EBCDIC-CP-ES");
732: s_revhash.put("CP285", "EBCDIC-CP-GB");
733: s_revhash.put("CP297", "EBCDIC-CP-FR");
734: s_revhash.put("CP420", "EBCDIC-CP-AR1");
735: s_revhash.put("CP424", "EBCDIC-CP-HE");
736: s_revhash.put("CP500", "EBCDIC-CP-CH");
737: s_revhash.put("CP500", "EBCDIC-CP-BE");
738: s_revhash.put("CP868", "CP-AR");
739: s_revhash.put("CP869", "CP-GR");
740: s_revhash.put("CP870", "EBCDIC-CP-ROECE");
741: s_revhash.put("CP870", "EBCDIC-CP-YU");
742: s_revhash.put("CP871", "EBCDIC-CP-IS");
743: s_revhash.put("CP918", "EBCDIC-CP-AR2");
744:
745: // Add support for Cp1252 and friends
746: // Since this code page should be written out in mixed case,
747: // there is no need to reverse the function.
748: s_revhash.put("CP1250", "WINDOWS-1250");
749: s_revhash.put("CP1251", "WINDOWS-1251");
750: s_revhash.put("CP1252", "WINDOWS-1252");
751: s_revhash.put("CP1253", "WINDOWS-1253");
752: s_revhash.put("CP1254", "WINDOWS-1254");
753: s_revhash.put("CP1255", "WINDOWS-1255");
754: s_revhash.put("CP1256", "WINDOWS-1256");
755: s_revhash.put("CP1257", "WINDOWS-1257");
756: s_revhash.put("CP1258", "WINDOWS-1258");
757: s_revhash.put("TIS620", "TIS-620");
758: s_revhash.put("ISO2022CN", "ISO-2022-CN");
759: s_revhash.put("JIS0201", "X0201");
760: s_revhash.put("JIS0208", "X0208");
761: s_revhash.put("JIS0212", "X0212");
762: s_revhash.put("JIS0212", "ISO-IR-159");
763: }
764:
765: private MIME2Java() {
766: }
767:
768: /**
769: * Convert a MIME charset name, also known as an XML encoding name, to a Java encoding name.
770: * @param mimeCharsetName Case insensitive MIME charset name: <code>UTF-8, US-ASCII, ISO-8859-1,
771: * ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6,
772: * ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-2022-JP, Shift_JIS, Windows-31J
773: * EUC-JP, GB2312, Big5, EUC-KR, ISO-2022-KR, KOI8-R,
774: * EBCDIC-CP-US, EBCDIC-CP-CA, EBCDIC-CP-NL, EBCDIC-CP-DK,
775: * EBCDIC-CP-NO, EBCDIC-CP-FI, EBCDIC-CP-SE, EBCDIC-CP-IT,
776: * EBCDIC-CP-ES, EBCDIC-CP-GB, EBCDIC-CP-FR, EBCDIC-CP-AR1,
777: * EBCDIC-CP-HE, EBCDIC-CP-CH, EBCDIC-CP-ROECE, EBCDIC-CP-YU,
778: * EBCDIC-CP-IS and EBCDIC-CP-AR2</code>.
779: * @return Java encoding name, or <var>null</var> if <var>mimeCharsetName</var>
780: * is unknown.
781: * @see #reverse
782: */
783: public static String convert(String mimeCharsetName) {
784: return (String) s_enchash.get(mimeCharsetName.toUpperCase());
785: }
786:
787: /**
788: * Convert a Java encoding name to MIME charset name.
789: * Available values of <i>encoding</i> are "UTF8", "8859_1", "8859_2", "8859_3", "8859_4",
790: * "8859_5", "8859_6", "8859_7", "8859_8", "8859_9", "JIS", "SJIS", "MS932", "EUCJIS",
791: * "GB2312", "BIG5", "KSC5601", "ISO2022KR", "KOI8_R", "CP037", "CP277", "CP278",
792: * "CP280", "CP284", "CP285", "CP297", "CP420", "CP424", "CP500", "CP870", "CP871" and "CP918".
793: * @param encoding Case insensitive Java encoding name: <code>UTF8, 8859_1, 8859_2, 8859_3,
794: * 8859_4, 8859_5, 8859_6, 8859_7, 8859_8, 8859_9, JIS, SJIS, MS932, EUCJIS,
795: * GB2312, BIG5, KSC5601, ISO2022KR, KOI8_R, CP037, CP277, CP278,
796: * CP280, CP284, CP285, CP297, CP420, CP424, CP500, CP870, CP871
797: * and CP918</code>.
798: * @return MIME charset name, or <var>null</var> if <var>encoding</var> is unknown.
799: * @see #convert
800: */
801: public static String reverse(String encoding) {
802: return (String) s_revhash.get(encoding.toUpperCase());
803: }
804: }
|