001: /*
002: * The Apache Software License, Version 1.1
003: *
004: *
005: * Copyright (c) 1999 The Apache Software Foundation. All rights
006: * reserved.
007: *
008: * Redistribution and use in source and binary forms, with or without
009: * modification, are permitted provided that the following conditions
010: * are met:
011: *
012: * 1. Redistributions of source code must retain the above copyright
013: * notice, this list of conditions and the following disclaimer.
014: *
015: * 2. Redistributions in binary conform must reproduce the above copyright
016: * notice, this list of conditions and the following disclaimer in
017: * the documentation and/or other materials provided with the
018: * distribution.
019: *
020: * 3. The end-user documentation included with the redistribution,
021: * if any, must include the following acknowledgment:
022: * "This product includes software developed by the
023: * Apache Software Foundation (http://www.apache.org/)."
024: * Alternately, this acknowledgment may appear in the software itself,
025: * if and wherever such third-party acknowledgments normally appear.
026: *
027: * 4. The names "Xerces" and "Apache Software Foundation" must
028: * not be used to endorse or promote products derived from this
029: * software without prior written permission. For written
030: * permission, please contact apache@apache.org.
031: *
032: * 5. Products derived from this software may not be called "Apache",
033: * nor may "Apache" appear in their name, without prior written
034: * permission of the Apache Software Foundation.
035: *
036: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
037: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
038: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
039: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
040: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
041: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
042: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
043: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
044: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
045: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
046: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
047: * SUCH DAMAGE.
048: * ====================================================================
049: *
050: * This software consists of voluntary contributions made by many
051: * individuals on behalf of the Apache Software Foundation and was
052: * originally based on software copyright (c) 1999, International
053: * Business Machines, Inc., http://www.apache.org. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: package org.conform.util;
059:
060: import java.util.*;
061:
062: /**
063: * MIME2Java is a convenience class which handles conversions between MIME charset names
064: * and Java encoding names.
065: * <p>The supported XML encodings are the intersection of XML-supported code sets and those
066: * supported in JDK 1.1.
067: * <p>MIME charset names are used on <var>xmlEncoding</var> parameters to methods such
068: * as <code>TXDocument#setEncoding</code> and <code>DTD#setEncoding</code>.
069: * <p>Java encoding names are used on <var>encoding</var> parameters to
070: * methods such as <code>TXDocument#printWithFormat</code> and <code>DTD#printExternal</code>.
071: * <P>
072: * <TABLE BORDER="0" WIDTH="100%">
073: * <TR>
074: * <TD WIDTH="33%">
075: * <P ALIGN="CENTER"><B>Common Name</B>
076: * </TD>
077: * <TD WIDTH="15%">
078: * <P ALIGN="CENTER"><B>Use this name in XML files</B>
079: * </TD>
080: * <TD WIDTH="12%">
081: * <P ALIGN="CENTER"><B>Name Type</B>
082: * </TD>
083: * <TD WIDTH="31%">
084: * <P ALIGN="CENTER"><B>Xerces converts to this Java Encoder Name</B>
085: * </TD>
086: * </TR>
087: * <TR>
088: * <TD WIDTH="33%">8 bit Unicode</TD>
089: * <TD WIDTH="15%">
090: * <P ALIGN="CENTER">UTF-8
091: * </TD>
092: * <TD WIDTH="12%">
093: * <P ALIGN="CENTER">IANA
094: * </TD>
095: * <TD WIDTH="31%">
096: * <P ALIGN="CENTER">UTF8
097: * </TD>
098: * </TR>
099: * <TR>
100: * <TD WIDTH="33%">ISO Latin 1</TD>
101: * <TD WIDTH="15%">
102: * <P ALIGN="CENTER">ISO-8859-1
103: * </TD>
104: * <TD WIDTH="12%">
105: * <P ALIGN="CENTER">MIME
106: * </TD>
107: * <TD WIDTH="31%">
108: * <P ALIGN="CENTER">ISO-8859-1
109: * </TD>
110: * </TR>
111: * <TR>
112: * <TD WIDTH="33%">ISO Latin 2</TD>
113: * <TD WIDTH="15%">
114: * <P ALIGN="CENTER">ISO-8859-2
115: * </TD>
116: * <TD WIDTH="12%">
117: * <P ALIGN="CENTER">MIME
118: * </TD>
119: * <TD WIDTH="31%">
120: * <P ALIGN="CENTER">ISO-8859-2
121: * </TD>
122: * </TR>
123: * <TR>
124: * <TD WIDTH="33%">ISO Latin 3</TD>
125: * <TD WIDTH="15%">
126: * <P ALIGN="CENTER">ISO-8859-3
127: * </TD>
128: * <TD WIDTH="12%">
129: * <P ALIGN="CENTER">MIME
130: * </TD>
131: * <TD WIDTH="31%">
132: * <P ALIGN="CENTER">ISO-8859-3
133: * </TD>
134: * </TR>
135: * <TR>
136: * <TD WIDTH="33%">ISO Latin 4</TD>
137: * <TD WIDTH="15%">
138: * <P ALIGN="CENTER">ISO-8859-4
139: * </TD>
140: * <TD WIDTH="12%">
141: * <P ALIGN="CENTER">MIME
142: * </TD>
143: * <TD WIDTH="31%">
144: * <P ALIGN="CENTER">ISO-8859-4
145: * </TD>
146: * </TR>
147: * <TR>
148: * <TD WIDTH="33%">ISO Latin Cyrillic</TD>
149: * <TD WIDTH="15%">
150: * <P ALIGN="CENTER">ISO-8859-5
151: * </TD>
152: * <TD WIDTH="12%">
153: * <P ALIGN="CENTER">MIME
154: * </TD>
155: * <TD WIDTH="31%">
156: * <P ALIGN="CENTER">ISO-8859-5
157: * </TD>
158: * </TR>
159: * <TR>
160: * <TD WIDTH="33%">ISO Latin Arabic</TD>
161: * <TD WIDTH="15%">
162: * <P ALIGN="CENTER">ISO-8859-6
163: * </TD>
164: * <TD WIDTH="12%">
165: * <P ALIGN="CENTER">MIME
166: * </TD>
167: * <TD WIDTH="31%">
168: * <P ALIGN="CENTER">ISO-8859-6
169: * </TD>
170: * </TR>
171: * <TR>
172: * <TD WIDTH="33%">ISO Latin Greek</TD>
173: * <TD WIDTH="15%">
174: * <P ALIGN="CENTER">ISO-8859-7
175: * </TD>
176: * <TD WIDTH="12%">
177: * <P ALIGN="CENTER">MIME
178: * </TD>
179: * <TD WIDTH="31%">
180: * <P ALIGN="CENTER">ISO-8859-7
181: * </TD>
182: * </TR>
183: * <TR>
184: * <TD WIDTH="33%">ISO Latin Hebrew</TD>
185: * <TD WIDTH="15%">
186: * <P ALIGN="CENTER">ISO-8859-8
187: * </TD>
188: * <TD WIDTH="12%">
189: * <P ALIGN="CENTER">MIME
190: * </TD>
191: * <TD WIDTH="31%">
192: * <P ALIGN="CENTER">ISO-8859-8
193: * </TD>
194: * </TR>
195: * <TR>
196: * <TD WIDTH="33%">ISO Latin 5</TD>
197: * <TD WIDTH="15%">
198: * <P ALIGN="CENTER">ISO-8859-9
199: * </TD>
200: * <TD WIDTH="12%">
201: * <P ALIGN="CENTER">MIME
202: * </TD>
203: * <TD WIDTH="31%">
204: * <P ALIGN="CENTER">ISO-8859-9
205: * </TD>
206: * </TR>
207: * <TR>
208: * <TD WIDTH="33%">EBCDIC: US</TD>
209: * <TD WIDTH="15%">
210: * <P ALIGN="CENTER">ebcdic-cp-us
211: * </TD>
212: * <TD WIDTH="12%">
213: * <P ALIGN="CENTER">IANA
214: * </TD>
215: * <TD WIDTH="31%">
216: * <P ALIGN="CENTER">cp037
217: * </TD>
218: * </TR>
219: * <TR>
220: * <TD WIDTH="33%">EBCDIC: Canada</TD>
221: * <TD WIDTH="15%">
222: * <P ALIGN="CENTER">ebcdic-cp-ca
223: * </TD>
224: * <TD WIDTH="12%">
225: * <P ALIGN="CENTER">IANA
226: * </TD>
227: * <TD WIDTH="31%">
228: * <P ALIGN="CENTER">cp037
229: * </TD>
230: * </TR>
231: * <TR>
232: * <TD WIDTH="33%">EBCDIC: Netherlands</TD>
233: * <TD WIDTH="15%">
234: * <P ALIGN="CENTER">ebcdic-cp-nl
235: * </TD>
236: * <TD WIDTH="12%">
237: * <P ALIGN="CENTER">IANA
238: * </TD>
239: * <TD WIDTH="31%">
240: * <P ALIGN="CENTER">cp037
241: * </TD>
242: * </TR>
243: * <TR>
244: * <TD WIDTH="33%">EBCDIC: Denmark</TD>
245: * <TD WIDTH="15%">
246: * <P ALIGN="CENTER">ebcdic-cp-dk
247: * </TD>
248: * <TD WIDTH="12%">
249: * <P ALIGN="CENTER">IANA
250: * </TD>
251: * <TD WIDTH="31%">
252: * <P ALIGN="CENTER">cp277
253: * </TD>
254: * </TR>
255: * <TR>
256: * <TD WIDTH="33%">EBCDIC: Norway</TD>
257: * <TD WIDTH="15%">
258: * <P ALIGN="CENTER">ebcdic-cp-no
259: * </TD>
260: * <TD WIDTH="12%">
261: * <P ALIGN="CENTER">IANA
262: * </TD>
263: * <TD WIDTH="31%">
264: * <P ALIGN="CENTER">cp277
265: * </TD>
266: * </TR>
267: * <TR>
268: * <TD WIDTH="33%">EBCDIC: Finland</TD>
269: * <TD WIDTH="15%">
270: * <P ALIGN="CENTER">ebcdic-cp-fi
271: * </TD>
272: * <TD WIDTH="12%">
273: * <P ALIGN="CENTER">IANA
274: * </TD>
275: * <TD WIDTH="31%">
276: * <P ALIGN="CENTER">cp278
277: * </TD>
278: * </TR>
279: * <TR>
280: * <TD WIDTH="33%">EBCDIC: Sweden</TD>
281: * <TD WIDTH="15%">
282: * <P ALIGN="CENTER">ebcdic-cp-se
283: * </TD>
284: * <TD WIDTH="12%">
285: * <P ALIGN="CENTER">IANA
286: * </TD>
287: * <TD WIDTH="31%">
288: * <P ALIGN="CENTER">cp278
289: * </TD>
290: * </TR>
291: * <TR>
292: * <TD WIDTH="33%">EBCDIC: Italy</TD>
293: * <TD WIDTH="15%">
294: * <P ALIGN="CENTER">ebcdic-cp-it
295: * </TD>
296: * <TD WIDTH="12%">
297: * <P ALIGN="CENTER">IANA
298: * </TD>
299: * <TD WIDTH="31%">
300: * <P ALIGN="CENTER">cp280
301: * </TD>
302: * </TR>
303: * <TR>
304: * <TD WIDTH="33%">EBCDIC: Spain, Latin America</TD>
305: * <TD WIDTH="15%">
306: * <P ALIGN="CENTER">ebcdic-cp-es
307: * </TD>
308: * <TD WIDTH="12%">
309: * <P ALIGN="CENTER">IANA
310: * </TD>
311: * <TD WIDTH="31%">
312: * <P ALIGN="CENTER">cp284
313: * </TD>
314: * </TR>
315: * <TR>
316: * <TD WIDTH="33%">EBCDIC: Great Britain</TD>
317: * <TD WIDTH="15%">
318: * <P ALIGN="CENTER">ebcdic-cp-gb
319: * </TD>
320: * <TD WIDTH="12%">
321: * <P ALIGN="CENTER">IANA
322: * </TD>
323: * <TD WIDTH="31%">
324: * <P ALIGN="CENTER">cp285
325: * </TD>
326: * </TR>
327: * <TR>
328: * <TD WIDTH="33%">EBCDIC: France</TD>
329: * <TD WIDTH="15%">
330: * <P ALIGN="CENTER">ebcdic-cp-fr
331: * </TD>
332: * <TD WIDTH="12%">
333: * <P ALIGN="CENTER">IANA
334: * </TD>
335: * <TD WIDTH="31%">
336: * <P ALIGN="CENTER">cp297
337: * </TD>
338: * </TR>
339: * <TR>
340: * <TD WIDTH="33%">EBCDIC: Arabic</TD>
341: * <TD WIDTH="15%">
342: * <P ALIGN="CENTER">ebcdic-cp-ar1
343: * </TD>
344: * <TD WIDTH="12%">
345: * <P ALIGN="CENTER">IANA
346: * </TD>
347: * <TD WIDTH="31%">
348: * <P ALIGN="CENTER">cp420
349: * </TD>
350: * </TR>
351: * <TR>
352: * <TD WIDTH="33%">EBCDIC: Hebrew</TD>
353: * <TD WIDTH="15%">
354: * <P ALIGN="CENTER">ebcdic-cp-he
355: * </TD>
356: * <TD WIDTH="12%">
357: * <P ALIGN="CENTER">IANA
358: * </TD>
359: * <TD WIDTH="31%">
360: * <P ALIGN="CENTER">cp424
361: * </TD>
362: * </TR>
363: * <TR>
364: * <TD WIDTH="33%">EBCDIC: Switzerland</TD>
365: * <TD WIDTH="15%">
366: * <P ALIGN="CENTER">ebcdic-cp-ch
367: * </TD>
368: * <TD WIDTH="12%">
369: * <P ALIGN="CENTER">IANA
370: * </TD>
371: * <TD WIDTH="31%">
372: * <P ALIGN="CENTER">cp500
373: * </TD>
374: * </TR>
375: * <TR>
376: * <TD WIDTH="33%">EBCDIC: Roece</TD>
377: * <TD WIDTH="15%">
378: * <P ALIGN="CENTER">ebcdic-cp-roece
379: * </TD>
380: * <TD WIDTH="12%">
381: * <P ALIGN="CENTER">IANA
382: * </TD>
383: * <TD WIDTH="31%">
384: * <P ALIGN="CENTER">cp870
385: * </TD>
386: * </TR>
387: * <TR>
388: * <TD WIDTH="33%">EBCDIC: Yogoslavia</TD>
389: * <TD WIDTH="15%">
390: * <P ALIGN="CENTER">ebcdic-cp-yu
391: * </TD>
392: * <TD WIDTH="12%">
393: * <P ALIGN="CENTER">IANA
394: * </TD>
395: * <TD WIDTH="31%">
396: * <P ALIGN="CENTER">cp870
397: * </TD>
398: * </TR>
399: * <TR>
400: * <TD WIDTH="33%">EBCDIC: Iceland</TD>
401: * <TD WIDTH="15%">
402: * <P ALIGN="CENTER">ebcdic-cp-is
403: * </TD>
404: * <TD WIDTH="12%">
405: * <P ALIGN="CENTER">IANA
406: * </TD>
407: * <TD WIDTH="31%">
408: * <P ALIGN="CENTER">cp871
409: * </TD>
410: * </TR>
411: * <TR>
412: * <TD WIDTH="33%">EBCDIC: Urdu</TD>
413: * <TD WIDTH="15%">
414: * <P ALIGN="CENTER">ebcdic-cp-ar2
415: * </TD>
416: * <TD WIDTH="12%">
417: * <P ALIGN="CENTER">IANA
418: * </TD>
419: * <TD WIDTH="31%">
420: * <P ALIGN="CENTER">cp918
421: * </TD>
422: * </TR>
423: * <TR>
424: * <TD WIDTH="33%">Chinese for PRC, mixed 1/2 byte</TD>
425: * <TD WIDTH="15%">
426: * <P ALIGN="CENTER">gb2312
427: * </TD>
428: * <TD WIDTH="12%">
429: * <P ALIGN="CENTER">MIME
430: * </TD>
431: * <TD WIDTH="31%">
432: * <P ALIGN="CENTER">GB2312
433: * </TD>
434: * </TR>
435: * <TR>
436: * <TD WIDTH="33%">Extended Unix Code, packed for Japanese</TD>
437: * <TD WIDTH="15%">
438: * <P ALIGN="CENTER">euc-jp
439: * </TD>
440: * <TD WIDTH="12%">
441: * <P ALIGN="CENTER">MIME
442: * </TD>
443: * <TD WIDTH="31%">
444: * <P ALIGN="CENTER">eucjis
445: * </TD>
446: * </TR>
447: * <TR>
448: * <TD WIDTH="33%">Japanese: iso-2022-jp</TD>
449: * <TD WIDTH="15%">
450: * <P ALIGN="CENTER">iso-2020-jp
451: * </TD>
452: * <TD WIDTH="12%">
453: * <P ALIGN="CENTER">MIME
454: * </TD>
455: * <TD WIDTH="31%">
456: * <P ALIGN="CENTER">JIS
457: * </TD>
458: * </TR>
459: * <TR>
460: * <TD WIDTH="33%">Japanese: Shift JIS</TD>
461: * <TD WIDTH="15%">
462: * <P ALIGN="CENTER">Shift_JIS
463: * </TD>
464: * <TD WIDTH="12%">
465: * <P ALIGN="CENTER">MIME
466: * </TD>
467: * <TD WIDTH="31%">
468: * <P ALIGN="CENTER">SJIS
469: * </TD>
470: * </TR>
471: * <TR>
472: * <TD WIDTH="33%">Japanese Windows: An extension of Shift JIS</TD>
473: * <TD WIDTH="15%">
474: * <P ALIGN="CENTER">Windows-31J
475: * </TD>
476: * <TD WIDTH="12%">
477: * <P ALIGN="CENTER">MIME
478: * </TD>
479: * <TD WIDTH="31%">
480: * <P ALIGN="CENTER">MS932 (since JDK 1.2)
481: * </TD>
482: * </TR>
483: * <TR>
484: * <TD WIDTH="33%">Chinese: Big5</TD>
485: * <TD WIDTH="15%">
486: * <P ALIGN="CENTER">Big5
487: * </TD>
488: * <TD WIDTH="12%">
489: * <P ALIGN="CENTER">MIME
490: * </TD>
491: * <TD WIDTH="31%">
492: * <P ALIGN="CENTER">Big5
493: * </TD>
494: * </TR>
495: * <TR>
496: * <TD WIDTH="33%">Extended Unix Code, packed for Korean</TD>
497: * <TD WIDTH="15%">
498: * <P ALIGN="CENTER">euc-kr
499: * </TD>
500: * <TD WIDTH="12%">
501: * <P ALIGN="CENTER">MIME
502: * </TD>
503: * <TD WIDTH="31%">
504: * <P ALIGN="CENTER">iso2022kr
505: * </TD>
506: * </TR>
507: * <TR>
508: * <TD WIDTH="33%">Cyrillic</TD>
509: * <TD WIDTH="15%">
510: * <P ALIGN="CENTER">koi8-r
511: * </TD>
512: * <TD WIDTH="12%">
513: * <P ALIGN="CENTER">MIME
514: * </TD>
515: * <TD WIDTH="31%">
516: * <P ALIGN="CENTER">koi8-r
517: * </TD>
518: * </TR>
519: * </TABLE>
520: *
521: * @version $Id: MIME2Java.java 435 2005-04-29 13:36:50Z hengels $
522: * @version ORIGINAL - MIME2Java.java,v 1.2 2000/12/14 19:21:46 lehors
523: * @author TAMURA Kent <kent@trl.ibm.co.jp>
524: */
525: public class MIME2Java {
526:
527: static private Hashtable s_enchash;
528: static private Hashtable s_revhash;
529:
530: static {
531: s_enchash = new Hashtable();
532: // <preferred MIME name (uppercase)>, <Java encoding name>
533: s_enchash.put("UTF-8", "UTF8");
534: s_enchash.put("US-ASCII", "ASCII");
535: s_enchash.put("ISO-8859-1", "8859_1");
536: s_enchash.put("ISO-8859-2", "8859_2");
537: s_enchash.put("ISO-8859-3", "8859_3");
538: s_enchash.put("ISO-8859-4", "8859_4");
539: s_enchash.put("ISO-8859-5", "8859_5");
540: s_enchash.put("ISO-8859-6", "8859_6");
541: s_enchash.put("ISO-8859-7", "8859_7");
542: s_enchash.put("ISO-8859-8", "8859_8");
543: s_enchash.put("ISO-8859-9", "8859_9");
544: s_enchash.put("ISO-2022-JP", "JIS");
545: s_enchash.put("SHIFT_JIS", "SJIS");
546: /**
547: * MS932 is suitable for Windows-31J,
548: * but JDK 1.1.x does not support MS932.
549: */
550: String version = System.getProperty("java.version");
551: if (version.equals("1.1") || version.startsWith("1.1.")) {
552: s_enchash.put("WINDOWS-31J", "SJIS");
553: } else {
554: s_enchash.put("WINDOWS-31J", "MS932");
555: }
556: s_enchash.put("EUC-JP", "EUCJIS");
557: s_enchash.put("GB2312", "GB2312");
558: s_enchash.put("BIG5", "Big5");
559: s_enchash.put("EUC-KR", "KSC5601");
560: s_enchash.put("ISO-2022-KR", "ISO2022KR");
561: s_enchash.put("KOI8-R", "KOI8_R");
562:
563: s_enchash.put("EBCDIC-CP-US", "CP037");
564: s_enchash.put("EBCDIC-CP-CA", "CP037");
565: s_enchash.put("EBCDIC-CP-NL", "CP037");
566: s_enchash.put("EBCDIC-CP-DK", "CP277");
567: s_enchash.put("EBCDIC-CP-NO", "CP277");
568: s_enchash.put("EBCDIC-CP-FI", "CP278");
569: s_enchash.put("EBCDIC-CP-SE", "CP278");
570: s_enchash.put("EBCDIC-CP-IT", "CP280");
571: s_enchash.put("EBCDIC-CP-ES", "CP284");
572: s_enchash.put("EBCDIC-CP-GB", "CP285");
573: s_enchash.put("EBCDIC-CP-FR", "CP297");
574: s_enchash.put("EBCDIC-CP-AR1", "CP420");
575: s_enchash.put("EBCDIC-CP-HE", "CP424");
576: s_enchash.put("EBCDIC-CP-CH", "CP500");
577: s_enchash.put("EBCDIC-CP-ROECE", "CP870");
578: s_enchash.put("EBCDIC-CP-YU", "CP870");
579: s_enchash.put("EBCDIC-CP-IS", "CP871");
580: s_enchash.put("EBCDIC-CP-AR2", "CP918");
581:
582: // j:CNS11643 -> EUC-TW?
583: // ISO-2022-CN? ISO-2022-CN-EXT?
584:
585: s_revhash = new Hashtable();
586: // <Java encoding name (uppercase)>, <preferred MIME name>
587: s_revhash.put("UTF8", "UTF-8");
588: s_revhash.put("ASCII", "US-ASCII");
589: s_revhash.put("8859_1", "ISO-8859-1");
590: s_revhash.put("8859_2", "ISO-8859-2");
591: s_revhash.put("8859_3", "ISO-8859-3");
592: s_revhash.put("8859_4", "ISO-8859-4");
593: s_revhash.put("8859_5", "ISO-8859-5");
594: s_revhash.put("8859_6", "ISO-8859-6");
595: s_revhash.put("8859_7", "ISO-8859-7");
596: s_revhash.put("8859_8", "ISO-8859-8");
597: s_revhash.put("8859_9", "ISO-8859-9");
598: s_revhash.put("JIS", "ISO-2022-JP");
599: s_revhash.put("SJIS", "Shift_JIS");
600: s_revhash.put("MS932", "WINDOWS-31J");
601: s_revhash.put("EUCJIS", "EUC-JP");
602: s_revhash.put("GB2312", "GB2312");
603: s_revhash.put("BIG5", "Big5");
604: s_revhash.put("KSC5601", "EUC-KR");
605: s_revhash.put("ISO2022KR", "ISO-2022-KR");
606: s_revhash.put("KOI8_R", "KOI8-R");
607:
608: s_revhash.put("CP037", "EBCDIC-CP-US");
609: s_revhash.put("CP037", "EBCDIC-CP-CA");
610: s_revhash.put("CP037", "EBCDIC-CP-NL");
611: s_revhash.put("CP277", "EBCDIC-CP-DK");
612: s_revhash.put("CP277", "EBCDIC-CP-NO");
613: s_revhash.put("CP278", "EBCDIC-CP-FI");
614: s_revhash.put("CP278", "EBCDIC-CP-SE");
615: s_revhash.put("CP280", "EBCDIC-CP-IT");
616: s_revhash.put("CP284", "EBCDIC-CP-ES");
617: s_revhash.put("CP285", "EBCDIC-CP-GB");
618: s_revhash.put("CP297", "EBCDIC-CP-FR");
619: s_revhash.put("CP420", "EBCDIC-CP-AR1");
620: s_revhash.put("CP424", "EBCDIC-CP-HE");
621: s_revhash.put("CP500", "EBCDIC-CP-CH");
622: s_revhash.put("CP870", "EBCDIC-CP-ROECE");
623: s_revhash.put("CP870", "EBCDIC-CP-YU");
624: s_revhash.put("CP871", "EBCDIC-CP-IS");
625: s_revhash.put("CP918", "EBCDIC-CP-AR2");
626: }
627:
628: private MIME2Java() {
629: }
630:
631: /**
632: * Convert a MIME charset name, also known as an XML encoding name, to a Java encoding name.
633: * @param mimeCharsetName Case insensitive MIME charset name: <code>UTF-8, US-ASCII, ISO-8859-1,
634: * ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6,
635: * ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-2022-JP, Shift_JIS, Windows-31J
636: * EUC-JP, GB2312, Big5, EUC-KR, ISO-2022-KR, KOI8-R,
637: * EBCDIC-CP-US, EBCDIC-CP-CA, EBCDIC-CP-NL, EBCDIC-CP-DK,
638: * EBCDIC-CP-NO, EBCDIC-CP-FI, EBCDIC-CP-SE, EBCDIC-CP-IT,
639: * EBCDIC-CP-ES, EBCDIC-CP-GB, EBCDIC-CP-FR, EBCDIC-CP-AR1,
640: * EBCDIC-CP-HE, EBCDIC-CP-CH, EBCDIC-CP-ROECE, EBCDIC-CP-YU,
641: * EBCDIC-CP-IS and EBCDIC-CP-AR2</code>.
642: * @return Java encoding name, or <var>null</var> if <var>mimeCharsetName</var>
643: * is unknown.
644: * @see #reverse
645: */
646: public static String convert(String mimeCharsetName) {
647: return (String) s_enchash.get(mimeCharsetName.toUpperCase());
648: }
649:
650: /**
651: * Convert a Java encoding name to MIME charset name.
652: * Available values of <i>encoding</i> are "UTF8", "8859_1", "8859_2", "8859_3", "8859_4",
653: * "8859_5", "8859_6", "8859_7", "8859_8", "8859_9", "JIS", "SJIS", "MS932", "EUCJIS",
654: * "GB2312", "BIG5", "KSC5601", "ISO2022KR", "KOI8_R", "CP037", "CP277", "CP278",
655: * "CP280", "CP284", "CP285", "CP297", "CP420", "CP424", "CP500", "CP870", "CP871" and "CP918".
656: * @param encoding Case insensitive Java encoding name: <code>UTF8, 8859_1, 8859_2, 8859_3,
657: * 8859_4, 8859_5, 8859_6, 8859_7, 8859_8, 8859_9, JIS, SJIS, MS932, EUCJIS,
658: * GB2312, BIG5, KSC5601, ISO2022KR, KOI8_R, CP037, CP277, CP278,
659: * CP280, CP284, CP285, CP297, CP420, CP424, CP500, CP870, CP871
660: * and CP918</code>.
661: * @return MIME charset name, or <var>null</var> if <var>encoding</var> is unknown.
662: * @see #convert
663: */
664: public static String reverse(String encoding) {
665: return (String) s_revhash.get(encoding.toUpperCase());
666: }
667: }
|