001: package com.bostechcorp.cbesb.common.util;
002:
003: import java.io.BufferedReader;
004: import java.io.ByteArrayInputStream;
005: import java.io.File;
006: import java.io.FileInputStream;
007: import java.io.FileNotFoundException;
008: import java.io.InputStream;
009: import java.io.InputStreamReader;
010: import java.io.UnsupportedEncodingException;
011: import java.util.HashMap;
012:
013: public class EncodingUtil {
014:
015: public static HashMap<String, String> encodingMap = new HashMap<String, String>();
016:
017: public static String toUTF(String str, String srcEncoding) {
018: if (str == null)
019: return null;
020: try {
021: str = new String(str.getBytes("UTF-8"), srcEncoding);
022: } catch (UnsupportedEncodingException e) {
023: e.printStackTrace();
024: }
025: return str;
026: }
027:
028: public static String convertUTFToOtherEncoding(String str,
029: String tarEncoding) {
030: if (str == null)
031: return null;
032: try {
033: str = new String(str.getBytes(tarEncoding));
034: } catch (UnsupportedEncodingException e) {
035: e.printStackTrace();
036: }
037: return str;
038: }
039:
040: public static String toUTF(String str) {
041: return toUTF(str, getDefaultEncoding());
042: }
043:
044: public static String convertUTFToDefaultEncoding(String str) {
045:
046: return convertUTFToOtherEncoding(str, getDefaultEncoding());
047: }
048:
049: public static String getDefaultEncoding() {
050: String defaultEncoding = (new InputStreamReader(
051: new ByteArrayInputStream(new byte[0]))).getEncoding();
052: return defaultEncoding;
053: }
054:
055: public static InputStream String2InputStreamByEncoding(String str,
056: String encoding) {
057:
058: ByteArrayInputStream stream;
059: try {
060: stream = new ByteArrayInputStream(str.getBytes(encoding));
061: } catch (UnsupportedEncodingException e) {
062:
063: e.printStackTrace();
064: stream = new ByteArrayInputStream(str.getBytes());
065: }
066: return stream;
067: }
068:
069: public static BufferedReader String2ReaderByDefaultEncoding(
070: String str) {
071: String defaultEncoding = (new InputStreamReader(
072: new ByteArrayInputStream(new byte[0]))).getEncoding();
073: ByteArrayInputStream stream = new ByteArrayInputStream(str
074: .getBytes());
075: BufferedReader reader = null;
076: try {
077: reader = new BufferedReader(new InputStreamReader(stream,
078: defaultEncoding));
079: } catch (UnsupportedEncodingException e) {
080: e.printStackTrace();
081: }
082: return reader;
083: }
084:
085: public static BufferedReader File2ReaderByDefaultEncoding(File file) {
086: if (!file.exists())
087: return null;
088: String defaultEncoding = (new InputStreamReader(
089: new ByteArrayInputStream(new byte[0]))).getEncoding();
090: BufferedReader reader = null;
091: try {
092: reader = new BufferedReader(new InputStreamReader(
093: new FileInputStream(file), defaultEncoding));
094: } catch (UnsupportedEncodingException e) {
095: e.printStackTrace();
096: } catch (FileNotFoundException e) {
097: e.printStackTrace();
098: }
099: return reader;
100: }
101:
102: public static String getOfficalEncodingName(String javaEncodingName) {
103: if (encodingMap == null)
104: return "";
105: if (encodingMap.size() == 0) {
106: initialEncodingMap();
107: }
108: String officalName = encodingMap.get(javaEncodingName.trim())
109: .trim();
110: if (officalName == null || ("").equals(officalName)) {
111: officalName = "UTF-8";
112: }
113: return officalName;
114: }
115:
116: public static String getOfficalEncodingNameByDefaultEncoding() {
117: return getOfficalEncodingName(getDefaultEncoding());
118:
119: }
120:
121: public static void initialEncodingMap() {
122:
123: if (encodingMap == null)
124: return;
125: encodingMap.put("8859_1", "ISO-8859-1");
126: encodingMap.put("ASCII", "US-ASCII");
127: encodingMap.put("Big5", "Big5");
128: encodingMap.put("Big5-HKSCS", "Big5-HKSCS");
129: encodingMap.put("Cp037", "IBM037");
130: encodingMap.put("ASCII", "XXX");
131: encodingMap.put("Cp273", "IBM273");
132: encodingMap.put("Cp277", "IBM277");
133: encodingMap.put("Cp278", "IBM278");
134: encodingMap.put("Cp280", "IBM280");
135: encodingMap.put("Cp284", "IBM284");
136: encodingMap.put("Cp285", "IBM285");
137: encodingMap.put("Cp297", "IBM297");
138: encodingMap.put("Cp420", "IBM420");
139: encodingMap.put("Cp424", "IBM424");
140: encodingMap.put("Cp437", "IBM437");
141: encodingMap.put("Cp500", "IBM500");
142: encodingMap.put("Cp737", "x-IBM737");
143: encodingMap.put("Cp775", "IBM775");
144: encodingMap.put("Cp838", "IBM-Thai ");
145: encodingMap.put("Cp850", "IBM850");
146: encodingMap.put("Cp852", "IBM852");
147: encodingMap.put("Cp855", "IBM855");
148: encodingMap.put("Cp857", "IBM857");
149: encodingMap.put("Cp858", "IBM00858");
150: encodingMap.put("Cp860", "IBM860");
151: encodingMap.put("Cp861", "IBM861");
152: encodingMap.put("Cp862", "IBM862");
153: encodingMap.put("Cp863", "IBM863");
154: encodingMap.put("Cp864", "IBM864");
155: encodingMap.put("Cp865", "IBM865");
156: encodingMap.put("Cp866", "IBM866");
157: encodingMap.put("Cp868", "IBM868");
158: encodingMap.put("Cp869", "IBM869");
159: encodingMap.put("Cp870", "IBM870");
160: encodingMap.put("Cp871", "IBM871");
161: encodingMap.put("Cp874", "x-IBM874");
162: encodingMap.put("Cp875", "x-IBM875");
163: encodingMap.put("Cp918", "IBM918");
164: encodingMap.put("Cp921", "x-IBM921");
165: encodingMap.put("Cp922", "x-IBM922");
166: encodingMap.put("Cp930", "x-IBM930");
167: encodingMap.put("Cp933", "x-IBM933");
168: encodingMap.put("Cp935", "x-IBM935");
169: encodingMap.put("Cp937", "x-IBM937");
170: encodingMap.put("Cp939", "x-IBM939");
171: encodingMap.put("Cp942", "x-IBM942");
172: encodingMap.put("Cp942C", "x-IBM942C");
173: encodingMap.put("Cp943", "x-IBM943");
174: encodingMap.put("Cp943C", "x-IBM943C");
175: encodingMap.put("Cp948", "x-IBM948");
176: encodingMap.put("Cp949", "x-IBM949");
177: encodingMap.put("Cp949C", "x-IBM949C");
178: encodingMap.put("Cp950", "x-IBM950");
179: encodingMap.put("Cp964", "x-IBM964");
180: encodingMap.put("Cp970", "x-IBM970");
181: encodingMap.put("Cp1006", "x-IBM1006");
182: encodingMap.put("Cp1025", "x-IBM1025");
183: encodingMap.put("Cp1026", "IBM1026");
184: encodingMap.put("Cp1046", "x-IBM1046");
185: encodingMap.put("Cp1047", "IBM1047");
186: encodingMap.put("Cp1097", "x-IBM1097");
187: encodingMap.put("Cp1098", "x-IBM1098");
188: encodingMap.put("Cp1112", "x-IBM1112");
189: encodingMap.put("Cp1122", "x-IBM1122");
190: encodingMap.put("Cp1123", "x-IBM1123");
191: encodingMap.put("Cp1124", "x-IBM1124");
192: encodingMap.put("Cp1140", "IBM01140");
193: encodingMap.put("Cp1141", "IBM01141");
194: encodingMap.put("Cp1142", "IBM01142");
195: encodingMap.put("Cp1143", "IBM01143");
196: encodingMap.put("Cp1144", "IBM01144");
197: encodingMap.put("Cp1145", "IBM01145");
198: encodingMap.put("Cp1146", "IBM01146");
199: encodingMap.put("Cp1147", "IBM01147");
200: encodingMap.put("Cp1148", "IBM01148");
201: encodingMap.put("Cp1149", "IBM01149");
202: encodingMap.put("Cp1250", "windows-1250");
203: encodingMap.put("Cp1251", "windows-1251");
204: encodingMap.put("Cp1252", "windows-1252");
205: encodingMap.put("Cp1253", "windows-1253");
206: encodingMap.put("Cp1254", "windows-1254");
207: encodingMap.put("Cp1255", "windows-1255");
208: encodingMap.put("Cp1256", "windows-1256");
209: encodingMap.put("Cp1257", "windows-1257");
210: encodingMap.put("Cp1258", "windows-1258");
211: encodingMap.put("Cp1381", "x-IBM1381");
212: encodingMap.put("Cp1383", "x-IBM1383");
213: encodingMap.put("Cp33722", "x-IBM33722");
214: encodingMap.put("GB18030", "GB18030");
215: encodingMap.put("GB2312", "GB2312");
216: encodingMap.put("GBK", "GBK");
217: encodingMap.put("ISO-2022-CN", "ISO-2022-CN");
218: encodingMap.put("ISO-2022-CN-CNS", "x-ISO-2022-CN");
219: encodingMap.put("ISO-2022-CN-GB", "x-ISO-2022-CN-GB");
220: encodingMap.put("ISO-2022-JP", "ISO-2022-JP");
221: encodingMap.put("ISO-2022-KR", "ISO-2022-KR");
222: encodingMap.put("ISO-8859-1", "ISO-8859-1");
223: encodingMap.put("ISO-8859-2", "ISO-8859-2");
224: encodingMap.put("ISO-8859-3", "ISO-8859-3");
225: encodingMap.put("ISO-8859-4", "ISO-8859-4");
226: encodingMap.put("ISO-8859-5", "ISO-8859-5");
227: encodingMap.put("ISO-8859-6", "ISO-8859-6");
228: encodingMap.put("ISO-8859-7", "ISO-8859-7");
229: encodingMap.put("ISO-8859-8", "ISO-8859-8");
230: encodingMap.put("ISO-8859-9", "ISO-8859-9");
231: encodingMap.put("ISO-8859-11", "x-iso-8859-11");
232: encodingMap.put("ISO-8859-13", "ISO-8859-13");
233: encodingMap.put("ISO-8859-15", "ISO-8859-15");
234: encodingMap.put("JIS", "ISO-2022-JP");
235: encodingMap.put("JIS0201", "JIS_X0201");
236: encodingMap.put("JIS0212", "JIS_X0212-1990");
237: encodingMap.put("JISAutoDetect", "x-JISAutoDetect");
238: encodingMap.put("JIS_X0201", "JIS_X0201");
239: encodingMap.put("JIS_X0212-1990", "JIS_X0212-1990");
240: encodingMap.put("KOI8-R", "KOI8-R");
241: encodingMap.put("ks_c_5601-1987", "EUC-KR");
242: encodingMap.put("KSC5601", "EUC-KR");
243: encodingMap.put("MacArabic", "x-MacArabic");
244: encodingMap.put("MacCentralEurope", "x-MacCentralEurope");
245: encodingMap.put("MacCroatian", "x-MacCroatian");
246: encodingMap.put("MacCyrillic", "x-MacCyrillic");
247: encodingMap.put("MacDingbat", "x-MacDingbat");
248: encodingMap.put("MacGreek", "x-MacGreek");
249: encodingMap.put("MacHebrew", "x-MacHebrew");
250: encodingMap.put("MacIceland", "x-MacIceland");
251: encodingMap.put("MacRoman", "x-MacRoman");
252: encodingMap.put("MacRomania", "x-MacRomania");
253: encodingMap.put("MacSymbol", "x-MacSymbol");
254: encodingMap.put("MacThai", "x-MacThai");
255: encodingMap.put("MacTurkish", "x-MacTurkish");
256: encodingMap.put("MacUkraine", "x-MacUkraine");
257: encodingMap.put("MS874", "x-windows-874");
258: encodingMap.put("MS932", "windows-31j");
259: encodingMap.put("Shift_JIS", "Shift_JIS");
260: encodingMap.put("TIS-620", "TIS-620");
261: encodingMap.put("Unicode", "UTF-16");
262: encodingMap.put("Unicode-8", "UTF-8");
263: encodingMap.put("UnicodeBig", "UTF-16");
264: encodingMap.put("UnicodeBigUnmarked", "UTF-16BE");
265: encodingMap.put("UnicodeLittle", "x-UTF-16LE-BOM");
266: encodingMap.put("UnicodeLittleUnmarked", "UTF-16LE");
267: encodingMap.put("UTF-8", "UTF-8");
268: encodingMap.put("UTF-16", "UTF-16");
269: encodingMap.put("UTF-16BE", "UTF-16BE");
270: encodingMap.put("UTF-16LE", "UTF-16LE");
271: encodingMap.put("UTF-32", "UTF-32");
272: encodingMap.put("UTF-32BE", "UTF-32BE");
273: encodingMap.put("UTF-32LE", "UTF-32LE");
274: encodingMap.put("windows-1250", "windows-1250");
275: encodingMap.put("windows-1251", "windows-1251");
276: encodingMap.put("windows-1252", "windows-1252");
277: encodingMap.put("windows-1253", "windows-1253");
278: encodingMap.put("windows-1254", "windows-1254");
279: encodingMap.put("windows-1255", "windows-1255");
280: encodingMap.put("windows-1256", "windows-1256");
281: encodingMap.put("windows-1257", "windows-1257");
282: encodingMap.put("windows-1258", "windows-1258");
283: encodingMap.put("windows-31j", "windows-31j");
284: encodingMap.put("x-EUC-CN", "GB2312 ");
285: encodingMap.put("x-EUC-JP", "EUC-JP");
286: encodingMap.put("x-EUC-JP-LINUX", "x-euc-jp-linux");
287: encodingMap.put("x-EUC-TW", "x-EUC-TW");
288: encodingMap.put("x-ISCII91", "x-ISCII91");
289: encodingMap.put("x-JIS0208", "x-JIS0208");
290: encodingMap.put("x-Johab", "x-Johab");
291: encodingMap.put("x-MS950-HKSCS", "x-MS950-HKSCS");
292: encodingMap.put("x-mswin-936", "x-mswin-936");
293: encodingMap.put("x-UTF-16LE-BOM", "x-UTF-16LE-BOM ");
294: encodingMap.put("X-UTF-32BE-BOM", "X-UTF-32BE-BOM");
295: encodingMap.put("X-UTF-32LE-BOM", "X-UTF-32LE-BOM");
296: encodingMap.put("x-windows-949", "x-windows-949");
297: encodingMap.put("x-windows-950", "x-windows-950");
298: }
299: }
|