001: /*
002: * EncodingCmd.java --
003: *
004: * Copyright (c) 2001 Bruce A. Johnson
005: *
006: * See the file "license.terms" for information on usage and
007: * redistribution of this file, and for a DISCLAIMER OF ALL
008: * WARRANTIES.
009: *
010: * RCS: @(#) $Id: EncodingCmd.java,v 1.3 2006/07/07 23:36:00 mdejong Exp $
011: *
012: */
013:
014: package tcl.lang;
015:
016: import java.util.Hashtable;
017:
018: import java.io.UnsupportedEncodingException;
019:
020: import java.nio.charset.Charset;
021: import java.nio.charset.IllegalCharsetNameException;
022: import java.nio.charset.UnsupportedCharsetException;
023:
024: /**
025: * This class implements the built-in "encoding" command in Tcl.
026: */
027:
028: class EncodingCmd implements Command {
029: // Defaults to "file.encoding" property
030:
031: static String systemTclEncoding = null;
032: static String systemJavaEncoding = null;
033:
034: static class EncodingMap {
035: String tclName;
036: String javaName;
037: int bytesPerChar;
038:
039: public EncodingMap(String tclName, String javaName,
040: int bytesPerChar) {
041: this .tclName = tclName;
042: this .javaName = javaName;
043: this .bytesPerChar = bytesPerChar;
044: }
045: }
046:
047: // Supported encodings
048:
049: static Hashtable encodeHash;
050:
051: static EncodingMap[] encodings = {
052: new EncodingMap("identity", "UTF8", 1),
053: new EncodingMap("utf-8", "UTF8", 1),
054: new EncodingMap("utf-16", "UTF16", 2),
055: new EncodingMap("unicode", "ISO-10646-UCS-2", 2),
056: new EncodingMap("ascii", "ASCII", 1),
057: new EncodingMap("big5", "Big5", 0),
058: new EncodingMap("cp1250", "Cp1250", 1),
059: new EncodingMap("cp1251", "Cp1251", 1),
060: new EncodingMap("ansi-1251", "Cp1251", 1),
061: new EncodingMap("cp1252", "Cp1252", 1),
062: new EncodingMap("cp1253", "Cp1253", 1),
063: new EncodingMap("cp1254", "Cp1254", 1),
064: new EncodingMap("cp1255", "Cp1255", 1),
065: new EncodingMap("cp1256", "Cp1256", 1),
066: new EncodingMap("cp1257", "Cp1257", 1),
067: new EncodingMap("cp1258", "Cp1258", 1),
068: new EncodingMap("cp437", "Cp437", 1),
069: new EncodingMap("cp737", "Cp737", 1),
070: new EncodingMap("cp775", "Cp775", 1),
071: new EncodingMap("cp850", "Cp850", 1),
072: new EncodingMap("cp852", "Cp852", 1),
073: new EncodingMap("cp855", "Cp855", 1),
074: new EncodingMap("cp857", "Cp857", 1),
075: new EncodingMap("cp860", "Cp860", 1),
076: new EncodingMap("cp861", "Cp861", 1),
077: new EncodingMap("cp862", "Cp862", 1),
078: new EncodingMap("cp863", "Cp863", 1),
079: new EncodingMap("cp864", "Cp864", 1),
080: new EncodingMap("cp865", "Cp865", 1),
081: new EncodingMap("cp866", "Cp866", 1),
082: new EncodingMap("cp869", "Cp869", 1),
083: new EncodingMap("cp874", "Cp874", 1),
084: new EncodingMap("cp932", "Cp942", 0),
085: new EncodingMap("cp936", "Cp936", 0),
086: new EncodingMap("cp949", "Cp949", 0),
087: new EncodingMap("cp950", "Cp950", 0),
088: new EncodingMap("euc-cn", "EUC_cn", 0),
089: new EncodingMap("euc-jp", "EUC_jp", 0),
090: new EncodingMap("euc-kr", "EUC_kr", 0),
091: new EncodingMap("iso2022", "ISO2022JP", -1),
092: new EncodingMap("iso2022-jp", "ISO2022JP", -1),
093: new EncodingMap("iso2022-kr", "ISO2022KR", -1),
094: new EncodingMap("iso8859-1", "ISO8859_1", 1),
095: new EncodingMap("ansi_x3.4-1968", "ISO8859_1", 1),
096: new EncodingMap("iso8859-2", "ISO8859_2", 1),
097: new EncodingMap("iso8859-3", "ISO8859_3", 1),
098: new EncodingMap("iso8859-4", "ISO8859_4", 1),
099: new EncodingMap("iso8859-5", "ISO8859_5", 1),
100: new EncodingMap("iso8859-6", "ISO8859_6", 1),
101: new EncodingMap("iso8859-7", "ISO8859_7", 1),
102: new EncodingMap("iso8859-8", "ISO8859_8", 1),
103: new EncodingMap("iso8859-9", "ISO8859_9", 1),
104: new EncodingMap("jis0201", "JIS0201", 1),
105: new EncodingMap("jis0208", "JIS0208", 2),
106: new EncodingMap("jis0212", "JIS0212", 2),
107: new EncodingMap("koi8-r", "KOI8_r", 1),
108: new EncodingMap("macCentEuro", "MacCentEuro", 1),
109: new EncodingMap("macCroatian", "MacCroatian", 1),
110: new EncodingMap("macCyrillic", "MacCyrillic", 1),
111: new EncodingMap("macDingbats", "MacDingbats", 1),
112: new EncodingMap("macGreek", "MacGreek", 1),
113: new EncodingMap("macIceland", "MacIceland", 1),
114: new EncodingMap("macJapan", "MacJapan", 0),
115: new EncodingMap("macRoman", "MacRoman", 1),
116: new EncodingMap("macRomania", "MacRomania", 1),
117: new EncodingMap("macThai", "MacThai", 1),
118: new EncodingMap("macTurkish", "MacTurkish", 1),
119: new EncodingMap("macUkraine", "MacUkraine", 1),
120: new EncodingMap("shiftjis", "SJIS", 0) };
121:
122: static {
123: // Store entries in a Hashtable, so that access from
124: // multiple threads will be synchronized.
125:
126: encodeHash = new Hashtable();
127:
128: for (int i = 0; i < encodings.length; i++) {
129: EncodingMap map = encodings[i];
130:
131: String tclKey = "tcl," + map.tclName;
132: String javaKey = "java," + map.javaName;
133:
134: encodeHash.put(tclKey, map);
135: encodeHash.put(javaKey, map);
136: }
137:
138: // Determine default system encoding, use
139: // "iso8859-1" if default is not known.
140:
141: String enc = null;
142:
143: try {
144: enc = System.getProperty("file.encoding");
145: } catch (SecurityException ex) {
146: }
147:
148: if (enc != null) {
149: // Lookup EncodingMap for this Java encoding name
150: String key = "java," + enc;
151: EncodingMap map = (EncodingMap) encodeHash.get(key);
152: if (map == null) {
153: enc = null;
154: } else {
155: systemTclEncoding = map.tclName;
156: systemJavaEncoding = map.javaName;
157: }
158: }
159:
160: // Default to "iso8859-1" if the encoding
161: // indicated by "file.encoding" is not
162: // in the supported encoding table.
163:
164: if (enc == null || enc.length() == 0) {
165: systemTclEncoding = "iso8859-1";
166: systemJavaEncoding = "ISO8859_1";
167: }
168: }
169:
170: static final private String validCmds[] = { "convertfrom",
171: "convertto", "names", "system", };
172:
173: static final int OPT_CONVERTFROM = 0;
174: static final int OPT_CONVERTTO = 1;
175: static final int OPT_NAMES = 2;
176: static final int OPT_SYSTEM = 3;
177:
178: /**
179: * This procedure is invoked to process the "encoding" Tcl command.
180: * See the user documentation for details on what it does.
181: *
182: * @param interp the current interpreter.
183: * @param objv command arguments.
184: */
185:
186: public void cmdProc(Interp interp, TclObject[] objv)
187: throws TclException {
188: if (objv.length < 2) {
189: throw new TclNumArgsException(interp, 1, objv,
190: "option ?arg ...?");
191: }
192:
193: int index = TclIndex.get(interp, objv[1], validCmds, "option",
194: 0);
195:
196: switch (index) {
197: case OPT_CONVERTTO:
198: case OPT_CONVERTFROM: {
199: String tclEncoding, javaEncoding;
200: TclObject data;
201:
202: if (objv.length == 3) {
203: tclEncoding = systemTclEncoding;
204: data = objv[2];
205: } else if (objv.length == 4) {
206: tclEncoding = objv[2].toString();
207: data = objv[3];
208: } else {
209: throw new TclNumArgsException(interp, 2, objv,
210: "?encoding? data");
211: }
212:
213: javaEncoding = getJavaName(tclEncoding);
214:
215: if (javaEncoding == null) {
216: throw new TclException(interp, "unknown encoding \""
217: + tclEncoding + "\"");
218: }
219:
220: try {
221: if (index == OPT_CONVERTFROM) {
222: // Treat the string as binary data
223: byte[] bytes = TclByteArray.getBytes(interp, data);
224: interp.setResult(new String(bytes, javaEncoding));
225: } else {
226: // Store the result as binary data
227: byte[] bytes = data.toString().getBytes(
228: javaEncoding);
229: interp.setResult(TclByteArray.newInstance(bytes));
230: }
231:
232: } catch (UnsupportedEncodingException ex) {
233: throw new TclRuntimeError("Encoding.cmdProc() error: "
234: + "unsupported java encoding \"" + javaEncoding
235: + "\"");
236: }
237:
238: break;
239: }
240: case OPT_NAMES: {
241: if (objv.length > 2) {
242: throw new TclNumArgsException(interp, 2, objv, null);
243: }
244:
245: TclObject list = TclList.newInstance();
246: for (int i = 0; i < encodings.length; i++) {
247: EncodingMap map = encodings[i];
248:
249: // Encodings that exists in the table but
250: // is not supported by the runtime should
251: // not be returned.
252:
253: if (isSupported(map.javaName)) {
254: TclList.append(interp, list, TclString
255: .newInstance(map.tclName));
256: }
257: }
258: interp.setResult(list);
259: break;
260: }
261: case OPT_SYSTEM: {
262: if (objv.length > 3)
263: throw new TclNumArgsException(interp, 2, objv,
264: "?encoding?");
265:
266: if (objv.length == 2) {
267: interp.setResult(systemTclEncoding);
268: } else {
269: String tclEncoding = objv[2].toString();
270: String javaEncoding = EncodingCmd
271: .getJavaName(tclEncoding);
272:
273: if (javaEncoding == null) {
274: throw new TclException(interp,
275: "unknown encoding \"" + tclEncoding + "\"");
276: }
277:
278: systemTclEncoding = tclEncoding;
279: systemJavaEncoding = javaEncoding;
280: }
281:
282: break;
283: }
284: default: {
285: throw new TclRuntimeError("Encoding.cmdProc() error: "
286: + "incorrect index returned from TclIndex.get()");
287: }
288: }
289: }
290:
291: // FIXME: It is not clear that this field is even used in Jacl.
292: // Can it be removed here and in the IO layer?
293:
294: // Given a Java encoding name return the average bytes per char
295:
296: static int getBytesPerChar(String name) {
297: String key = "java," + name;
298: EncodingMap map = (EncodingMap) encodeHash.get(key);
299: if (map == null) {
300: throw new RuntimeException("Invalid encoding \"" + name
301: + "\"");
302: }
303: return map.bytesPerChar;
304: }
305:
306: // Given a Tcl encoding name, return the Java encoding name
307:
308: static String getJavaName(String name) {
309: String key = "tcl," + name;
310: EncodingMap map = (EncodingMap) encodeHash.get(key);
311: if (map == null) {
312: return null;
313: }
314: return map.javaName;
315: }
316:
317: // Given a Java encoding name, return the Tcl encoding name
318:
319: static String getTclName(String name) {
320: String key = "java," + name;
321: EncodingMap map = (EncodingMap) encodeHash.get(key);
322: if (map == null) {
323: return null;
324: }
325: return map.tclName;
326: }
327:
328: // Return true if the Java encoding name is actually
329: // supported in this Java install. Both "western"
330: // and "international" options exist at Java install
331: // time and the "western" install does not support
332: // all the encodings that Tcl expects.
333:
334: static boolean isSupported(String name) {
335: String key = "java," + name;
336: EncodingMap map = (EncodingMap) encodeHash.get(key);
337: if (map == null) {
338: return false;
339: }
340:
341: // FIXME: Could load the supported charset map once, then
342: // use it over and over. If lots of calls to [encoding names]
343: // is made, this could make a big diff.
344:
345: // Load the encoding at runtime
346: Charset cs;
347: try {
348: cs = Charset.forName(name);
349: } catch (IllegalCharsetNameException ex) {
350: // This should never happen
351: throw new TclRuntimeError("illegal charset name \"" + name
352: + "\"");
353: } catch (UnsupportedCharsetException ex) {
354: // This can happen when a western install does
355: // not support international encodings.
356:
357: return false;
358: }
359:
360: // Return true when charset can encode and decode.
361: // All charsets can decode, but some special case
362: // charsets may not support the encode operation.
363:
364: return cs.canEncode();
365: }
366:
367: }
|