001: package com.internetcds.jdbc.tds;
002:
003: import java.io.UnsupportedEncodingException;
004: import java.util.Hashtable;
005:
006: /**
007: * Helper class to handle server character set conversion.
008: *
009: * @author Stefan Bodewig <a href="mailto:stefan.bodewig@megabit.net">stefan.bodewig@megabit.net</a>
010: *
011: * @version $Id: EncodingHelper.java,v 1.2 2007-10-19 13:21:40 sinisa Exp $
012: */
013: public class EncodingHelper {
014: public static final String cvsVersion = "$Id: EncodingHelper.java,v 1.2 2007-10-19 13:21:40 sinisa Exp $";
015:
016: /**
017: * The name of the encoding.
018: */
019: private String name;
020: /**
021: * Is this a DBCS charset (does it need more than one byte per character)?
022: */
023: private boolean wideChars;
024: /**
025: * A String containing all characters of the charset (if this is not
026: * a DBCS charset).
027: */
028: private String converted;
029:
030: /**
031: * private so only the static accessor can be used.
032: */
033: private EncodingHelper(String name, boolean wideChars) {
034: this .name = name;
035: this .wideChars = wideChars;
036: if (!wideChars) {
037: converted = getString(convArray);
038: }
039: }
040:
041: /**
042: * Translate the String into a byte[] in the server's encoding.
043: */
044: public byte[] getBytes(String value) {
045: try {
046: return value.getBytes(name);
047: } catch (UnsupportedEncodingException uee) {
048: return value.getBytes();
049: }
050: }
051:
052: /**
053: * Translate the byte[] from the server's encoding to a Unicode String.
054: */
055: public String getString(byte[] value) {
056: return getString(value, 0, value.length);
057: }
058:
059: /**
060: * Translate part of the byte[] from the server's encoding to a
061: * Unicode String.
062: *
063: * The subarray starting at index off and extending to off+len-1
064: * is translated.
065: */
066: public String getString(byte[] value, int off, int len) {
067: try {
068: return new String(value, off, len, name);
069: } catch (UnsupportedEncodingException uee) {
070: return new String(value, off, len);
071: }
072: }
073:
074: /**
075: * Is this a DBCS charset (does it need more than one byte per character)?
076: */
077: public boolean isDBCS() {
078: return wideChars;
079: }
080:
081: /**
082: * Can the given String be converted to the server's charset?
083: *
084: * <p>Does not work for DBCS charsets.
085: */
086: public boolean canBeConverted(String value) {
087: if (isDBCS()) {
088: throw new IllegalStateException(name + " is a DBCS charset");
089: }
090:
091: int len = value.length();
092: for (int i = 0; i < len; i++) {
093: if (converted.indexOf(value.charAt(i)) == -1) {
094: return false;
095: }
096: }
097: return true;
098: }
099:
100: /**
101: * Return the helper object for the given encoding.
102: */
103: public static EncodingHelper getHelper(String encodingName) {
104: if (!initialized) {
105: synchronized (com.internetcds.jdbc.tds.EncodingHelper.class) {
106: if (!initialized) {
107: initialize();
108: }
109: }
110: }
111: return (EncodingHelper) knownEncodings.get(encodingName);
112: }
113:
114: /**
115: * Array containig the bytes 0x00 - 0xFF.
116: */
117: private static byte[] convArray;
118: /**
119: * Hashtable holding instances for all known encodings.
120: */
121: private static Hashtable knownEncodings;
122: /**
123: * Simple boolean to ensure we initialize once and only once.
124: */
125: private static boolean initialized;
126:
127: /**
128: * Initialize the static variables.
129: *
130: * <p>Will be called from the static block below, but some VMs
131: * (notably Microsoft's) won't run this.
132: */
133: private synchronized static void initialize() {
134: convArray = new byte[256];
135: for (int i = 0; i < 256; i++) {
136: convArray[i] = (byte) i;
137: }
138:
139: knownEncodings = new Hashtable();
140: EncodingHelper e = new EncodingHelper("ISO8859_1", false);
141: knownEncodings.put("iso_1", e);
142: knownEncodings.put("cp1252", e);
143:
144: try {
145: // simple test for the presence of i18n.jar
146: "a".getBytes("Cp437");
147:
148: knownEncodings.put("cp437", new EncodingHelper("Cp437",
149: false));
150: knownEncodings.put("cp850", new EncodingHelper("Cp850",
151: false));
152: knownEncodings.put("cp1250", new EncodingHelper("Cp1250",
153: false));
154: knownEncodings.put("cp1251", new EncodingHelper("Cp1251",
155: false));
156: knownEncodings.put("cp1253", new EncodingHelper("Cp1253",
157: false));
158: knownEncodings.put("cp1254", new EncodingHelper("Cp1254",
159: false));
160: knownEncodings.put("cp1255", new EncodingHelper("Cp1255",
161: false));
162: knownEncodings.put("cp1256", new EncodingHelper("Cp1256",
163: false));
164: knownEncodings.put("cp1257", new EncodingHelper("Cp1257",
165: false));
166:
167: /*
168: * XXX are the CpXXX different from MSXXX? Used MS to be save.
169: */
170: //thai
171: knownEncodings.put("cp874", new EncodingHelper("MS874",
172: true));
173: //japanese
174: knownEncodings.put("cp932", new EncodingHelper("MS932",
175: true));
176: //simplified chinese
177: knownEncodings.put("cp932", new EncodingHelper("MS932",
178: true));
179: //korean
180: knownEncodings.put("cp949", new EncodingHelper("MS949",
181: true));
182: //traditional chinese
183: knownEncodings.put("cp950", new EncodingHelper("MS950",
184: true));
185: } catch (UnsupportedEncodingException uee) {
186: // i18n.jar not present, only ISO-8859-1 is available
187: }
188:
189: initialized = true;
190: }
191:
192: static {
193: initialize();
194: }
195: }
|