001: package net.sf.saxon.charcode;
002:
003: import net.sf.saxon.Controller;
004: import net.sf.saxon.trans.DynamicError;
005: import net.sf.saxon.trans.XPathException;
006:
007: import javax.xml.transform.OutputKeys;
008: import java.nio.charset.Charset;
009: import java.nio.charset.IllegalCharsetNameException;
010: import java.nio.charset.UnsupportedCharsetException;
011: import java.util.Iterator;
012: import java.util.Properties;
013:
014: /**
015: * This class creates a CharacterSet object for a given named encoding.
016: */
017:
018: public class CharacterSetFactory {
019:
020: /**
021: * Class is never instantiated
022: */
023: private CharacterSetFactory() {
024: }
025:
026: /**
027: * Make a CharacterSet appropriate to the encoding
028: */
029:
030: public static CharacterSet getCharacterSet(Properties details,
031: Controller controller) throws XPathException {
032:
033: String encoding = details.getProperty(OutputKeys.ENCODING);
034: if (encoding == null)
035: encoding = "UTF8";
036: if (encoding.equalsIgnoreCase("UTF-8"))
037: encoding = "UTF8"; // needed for Microsoft Java VM
038:
039: CharacterSet charSet = makeCharacterSet(encoding, controller);
040: if (charSet == null) {
041: charSet = ASCIICharacterSet.getInstance();
042: }
043: return charSet;
044: }
045:
046: private static CharacterSet makeCharacterSet(String encoding,
047: Controller controller) throws XPathException {
048: if (encoding.equalsIgnoreCase("UTF8")) {
049: return UnicodeCharacterSet.getInstance();
050: } else if (encoding.equalsIgnoreCase("ASCII")) {
051: return ASCIICharacterSet.getInstance();
052: } else if (encoding.equalsIgnoreCase("US-ASCII")) {
053: return ASCIICharacterSet.getInstance();
054: } else if (encoding.equalsIgnoreCase("iso-646")) {
055: return ASCIICharacterSet.getInstance();
056: } else if (encoding.equalsIgnoreCase("iso646")) {
057: return ASCIICharacterSet.getInstance();
058: } else if (encoding.equalsIgnoreCase("iso-8859-1")) {
059: return Latin1CharacterSet.getInstance();
060: } else if (encoding.equalsIgnoreCase("ISO8859_1")) {
061: return Latin1CharacterSet.getInstance();
062: } else if (encoding.equalsIgnoreCase("iso-8859-2")) {
063: return Latin2CharacterSet.getInstance();
064: } else if (encoding.equalsIgnoreCase("ISO8859_2")) {
065: return Latin2CharacterSet.getInstance();
066: } else if (encoding.equalsIgnoreCase("UTF-8")) {
067: return UnicodeCharacterSet.getInstance();
068: } else if (encoding.equalsIgnoreCase("UTF-16")) {
069: return UnicodeCharacterSet.getInstance();
070: } else if (encoding.equalsIgnoreCase("UTF16")) {
071: return UnicodeCharacterSet.getInstance();
072: } else if (encoding.equalsIgnoreCase("KOI8-R")) {
073: return KOI8RCharacterSet.getInstance();
074: } else if (encoding.equalsIgnoreCase("cp1251")) {
075: return CP1251CharacterSet.getInstance();
076: } else if (encoding.equalsIgnoreCase("windows-1251")) {
077: return CP1251CharacterSet.getInstance();
078: } else if (encoding.equalsIgnoreCase("cp1250")) {
079: return CP1250CharacterSet.getInstance();
080: } else if (encoding.equalsIgnoreCase("windows-1250")) {
081: return CP1250CharacterSet.getInstance();
082: } else if (encoding.equalsIgnoreCase("cp1252")) {
083: return CP1252CharacterSet.getInstance();
084: } else if (encoding.equalsIgnoreCase("windows-1252")) {
085: return CP1252CharacterSet.getInstance();
086: } else if (encoding.equalsIgnoreCase("cp852")) {
087: return CP852CharacterSet.getInstance();
088: } else if (encoding.equalsIgnoreCase("windows-852")) {
089: return CP852CharacterSet.getInstance();
090:
091: } else {
092: // Allow an alias for the character set to be specified as a system property
093: String csname = System.getProperty(OutputKeys.ENCODING
094: + '.' + encoding);
095: if (csname == null) {
096: Charset charset;
097: try {
098: charset = Charset.forName(encoding);
099: CharacterSet res = UnknownCharacterSet
100: .makeCharSet(charset);
101:
102: // Some JDK1.4 charsets are known to be buggy, for example SJIS.
103: // We'll see whether the charset claims to be able to encode some
104: // tricky characters; if it says it can, the chances are it's lying.
105:
106: if (res.inCharset(0x1ff) && res.inCharset(0x300)
107: && res.inCharset(0xa90)
108: && res.inCharset(0x2200)
109: && res.inCharset(0x3400)) {
110: res = BuggyCharacterSet.makeCharSet(charset);
111: }
112: return res;
113: } catch (IllegalCharsetNameException err) {
114: throw new DynamicError("Invalid encoding name: "
115: + encoding);
116: } catch (UnsupportedCharsetException err) {
117: //System.err.println("Unknown encoding " + encoding + ": reverting to ASCII");
118: return ASCIICharacterSet.getInstance();
119: }
120: } else {
121: try {
122: Object obj = controller.getConfiguration()
123: .getInstance(csname,
124: controller.getClassLoader());
125: if (obj instanceof PluggableCharacterSet) {
126: return (PluggableCharacterSet) obj;
127: }
128: } catch (Exception err) {
129: throw new DynamicError("Failed to load " + csname);
130: }
131: }
132: }
133: return null;
134: }
135:
136: /**
137: * Main program is a utility to give a list of the character sets supported
138: * by the Java VM
139: */
140:
141: public static void main(String[] args) throws Exception {
142: System.err
143: .println("Available Character Sets in the java.nio package for this Java VM:");
144: Iterator iter = Charset.availableCharsets().keySet().iterator();
145: while (iter.hasNext()) {
146: String s = (String) iter.next();
147: System.err.println(s);
148: }
149: }
150: }
151:
152: //
153: // The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
154: // you may not use this file except in compliance with the License. You may obtain a copy of the
155: // License at http://www.mozilla.org/MPL/
156: //
157: // Software distributed under the License is distributed on an "AS IS" basis,
158: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
159: // See the License for the specific language governing rights and limitations under the License.
160: //
161: // The Original Code is: all this file.
162: //
163: // The Initial Developer of the Original Code is Michael H. Kay.
164: //
165: // Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
166: //
167: // Contributor(s): none.
168: //
|