001: /*
002: * Copyright 1999-2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: /*
017: * $Id: Encodings.java,v 1.14 2005/08/03 19:20:31 minchau Exp $
018: */
019: package org.apache.xml.serializer;
020:
021: import java.io.InputStream;
022: import java.io.OutputStream;
023: import java.io.OutputStreamWriter;
024: import java.io.UnsupportedEncodingException;
025: import java.io.Writer;
026: import java.lang.reflect.Method;
027: import java.net.URL;
028: import java.security.AccessController;
029: import java.security.PrivilegedAction;
030: import java.util.Enumeration;
031: import java.util.Hashtable;
032: import java.util.Properties;
033: import java.util.StringTokenizer;
034:
035: /**
036: * Provides information about encodings. Depends on the Java runtime
037: * to provides writers for the different encodings.
038: *
039: * This class is only for internal use within Xalan. However, it is used directly
040: * by org.apache.xalan.xsltc.compiler.Output.
041: *
042: * @xsl.usage internal
043: */
044:
045: public final class Encodings extends Object {
046: /**
047: * Standard filename for properties file with encodings data.
048: */
049: private static final String ENCODINGS_FILE = "org/apache/xml/serializer/Encodings.properties";
050:
051: /**
052: * Standard filename for properties file with encodings data.
053: */
054: private static final String ENCODINGS_PROP = "org.apache.xalan.serialize.encodings";
055:
056: /**
057: * Returns a writer for the specified encoding based on
058: * an output stream.
059: * <p>
060: * This is not a public API.
061: * @param output The output stream
062: * @param encoding The encoding
063: * @return A suitable writer
064: * @throws UnsupportedEncodingException There is no convertor
065: * to support this encoding
066: * @xsl.usage internal
067: */
068: static Writer getWriter(OutputStream output, String encoding)
069: throws UnsupportedEncodingException {
070:
071: for (int i = 0; i < _encodings.length; ++i) {
072: if (_encodings[i].name.equalsIgnoreCase(encoding)) {
073: try {
074: return new OutputStreamWriter(output,
075: _encodings[i].javaName);
076: } catch (java.lang.IllegalArgumentException iae) // java 1.1.8
077: {
078: // keep trying
079: } catch (UnsupportedEncodingException usee) {
080:
081: // keep trying
082: }
083: }
084: }
085:
086: try {
087: return new OutputStreamWriter(output, encoding);
088: } catch (java.lang.IllegalArgumentException iae) // java 1.1.8
089: {
090: throw new UnsupportedEncodingException(encoding);
091: }
092: }
093:
094: /**
095: * Returns the EncodingInfo object for the specified
096: * encoding.
097: * <p>
098: * This is not a public API.
099: *
100: * @param encoding The encoding
101: * @return The object that is used to determine if
102: * characters are in the given encoding.
103: * @xsl.usage internal
104: */
105: static EncodingInfo getEncodingInfo(String encoding) {
106: EncodingInfo ei;
107:
108: String normalizedEncoding = toUpperCaseFast(encoding);
109: ei = (EncodingInfo) _encodingTableKeyJava
110: .get(normalizedEncoding);
111: if (ei == null)
112: ei = (EncodingInfo) _encodingTableKeyMime
113: .get(normalizedEncoding);
114: if (ei == null) {
115: // We shouldn't have to do this, but just in case.
116: ei = new EncodingInfo(null, null);
117: }
118:
119: return ei;
120: }
121:
122: /**
123: * A fast and cheap way to uppercase a String that is
124: * only made of printable ASCII characters.
125: * <p>
126: * This is not a public API.
127: * @param s a String of ASCII characters
128: * @return an uppercased version of the input String,
129: * possibly the same String.
130: * @xsl.usage internal
131: */
132: static private String toUpperCaseFast(final String s) {
133:
134: boolean different = false;
135: final int mx = s.length();
136: char[] chars = new char[mx];
137: for (int i = 0; i < mx; i++) {
138: char ch = s.charAt(i);
139: // is the character a lower case ASCII one?
140: if ('a' <= ch && ch <= 'z') {
141: // a cheap and fast way to uppercase that is good enough
142: ch = (char) (ch + ('A' - 'a'));
143: different = true; // the uppercased String is different
144: }
145: chars[i] = ch;
146: }
147:
148: // A little optimization, don't call String.valueOf() if
149: // the uppercased string is the same as the input string.
150: final String upper;
151: if (different)
152: upper = String.valueOf(chars);
153: else
154: upper = s;
155:
156: return upper;
157: }
158:
159: /** The default encoding, ISO style, ISO style. */
160: static final String DEFAULT_MIME_ENCODING = "UTF-8";
161:
162: /**
163: * Get the proper mime encoding. From the XSLT recommendation: "The encoding
164: * attribute specifies the preferred encoding to use for outputting the result
165: * tree. XSLT processors are required to respect values of UTF-8 and UTF-16.
166: * For other values, if the XSLT processor does not support the specified
167: * encoding it may signal an error; if it does not signal an error it should
168: * use UTF-8 or UTF-16 instead. The XSLT processor must not use an encoding
169: * whose name does not match the EncName production of the XML Recommendation
170: * [XML]. If no encoding attribute is specified, then the XSLT processor should
171: * use either UTF-8 or UTF-16."
172: * <p>
173: * This is not a public API.
174: *
175: * @param encoding Reference to java-style encoding string, which may be null,
176: * in which case a default will be found.
177: *
178: * @return The ISO-style encoding string, or null if failure.
179: * @xsl.usage internal
180: */
181: static String getMimeEncoding(String encoding) {
182:
183: if (null == encoding) {
184: try {
185:
186: // Get the default system character encoding. This may be
187: // incorrect if they passed in a writer, but right now there
188: // seems to be no way to get the encoding from a writer.
189: encoding = System.getProperty("file.encoding", "UTF8");
190:
191: if (null != encoding) {
192:
193: /*
194: * See if the mime type is equal to UTF8. If you don't
195: * do that, then convertJava2MimeEncoding will convert
196: * 8859_1 to "ISO-8859-1", which is not what we want,
197: * I think, and I don't think I want to alter the tables
198: * to convert everything to UTF-8.
199: */
200: String jencoding = (encoding
201: .equalsIgnoreCase("Cp1252")
202: || encoding.equalsIgnoreCase("ISO8859_1")
203: || encoding.equalsIgnoreCase("8859_1") || encoding
204: .equalsIgnoreCase("UTF8")) ? DEFAULT_MIME_ENCODING
205: : convertJava2MimeEncoding(encoding);
206:
207: encoding = (null != jencoding) ? jencoding
208: : DEFAULT_MIME_ENCODING;
209: } else {
210: encoding = DEFAULT_MIME_ENCODING;
211: }
212: } catch (SecurityException se) {
213: encoding = DEFAULT_MIME_ENCODING;
214: }
215: } else {
216: encoding = convertJava2MimeEncoding(encoding);
217: }
218:
219: return encoding;
220: }
221:
222: /**
223: * Try the best we can to convert a Java encoding to a XML-style encoding.
224: * <p>
225: * This is not a public API.
226: * @param encoding non-null reference to encoding string, java style.
227: *
228: * @return ISO-style encoding string.
229: * @xsl.usage internal
230: */
231: private static String convertJava2MimeEncoding(String encoding) {
232: EncodingInfo enc = (EncodingInfo) _encodingTableKeyJava
233: .get(toUpperCaseFast(encoding));
234: if (null != enc)
235: return enc.name;
236: return encoding;
237: }
238:
239: /**
240: * Try the best we can to convert a Java encoding to a XML-style encoding.
241: * <p>
242: * This is not a public API.
243: *
244: * @param encoding non-null reference to encoding string, java style.
245: *
246: * @return ISO-style encoding string.
247: *
248: * @xsl.usage internal
249: */
250: public static String convertMime2JavaEncoding(String encoding) {
251:
252: for (int i = 0; i < _encodings.length; ++i) {
253: if (_encodings[i].name.equalsIgnoreCase(encoding)) {
254: return _encodings[i].javaName;
255: }
256: }
257:
258: return encoding;
259: }
260:
261: /**
262: * Load a list of all the supported encodings.
263: *
264: * System property "encodings" formatted using URL syntax may define an
265: * external encodings list. Thanks to Sergey Ushakov for the code
266: * contribution!
267: * @xsl.usage internal
268: */
269: private static EncodingInfo[] loadEncodingInfo() {
270: URL url = null;
271: try {
272: String urlString = null;
273: InputStream is = null;
274:
275: try {
276: urlString = System.getProperty(ENCODINGS_PROP, "");
277: } catch (SecurityException e) {
278: }
279:
280: if (urlString != null && urlString.length() > 0) {
281: url = new URL(urlString);
282: is = url.openStream();
283: }
284:
285: if (is == null) {
286: SecuritySupport ss = SecuritySupport.getInstance();
287: is = ss.getResourceAsStream(ObjectFactory
288: .findClassLoader(), ENCODINGS_FILE);
289: }
290:
291: Properties props = new Properties();
292: if (is != null) {
293: props.load(is);
294: is.close();
295: } else {
296: // Seems to be no real need to force failure here, let the
297: // system do its best... The issue is not really very critical,
298: // and the output will be in any case _correct_ though maybe not
299: // always human-friendly... :)
300: // But maybe report/log the resource problem?
301: // Any standard ways to report/log errors (in static context)?
302: }
303:
304: int totalEntries = props.size();
305: int totalMimeNames = 0;
306: Enumeration keys = props.keys();
307: for (int i = 0; i < totalEntries; ++i) {
308: String javaName = (String) keys.nextElement();
309: String val = props.getProperty(javaName);
310: totalMimeNames++;
311: int pos = val.indexOf(' ');
312: for (int j = 0; j < pos; ++j)
313: if (val.charAt(j) == ',')
314: totalMimeNames++;
315: }
316: EncodingInfo[] ret = new EncodingInfo[totalMimeNames];
317: int j = 0;
318: keys = props.keys();
319: for (int i = 0; i < totalEntries; ++i) {
320: String javaName = (String) keys.nextElement();
321: String val = props.getProperty(javaName);
322: int pos = val.indexOf(' ');
323: String mimeName;
324: //int lastPrintable;
325: if (pos < 0) {
326: // Maybe report/log this problem?
327: // "Last printable character not defined for encoding " +
328: // mimeName + " (" + val + ")" ...
329: mimeName = val;
330: // lastPrintable = 0x00FF;
331: } else {
332: // lastPrintable =
333: // Integer.decode(val.substring(pos).trim()).intValue();
334: StringTokenizer st = new StringTokenizer(val
335: .substring(0, pos), ",");
336: for (boolean first = true; st.hasMoreTokens(); first = false) {
337: mimeName = st.nextToken();
338: ret[j] = new EncodingInfo(mimeName, javaName);
339: _encodingTableKeyMime.put(mimeName
340: .toUpperCase(), ret[j]);
341: if (first)
342: _encodingTableKeyJava.put(javaName
343: .toUpperCase(), ret[j]);
344: j++;
345: }
346: }
347: }
348: return ret;
349: } catch (java.net.MalformedURLException mue) {
350: throw new org.apache.xml.serializer.utils.WrappedRuntimeException(
351: mue);
352: } catch (java.io.IOException ioe) {
353: throw new org.apache.xml.serializer.utils.WrappedRuntimeException(
354: ioe);
355: }
356: }
357:
358: /**
359: * Return true if the character is the high member of a surrogate pair.
360: * <p>
361: * This is not a public API.
362: * @param ch the character to test
363: * @xsl.usage internal
364: */
365: static boolean isHighUTF16Surrogate(char ch) {
366: return ('\uD800' <= ch && ch <= '\uDBFF');
367: }
368:
369: /**
370: * Return true if the character is the low member of a surrogate pair.
371: * <p>
372: * This is not a public API.
373: * @param ch the character to test
374: * @xsl.usage internal
375: */
376: static boolean isLowUTF16Surrogate(char ch) {
377: return ('\uDC00' <= ch && ch <= '\uDFFF');
378: }
379:
380: /**
381: * Return the unicode code point represented by the high/low surrogate pair.
382: * <p>
383: * This is not a public API.
384: * @param highSurrogate the high char of the high/low pair
385: * @param lowSurrogate the low char of the high/low pair
386: * @xsl.usage internal
387: */
388: static int toCodePoint(char highSurrogate, char lowSurrogate) {
389: int codePoint = ((highSurrogate - 0xd800) << 10)
390: + (lowSurrogate - 0xdc00) + 0x10000;
391: return codePoint;
392: }
393:
394: /**
395: * Return the unicode code point represented by the char.
396: * A bit of a dummy method, since all it does is return the char,
397: * but as an int value.
398: * <p>
399: * This is not a public API.
400: * @param ch the char.
401: * @xsl.usage internal
402: */
403: static int toCodePoint(char ch) {
404: int codePoint = ch;
405: return codePoint;
406: }
407:
408: private static final Hashtable _encodingTableKeyJava = new Hashtable();
409: private static final Hashtable _encodingTableKeyMime = new Hashtable();
410: private static final EncodingInfo[] _encodings = loadEncodingInfo();
411: }
|