001: package net.sf.saxon.charcode;
002:
003: import net.sf.saxon.om.XMLChar;
004:
005: import java.nio.CharBuffer;
006: import java.nio.charset.CharacterCodingException;
007: import java.nio.charset.Charset;
008: import java.nio.charset.CharsetEncoder;
009: import java.util.HashMap;
010:
011: /**
012: * This class establishes properties of a character set that is
013: * known to the Java VM but not specifically known to Saxon. It avoids
014: * using the encoder.canEncode() method because there is a known bug
015: * (in JDK 1.4.2) that for some encodings, this returns true for
016: * every character. So this version of the class actually attempts
017: * to encode the characters, and catches the exception when it fails.
018: */
019:
020: public class BuggyCharacterSet implements CharacterSet {
021:
022: private static HashMap map;
023:
024: private CharsetEncoder encoder;
025:
026: // This class is written on the assumption that the CharsetEncoder.canEncode()
027: // method may be expensive. For BMP characters, it therefore remembers the results
028: // so each character is only looked up the first time it is encountered.
029:
030: private byte[] charinfo = new byte[65536];
031: // rely on initialization to zeroes
032:
033: //private final static byte UNKNOWN = 0;
034: private static final byte GOOD = 1;
035: private static final byte BAD = 2;
036:
037: private BuggyCharacterSet(Charset charset) {
038: encoder = charset.newEncoder();
039: }
040:
041: public static synchronized BuggyCharacterSet makeCharSet(
042: Charset charset) {
043: if (map == null) {
044: map = new HashMap(10);
045: }
046: BuggyCharacterSet c = (BuggyCharacterSet) map.get(charset);
047: if (c == null) {
048: c = new BuggyCharacterSet(charset);
049: map.put(charset, c);
050: }
051: return c;
052: }
053:
054: public final boolean inCharset(int c) {
055: // Assume ASCII chars are always OK
056: if (c <= 127) {
057: return true;
058: }
059: try {
060: if (c <= 65535) {
061: if (charinfo[c] == GOOD) {
062: return true;
063: } else if (charinfo[c] == BAD) {
064: return false;
065: } else {
066: charinfo[c] = BAD; // guilty until proved innocent
067: char[] cc = { (char) c };
068: encoder.encode(CharBuffer.wrap(cc));
069: charinfo[c] = GOOD;
070: return true;
071: }
072: } else {
073: char[] ss = { XMLChar.highSurrogate(c),
074: XMLChar.lowSurrogate(c) };
075: encoder.encode(CharBuffer.wrap(ss));
076: return true;
077: }
078: } catch (CharacterCodingException ex) {
079: return false;
080: }
081: }
082:
083: }
084:
085: //
086: // The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
087: // you may not use this file except in compliance with the License. You may obtain a copy of the
088: // License at http://www.mozilla.org/MPL/
089: //
090: // Software distributed under the License is distributed on an "AS IS" basis,
091: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
092: // See the License for the specific language governing rights and limitations under the License.
093: //
094: // The Original Code is: all this file.
095: //
096: // The Initial Developer of the Original Code is
097: // Aleksei Makarov [makarov@iitam.omsk.net.ru]
098: //
099: // Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
100: //
101: // Contributor(s): none.
102: //
|