001: /*
002: * $Id: ISO8859_1XMLDecoder.java,v 1.5 2004/07/11 09:37:37 yuvalo Exp $
003: *
004: * (C) Copyright 2002-2004 by Yuval Oren. All rights reserved.
005: *
006: * Licensed under the Apache License, Version 2.0 (the "License");
007: * you may not use this file except in compliance with the License.
008: * You may obtain a copy of the License at
009: *
010: * http://www.apache.org/licenses/LICENSE-2.0
011: *
012: * Unless required by applicable law or agreed to in writing, software
013: * distributed under the License is distributed on an "AS IS" BASIS,
014: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015: * See the License for the specific language governing permissions and
016: * limitations under the License.
017: */
018:
019: package com.bluecast.xml;
020:
021: import com.bluecast.io.CharsetDecoder;
022: import com.bluecast.io.IllegalCharException;
023:
024: import java.io.CharConversionException;
025:
026: /**
027: * Converts bytes to ISO8859-1 characters while converting
028: * carriage returns into linefeeds and CRLF into LF.
029: * Checks for invalid XML characters.
030: *
031: * @author Yuval Oren, yuval@bluecast.com
032: * @version $Revision: 1.5 $
033: */
034: final public class ISO8859_1XMLDecoder implements XMLDecoder {
035: private boolean sawCR = false;
036:
037: public CharsetDecoder newCharsetDecoder() {
038: return newXMLDecoder();
039: }
040:
041: public XMLDecoder newXMLDecoder() {
042: return new ISO8859_1XMLDecoder();
043: }
044:
045: public int minBytesPerChar() {
046: return 1;
047: }
048:
049: public int maxBytesPerChar() {
050: return 1;
051: }
052:
053: public void reset() {
054: sawCR = false;
055: }
056:
057: public void decode(byte[] in_buf, int in_off, int in_len,
058: char[] out_buf, int out_off, int out_len, int[] result)
059: throws CharConversionException {
060: internalDecode(in_buf, in_off, in_len, out_buf, out_off,
061: out_len, result, false);
062: }
063:
064: public void decodeXMLDecl(byte[] in_buf, int in_off, int in_len,
065: char[] out_buf, int out_off, int out_len, int[] result)
066: throws CharConversionException {
067: internalDecode(in_buf, in_off, in_len, out_buf, out_off,
068: out_len, result, true);
069: }
070:
071: private void internalDecode(byte[] in_buf, int in_off, int in_len,
072: char[] out_buf, int out_off, int out_len, int[] result,
073: boolean decodeDecl) throws CharConversionException {
074:
075: int i, o;
076: inputLoop: for (i = o = 0; i < in_len && o < out_len; i++) {
077: char c = (char) (0xFF & in_buf[in_off + i]);
078: if (c >= 0x20) {
079: sawCR = false;
080: out_buf[out_off + o++] = (char) c;
081: } else {
082: switch (c) {
083: case '\n':
084: if (sawCR) {
085: sawCR = false;
086: } else
087: out_buf[out_off + o++] = '\n';
088: break;
089:
090: case '\r':
091: sawCR = true;
092: out_buf[out_off + o++] = '\n';
093: break;
094:
095: case '\t':
096: out_buf[out_off + o++] = '\t';
097: break;
098:
099: default:
100: if (decodeDecl)
101: break inputLoop;
102: else
103: throw new IllegalCharException(
104: "Illegal XML character: 0x"
105: + Integer.toHexString(c));
106: }
107: }
108: }
109: result[0] = i;
110: result[1] = o;
111: }
112: }
|