001: /*
002: * The Apache Software License, Version 1.1
003: *
004: *
005: * Copyright (c) 1999 The Apache Software Foundation. All rights
006: * reserved.
007: *
008: * Redistribution and use in source and binary forms, with or without
009: * modification, are permitted provided that the following conditions
010: * are met:
011: *
012: * 1. Redistributions of source code must retain the above copyright
013: * notice, this list of conditions and the following disclaimer.
014: *
015: * 2. Redistributions in binary form must reproduce the above copyright
016: * notice, this list of conditions and the following disclaimer in
017: * the documentation and/or other materials provided with the
018: * distribution.
019: *
020: * 3. The end-user documentation included with the redistribution,
021: * if any, must include the following acknowledgment:
022: * "This product includes software developed by the
023: * Apache Software Foundation (http://www.apache.org/)."
024: * Alternately, this acknowledgment may appear in the software itself,
025: * if and wherever such third-party acknowledgments normally appear.
026: *
027: * 4. The names "Xerces" and "Apache Software Foundation" must
028: * not be used to endorse or promote products derived from this
029: * software without prior written permission. For written
030: * permission, please contact apache@apache.org.
031: *
032: * 5. Products derived from this software may not be called "Apache",
033: * nor may "Apache" appear in their name, without prior written
034: * permission of the Apache Software Foundation.
035: *
036: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
037: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
038: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
039: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
040: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
041: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
042: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
043: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
044: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
045: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
046: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
047: * SUCH DAMAGE.
048: * ====================================================================
049: *
050: * This software consists of voluntary contributions made by many
051: * individuals on behalf of the Apache Software Foundation and was
052: * originally based on software copyright (c) 1999, International
053: * Business Machines, Inc., http://www.apache.org. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: package org.apache.xerces.readers;
059:
060: import org.apache.xerces.framework.XMLErrorReporter;
061: import org.apache.xerces.utils.ChunkyByteArray;
062: import org.apache.xerces.utils.QName;
063: import org.apache.xerces.utils.StringPool;
064:
065: import java.io.InputStreamReader;
066: import java.io.IOException;
067: import java.io.UnsupportedEncodingException;
068:
069: /**
070: *
071: * @version
072: */
073: final class UTF8Recognizer extends XMLDeclRecognizer {
074: private byte[] fUTF8BOM = { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF };
075:
076: //
077: //
078: //
079: public XMLEntityHandler.EntityReader recognize(
080: XMLEntityReaderFactory readerFactory,
081: XMLEntityHandler entityHandler,
082: XMLErrorReporter errorReporter,
083: boolean sendCharDataAsCharArray, StringPool stringPool,
084: ChunkyByteArray data, boolean xmlDecl,
085: boolean allowJavaEncodingName) throws Exception {
086: XMLEntityHandler.EntityReader reader = null;
087:
088: //check to see if there is a UTF8 BOM, if see one, skip past it.
089: boolean seeBOM = false;
090: byte bom0 = data.byteAt(0);
091: if (bom0 == fUTF8BOM[0]) {
092: byte bom1 = data.byteAt(1);
093: if (bom1 == fUTF8BOM[1]) {
094: byte bom2 = data.byteAt(2);
095: if (bom2 == fUTF8BOM[2]) {
096: seeBOM = true;
097: }
098: }
099: }
100: if (seeBOM) {
101: // it will have the same content anyway.
102: data.read(fUTF8BOM, 0, 3);
103: }
104:
105: byte b0 = data.byteAt(0);
106: boolean debug = false;
107:
108: if (b0 == '<') {
109: int b1 = data.byteAt(1);
110: if (b1 == '?') {
111: if (data.byteAt(2) == 'x' && data.byteAt(3) == 'm'
112: && data.byteAt(4) == 'l') {
113: int b5 = data.byteAt(5);
114: if (b5 == 0x20 || b5 == 0x09 || b5 == 0x0a
115: || b5 == 0x0d) {
116: XMLEntityHandler.EntityReader declReader = new XMLDeclReader(
117: entityHandler, errorReporter,
118: sendCharDataAsCharArray, data,
119: stringPool);
120: int encoding = prescanXMLDeclOrTextDecl(
121: declReader, xmlDecl);
122: if (encoding != -1) {
123: String encname = stringPool
124: .orphanString(encoding);
125: String enc = encname.toUpperCase();
126: if ("ISO-10646-UCS-2".equals(enc))
127: throw new UnsupportedEncodingException(
128: encname);
129: if ("ISO-10646-UCS-4".equals(enc))
130: throw new UnsupportedEncodingException(
131: encname);
132: if ("UTF-16".equals(enc))
133: throw new UnsupportedEncodingException(
134: encname);
135:
136: String javaencname = MIME2Java.convert(enc);
137: if (null == javaencname) {
138: // Not supported
139: if (allowJavaEncodingName) {
140: javaencname = encname;
141: } else {
142: throw new UnsupportedEncodingException(
143: encname);
144: }
145: }
146: try {
147: data.rewind();
148: if ("UTF-8"
149: .equalsIgnoreCase(javaencname)
150: || "UTF8"
151: .equalsIgnoreCase(javaencname)) {
152: reader = readerFactory
153: .createUTF8Reader(
154: entityHandler,
155: errorReporter,
156: sendCharDataAsCharArray,
157: data, stringPool);
158: } else {
159: reader = readerFactory
160: .createCharReader(
161: entityHandler,
162: errorReporter,
163: sendCharDataAsCharArray,
164: new InputStreamReader(
165: data,
166: javaencname),
167: stringPool);
168: }
169: } catch (UnsupportedEncodingException e) {
170: throw new UnsupportedEncodingException(
171: encname);
172: } catch (Exception e) {
173: if (debug == true)
174: e.printStackTrace(); // Internal Error
175: }
176: } else {
177: data.rewind();
178: reader = readerFactory.createUTF8Reader(
179: entityHandler, errorReporter,
180: sendCharDataAsCharArray, data,
181: stringPool);
182: }
183: }
184: }
185: }
186: }
187: return reader;
188: }
189:
190: final class XMLDeclReader extends XMLEntityReader {
191: //
192: //
193: //
194: private StringPool fStringPool = null;
195: private ChunkyByteArray fData = null;
196:
197: //
198: //
199: //
200: XMLDeclReader(XMLEntityHandler entityHandler,
201: XMLErrorReporter errorReporter,
202: boolean sendCharDataAsCharArray, ChunkyByteArray data,
203: StringPool stringPool) {
204: super (entityHandler, errorReporter, sendCharDataAsCharArray);
205: fStringPool = stringPool;
206: fData = data;
207: }
208:
209: //
210: // These methods are used to parse XMLDecl/TextDecl.
211: //
212: public boolean lookingAtChar(char ch, boolean skipPastChar)
213: throws IOException {
214: if (fData.byteAt(fCurrentOffset) != ch)
215: return false;
216: if (skipPastChar)
217: fCurrentOffset++;
218: return true;
219: }
220:
221: public boolean lookingAtSpace(boolean skipPastChar)
222: throws IOException {
223: int ch = fData.byteAt(fCurrentOffset) & 0xff;
224: if (ch != 0x20 && ch != 0x09 && ch != 0x0A && ch != 0x0D)
225: return false;
226: if (skipPastChar)
227: fCurrentOffset++;
228: return true;
229: }
230:
231: public void skipPastSpaces() throws IOException {
232: while (true) {
233: int ch = fData.byteAt(fCurrentOffset) & 0xff;
234: if (ch != 0x20 && ch != 0x09 && ch != 0x0A
235: && ch != 0x0D)
236: return;
237: fCurrentOffset++;
238: }
239: }
240:
241: public boolean skippedString(char[] s) throws IOException {
242: int offset = fCurrentOffset;
243: for (int i = 0; i < s.length; i++) {
244: if (fData.byteAt(offset) != s[i])
245: return false;
246: offset++;
247: }
248: fCurrentOffset = offset;
249: return true;
250: }
251:
252: public int scanStringLiteral() throws Exception {
253: boolean single;
254: if (!(single = lookingAtChar('\'', true))
255: && !lookingAtChar('\"', true)) {
256: return XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED;
257: }
258: int offset = fCurrentOffset;
259: char qchar = single ? '\'' : '\"';
260: while (true) {
261: byte b = fData.byteAt(fCurrentOffset);
262: if (b == qchar)
263: break;
264: if (b == -1)
265: return XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED;
266: fCurrentOffset++;
267: }
268: int length = fCurrentOffset - offset;
269: StringBuffer str = new StringBuffer(length);
270: for (int i = 0; i < length; i++) {
271: str.append((char) fData.byteAt(offset + i));
272: }
273: int stringIndex = fStringPool.addString(str.toString());
274: fCurrentOffset++; // move past qchar
275: return stringIndex;
276: }
277:
278: //
279: // The rest of the methods in XMLReader are not used for parsing XMLDecl/TextDecl.
280: //
281: public void append(XMLEntityHandler.CharBuffer charBuffer,
282: int offset, int length) {
283: throw new RuntimeException("RDR002 cannot happen");
284: }
285:
286: public int addString(int offset, int length) {
287: throw new RuntimeException("RDR002 cannot happen");
288: }
289:
290: public int addSymbol(int offset, int length) {
291: throw new RuntimeException("RDR002 cannot happen");
292: }
293:
294: public void skipToChar(char ch) throws IOException {
295: throw new IOException("RDR002 cannot happen");
296: }
297:
298: public void skipPastName(char fastcheck) throws IOException {
299: throw new IOException("RDR002 cannot happen");
300: }
301:
302: public void skipPastNmtoken(char fastcheck) throws IOException {
303: throw new IOException("RDR002 cannot happen");
304: }
305:
306: public boolean lookingAtValidChar(boolean skipPastChar)
307: throws IOException {
308: throw new IOException("RDR002 cannot happen");
309: }
310:
311: public int scanInvalidChar() throws IOException {
312: throw new IOException("RDR002 cannot happen");
313: }
314:
315: public int scanCharRef(boolean hex) throws IOException {
316: throw new IOException("RDR002 cannot happen");
317: }
318:
319: public int scanAttValue(char qchar, boolean asSymbol)
320: throws IOException {
321: throw new IOException("RDR002 cannot happen");
322: }
323:
324: public int scanEntityValue(int qchar, boolean createString)
325: throws IOException {
326: throw new IOException("RDR002 cannot happen");
327: }
328:
329: public boolean scanExpectedName(char fastcheck,
330: StringPool.CharArrayRange expectedName)
331: throws IOException {
332: throw new IOException("RDR002 cannot happen");
333: }
334:
335: public void scanQName(char fastcheck, QName qname)
336: throws IOException {
337: throw new IOException("RDR002 cannot happen");
338: }
339:
340: public int scanName(char fastcheck) throws IOException {
341: throw new IOException("RDR002 cannot happen");
342: }
343:
344: public int scanContent(QName element) throws IOException {
345: throw new IOException("RDR002 cannot happen");
346: }
347: }
348: }
|