001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.cocoon.components.sax;
018:
019: import java.util.ArrayList;
020:
021: import org.apache.cocoon.xml.AbstractXMLProducer;
022: import org.xml.sax.SAXException;
023: import org.xml.sax.helpers.AttributesImpl;
024:
025: /**
026: * This a simple xml compiler which takes a byte array as input.
027: *
028: * @author <a href="mailto:stefano@apache.org">Stefano Mazzocchi</a>
029: * @author <a href="mailto:cziegeler@apache.org">Carsten Ziegeler</a>
030: * @author <a href="mailto:tcurdt@apache.org">Torsten Curdt</a>
031: * @version CVS $Id: XMLByteStreamInterpreter.java 433543 2006-08-22 06:22:54Z crossley $
032: */
033:
034: public final class XMLByteStreamInterpreter extends AbstractXMLProducer
035: implements XMLDeserializer {
036:
037: private static final int START_DOCUMENT = 0;
038: private static final int END_DOCUMENT = 1;
039: private static final int START_PREFIX_MAPPING = 2;
040: private static final int END_PREFIX_MAPPING = 3;
041: private static final int START_ELEMENT = 4;
042: private static final int END_ELEMENT = 5;
043: private static final int CHARACTERS = 6;
044: private static final int IGNORABLE_WHITESPACE = 7;
045: private static final int PROCESSING_INSTRUCTION = 8;
046: private static final int COMMENT = 9;
047: private static final int LOCATOR = 10;
048: private static final int START_DTD = 11;
049: private static final int END_DTD = 12;
050: private static final int START_CDATA = 13;
051: private static final int END_CDATA = 14;
052: private static final int SKIPPED_ENTITY = 15;
053: private static final int START_ENTITY = 16;
054: private static final int END_ENTITY = 17;
055:
056: private ArrayList list = new ArrayList();
057: private byte[] input;
058: private int currentPos;
059:
060: public void recycle() {
061: super .recycle();
062: this .list.clear();
063: this .input = null;
064: }
065:
066: public void deserialize(Object saxFragment) throws SAXException {
067: if (!(saxFragment instanceof byte[])) {
068: throw new SAXException(
069: "XMLDeserializer needs byte array for deserialization.");
070: }
071: this .list.clear();
072: this .input = (byte[]) saxFragment;
073: this .currentPos = 0;
074: this .checkProlog();
075: this .parse();
076: }
077:
078: private void parse() throws SAXException {
079: while (currentPos < input.length) {
080: switch (this .readEvent()) {
081: case START_DOCUMENT:
082: contentHandler.startDocument();
083: break;
084: case END_DOCUMENT:
085: contentHandler.endDocument();
086: break;
087: case START_PREFIX_MAPPING:
088: contentHandler.startPrefixMapping(this .readString(),
089: this .readString());
090: break;
091: case END_PREFIX_MAPPING:
092: contentHandler.endPrefixMapping(this .readString());
093: break;
094: case START_ELEMENT:
095: int attributes = this .readAttributes();
096: AttributesImpl atts = new AttributesImpl();
097: for (int i = 0; i < attributes; i++) {
098: atts.addAttribute(this .readString(), this
099: .readString(), this .readString(), this
100: .readString(), this .readString());
101: }
102: contentHandler.startElement(this .readString(), this
103: .readString(), this .readString(), atts);
104: break;
105: case END_ELEMENT:
106: contentHandler.endElement(this .readString(), this
107: .readString(), this .readString());
108: break;
109: case CHARACTERS:
110: char[] chars = this .readChars();
111: int len = chars.length;
112: while (len > 0 && chars[len - 1] == 0)
113: len--;
114: if (len > 0)
115: contentHandler.characters(chars, 0, len);
116: break;
117: case IGNORABLE_WHITESPACE:
118: char[] spaces = this .readChars();
119: len = spaces.length;
120: while (len > 0 && spaces[len - 1] == 0)
121: len--;
122: if (len > 0)
123: contentHandler.characters(spaces, 0, len);
124: break;
125: case PROCESSING_INSTRUCTION:
126: contentHandler.processingInstruction(this .readString(),
127: this .readString());
128: break;
129: case COMMENT:
130: chars = this .readChars();
131: if (this .lexicalHandler != null) {
132: len = chars.length;
133: while (len > 0 && chars[len - 1] == 0)
134: len--;
135: if (len > 0)
136: lexicalHandler.comment(chars, 0, len);
137: }
138: break;
139: case LOCATOR: {
140: String publicId = this .readString();
141: String systemId = this .readString();
142: int lineNumber = this .read();
143: int columnNumber = this .read();
144: org.xml.sax.helpers.LocatorImpl locator = new org.xml.sax.helpers.LocatorImpl();
145: locator.setPublicId(publicId);
146: locator.setSystemId(systemId);
147: locator.setLineNumber(lineNumber);
148: locator.setColumnNumber(columnNumber);
149: contentHandler.setDocumentLocator(locator);
150: }
151: break;
152: case START_DTD:
153: lexicalHandler.startDTD(this .readString(), this
154: .readString(), this .readString());
155: break;
156: case END_DTD:
157: lexicalHandler.endDTD();
158: break;
159: case START_CDATA:
160: lexicalHandler.startCDATA();
161: break;
162: case END_CDATA:
163: lexicalHandler.endCDATA();
164: break;
165: case SKIPPED_ENTITY:
166: contentHandler.skippedEntity(this .readString());
167: break;
168: case START_ENTITY:
169: lexicalHandler.startEntity(this .readString());
170: break;
171: case END_ENTITY:
172: lexicalHandler.endEntity(this .readString());
173: break;
174: default:
175: throw new SAXException(
176: "parsing error: event not supported.");
177: }
178: }
179: }
180:
181: private void checkProlog() throws SAXException {
182: int valid = 0;
183: if (this .read() == 'C')
184: valid++;
185: if (this .read() == 'X')
186: valid++;
187: if (this .read() == 'M')
188: valid++;
189: if (this .read() == 'L')
190: valid++;
191: if (this .read() == 1)
192: valid++;
193: if (this .read() == 0)
194: valid++;
195: if (valid != 6)
196: throw new SAXException("Unrecognized file format.");
197: }
198:
199: protected int readEvent() throws SAXException {
200: return this .read();
201: }
202:
203: private int readAttributes() throws SAXException {
204: int ch1 = this .read();
205: int ch2 = this .read();
206: return ((ch1 << 8) + (ch2 << 0));
207: }
208:
209: private String readString() throws SAXException {
210: int length = this .readWord();
211: int index = length & 0x00007FFF;
212: if (length >= 0x00008000) {
213: return (String) list.get(index);
214: } else {
215: if (length == 0x00007FFF) {
216: length = this .readLong();
217: }
218: char[] chars = this .readChars(length);
219: int len = chars.length;
220: if (len > 0) {
221: while (chars[len - 1] == 0)
222: len--;
223: }
224: String str;
225: if (len == 0) {
226: str = "";
227: } else {
228: str = new String(chars, 0, len);
229: }
230: list.add(str);
231: return str;
232: }
233: }
234:
235: /**
236: * The returned char array might contain any number of zero bytes
237: * at the end
238: */
239: private char[] readChars() throws SAXException {
240: int length = this .readWord();
241: if (length == 0x00007FFF) {
242: length = this .readLong();
243: }
244: return this .readChars(length);
245: }
246:
247: private int read() throws SAXException {
248: if (currentPos >= input.length)
249: throw new SAXException("Reached end of input.");
250: return input[currentPos++] & 0xff;
251: }
252:
253: /**
254: * The returned char array might contain any number of zero bytes
255: * at the end
256: */
257: private char[] readChars(int len) throws SAXException {
258: char[] str = new char[len];
259: byte[] bytearr = new byte[len];
260: int c, char2, char3;
261: int count = 0;
262: int i = 0;
263:
264: this .readBytes(bytearr);
265:
266: while (count < len) {
267: c = bytearr[count] & 0xff;
268: switch (c >> 4) {
269: case 0:
270: case 1:
271: case 2:
272: case 3:
273: case 4:
274: case 5:
275: case 6:
276: case 7:
277: // 0xxxxxxx
278: count++;
279: str[i++] = (char) c;
280: break;
281: case 12:
282: case 13:
283: // 110x xxxx 10xx xxxx
284: count += 2;
285: char2 = bytearr[count - 1];
286: str[i++] = (char) (((c & 0x1F) << 6) | (char2 & 0x3F));
287: break;
288: case 14:
289: // 1110 xxxx 10xx xxxx 10xx xxxx
290: count += 3;
291: char2 = bytearr[count - 2];
292: char3 = bytearr[count - 1];
293: str[i++] = ((char) (((c & 0x0F) << 12)
294: | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0)));
295: break;
296: default:
297: // 10xx xxxx, 1111 xxxx
298: throw new SAXException("UTFDataFormatException");
299: }
300: }
301:
302: return str;
303: }
304:
305: private void readBytes(byte[] b) throws SAXException {
306: if (this .currentPos + b.length > this .input.length) {
307: // TC:
308: // >= prevents getting the last byte
309: // 0 1 2 3 4 input.length = 5
310: // |_ currentPos = 2
311: // b.length = 3
312: // 2 + 3 > 5 ok
313: // 2 + 3 >= 5 wrong
314: // why has this worked before?
315: throw new SAXException("End of input reached.");
316: }
317: System.arraycopy(this .input, this .currentPos, b, 0, b.length);
318: this .currentPos += b.length;
319: }
320:
321: private int readWord() throws SAXException {
322: int ch1 = this .read();
323: int ch2 = this .read();
324: return ((ch1 << 8) + (ch2 << 0));
325: }
326:
327: private int readLong() throws SAXException {
328: int ch1 = this .read();
329: int ch2 = this .read();
330: int ch3 = this .read();
331: int ch4 = this .read();
332: return ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0));
333: }
334: }
|