001: /*
002: * Copyright (c) 2007, intarsys consulting GmbH
003: *
004: * Redistribution and use in source and binary forms, with or without
005: * modification, are permitted provided that the following conditions are met:
006: *
007: * - Redistributions of source code must retain the above copyright notice,
008: * this list of conditions and the following disclaimer.
009: *
010: * - Redistributions in binary form must reproduce the above copyright notice,
011: * this list of conditions and the following disclaimer in the documentation
012: * and/or other materials provided with the distribution.
013: *
014: * - Neither the name of intarsys nor the names of its contributors may be used
015: * to endorse or promote products derived from this software without specific
016: * prior written permission.
017: *
018: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
019: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
020: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
021: * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
022: * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
023: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
024: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
025: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
026: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
027: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
028: * POSSIBILITY OF SUCH DAMAGE.
029: */
030: package de.intarsys.pdf.parser;
031:
032: import java.io.ByteArrayOutputStream;
033: import java.io.IOException;
034: import de.intarsys.pdf.content.CSContent;
035: import de.intarsys.pdf.content.CSOperation;
036: import de.intarsys.pdf.cos.COSIndirectObject;
037: import de.intarsys.pdf.cos.COSObject;
038: import de.intarsys.pdf.cos.COSString;
039: import de.intarsys.tools.randomaccess.IRandomAccess;
040: import de.intarsys.tools.randomaccess.RandomAccessByteArray;
041:
042: /**
043: * A parser for .pdf type content streams.
044: */
045: public class CSContentParser extends PDFParser {
046: private static final COSObject[] EMPTY_OPERANDS = new COSObject[0];
047:
048: private Object[] operands = new Object[10];
049:
050: private int size = 0;
051:
052: /**
053: * create a COSDocumentParser
054: */
055: public CSContentParser() {
056: super ();
057: }
058:
059: /*
060: * (non-Javadoc)
061: *
062: * @see de.intarsys.pdf.parser.PDFParser#createObjectReference()
063: */
064: protected COSIndirectObject createObjectReference()
065: throws IOException, COSLoadException {
066: COSLoadError e = new COSLoadError(
067: "indirect objects not allowed in streams");
068: handleError(e);
069: return null;
070: }
071:
072: protected COSString parseImageData(IRandomAccess input)
073: throws IOException {
074: ByteArrayOutputStream bos = new ByteArrayOutputStream();
075: int next = input.read();
076: if (next == ' ') {
077: // skip space after "ID"
078: next = input.read();
079: }
080: while (next != -1) {
081: /*
082: * spec is not clear but some internet articles claim that before
083: * "EI" a line break is required; accept CR, LF, and CRLF
084: */
085: if ((next == '\r') || (next == '\n')) {
086: int linebreak1;
087: boolean linebreak2;
088: boolean e;
089:
090: linebreak1 = next;
091: next = input.read();
092: linebreak2 = next == '\n';
093: if (linebreak2) {
094: next = input.read();
095: }
096: e = next == 'E';
097: if (e) {
098: next = input.read();
099: if (next == 'I') {
100: input.seekBy(linebreak2 ? (-4) : (-3));
101: break;
102: }
103: }
104: bos.write(linebreak1);
105: if (linebreak2) {
106: bos.write('\n');
107: }
108: if (e) {
109: bos.write('E');
110: }
111: }
112: bos.write(next);
113: next = input.read();
114: }
115: return COSString.create(bos.toByteArray());
116: }
117:
118: /**
119: * parse a valid COS object for use in stream context from the current
120: * stream position see PDF Reference v1.4, chapter 3.7.1 Content Streams
121: *
122: * @param streamContent
123: * The stream content object that defines the context of the
124: * stream.
125: *
126: * @return The stream operation parsed.
127: *
128: * @throws IOException
129: * @throws COSLoadException
130: */
131: protected CSOperation parseOperation(IRandomAccess input,
132: CSContent streamContent) throws IOException,
133: COSLoadException {
134: do {
135: Object element = parseElement(input);
136: if (element instanceof byte[]) {
137: COSObject[] copyOperands;
138:
139: // speed
140: if (size == 0) {
141: copyOperands = EMPTY_OPERANDS;
142: } else {
143: copyOperands = new COSObject[size];
144: System
145: .arraycopy(operands, 0, copyOperands, 0,
146: size);
147: }
148: size = 0;
149: return new CSOperation((byte[]) element, copyOperands);
150: } else if (element == null) {
151: int next = input.read();
152: if (next != -1) {
153: input.seekBy(-1);
154: COSLoadError e = new COSLoadError(
155: "unexpected char (" + (char) next + ")");
156: handleError(e);
157: }
158: size = 0;
159: return null;
160: } else {
161: if (size >= operands.length) {
162: Object[] newOperands = new Object[size << 2];
163: System.arraycopy(operands, 0, newOperands, 0, size);
164: operands = newOperands;
165: }
166: operands[size++] = element;
167: }
168: } while (true);
169: }
170:
171: protected CSOperation parseOperationEI(IRandomAccess input,
172: CSContent streamContent) throws IOException,
173: COSLoadException {
174: CSOperation op = null;
175: Object element = parseImageData(input);
176: if (element == null) {
177: int next = input.read();
178: COSLoadError e = new COSLoadError("unexpected char ("
179: + (char) next + ")");
180: handleError(e);
181: }
182: operands[size++] = element;
183: element = parseElement(input);
184: if (!(element instanceof byte[])) {
185: COSLoadError e = new COSLoadError("EI expected");
186: handleError(e);
187: }
188: COSObject[] copy = new COSObject[size];
189: System.arraycopy(operands, 0, copy, 0, size);
190: op = new CSOperation((byte[]) element, copy);
191: size = 0;
192: return op;
193: }
194:
195: /**
196: * parse a content stream.
197: *
198: * <p>
199: * See PDF Reference v1.4, chapter 3.7 Content Streams
200: * </p>
201: *
202: * @param data
203: * A byte array containing the encoded content stream
204: *
205: * @return the parsed content
206: *
207: * @throws IOException
208: * @throws COSLoadException
209: */
210: public CSContent parseStream(byte[] data) throws IOException,
211: COSLoadException {
212: return parseStream(new RandomAccessByteArray(data));
213: }
214:
215: /**
216: * parse a content stream.
217: *
218: * <p>
219: * See PDF Reference v1.4, chapter 3.7 Content Streams
220: * </p>
221: *
222: * @param input
223: * a open IRandomAccessData positioned at the beginning of the
224: * content stream
225: *
226: * @return the parsed content
227: *
228: * @throws IOException
229: * @throws COSLoadException
230: */
231: public CSContent parseStream(IRandomAccess input)
232: throws IOException, COSLoadException {
233: CSContent streamContent = CSContent.createNew();
234: while (true) {
235: CSOperation op = parseOperation(input, streamContent);
236: if (op == null) {
237: return streamContent;
238: }
239: streamContent.addOperation(op);
240: byte[] operatorToken = op.getOperatorToken();
241: if ((operatorToken.length == 2)
242: && (operatorToken[0] == 'I')
243: && (operatorToken[1] == 'D')) {
244: op = parseOperationEI(input, streamContent);
245: streamContent.addOperation(op);
246: }
247: }
248: }
249: }
|