001: /*
002: * Copyright (c) 2007, intarsys consulting GmbH
003: *
004: * Redistribution and use in source and binary forms, with or without
005: * modification, are permitted provided that the following conditions are met:
006: *
007: * - Redistributions of source code must retain the above copyright notice,
008: * this list of conditions and the following disclaimer.
009: *
010: * - Redistributions in binary form must reproduce the above copyright notice,
011: * this list of conditions and the following disclaimer in the documentation
012: * and/or other materials provided with the distribution.
013: *
014: * - Neither the name of intarsys nor the names of its contributors may be used
015: * to endorse or promote products derived from this software without specific
016: * prior written permission.
017: *
018: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
019: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
020: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
021: * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
022: * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
023: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
024: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
025: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
026: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
027: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
028: * POSSIBILITY OF SUCH DAMAGE.
029: */
030: package de.intarsys.pdf.st;
031:
032: import java.io.IOException;
033: import java.util.ArrayList;
034: import java.util.Arrays;
035: import java.util.List;
036: import de.intarsys.pdf.cos.COSDictionary;
037: import de.intarsys.pdf.cos.COSObjectKey;
038: import de.intarsys.pdf.cos.COSTrailer;
039: import de.intarsys.pdf.parser.COSDocumentParser;
040: import de.intarsys.pdf.parser.COSLoadError;
041: import de.intarsys.pdf.parser.COSLoadException;
042: import de.intarsys.pdf.parser.PDFParser;
043: import de.intarsys.tools.randomaccess.IRandomAccess;
044:
045: /**
046: * <p>
047: * A simple xref rebuilding parser.
048: * </p>
049: * <p>
050: * The parser reads lines in the document and looks if the line begins with a
051: * object definition, which looks like: number number "obj". Every object
052: * definition found is added as a XRefEntry to the STDocument.
053: * </p>
054: * <p>
055: * False object definitions can be read and they can cause a problem, if a
056: * <b>real </b> object with the same object number is read after the false one.
057: * On the other side: the document was already broke before this parser was
058: * called.
059: * </p>
060: *
061: */
062: public class XRefFallbackParser extends AbstractXRefParser {
063: private STTrailerXRefSection xRefSection;
064:
065: private List trailers = new ArrayList();
066:
067: public XRefFallbackParser(STDocument doc, COSDocumentParser parser) {
068: super (doc, parser);
069: this .xRefSection = new STTrailerXRefSection(doc);
070: }
071:
072: /*
073: * (non-Javadoc)
074: *
075: * @see de.intarsys.pdf.st.AbstractXRefParser#parse(de.intarsys.tools.randomaccess.IRandomAccess)
076: */
077: public STXRefSection parse(IRandomAccess input) throws IOException,
078: COSLoadException {
079: input.seek(0);
080: int objNumber;
081: int genNumber;
082: byte[] token;
083: long offset;
084:
085: while (readUptoNewLine(input)) {
086: offset = input.getOffset();
087: try {
088: COSDictionary trailer = getParser().parseTrailer(input);
089: trailers.add(trailer);
090: continue;
091: } catch (IOException e) {
092: // no trailer
093: } catch (COSLoadException e) {
094: // no trailer
095: }
096: try {
097: objNumber = getParser().readInteger(input, true);
098: if (objNumber != 0) {
099: genNumber = getParser().readInteger(input, true);
100: token = getParser().readToken(input);
101: if (Arrays.equals(PDFParser.TOKEN_obj, token)) {
102: getXRefSection().addEntry(
103: new STXRefEntryOccupied(
104: new COSObjectKey(objNumber,
105: genNumber),
106: (int) offset));
107: }
108: }
109: } catch (IOException e) {
110: // no obj
111: }
112: }
113:
114: checkXRefSections();
115: return getXRefSection();
116: }
117:
118: /**
119: * Check if we found a root object
120: *
121: * @throws IOException
122: * @throws COSLoadException
123: */
124: private void checkXRefSections() throws IOException,
125: COSLoadException {
126: if (trailers.isEmpty()) {
127: COSLoadError e = new COSLoadError("no trailer found");
128: handleError(e);
129: }
130: boolean rootFound = false;
131: for (int i = trailers.size() - 1; i >= 0; i--) {
132: COSDictionary trailer = (COSDictionary) trailers.get(i);
133: if (trailer.containsKey(COSTrailer.DK_Root)) {
134: getXRefSection().cosSetDict(trailer);
135: rootFound = true;
136: break;
137: }
138: }
139: if (rootFound == false) {
140: COSLoadError e = new COSLoadError(
141: "trailer doesn't contain a root entry");
142: handleError(e);
143: }
144: getXRefSection().setSize(getXRefSection().getMaxObjectNumber());
145: }
146:
147: private boolean readUptoNewLine(IRandomAccess input)
148: throws IOException {
149: int i;
150: while (true) {
151: i = input.read();
152: if (i == -1) {
153: return false;
154: }
155: if (PDFParser.isEOL(i)) {
156: if (i == PDFParser.CHAR_CR) {
157: i = input.read();
158: if (i != PDFParser.CHAR_LF) {
159: input.seekBy(-1);
160: }
161: }
162: return true;
163: }
164: }
165: }
166:
167: protected STTrailerXRefSection getXRefSection() {
168: return xRefSection;
169: }
170: }
|