001: /*
002: * Copyright (c) 2007, intarsys consulting GmbH
003: *
004: * Redistribution and use in source and binary forms, with or without
005: * modification, are permitted provided that the following conditions are met:
006: *
007: * - Redistributions of source code must retain the above copyright notice,
008: * this list of conditions and the following disclaimer.
009: *
010: * - Redistributions in binary form must reproduce the above copyright notice,
011: * this list of conditions and the following disclaimer in the documentation
012: * and/or other materials provided with the distribution.
013: *
014: * - Neither the name of intarsys nor the names of its contributors may be used
015: * to endorse or promote products derived from this software without specific
016: * prior written permission.
017: *
018: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
019: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
020: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
021: * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
022: * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
023: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
024: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
025: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
026: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
027: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
028: * POSSIBILITY OF SUCH DAMAGE.
029: */
030: package de.intarsys.pdf.st;
031:
032: import java.io.IOException;
033: import java.util.Arrays;
034: import de.intarsys.pdf.cos.COSObjectKey;
035: import de.intarsys.pdf.parser.COSDocumentParser;
036: import de.intarsys.pdf.parser.COSLoadError;
037: import de.intarsys.pdf.parser.COSLoadException;
038: import de.intarsys.pdf.parser.COSLoadWarning;
039: import de.intarsys.pdf.parser.PDFParser;
040: import de.intarsys.tools.randomaccess.IRandomAccess;
041:
042: /**
043: * A parser for XRef objects in "classical" format.
044: */
045: public class XRefTrailerParser extends AbstractXRefParser {
046: public XRefTrailerParser(STDocument doc, COSDocumentParser parser) {
047: super (doc, parser);
048: }
049:
050: protected STXRefSection parseXRef(IRandomAccess randomAccess)
051: throws IOException, COSLoadException {
052: STTrailerXRefSection xRefSection = new STTrailerXRefSection(
053: getDoc(), randomAccess.getOffset());
054: parseXRefSection(randomAccess, xRefSection);
055: xRefSection.cosSetDict(getDoc().getParser().parseTrailer(
056: randomAccess));
057: return xRefSection;
058: }
059:
060: protected void parseXRefSection(IRandomAccess randomAccess,
061: STTrailerXRefSection xRef) throws IOException,
062: COSLoadException {
063: getDoc().getParser().readSpaces(randomAccess);
064: byte[] token = new byte[4];
065: randomAccess.read(token);
066: if (Arrays.equals(token, PDFParser.TOKEN_xref)) {
067: token = new byte[PDFParser.TOKEN_trailer.length];
068: long oldpos = -1;
069: long pos = randomAccess.getOffset();
070:
071: // break from loop if we seem to repeat ourself
072: while (oldpos != pos) {
073: oldpos = pos;
074: parseXRefSubsection(randomAccess, xRef);
075: // maybe we got out of synch while stupidly parsing xref
076: // todo 3 @kkr seems compliant, verify with test suite documents
077: // once available
078: getDoc().getParser().readSpaces(randomAccess);
079: int c = randomAccess.read();
080: randomAccess.seekBy(-1);
081: if ((c == -1) || !PDFParser.isDigit(c)) {
082: break;
083: }
084: pos = randomAccess.getOffset();
085: }
086: } else {
087: COSLoadError e = new COSLoadError(
088: "no 'xref' key found at position:"
089: + randomAccess.getOffset());
090: handleError(e);
091: }
092: }
093:
094: protected void parseXRefSubsection(IRandomAccess randomAccess,
095: STTrailerXRefSection xRef) throws IOException,
096: COSLoadException {
097: // read begining
098: int beginningObject = getDoc().getParser().readInteger(
099: randomAccess, false);
100:
101: // followed by white space
102: int c = randomAccess.read();
103: if (c == -1) {
104: return;
105: }
106: if (c != 32) {
107: COSLoadWarning pwarn = new COSLoadWarning(
108: PDFParser.C_WARN_SINGLESPACE);
109: pwarn.setHint(new Long(randomAccess.getOffset()));
110: getParser().handleWarning(pwarn);
111: }
112: c = randomAccess.read();
113: if (c == -1) {
114: return;
115: }
116: randomAccess.seekBy(-1);
117: if (!PDFParser.isDigit(c)) {
118: COSLoadWarning pwarn = new COSLoadWarning(
119: PDFParser.C_WARN_SINGLESPACE);
120: pwarn.setHint(new Long(randomAccess.getOffset()));
121: getParser().handleWarning(pwarn);
122: }
123:
124: // read entry count
125: int numEntries = getDoc().getParser().readInteger(randomAccess,
126: true);
127: // read spaces
128: getDoc().getParser().readSpaces(randomAccess);
129: byte[] buffer = new byte[20];
130: STXRefEntry entry;
131: for (int i = 0; i < numEntries; i++) {
132: // read complete entry
133: // todo 1 @mit this may be complete garbage
134: c = randomAccess.read(buffer);
135: if (c == -1) {
136: // must fail anyway
137: return;
138: }
139: entry = parseEntry(beginningObject + i, buffer);
140: xRef.addEntry(entry);
141: }
142: }
143:
144: private STXRefEntry parseEntry(int objectNumber, byte[] entryBytes) {
145: int offset = 0;
146: for (int i = 0; i < 10; i++) {
147: offset = (offset * 10) + (entryBytes[i] - '0');
148: }
149:
150: int generation = 0;
151: for (int i = 11; i < 16; i++) {
152: generation = (generation * 10) + (entryBytes[i] - '0');
153: }
154:
155: boolean inuse = (entryBytes[17] == 'n');
156: if (inuse) {
157: return new STXRefEntryOccupied(new COSObjectKey(
158: objectNumber, generation), offset);
159: }
160: return new STXRefEntryFree(new COSObjectKey(objectNumber,
161: generation), offset);
162: }
163:
164: protected void loadPrevious(IRandomAccess randomAccess,
165: STXRefSection xRefSection) throws IOException,
166: COSLoadException {
167: int xrefStreamOffset = xRefSection.getXRefStmOffset();
168: if (xrefStreamOffset != -1) {
169: // this is for the mixed mode
170: AbstractXRefParser parser = new XRefStreamParser(getDoc(),
171: getParser());
172: randomAccess.seek(xrefStreamOffset);
173: try {
174: STStreamXRefSection xrefStream = (STStreamXRefSection) parser
175: .parse(randomAccess);
176: ((STTrailerXRefSection) xRefSection)
177: .setXRefStream(xrefStream);
178: } catch (Exception e) {
179: // todo create message
180: // ignore, just like adobe does...
181: }
182: }
183: int offset = xRefSection.getPreviousOffset();
184: if (offset != -1) {
185: AbstractXRefParser parser = new XRefTrailerParser(getDoc(),
186: getParser());
187: randomAccess.seek(offset);
188: STXRefSection trailer = parser.parse(randomAccess);
189: xRefSection.setPrevious(trailer);
190: }
191: }
192:
193: /*
194: * (non-Javadoc)
195: *
196: * @see de.intarsys.pdf.st.AbstractXRefParser#parse(de.intarsys.tools.randomaccess.IRandomAccess)
197: */
198: public STXRefSection parse(IRandomAccess randomAcces)
199: throws IOException, COSLoadException {
200: STXRefSection xRefSection = parseXRef(randomAcces);
201: loadPrevious(randomAcces, xRefSection);
202: return xRefSection;
203: }
204: }
|