001: /**
002: * Copyright (c) 2003-2006, www.pdfbox.org
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without
006: * modification, are permitted provided that the following conditions are met:
007: *
008: * 1. Redistributions of source code must retain the above copyright notice,
009: * this list of conditions and the following disclaimer.
010: * 2. Redistributions in binary form must reproduce the above copyright notice,
011: * this list of conditions and the following disclaimer in the documentation
012: * and/or other materials provided with the distribution.
013: * 3. Neither the name of pdfbox; nor the names of its
014: * contributors may be used to endorse or promote products derived from this
015: * software without specific prior written permission.
016: *
017: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
018: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
019: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
020: * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
021: * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
022: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
024: * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
026: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: *
028: * http://www.pdfbox.org
029: *
030: */package org.pdfbox.cos;
031:
032: import java.io.File;
033: import java.io.IOException;
034:
035: import java.util.ArrayList;
036: import java.util.HashMap;
037: import java.util.Iterator;
038: import java.util.List;
039: import java.util.Map;
040:
041: import org.pdfbox.exceptions.COSVisitorException;
042: import org.pdfbox.io.RandomAccess;
043: import org.pdfbox.io.RandomAccessFile;
044:
045: import org.pdfbox.pdfparser.PDFObjectStreamParser;
046: import org.pdfbox.persistence.util.COSObjectKey;
047:
048: /**
049: * This is the in-memory representation of the PDF document. You need to call
050: * close() on this object when you are done using it!!
051: *
052: * @author <a href="ben@benlitchfield.com">Ben Litchfield</a>
053: * @version $Revision: 1.28 $
054: */
055: public class COSDocument extends COSBase {
056: private float version;
057:
058: /**
059: * added objects (actually preserving original sequence).
060: */
061: private List objects = new ArrayList();
062:
063: /**
064: * a pool of objects read/referenced so far
065: * used to resolve indirect object references.
066: */
067: private Map objectPool = new HashMap();
068:
069: /**
070: * Document trailer dictionary.
071: */
072: private COSDictionary trailer;
073:
074: /**
075: * This file will store the streams in order to conserve memory.
076: */
077: private RandomAccess scratchFile = null;
078:
079: private File tmpFile = null;
080:
081: private String headerString = "%PDF-1.4";
082:
083: /**
084: * Constructor. Uses the java.io.tmpdir value to create a file
085: * to store the streams.
086: *
087: * @throws IOException If there is an error creating the tmp file.
088: */
089: public COSDocument() throws IOException {
090: this (new File(System.getProperty("java.io.tmpdir")));
091: }
092:
093: /**
094: * Constructor that will create a create a scratch file in the
095: * following directory.
096: *
097: * @param scratchDir The directory to store a scratch file.
098: *
099: * @throws IOException If there is an error creating the tmp file.
100: */
101: public COSDocument(File scratchDir) throws IOException {
102: tmpFile = File.createTempFile("pdfbox", "tmp", scratchDir);
103: scratchFile = new RandomAccessFile(tmpFile, "rw");
104: }
105:
106: /**
107: * Constructor that will use the following random access file for storage
108: * of the PDF streams. The client of this method is responsible for deleting
109: * the storage if necessary that this file will write to. The close method
110: * will close the file though.
111: *
112: * @param file The random access file to use for storage.
113: */
114: public COSDocument(RandomAccess file) {
115: scratchFile = file;
116: }
117:
118: /**
119: * This will get the scratch file for this document.
120: *
121: * @return The scratch file.
122: */
123: public RandomAccess getScratchFile() {
124: return scratchFile;
125: }
126:
127: /**
128: * This will get the first dictionary object by type.
129: *
130: * @param type The type of the object.
131: *
132: * @return This will return an object with the specified type.
133: */
134: public COSObject getObjectByType(String type) {
135: return getObjectByType(COSName.getPDFName(type));
136: }
137:
138: /**
139: * This will get the first dictionary object by type.
140: *
141: * @param type The type of the object.
142: *
143: * @return This will return an object with the specified type.
144: */
145: public COSObject getObjectByType(COSName type) {
146: COSObject retval = null;
147: Iterator iter = objects.iterator();
148: while (iter.hasNext() && retval == null) {
149: COSObject object = (COSObject) iter.next();
150:
151: COSBase realObject = object.getObject();
152: if (realObject instanceof COSDictionary) {
153: COSDictionary dic = (COSDictionary) realObject;
154: COSName objectType = (COSName) dic
155: .getItem(COSName.TYPE);
156: if (objectType != null && objectType.equals(type)) {
157: retval = object;
158: }
159: }
160: }
161: return retval;
162: }
163:
164: /**
165: * This will get all dictionary objects by type.
166: *
167: * @param type The type of the object.
168: *
169: * @return This will return an object with the specified type.
170: */
171: public List getObjectsByType(String type) {
172: return getObjectsByType(COSName.getPDFName(type));
173: }
174:
175: /**
176: * This will get a dictionary object by type.
177: *
178: * @param type The type of the object.
179: *
180: * @return This will return an object with the specified type.
181: */
182: public List getObjectsByType(COSName type) {
183: List retval = new ArrayList();
184: Iterator iter = objects.iterator();
185: while (iter.hasNext()) {
186: COSObject object = (COSObject) iter.next();
187:
188: COSBase realObject = object.getObject();
189: if (realObject instanceof COSDictionary) {
190: COSDictionary dic = (COSDictionary) realObject;
191: COSName objectType = (COSName) dic
192: .getItem(COSName.TYPE);
193: if (objectType != null && objectType.equals(type)) {
194: retval.add(object);
195: }
196: }
197: }
198: return retval;
199: }
200:
201: /**
202: * This will print contents to stdout.
203: */
204: public void print() {
205: Iterator iter = objects.iterator();
206: while (iter.hasNext()) {
207: COSObject object = (COSObject) iter.next();
208: System.out.println(object);
209: }
210: }
211:
212: /**
213: * This will set the version of this PDF document.
214: *
215: * @param versionValue The version of the PDF document.
216: */
217: public void setVersion(float versionValue) {
218: version = versionValue;
219: }
220:
221: /**
222: * This will get the version of this PDF document.
223: *
224: * @return This documents version.
225: */
226: public float getVersion() {
227: return version;
228: }
229:
230: /**
231: * This will tell if this is an encrypted document.
232: *
233: * @return true If this document is encrypted.
234: */
235: public boolean isEncrypted() {
236: boolean encrypted = false;
237: if (trailer != null) {
238: encrypted = trailer.getDictionaryObject("Encrypt") != null;
239: }
240: return encrypted;
241: }
242:
243: /**
244: * This will get the encryption dictionary if the document is encrypted or null
245: * if the document is not encrypted.
246: *
247: * @return The encryption dictionary.
248: */
249: public COSDictionary getEncryptionDictionary() {
250: return (COSDictionary) trailer.getDictionaryObject(COSName
251: .getPDFName("Encrypt"));
252: }
253:
254: /**
255: * This will set the encryption dictionary, this should only be called when
256: * encypting the document.
257: *
258: * @param encDictionary The encryption dictionary.
259: */
260: public void setEncryptionDictionary(COSDictionary encDictionary) {
261: trailer.setItem(COSName.getPDFName("Encrypt"), encDictionary);
262: }
263:
264: /**
265: * This will get the document ID.
266: *
267: * @return The document id.
268: */
269: public COSArray getDocumentID() {
270: return (COSArray) getTrailer()
271: .getItem(COSName.getPDFName("ID"));
272: }
273:
274: /**
275: * This will set the document ID.
276: *
277: * @param id The document id.
278: */
279: public void setDocumentID(COSArray id) {
280: getTrailer().setItem(COSName.getPDFName("ID"), id);
281: }
282:
283: /**
284: * This will create an object for this document.
285: *
286: * Create an indirect object out of the direct type and include in the document
287: * for later lookup via document a map from direct object to indirect object
288: * is maintained. this provides better support for manual PDF construction.
289: *
290: * @param base the base object to wrap in an indirect object.
291: *
292: * @return The pdf object that wraps the base, or creates a new one.
293: */
294: /**
295: public COSObject createObject( COSBase base )
296: {
297: COSObject obj = (COSObject)objectMap.get(base);
298: if (obj == null)
299: {
300: obj = new COSObject( base );
301: obj.addTo(this);
302: }
303: return obj;
304: }**/
305:
306: /**
307: * This will get the document catalog.
308: *
309: * Maybe this should move to an object at PDFEdit level
310: *
311: * @return catalog is the root of all document activities
312: *
313: * @throws IOException If no catalog can be found.
314: */
315: public COSObject getCatalog() throws IOException {
316: COSObject catalog = getObjectByType(COSName.CATALOG);
317: if (catalog == null) {
318: throw new IOException("Catalog cannot be found");
319: }
320: return catalog;
321: }
322:
323: /**
324: * This will get a list of all available objects.
325: *
326: * @return A list of all objects.
327: */
328: public List getObjects() {
329: return new ArrayList(objects);
330: }
331:
332: /**
333: * This will get the document trailer.
334: *
335: * @return the document trailer dict
336: */
337: public COSDictionary getTrailer() {
338: return trailer;
339: }
340:
341: /**
342: * // MIT added, maybe this should not be supported as trailer is a persistence construct.
343: * This will set the document trailer.
344: *
345: * @param newTrailer the document trailer dictionary
346: */
347: public void setTrailer(COSDictionary newTrailer) {
348: trailer = newTrailer;
349: }
350:
351: /**
352: * visitor pattern double dispatch method.
353: *
354: * @param visitor The object to notify when visiting this object.
355: * @return any object, depending on the visitor implementation, or null
356: * @throws COSVisitorException If an error occurs while visiting this object.
357: */
358: public Object accept(ICOSVisitor visitor)
359: throws COSVisitorException {
360: return visitor.visitFromDocument(this );
361: }
362:
363: /**
364: * This will close all storage and delete the tmp files.
365: *
366: * @throws IOException If there is an error close resources.
367: */
368: public void close() throws IOException {
369: if (scratchFile != null) {
370: scratchFile.close();
371: scratchFile = null;
372: }
373: if (tmpFile != null) {
374: tmpFile.delete();
375: tmpFile = null;
376: }
377: }
378:
379: /**
380: * The sole purpose of this is to inform a client of PDFBox that they
381: * did not close the document.
382: */
383: protected void finalize() {
384: if (tmpFile != null || scratchFile != null) {
385: Throwable t = new Throwable(
386: "Warning: You did not close the PDF Document");
387: t.printStackTrace();
388: }
389: }
390:
391: /**
392: * @return Returns the headerString.
393: */
394: public String getHeaderString() {
395: return headerString;
396: }
397:
398: /**
399: * @param header The headerString to set.
400: */
401: public void setHeaderString(String header) {
402: headerString = header;
403: }
404:
405: /**
406: * This method will search the list of objects for types of ObjStm. If it finds
407: * them then it will parse out all of the objects from the stream that is contains.
408: *
409: * @throws IOException If there is an error parsing the stream.
410: */
411: public void dereferenceObjectStreams() throws IOException {
412: Iterator objStm = getObjectsByType("ObjStm").iterator();
413: while (objStm.hasNext()) {
414: COSObject objStream = (COSObject) objStm.next();
415: COSStream stream = (COSStream) objStream.getObject();
416: PDFObjectStreamParser parser = new PDFObjectStreamParser(
417: stream, this );
418: parser.parse();
419: Iterator compressedObjects = parser.getObjects().iterator();
420: while (compressedObjects.hasNext()) {
421: COSObject next = (COSObject) compressedObjects.next();
422: COSObjectKey key = new COSObjectKey(next);
423: COSObject obj = getObjectFromPool(key);
424: obj.setObject(next.getObject());
425: }
426: }
427: }
428:
429: /**
430: * This will add an object to this document.
431: * the method checks if obj is already present as there may be cyclic dependencies
432: *
433: * @param obj The object to add to the document.
434: * @return The object that was actually added to this document, if an object reference already
435: * existed then that will be returned.
436: *
437: * @throws IOException If there is an error adding the object.
438: */
439: public COSObject addObject(COSObject obj) throws IOException {
440: COSObjectKey key = null;
441: if (obj.getObjectNumber() != null) {
442: key = new COSObjectKey(obj);
443: }
444: COSObject fromPool = getObjectFromPool(key);
445: fromPool.setObject(obj.getObject());
446: return fromPool;
447: }
448:
449: /**
450: * This will get an object from the pool.
451: *
452: * @param key The object key.
453: *
454: * @return The object in the pool or a new one if it has not been parsed yet.
455: *
456: * @throws IOException If there is an error getting the proxy object.
457: */
458: public COSObject getObjectFromPool(COSObjectKey key)
459: throws IOException {
460: COSObject obj = null;
461: if (key != null) {
462: obj = (COSObject) objectPool.get(key);
463: }
464: if (obj == null) {
465: // this was a forward reference, make "proxy" object
466: obj = new COSObject(null);
467: if (key != null) {
468: obj.setObjectNumber(new COSInteger(key.getNumber()));
469: obj.setGenerationNumber(new COSInteger(key
470: .getGeneration()));
471: objectPool.put(key, obj);
472: }
473: objects.add(obj);
474: }
475:
476: return obj;
477: }
478: }
|