001: /**
002: * Copyright (c) 2003-2006, www.pdfbox.org
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without
006: * modification, are permitted provided that the following conditions are met:
007: *
008: * 1. Redistributions of source code must retain the above copyright notice,
009: * this list of conditions and the following disclaimer.
010: * 2. Redistributions in binary form must reproduce the above copyright notice,
011: * this list of conditions and the following disclaimer in the documentation
012: * and/or other materials provided with the distribution.
013: * 3. Neither the name of pdfbox; nor the names of its
014: * contributors may be used to endorse or promote products derived from this
015: * software without specific prior written permission.
016: *
017: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
018: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
019: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
020: * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
021: * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
022: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
024: * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
026: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: *
028: * http://www.pdfbox.org
029: *
030: */package org.pdfbox.util;
031:
032: import java.io.File;
033: import java.io.IOException;
034: import java.util.ArrayList;
035: import java.util.HashMap;
036: import java.util.Iterator;
037: import java.util.List;
038: import java.util.Map;
039:
040: import org.pdfbox.cos.COSArray;
041: import org.pdfbox.cos.COSBase;
042: import org.pdfbox.cos.COSDictionary;
043: import org.pdfbox.cos.COSInteger;
044: import org.pdfbox.cos.COSName;
045: import org.pdfbox.cos.COSNumber;
046: import org.pdfbox.cos.COSObject;
047: import org.pdfbox.cos.COSStream;
048: import org.pdfbox.exceptions.COSVisitorException;
049: import org.pdfbox.pdmodel.PDDocument;
050: import org.pdfbox.pdmodel.PDDocumentCatalog;
051: import org.pdfbox.pdmodel.PDDocumentInformation;
052: import org.pdfbox.pdmodel.PDDocumentNameDictionary;
053: import org.pdfbox.pdmodel.PDPage;
054: import org.pdfbox.pdmodel.common.COSArrayList;
055: import org.pdfbox.pdmodel.common.COSObjectable;
056: import org.pdfbox.pdmodel.common.PDStream;
057: import org.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline;
058: import org.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
059: import org.pdfbox.pdmodel.interactive.form.PDAcroForm;
060: import org.pdfbox.pdmodel.interactive.form.PDField;
061: import org.pdfbox.pdmodel.interactive.form.PDFieldFactory;
062:
063: /**
064: * This class will take a list of pdf documents and merge them, saving the result
065: * in a new document.
066: *
067: * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
068: * @version $Revision: 1.2 $
069: */
070: public class PDFMergerUtility {
071:
072: private List sources;
073: private String destinationFileName;
074:
075: /**
076: * Instantiate a new PDFMergerUtility.
077: */
078: public PDFMergerUtility() {
079: sources = new ArrayList();
080: }
081:
082: /**
083: * Get the name of the destination file.
084: * @return Returns the destination.
085: */
086: public String getDestinationFileName() {
087: return destinationFileName;
088: }
089:
090: /**
091: * Set the name of the destination file.
092: * @param destination
093: * The destination to set.
094: */
095: public void setDestinationFileName(String destination) {
096: this .destinationFileName = destination;
097: }
098:
099: /**
100: * Add a source file to the list of files to merge.
101: *
102: * @param source Full path and file name of source document.
103: */
104: public void addSource(String source) {
105: sources.add(new File(source));
106: }
107:
108: /**
109: * Add a source file to the list of files to mere.
110: *
111: * @param source File representing source document
112: */
113: public void addSource(File source) {
114: sources.add(source);
115: }
116:
117: /**
118: * Merge the list of source documents, saving the result in the destination file.
119: *
120: * @throws IOException If there is an error saving the document.
121: * @throws COSVisitorException If an error occurs while saving the destination file.
122: */
123: public void mergeDocuments() throws IOException,
124: COSVisitorException {
125: PDDocument destination = null;
126: File sourceFile;
127: PDDocument source;
128: if (sources != null && sources.size() > 0) {
129: try {
130: Iterator sit = sources.iterator();
131: sourceFile = (File) sit.next();
132: destination = PDDocument.load(sourceFile);
133: while (sit.hasNext()) {
134: sourceFile = (File) sit.next();
135: source = PDDocument.load(sourceFile);
136: try {
137: appendDocument(destination, source);
138: } finally {
139: if (source != null) {
140: source.close();
141: }
142: }
143: }
144: destination.save(destinationFileName);
145: } finally {
146: if (destination != null) {
147: destination.close();
148: }
149: }
150: }
151: }
152:
153: /**
154: * append all pages from source to destination.
155: *
156: * @param destination the document to receive the pages
157: * @param source the document originating the new pages
158: *
159: * @throws IOException If there is an error accessing data from either document.
160: */
161: public void appendDocument(PDDocument destination, PDDocument source)
162: throws IOException {
163: if (destination.isEncrypted()) {
164: throw new IOException(
165: "Error: destination PDF is encrypted, can't append encrypted PDF documents.");
166: }
167: if (source.isEncrypted()) {
168: throw new IOException(
169: "Error: source PDF is encrypted, can't append encrypted PDF documents.");
170: }
171: PDDocumentInformation destInfo = destination
172: .getDocumentInformation();
173: PDDocumentInformation srcInfo = source.getDocumentInformation();
174: destInfo.getDictionary().mergeInto(srcInfo.getDictionary());
175:
176: PDDocumentCatalog destCatalog = destination
177: .getDocumentCatalog();
178: PDDocumentCatalog srcCatalog = source.getDocumentCatalog();
179:
180: if (destCatalog.getOpenAction() == null) {
181: destCatalog.setOpenAction(srcCatalog.getOpenAction());
182: }
183:
184: PDAcroForm destAcroForm = destCatalog.getAcroForm();
185: PDAcroForm srcAcroForm = srcCatalog.getAcroForm();
186: if (destAcroForm == null) {
187: cloneForNewDocument(destination, srcAcroForm);
188: destCatalog.setAcroForm(srcAcroForm);
189: } else {
190: mergeAcroForm(destination, destAcroForm, srcAcroForm);
191: }
192:
193: COSArray destThreads = (COSArray) destCatalog
194: .getCOSDictionary().getDictionaryObject(
195: COSName.getPDFName("Threads"));
196: COSArray srcThreads = (COSArray) cloneForNewDocument(
197: destination, destCatalog.getCOSDictionary()
198: .getDictionaryObject(
199: COSName.getPDFName("Threads")));
200: if (destThreads == null) {
201: destCatalog.getCOSDictionary().setItem(
202: COSName.getPDFName("Threads"), srcThreads);
203: } else {
204: destThreads.addAll(srcThreads);
205: }
206:
207: COSName names = COSName.getPDFName("Names");
208: PDDocumentNameDictionary destNames = destCatalog.getNames();
209: PDDocumentNameDictionary srcNames = srcCatalog.getNames();
210: if (srcNames != null) {
211: if (destNames == null) {
212: destCatalog.getCOSDictionary().setItem(names,
213: cloneForNewDocument(destination, srcNames));
214: } else {
215: //warning, potential for collision here!!
216: destNames.getCOSDictionary().mergeInto(
217: (COSDictionary) cloneForNewDocument(
218: destination, srcNames));
219: }
220: }
221:
222: PDDocumentOutline destOutline = destCatalog
223: .getDocumentOutline();
224: PDDocumentOutline srcOutline = srcCatalog.getDocumentOutline();
225: if (srcOutline != null) {
226: if (destOutline == null) {
227: PDDocumentOutline cloned = new PDDocumentOutline(
228: (COSDictionary) cloneForNewDocument(
229: destination, srcOutline));
230: destCatalog.setDocumentOutline(cloned);
231: } else {
232: PDOutlineItem first = srcOutline.getFirstChild();
233: PDOutlineItem clonedFirst = new PDOutlineItem(
234: (COSDictionary) cloneForNewDocument(
235: destination, first));
236: destOutline.appendChild(clonedFirst);
237: }
238: }
239:
240: String destPageMode = destCatalog.getPageMode();
241: String srcPageMode = srcCatalog.getPageMode();
242: if (destPageMode == null) {
243: destCatalog.setPageMode(srcPageMode);
244: }
245:
246: COSName pageLabels = COSName.getPDFName("PageLabels");
247: COSDictionary destLabels = (COSDictionary) destCatalog
248: .getCOSDictionary().getDictionaryObject(pageLabels);
249: COSDictionary srcLabels = (COSDictionary) srcCatalog
250: .getCOSDictionary().getDictionaryObject(pageLabels);
251: if (srcLabels != null) {
252: int destPageCount = destination.getNumberOfPages();
253: COSArray destNums = null;
254: if (destLabels == null) {
255: destLabels = new COSDictionary();
256: destNums = new COSArray();
257: destLabels
258: .setItem(COSName.getPDFName("Nums"), destNums);
259: destCatalog.getCOSDictionary().setItem(pageLabels,
260: destLabels);
261: } else {
262: destNums = (COSArray) destLabels
263: .getDictionaryObject(COSName.getPDFName("Nums"));
264: }
265: COSArray srcNums = (COSArray) srcLabels
266: .getDictionaryObject(COSName.getPDFName("Nums"));
267: for (int i = 0; i < srcNums.size(); i += 2) {
268: COSNumber labelIndex = (COSNumber) srcNums.getObject(i);
269: long labelIndexValue = labelIndex.intValue();
270: destNums.add(new COSInteger(labelIndexValue
271: + destPageCount));
272: destNums.add(cloneForNewDocument(destination, srcNums
273: .getObject(i + 1)));
274: }
275: }
276:
277: COSName metadata = COSName.getPDFName("Metadata");
278: COSStream destMetadata = (COSStream) destCatalog
279: .getCOSDictionary().getDictionaryObject(metadata);
280: COSStream srcMetadata = (COSStream) srcCatalog
281: .getCOSDictionary().getDictionaryObject(metadata);
282: if (destMetadata == null && srcMetadata != null) {
283: PDStream newStream = new PDStream(destination, srcMetadata
284: .getUnfilteredStream(), false);
285: newStream.getStream().mergeInto(srcMetadata);
286: newStream.addCompression();
287: destCatalog.getCOSDictionary().setItem(metadata, newStream);
288: }
289:
290: //finally append the pages
291: List pages = source.getDocumentCatalog().getAllPages();
292: Iterator pageIter = pages.iterator();
293: while (pageIter.hasNext()) {
294: PDPage page = (PDPage) pageIter.next();
295: PDPage newPage = new PDPage(
296: (COSDictionary) cloneForNewDocument(destination,
297: page.getCOSDictionary()));
298: destination.addPage(newPage);
299: }
300: }
301:
302: Map clonedVersion = new HashMap();
303:
304: private COSBase cloneForNewDocument(PDDocument destination,
305: Object base) throws IOException {
306: if (base == null) {
307: return null;
308: }
309: COSBase retval = (COSBase) clonedVersion.get(base);
310: if (retval != null) {
311: //we are done, it has already been converted.
312: } else if (base instanceof List) {
313: COSArray array = new COSArray();
314: List list = (List) base;
315: for (int i = 0; i < list.size(); i++) {
316: array
317: .add(cloneForNewDocument(destination, list
318: .get(i)));
319: }
320: retval = array;
321: } else if (base instanceof COSObjectable
322: && !(base instanceof COSBase)) {
323: retval = cloneForNewDocument(destination,
324: ((COSObjectable) base).getCOSObject());
325: clonedVersion.put(base, retval);
326: } else if (base instanceof COSObject) {
327: COSObject object = (COSObject) base;
328: retval = cloneForNewDocument(destination, object
329: .getObject());
330: clonedVersion.put(base, retval);
331: } else if (base instanceof COSArray) {
332: COSArray newArray = new COSArray();
333: COSArray array = (COSArray) base;
334: for (int i = 0; i < array.size(); i++) {
335: newArray.add(cloneForNewDocument(destination, array
336: .get(i)));
337: }
338: retval = newArray;
339: clonedVersion.put(base, retval);
340: } else if (base instanceof COSStream) {
341: COSStream originalStream = (COSStream) base;
342: List keys = originalStream.keyList();
343: PDStream stream = new PDStream(destination, originalStream
344: .getFilteredStream(), true);
345: clonedVersion.put(base, stream.getStream());
346: for (int i = 0; i < keys.size(); i++) {
347: COSName key = (COSName) keys.get(i);
348: stream.getStream().setItem(
349: key,
350: cloneForNewDocument(destination, originalStream
351: .getItem(key)));
352: }
353: retval = stream.getStream();
354: } else if (base instanceof COSDictionary) {
355: COSDictionary dic = (COSDictionary) base;
356: List keys = dic.keyList();
357: retval = new COSDictionary();
358: clonedVersion.put(base, retval);
359: for (int i = 0; i < keys.size(); i++) {
360: COSName key = (COSName) keys.get(i);
361: ((COSDictionary) retval).setItem(key,
362: cloneForNewDocument(destination, dic
363: .getItem(key)));
364: }
365: } else {
366: retval = (COSBase) base;
367: }
368: clonedVersion.put(base, retval);
369: return retval;
370: }
371:
372: private int nextFieldNum = 1;
373:
374: /**
375: * Merge the contents of the source form into the destination form
376: * for the destination file.
377: *
378: * @param destination the destination document
379: * @param destAcroForm the destination form
380: * @param srcAcroForm the source form
381: * @throws IOException If an error occurs while adding the field.
382: */
383: private void mergeAcroForm(PDDocument destination,
384: PDAcroForm destAcroForm, PDAcroForm srcAcroForm)
385: throws IOException {
386: List destFields = destAcroForm.getFields();
387: List srcFields = srcAcroForm.getFields();
388: if (srcFields != null) {
389: if (destFields == null) {
390: destFields = new COSArrayList();
391: destAcroForm.setFields(destFields);
392: }
393: Iterator srcFieldsIterator = srcFields.iterator();
394: while (srcFieldsIterator.hasNext()) {
395: PDField srcField = (PDField) srcFieldsIterator.next();
396: PDField destField = PDFieldFactory.createField(
397: destAcroForm,
398: (COSDictionary) cloneForNewDocument(
399: destination, srcField.getDictionary()));
400: // if the form already has a field with this name then we need to rename this field
401: // to prevent merge conflicts.
402: if (destAcroForm.getField(destField
403: .getFullyQualifiedName()) != null) {
404: destField.setPartialName("dummyFieldName"
405: + (nextFieldNum++));
406: }
407: destFields.add(destField);
408: }
409: }
410: }
411:
412: }
|