001: /**
002: * Copyright (c) 2004-2005, www.pdfbox.org
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without
006: * modification, are permitted provided that the following conditions are met:
007: *
008: * 1. Redistributions of source code must retain the above copyright notice,
009: * this list of conditions and the following disclaimer.
010: * 2. Redistributions in binary form must reproduce the above copyright notice,
011: * this list of conditions and the following disclaimer in the documentation
012: * and/or other materials provided with the distribution.
013: * 3. Neither the name of pdfbox; nor the names of its
014: * contributors may be used to endorse or promote products derived from this
015: * software without specific prior written permission.
016: *
017: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
018: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
019: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
020: * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
021: * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
022: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
024: * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
026: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: *
028: * http://www.pdfbox.org
029: *
030: */package org.pdfbox.util;
031:
032: import org.pdfbox.pdmodel.PDDocument;
033: import org.pdfbox.pdmodel.PDPage;
034:
035: import java.io.IOException;
036:
037: import java.util.ArrayList;
038: import java.util.Iterator;
039: import java.util.List;
040:
041: /**
042: * Split a document into several other documents.
043: *
044: * @author Mario Ivankovits (mario@ops.co.at)
045: * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
046: * @version $Revision: 1.7 $
047: */
048: public class Splitter {
049:
050: /**
051: * The source PDF document.
052: */
053: protected PDDocument pdfDocument;
054:
055: /**
056: * The current PDF document that contains the splitted page.
057: */
058: protected PDDocument currentDocument = null;
059:
060: private int splitAtPage = 1;
061: private List newDocuments = null;
062:
063: /**
064: * The current page number that we are processing, zero based.
065: */
066: protected int pageNumber = 0;
067:
068: /**
069: * This will take a document and split into several other documents.
070: *
071: * @param document The document to split.
072: *
073: * @return A list of all the split documents.
074: *
075: * @throws IOException If there is an IOError
076: */
077: public List split(PDDocument document) throws IOException {
078: newDocuments = new ArrayList();
079: pdfDocument = document;
080:
081: List pages = pdfDocument.getDocumentCatalog().getAllPages();
082: processPages(pages);
083: return newDocuments;
084: }
085:
086: /**
087: * This will tell the splitting algorithm where to split the pages. The default
088: * is 1, so every page will become a new document. If it was to then each document would
089: * contain 2 pages. So it the source document had 5 pages it would split into
090: * 3 new documents, 2 documents containing 2 pages and 1 document containing one
091: * page.
092: *
093: * @param split The number of pages each split document should contain.
094: */
095: public void setSplitAtPage(int split) {
096: if (split <= 0) {
097: throw new RuntimeException(
098: "Error split must be at least one page.");
099: }
100: splitAtPage = split;
101: }
102:
103: /**
104: * This will return how many pages each split document will contain.
105: *
106: * @return The split parameter.
107: */
108: public int getSplitAtPage() {
109: return splitAtPage;
110: }
111:
112: /**
113: * Interface method to handle the start of the page processing.
114: *
115: * @param pages The list of pages from the source document.
116: *
117: * @throws IOException If an IO error occurs.
118: */
119: protected void processPages(List pages) throws IOException {
120: Iterator iter = pages.iterator();
121: while (iter.hasNext()) {
122: PDPage page = (PDPage) iter.next();
123: processNextPage(page);
124: }
125: }
126:
127: /**
128: * Interface method, you can control where a document gets split by implementing
129: * this method. By default a split occurs at every page. If you wanted to split
130: * based on some complex logic then you could override this method. For example.
131: * <code>
132: * protected void createNewDocumentIfNecessary()
133: * {
134: * if( isPrime( pageNumber ) )
135: * {
136: * super.createNewDocumentIfNecessary();
137: * }
138: * }
139: * </code>
140: *
141: * @throws IOException If there is an error creating the new document.
142: */
143: protected void createNewDocumentIfNecessary() throws IOException {
144: if (isNewDocNecessary()) {
145: createNewDocument();
146: }
147: }
148:
149: /**
150: * Check if it is necessary to create a new document.
151: *
152: * @return true If a new document should be created.
153: */
154: protected boolean isNewDocNecessary() {
155: return pageNumber % splitAtPage == 0 || currentDocument == null;
156: }
157:
158: /**
159: * Create a new document to write the splitted contents to.
160: *
161: * @throws IOException If there is an problem creating the new document.
162: */
163: protected void createNewDocument() throws IOException {
164: currentDocument = new PDDocument();
165: currentDocument.setDocumentInformation(pdfDocument
166: .getDocumentInformation());
167: currentDocument.getDocumentCatalog()
168: .setViewerPreferences(
169: pdfDocument.getDocumentCatalog()
170: .getViewerPreferences());
171: newDocuments.add(currentDocument);
172: }
173:
174: /**
175: * Interface to start processing a new page.
176: *
177: * @param page The page that is about to get processed.
178: *
179: * @throws IOException If there is an error creating the new document.
180: */
181: protected void processNextPage(PDPage page) throws IOException {
182: createNewDocumentIfNecessary();
183: PDPage imported = currentDocument.importPage(page);
184: imported.setCropBox(page.findCropBox());
185: imported.setMediaBox(page.findMediaBox());
186: imported.setResources(page.findResources());
187: imported.setRotation(page.findRotation());
188: pageNumber++;
189: }
190: }
|