001: /**
002: * Copyright (c) 2005-2006, www.pdfbox.org
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without
006: * modification, are permitted provided that the following conditions are met:
007: *
008: * 1. Redistributions of source code must retain the above copyright notice,
009: * this list of conditions and the following disclaimer.
010: * 2. Redistributions in binary form must reproduce the above copyright notice,
011: * this list of conditions and the following disclaimer in the documentation
012: * and/or other materials provided with the distribution.
013: * 3. Neither the name of pdfbox; nor the names of its
014: * contributors may be used to endorse or promote products derived from this
015: * software without specific prior written permission.
016: *
017: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
018: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
019: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
020: * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
021: * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
022: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
024: * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
026: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: *
028: * http://www.pdfbox.org
029: *
030: */package org.pdfbox;
031:
032: import java.awt.image.BufferedImage;
033: import java.io.File;
034: import java.util.Iterator;
035: import java.util.List;
036:
037: import javax.imageio.IIOException;
038: import javax.imageio.IIOImage;
039: import javax.imageio.ImageIO;
040: import javax.imageio.ImageWriteParam;
041: import javax.imageio.ImageWriter;
042: import javax.imageio.stream.ImageOutputStream;
043:
044: import org.pdfbox.exceptions.InvalidPasswordException;
045:
046: import org.pdfbox.pdmodel.PDDocument;
047: import org.pdfbox.pdmodel.PDPage;
048:
049: /**
050: * Convert a PDF document to an image.
051: *
052: * @author <a href="ben@benlitchfield.com">Ben Litchfield</a>
053: * @version $Revision: 1.5 $
054: */
055: public class PDFToImage {
056:
057: private static final String PASSWORD = "-password";
058: private static final String START_PAGE = "-startPage";
059: private static final String END_PAGE = "-endPage";
060: private static final String IMAGE_TYPE = "-imageType";
061: private static final String OUTPUT_PREFIX = "-outputPrefix";
062:
063: /**
064: * private constructor.
065: */
066: private PDFToImage() {
067: //static class
068: }
069:
070: /**
071: * Infamous main method.
072: *
073: * @param args Command line arguments, should be one and a reference to a file.
074: *
075: * @throws Exception If there is an error parsing the document.
076: */
077: public static void main(String[] args) throws Exception {
078: String password = "";
079: String pdfFile = null;
080: String outputPrefix = null;
081: String imageType = "jpg";
082: int startPage = 1;
083: int endPage = Integer.MAX_VALUE;
084: for (int i = 0; i < args.length; i++) {
085: if (args[i].equals(PASSWORD)) {
086: i++;
087: if (i >= args.length) {
088: usage();
089: }
090: password = args[i];
091: } else if (args[i].equals(START_PAGE)) {
092: i++;
093: if (i >= args.length) {
094: usage();
095: }
096: startPage = Integer.parseInt(args[i]);
097: } else if (args[i].equals(END_PAGE)) {
098: i++;
099: if (i >= args.length) {
100: usage();
101: }
102: endPage = Integer.parseInt(args[i]);
103: } else if (args[i].equals(IMAGE_TYPE)) {
104: i++;
105: imageType = args[i];
106: } else if (args[i].equals(OUTPUT_PREFIX)) {
107: i++;
108: outputPrefix = args[i];
109: } else {
110: if (pdfFile == null) {
111: pdfFile = args[i];
112: }
113: }
114: }
115:
116: if (pdfFile == null) {
117: usage();
118: } else {
119: if (outputPrefix == null) {
120: outputPrefix = pdfFile.substring(0, pdfFile
121: .lastIndexOf('.'));
122: }
123:
124: PDDocument document = null;
125: try {
126: document = PDDocument.load(pdfFile);
127:
128: //document.print();
129: if (document.isEncrypted()) {
130: try {
131: document.decrypt(password);
132: } catch (InvalidPasswordException e) {
133: if (args.length == 4)//they supplied the wrong password
134: {
135: System.err
136: .println("Error: The supplied password is incorrect.");
137: System.exit(2);
138: } else {
139: //they didn't suppply a password and the default of "" was wrong.
140: System.err
141: .println("Error: The document is encrypted.");
142: usage();
143: }
144: }
145: }
146: List pages = document.getDocumentCatalog()
147: .getAllPages();
148: for (int i = startPage - 1; i < endPage
149: && i < pages.size(); i++) {
150: ImageOutputStream output = null;
151: ImageWriter imageWriter = null;
152: try {
153: PDPage page = (PDPage) pages.get(i);
154: BufferedImage image = page.convertToImage();
155: String fileName = outputPrefix + (i + 1) + "."
156: + imageType;
157: System.out.println("Writing:" + fileName);
158: output = ImageIO
159: .createImageOutputStream(new File(
160: fileName));
161:
162: boolean foundWriter = false;
163: Iterator writerIter = ImageIO
164: .getImageWritersByFormatName(imageType);
165: while (writerIter.hasNext() && !foundWriter) {
166: try {
167: imageWriter = (ImageWriter) writerIter
168: .next();
169: ImageWriteParam writerParams = imageWriter
170: .getDefaultWriteParam();
171: if (writerParams.canWriteCompressed()) {
172: writerParams
173: .setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
174: writerParams
175: .setCompressionQuality(1.0f);
176: }
177:
178: imageWriter.setOutput(output);
179: imageWriter.write(null, new IIOImage(
180: image, null, null),
181: writerParams);
182: foundWriter = true;
183: } catch (IIOException io) {
184: //ignore exception
185: } finally {
186: if (imageWriter != null) {
187: imageWriter.dispose();
188: }
189: }
190: }
191: if (!foundWriter) {
192: throw new RuntimeException(
193: "Error: no writer found for image type '"
194: + imageType + "'");
195: }
196: } finally {
197: if (output != null) {
198: output.flush();
199: output.close();
200: }
201: }
202: }
203: } finally {
204: if (document != null) {
205: document.close();
206: }
207: }
208: }
209: }
210:
211: /**
212: * This will print the usage requirements and exit.
213: */
214: private static void usage() {
215: System.err
216: .println("Usage: java org.pdfbox.PDFToImage [OPTIONS] <PDF file>\n"
217: + " -password <password> Password to decrypt document\n"
218: + " -imageType <image type> ("
219: + getImageFormats()
220: + ")\n"
221: + " -outputPrefix <output prefix> Filename prefix for image files\n"
222: + " -startPage <number> The first page to start extraction(1 based)\n"
223: + " -endPage <number> The last page to extract(inclusive)\n"
224: + " <PDF file> The PDF document to use\n");
225: System.exit(1);
226: }
227:
228: private static String getImageFormats() {
229: StringBuffer retval = new StringBuffer();
230: String[] formats = ImageIO.getReaderFormatNames();
231: for (int i = 0; i < formats.length; i++) {
232: retval.append(formats[i]);
233: if (i + 1 < formats.length) {
234: retval.append(",");
235: }
236: }
237: return retval.toString();
238: }
239: }
|