001: /**
002: * Copyright (c) 2005, www.pdfbox.org
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without
006: * modification, are permitted provided that the following conditions are met:
007: *
008: * 1. Redistributions of source code must retain the above copyright notice,
009: * this list of conditions and the following disclaimer.
010: * 2. Redistributions in binary form must reproduce the above copyright notice,
011: * this list of conditions and the following disclaimer in the documentation
012: * and/or other materials provided with the distribution.
013: * 3. Neither the name of pdfbox; nor the names of its
014: * contributors may be used to endorse or promote products derived from this
015: * software without specific prior written permission.
016: *
017: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
018: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
019: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
020: * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
021: * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
022: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
024: * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
026: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: *
028: * http://www.pdfbox.org
029: *
030: */package org.pdfbox.examples.util;
031:
032: import org.pdfbox.exceptions.InvalidPasswordException;
033:
034: import org.pdfbox.pdmodel.PDDocument;
035: import org.pdfbox.pdmodel.PDPage;
036: import org.pdfbox.util.PDFTextStripper;
037: import org.pdfbox.util.TextPosition;
038:
039: import java.io.IOException;
040:
041: import java.util.List;
042:
043: /**
044: * This is an example on how to get some x/y coordinates of text.
045: *
046: * Usage: java org.pdfbox.examples.util.PrintTextLocations <input-pdf>
047: *
048: * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
049: * @version $Revision: 1.6 $
050: */
051: public class PrintTextLocations extends PDFTextStripper {
052: /**
053: * Default constructor.
054: *
055: * @throws IOException If there is an error loading text stripper properties.
056: */
057: public PrintTextLocations() throws IOException {
058: super .setSortByPosition(true);
059: }
060:
061: /**
062: * This will print the documents data.
063: *
064: * @param args The command line arguments.
065: *
066: * @throws Exception If there is an error parsing the document.
067: */
068: public static void main(String[] args) throws Exception {
069: if (args.length != 1) {
070: usage();
071: } else {
072: PDDocument document = null;
073: try {
074: document = PDDocument.load(args[0]);
075: if (document.isEncrypted()) {
076: try {
077: document.decrypt("");
078: } catch (InvalidPasswordException e) {
079: System.err
080: .println("Error: Document is encrypted with a password.");
081: System.exit(1);
082: }
083: }
084: PrintTextLocations printer = new PrintTextLocations();
085: List allPages = document.getDocumentCatalog()
086: .getAllPages();
087: for (int i = 0; i < allPages.size(); i++) {
088: PDPage page = (PDPage) allPages.get(i);
089: System.out.println("Processing page: " + i);
090: printer.processStream(page, page.findResources(),
091: page.getContents().getStream());
092: }
093: } finally {
094: if (document != null) {
095: document.close();
096: }
097: }
098: }
099: }
100:
101: /**
102: * A method provided as an event interface to allow a subclass to perform
103: * some specific functionality when a character needs to be displayed.
104: *
105: * @param text The character to be displayed.
106: */
107: protected void showCharacter(TextPosition text) {
108: System.out.println("String[" + text.getX() + "," + text.getY()
109: + " fs=" + text.getFontSize() + " xscale="
110: + text.getXScale() + " height=" + text.getHeight()
111: + " width=" + text.getWidth() + "]"
112: + text.getCharacter());
113: }
114:
115: /**
116: * This will print the usage for this document.
117: */
118: private static void usage() {
119: System.err
120: .println("Usage: java org.pdfbox.examples.pdmodel.PrintTextLocations <input-pdf>");
121: }
122:
123: }
|