001: package it.stefanochizzolini.clown.samples;
002:
003: import it.stefanochizzolini.clown.documents.Document;
004: import it.stefanochizzolini.clown.documents.Page;
005: import it.stefanochizzolini.clown.documents.Pages;
006: import it.stefanochizzolini.clown.documents.contents.Contents;
007: import it.stefanochizzolini.clown.documents.contents.Resources;
008: import it.stefanochizzolini.clown.documents.contents.objects.CompositeObject;
009: import it.stefanochizzolini.clown.documents.contents.objects.ContentObject;
010: import it.stefanochizzolini.clown.documents.contents.objects.Operation;
011: import it.stefanochizzolini.clown.documents.interchange.metadata.Information;
012: import it.stefanochizzolini.clown.files.File;
013: import it.stefanochizzolini.clown.objects.PdfDictionary;
014: import it.stefanochizzolini.clown.objects.PdfIndirectObject;
015: import it.stefanochizzolini.clown.objects.PdfName;
016: import it.stefanochizzolini.clown.objects.PdfReference;
017: import it.stefanochizzolini.clown.tokens.FileFormatException;
018:
019: import java.util.HashMap;
020: import java.util.List;
021: import java.util.Map;
022:
023: /**
024: This sample demonstrates how to inspect the structure of a PDF document.
025: <h3>Remarks</h3>
026: <p>This implementation is just a limited exercise: see the API documentation
027: to perform all the possible access functionalities.</p>
028: */
029: public class ParsingSample implements ISample {
030: public void run(PDFClownSampleLoader loader) {
031: // (boilerplate user choice -- ignore it)
032: String filePath = loader
033: .getPdfFileChoice("Please select a PDF file");
034:
035: // 1. Open the PDF file!
036: File file;
037: try {
038: file = new File(filePath);
039: } catch (FileFormatException e) {
040: throw new RuntimeException(filePath
041: + " file has a bad file format.", e);
042: } catch (Exception e) {
043: throw new RuntimeException(
044: filePath + " file access error.", e);
045: }
046:
047: // 2. Parsing the document...
048: // Get the PDF document!
049: Document document = file.getDocument();
050: // 2.1. Showing basic metadata...
051: System.out.println("\nDocument information:");
052: Information info = document.getInformation();
053: if (info == null) {
054: System.out
055: .println("No information available (Info dictionary doesn't exist).");
056: } else {
057: System.out.println("Author: " + info.getAuthor());
058: System.out.println("Title: " + info.getTitle());
059: System.out.println("Subject: " + info.getSubject());
060: System.out.println("CreationDate: "
061: + info.getCreationDate());
062: }
063:
064: System.out
065: .println("\nIterating through the indirect-object collection (please wait)...");
066:
067: // 2.2. Counting the indirect objects, grouping them by type...
068: HashMap<String, Integer> objCounters = new HashMap<String, Integer>();
069: objCounters.put("xref free entry", 0);
070: for (PdfIndirectObject object : file.getIndirectObjects()) {
071: if (object.isInUse()) // In-use entry.
072: {
073: String typeName = object.getDataObject().getClass()
074: .getName();
075: if (objCounters.containsKey(typeName)) {
076: objCounters.put(typeName,
077: objCounters.get(typeName) + 1);
078: } else {
079: objCounters.put(typeName, 1);
080: }
081: } else // Free entry.
082: {
083: objCounters.put("xref free entry", objCounters
084: .get("xref free entry") + 1);
085: }
086: }
087: System.out
088: .println("\nIndirect objects partial counts (grouped by PDF object type):");
089: for (Map.Entry<String, Integer> entry : objCounters.entrySet()) {
090: System.out.println(" " + entry.getKey() + ": "
091: + entry.getValue());
092: }
093: System.out.println("Indirect objects total count: "
094: + file.getIndirectObjects().size());
095:
096: // 2.3. Showing some page information...
097: Pages pages = document.getPages();
098: int pageCount = pages.size();
099: System.out.println("\nPage count: " + pageCount);
100: int pageIndex = (int) Math.floor((float) pageCount / 2);
101: Page page = pages.get(pageIndex);
102: System.out.println("Mid page:");
103: printPageInfo(page, pageIndex);
104:
105: pageIndex++;
106: if (pageIndex < pageCount) {
107: System.out.println("Next page:");
108: printPageInfo(page.getNext(), pageIndex);
109: }
110: }
111:
112: private void printPageInfo(Page page, int index) {
113: // 1. Showing basic page information...
114: System.out.println(" Index (calculated): " + page.getIndex()
115: + " (should be " + index + ")");
116: System.out.println(" ID: "
117: + ((PdfReference) page.getBaseObject()).getID());
118: PdfDictionary pageDictionary = page.getBaseDataObject();
119: System.out.println(" Dictionary entries:");
120: for (PdfName key : pageDictionary.keySet()) {
121: System.out.println(" " + key.getValue());
122: }
123:
124: // 2. Showing page contents information...
125: Contents contents = page.getContents();
126: System.out
127: .println(" Content objects count: " + contents.size());
128: System.out.println(" Content head (operations):");
129: {
130: int i = 0, count = contents.size();
131: while (i < 10 && i < count) {
132: i = printContentObject(contents.get(i), i, 0);
133: }
134: }
135:
136: // 3. Showing page resources information...
137: {
138: Resources resources = page.getResources();
139: System.out.println(" Resources:");
140: Map subResources = resources.getFonts();
141: if (subResources != null) {
142: System.out.println(" Font count: "
143: + subResources.size());
144: }
145:
146: subResources = resources.getXObjects();
147: if (subResources != null) {
148: System.out.println(" XObjects count: "
149: + subResources.size());
150: }
151:
152: subResources = resources.getColorSpaces();
153: if (subResources != null) {
154: System.out.println(" ColorSpaces count: "
155: + subResources.size());
156: }
157: }
158: }
159:
160: private int printContentObject(ContentObject content, int index,
161: int level) {
162: String indentation;
163: {
164: StringBuffer buffer = new StringBuffer();
165: for (int i = 0; i < level; i++) {
166: buffer.append(' ');
167: }
168: indentation = buffer.toString();
169: }
170:
171: /*
172: NOTE: Contents are expressed through both simple operations and composite objects.
173: */
174: if (content instanceof Operation) {
175: System.out.println(" " + indentation + (++index) + ": "
176: + content.toString());
177: } else if (content instanceof CompositeObject) {
178: System.out.println(" " + indentation
179: + content.getClass().getSimpleName() + "\n "
180: + indentation + "{");
181: List<? extends ContentObject> objects = ((CompositeObject) content)
182: .getObjects();
183: for (ContentObject obj : objects) {
184: if ((index = printContentObject(obj, index, level + 1)) > 9)
185: break;
186: }
187: System.out.println(" " + indentation + "}");
188: }
189: return index;
190: }
191: }
|