001: package com.etymon.pjx.util;
002:
003: import java.io.*;
004: import java.util.*;
005: import com.etymon.pjx.*;
006:
007: /**
008: Provides methods for retrieving and modifying the page tree of a
009: PDF document. This class is synchronized.
010: @author Nassib Nassar
011: */
012: public class PdfPageTree {
013:
014: /**
015: The page tree root of the document.
016: */
017: protected PdfReference _pageTreeRoot;
018:
019: /**
020: The manager associated with this document.
021: */
022: protected PdfManager _m;
023:
024: /**
025: The catalog associated with this document.
026: */
027: protected PdfCatalog _catalog;
028:
029: /**
030: Defines the set of inheritable field attributes.
031: */
032: protected static Set _inheritable;
033:
034: protected static final PdfName PDFNAME_COUNT = new PdfName("Count");
035: protected static final PdfName PDFNAME_KIDS = new PdfName("Kids");
036: protected static final PdfName PDFNAME_PAGE = new PdfName("Page");
037: protected static final PdfName PDFNAME_PAGES = new PdfName("Pages");
038: protected static final PdfName PDFNAME_PARENT = new PdfName(
039: "Parent");
040: protected static final PdfName PDFNAME_TYPE = new PdfName("Type");
041:
042: /**
043: Constructs a <code>PdfPageTree</code> instance based on a
044: specified <code>PdfManager</code>.
045: */
046: public PdfPageTree(PdfManager manager) {
047:
048: _m = manager;
049: _catalog = new PdfCatalog(manager);
050:
051: _inheritable = new HashSet(4);
052: _inheritable.add(new PdfName("Resources"));
053: _inheritable.add(new PdfName("MediaBox"));
054: _inheritable.add(new PdfName("CropBox"));
055: _inheritable.add(new PdfName("Rotate"));
056:
057: }
058:
059: /**
060: Returns an indirect reference to a page object specified by
061: page number. Note that page objects do not include
062: inherited attributes; {@link
063: #inheritAttributes(PdfDictionary)
064: inheritAttributes(PdfDictionary)} should be used to obtain
065: inherited attributes.
066: @param pageNumber the page number. The numbering starts
067: with <code>0</code>.
068: @return the indirect reference.
069: @throws IOException
070: @throws PdfFormatException
071: */
072: public PdfReference getPage(int pageNumber) throws IOException,
073: PdfFormatException {
074: synchronized (this ) {
075: synchronized (_m) {
076:
077: if (pageNumber < 0) {
078: throw new IndexOutOfBoundsException(
079: "Requested page number is less than 0");
080: }
081:
082: // keep a running list of all page
083: // nodes visited so that we can detect
084: // a cycle and avoid getting caught in
085: // an infinite loop
086: Set visited = new HashSet();
087:
088: // get the root of the page tree
089: PdfReference nodeR = getRoot();
090: visited.add(nodeR);
091: Object obj = _m.getObjectIndirect(nodeR);
092: if (!(obj instanceof PdfDictionary)) {
093: throw new PdfFormatException(
094: "Page tree (Pages) is not a dictionary.");
095: }
096: Map node = ((PdfDictionary) obj).getMap();
097:
098: // descend the page tree; each
099: // iteration through this loop
100: // descends one level
101: boolean first = true;
102: int numberOfPages;
103: int pageSum = 0;
104: while (true) {
105:
106: // if this is the first node,
107: // it should contain the
108: // total number of pages;
109: // check that the requested
110: // page is within that range
111: if (first) {
112:
113: first = false;
114:
115: obj = node.get(PDFNAME_COUNT);
116: if ((!(obj instanceof PdfInteger))
117: && (!(obj instanceof PdfReference))) {
118: throw new PdfFormatException(
119: "Page count is not an integer or reference.");
120: }
121: if (obj instanceof PdfReference) {
122: obj = _m
123: .getObjectIndirect((PdfReference) obj);
124: }
125: if (!(obj instanceof PdfInteger)) {
126: throw new PdfFormatException(
127: "Page count is not an integer.");
128: }
129: numberOfPages = ((PdfInteger) obj).getInt();
130:
131: if (pageNumber >= numberOfPages) {
132: throw new IndexOutOfBoundsException(
133: "Requested page number is too large");
134: }
135:
136: }
137:
138: // at this point we have a
139: // node that is not a page
140: // object; therefore we assume
141: // it is a pages object and
142: // proceed to determine the
143: // next node to examine
144:
145: // get the list of kids
146: obj = node.get(PDFNAME_KIDS);
147: if ((!(obj instanceof PdfArray))
148: && (!(obj instanceof PdfReference))) {
149: throw new PdfFormatException(
150: "Kids object is not an array or reference.");
151: }
152: if (obj instanceof PdfReference) {
153: obj = _m.getObjectIndirect((PdfReference) obj);
154: }
155: if (!(obj instanceof PdfArray)) {
156: throw new PdfFormatException(
157: "Kids object is not an array.");
158: }
159: List kids = ((PdfArray) obj).getList();
160: //System.out.println( (PdfArray)obj );
161:
162: // iterate through the list of
163: // kids, examining the number
164: // of pages in each, and
165: // stopping when we reach the
166: // one that must contain the
167: // page we are looking for
168: boolean descend = false;
169: for (Iterator t = kids.iterator(); ((t.hasNext()) && (!descend));) {
170:
171: // get the "kid",
172: // i.e. the referenced
173: // page or pages
174: // object
175: obj = t.next();
176: if (!(obj instanceof PdfReference)) {
177: throw new PdfFormatException(
178: "Kids element is not a reference.");
179: }
180: PdfReference kidR = ((PdfReference) obj);
181: if (visited.contains(kidR)) {
182: throw new PdfFormatException(
183: "Page tree contains a cycle (must be acyclic).");
184: }
185: visited.add(kidR);
186: obj = _m.getObjectIndirect(kidR);
187: if (!(obj instanceof PdfDictionary)) {
188: throw new PdfFormatException(
189: "Kids element is not a dictionary.");
190: }
191: Map kid = ((PdfDictionary) obj).getMap();
192:
193: // determine whether
194: // it is a page object
195: // or a pages object
196: obj = kid.get(PDFNAME_TYPE);
197: if ((!(obj instanceof PdfName))
198: && (!(obj instanceof PdfReference))) {
199: throw new PdfFormatException(
200: "Page node type is not a name or reference.");
201: }
202: if (obj instanceof PdfReference) {
203: obj = _m
204: .getObjectIndirect((PdfReference) obj);
205: }
206: if (!(obj instanceof PdfName)) {
207: throw new PdfFormatException(
208: "Page node type is not a name.");
209: }
210: PdfName nodeType = (PdfName) obj;
211: boolean singlePage = nodeType
212: .equals(PDFNAME_PAGE);
213:
214: // determine how many
215: // pages are
216: // represented by this
217: // node
218: int count;
219: if (singlePage) {
220: // this is a
221: // page
222: // object, so
223: // it
224: // represents
225: // exactly one
226: // page
227: count = 1;
228: } else {
229: // otherwise
230: // we assume
231: // this is a
232: // pages
233: // object, and
234: // we examine
235: // the Count
236: // value
237: obj = kid.get(PDFNAME_COUNT);
238: if ((!(obj instanceof PdfInteger))
239: && (!(obj instanceof PdfReference))) {
240: throw new PdfFormatException(
241: "Page count is not an integer or reference.");
242: }
243: if (obj instanceof PdfReference) {
244: obj = _m
245: .getObjectIndirect((PdfReference) obj);
246: }
247: if (!(obj instanceof PdfInteger)) {
248: throw new PdfFormatException(
249: "Page count is not an integer.");
250: }
251: count = ((PdfInteger) obj).getInt();
252: }
253:
254: if ((pageSum + count) > pageNumber) {
255:
256: if (singlePage) {
257: // this is the page we are looking for
258: return kidR;
259: } else {
260: // descend this node; don't bother with the rest of the
261: // kids in the list
262: node = kid;
263: descend = true;
264: }
265:
266: } else {
267:
268: // we will
269: // keep
270: // iterating
271: // the kid
272: // list, so we
273: // add the
274: // number of
275: // pages to
276: // the left to
277: // our running
278: // sum
279: pageSum += count;
280:
281: }
282:
283: } // for()
284:
285: // if descend was not set to
286: // true, then the for() loop
287: // completed normally, meaning
288: // that the kids do not
289: // contains enough pages, and
290: // something is wrong with the
291: // document
292: if (!descend) {
293: throw new PdfFormatException(
294: "Requested page not found.");
295: }
296:
297: } // while()
298: }
299: }
300: }
301:
302: /**
303: Returns the number of pages in the document.
304: @return the number of pages.
305: @throws IOException
306: @throws PdfFormatException
307: */
308: public int getNumberOfPages() throws IOException,
309: PdfFormatException {
310: synchronized (this ) {
311: synchronized (_m) {
312:
313: Object obj = _m.getObjectIndirect(getRoot());
314:
315: if (!(obj instanceof PdfDictionary)) {
316: throw new PdfFormatException(
317: "Page tree root (Pages) is not a dictionary.");
318: }
319:
320: Map root = ((PdfDictionary) obj).getMap();
321:
322: obj = root.get(PDFNAME_COUNT);
323:
324: if ((!(obj instanceof PdfInteger))
325: && (!(obj instanceof PdfReference))) {
326: throw new PdfFormatException(
327: "Page count is not an integer or reference.");
328: }
329:
330: if (obj instanceof PdfReference) {
331: obj = _m.getObjectIndirect((PdfReference) obj);
332: }
333:
334: if (!(obj instanceof PdfInteger)) {
335: throw new PdfFormatException(
336: "Page count is not an integer.");
337: }
338:
339: return ((PdfInteger) obj).getInt();
340:
341: }
342: }
343: }
344:
345: /**
346: Returns an indirect reference to the root node of the
347: document's page tree.
348: @return the indirect reference.
349: @throws IOException
350: @throws PdfFormatException
351: */
352: public PdfReference getRoot() throws IOException,
353: PdfFormatException {
354: synchronized (this ) {
355: synchronized (_m) {
356:
357: Object obj = _m
358: .getObjectIndirect(_catalog.getCatalog());
359: if (!(obj instanceof PdfDictionary)) {
360: throw new PdfFormatException(
361: "Catalog is not a dictionary.");
362: }
363: PdfDictionary catalog = (PdfDictionary) obj;
364:
365: obj = catalog.getMap().get(PDFNAME_PAGES);
366: if (!(obj instanceof PdfReference)) {
367: throw new PdfFormatException(
368: "Page tree root (Pages) is not an indirect reference.");
369: }
370: return (PdfReference) obj;
371:
372: }
373: }
374: }
375:
376: /**
377: Adds inherited attributes to a specified page dictionary
378: object. The page object is cloned and the inherited
379: attributes are made explicit in the cloned object's
380: dictionary. The inherited attributes are retrieved by
381: ascending the page tree and looking for inheritable
382: attributes (if any) that are missing from the specified
383: page dictionary.
384: @param page the page dictionary to be filled in with
385: inherited attributes.
386: @return a clone of the specified page dictionary, with all
387: inherited attributes filled in.
388: @throws IOException
389: @throws PdfFormatException
390: */
391: public PdfDictionary inheritAttributes(PdfDictionary page)
392: throws IOException, PdfFormatException {
393: synchronized (this ) {
394: synchronized (_m) {
395:
396: Map pageM = page.getMap();
397:
398: // define new dictionary map
399: Map newMap = new HashMap(page.getMap());
400:
401: // start out looking for all inheritable attributes
402: // that are not present in this page
403: Set unused = new HashSet(_inheritable.size());
404: for (Iterator t = _inheritable.iterator(); t.hasNext();) {
405:
406: PdfName attr = (PdfName) t.next();
407: Object obj = pageM.get(attr);
408:
409: if ((obj == null) || (obj instanceof PdfNull)) {
410: unused.add(attr);
411: }
412:
413: }
414:
415: boolean done = false;
416:
417: do {
418:
419: // if all the inheritable attributes have been
420: // filled, there is no need to continue
421: // ascending the tree
422: if (unused.isEmpty()) {
423: done = true;
424: break;
425: }
426:
427: // get the Parent node
428: Object obj = pageM.get(PDFNAME_PARENT);
429: if (obj == null) {
430: // we are done
431: done = true;
432: break;
433: }
434: if (!(obj instanceof PdfObject)) {
435: throw new PdfFormatException(
436: "Parent object is not a PDF object.");
437: }
438: obj = _m.getObjectIndirect((PdfObject) obj);
439: if (!(obj instanceof PdfDictionary)) {
440: throw new PdfFormatException(
441: "Parent object is not a dictionary.");
442: }
443: pageM = ((PdfDictionary) obj).getMap();
444:
445: // now examine the parent node
446: for (Iterator t = unused.iterator(); t.hasNext();) {
447:
448: PdfName attr = (PdfName) t.next();
449:
450: // check if the attribute is present
451: obj = pageM.get(attr);
452: if ((obj != null)
453: && (!(obj instanceof PdfNull))) {
454: t.remove();
455: newMap.put(attr, obj);
456: }
457:
458: }
459:
460: } while (!done);
461:
462: return new PdfDictionary(newMap);
463:
464: }
465: }
466: }
467:
468: }
|