0001: package com.etymon.pj;
0002:
0003: import java.io.*;
0004: import java.util.*;
0005: import com.etymon.pj.exception.*;
0006: import com.etymon.pj.object.*;
0007: import com.etymon.pj.object.pagemark.*;
0008: import com.etymon.pj.util.*;
0009:
0010: /**
0011: A document representation of a PDF file.
0012: @author Nassib Nassar
0013: */
0014: public class Pdf {
0015:
0016: /**
0017: Creates an empty PDF document.
0018: */
0019: public Pdf() {
0020: init();
0021: createEmpty();
0022: }
0023:
0024: /**
0025: Creates a PDF document from an existing PDF file.
0026: @param filename the name of the PDF file to read.
0027: @exception IOException if an I/O error occurs.
0028: @exception PjException if a PDF error occurs.
0029: */
0030: public Pdf(String filename) throws IOException, PjException {
0031:
0032: readFromFile(filename);
0033:
0034: // set the Producer in the Info dictionary to pj
0035: // get the Info dictionary
0036: PjReference infoRef;
0037: try {
0038: infoRef = getInfoDictionary();
0039: } catch (InvalidPdfObjectException e) {
0040: infoRef = null;
0041: }
0042: PjInfo info;
0043: if (infoRef == null) {
0044: // create a new Info dictionary and add it
0045: info = new PjInfo();
0046: int infoId = registerObject(info);
0047: infoRef = new PjReference(new PjNumber(infoId));
0048: setInfoDictionary(infoRef);
0049: } else {
0050: PjDictionary d = (PjDictionary) (getObject(infoRef
0051: .getObjNumber().getInt()));
0052: info = new PjInfo(d.getHashtable());
0053: }
0054: // set the Producer field
0055: // PjInfo.setProducer(PjObject) automatically includes pj in the string
0056: info.setProducer(new PjString(""));
0057: }
0058:
0059: /**
0060: Writes this PDF document to a file in PDF format.
0061: @param filename the name of the PDF file to create.
0062: @exception IOException if an I/O error occurs.
0063: */
0064: public void writeToFile(String filename) throws IOException {
0065: File file = new File(filename);
0066: file.delete();
0067: FileOutputStream fos = new FileOutputStream(file);
0068: BufferedOutputStream bos = new BufferedOutputStream(fos);
0069: writeToStream(bos);
0070: bos.close();
0071: fos.close();
0072: }
0073:
0074: /**
0075: Writes this PDF document to a stream in PDF format.
0076: @param os the stream to write to.
0077: @exception IOException if an I/O error occurs.
0078: */
0079: public void writeToStream(OutputStream os) throws IOException {
0080: // first make sure to remove the Prev field from the
0081: // trailer if it is left over from having read a
0082: // multi-part xref!
0083: _trailer.remove(PjName.PREV);
0084: // remove the ID (if there is one) from the trailer
0085: _trailer.remove(PjName.ID);
0086: // ok, go ahead
0087: long z = 0;
0088: z = z + PjObject.writeln(os, "%PDF-" + PjConst.PDF_VERSION);
0089: z = z + PjObject.writeln(os, PjConst.VERSION_IN_PDF);
0090: // The pj copyright notice is inserted into all PDF
0091: // files output by pj; you may not remove this
0092: // copyright notice.
0093: z = z + PjObject.writeln(os, PjConst.COPYRIGHT_IN_PDF);
0094: z = z + PjObject.writeln(os, "%\323\343\317\342");
0095: PjObject obj;
0096: Integer objnum;
0097: int highest = 0;
0098: int size = _objects.size();
0099: long[] position = new long[size];
0100: for (int x = 1; x < size; x++) {
0101: if (x > highest) {
0102: highest = x;
0103: }
0104: obj = _objects.objectAt(x);
0105: position[x] = z;
0106: z = z + PjObject.writeln(os, x + " 0 obj");
0107: if (obj != null) {
0108: z = z + obj.writePdf(os);
0109: } else {
0110: // this is a small hack to avoid having to create "f" entries in the xref table
0111: z = z + PjNumber.ZERO.writePdf(os);
0112: }
0113: z = z + PjObject.writeln(os, "");
0114: z = z + PjObject.writeln(os, "endobj");
0115: }
0116: // write out xref
0117: long startxref = z;
0118: z = z + PjObject.writeln(os, "xref");
0119: int p = 0;
0120: int r;
0121: Long g;
0122: String s;
0123: position[0] = -1;
0124: int count = 0;
0125: while (p <= highest) {
0126: while ((p <= highest) && (position[p] == 0)) {
0127: p++;
0128: }
0129: r = p;
0130: while ((r <= highest) && (position[r] != 0)) {
0131: r++;
0132: }
0133: z = z + PjObject.write(os, p + " ");
0134: z = z + PjObject.writeln(os, new Integer(r - p));
0135: for (int x = p; x < r; x++) {
0136: count++;
0137: if (x == 0) {
0138: z = z + PjObject.write(os, "0000000000 65535 f \n");
0139: } else {
0140: s = new Long(position[x]).toString();
0141: for (int w = 1; (w + s.length()) <= 10; w++) {
0142: z = z + PjObject.write(os, "0");
0143: }
0144: z = z + PjObject.write(os, s);
0145: z = z + PjObject.write(os, " 00000 n \n");
0146: }
0147: }
0148: p = r;
0149: }
0150: // write out trailer
0151: z = z + PjObject.writeln(os, "trailer");
0152: _trailer.put(new PjName("Size"), new PjNumber(count));
0153: PjDictionary trailer = new PjDictionary(_trailer);
0154: z = z + trailer.writePdf(os);
0155: z = z + PjObject.writeln(os, "");
0156: z = z + PjObject.writeln(os, "startxref");
0157: z = z + PjObject.writeln(os, new Long(startxref));
0158: z = z + PjObject.writeln(os, "%%EOF");
0159: }
0160:
0161: /**
0162: Registers a PjObject within this PDF document.
0163: @param obj the PjObject to register.
0164: @return the new object number of the registered PjObject.
0165: */
0166: public int registerObject(PjObject obj) {
0167: int n = _objects.getFirstFree();
0168: _objects.setObjectAt(obj, n);
0169: return n;
0170: }
0171:
0172: /**
0173: Registers a PjObject within this PDF document using a
0174: specified object number.
0175: @param obj the PjObject to register.
0176: @param objectNumber the object number to register obj under.
0177: */
0178: public void registerObject(PjObject obj, int objectNumber) {
0179: _objects.setObjectAt(obj, objectNumber);
0180: }
0181:
0182: /**
0183: Adds a PjObject to a page in this PDF document.
0184: @param page the page object to add to.
0185: @param objectNumber the object number of the PjObject to add.
0186: @exception InvalidPdfObjectException if an invalid object
0187: type is encountered.
0188: */
0189: public void addToPage(PjPage page, int objectNumber)
0190: throws InvalidPdfObjectException {
0191: PjReference objectToAdd = new PjReference(new PjNumber(
0192: objectNumber));
0193: // we handle four cases of /Contents:
0194: // 1) does not exist
0195: // 2) reference to a stream object
0196: // 3) reference to an array of references to stream objects
0197: // 4) array of references to stream objects
0198: // the last of these appears not to be supported by the PDF spec,
0199: // however we will accept it just in case
0200: PjObject contents = page.getContents();
0201: if (contents == null) {
0202: // set the page Contents to reference the new object
0203: page.setContents(objectToAdd);
0204: } else if (contents instanceof PjReference) {
0205: // find out whether the reference is to a stream or array
0206: PjObject indirectContents = getObject(((PjReference) contents)
0207: .getObjNumber().getInt());
0208: if (indirectContents instanceof PjArray) {
0209: // add the new object to the existing array
0210: ((PjArray) indirectContents).getVector().addElement(
0211: objectToAdd);
0212: } else if (indirectContents instanceof PjStream) {
0213: // create a new array that includes
0214: // the existing reference to the
0215: // stream as well as the new object
0216: // reference
0217: Vector v = new Vector();
0218: v.addElement(contents);
0219: v.addElement(objectToAdd);
0220: PjArray array = new PjArray(v);
0221: // add the new array to the document
0222: int arrayId = registerObject(array);
0223: // set the page Contents to reference this new array
0224: page
0225: .setContents(new PjReference(new PjNumber(
0226: arrayId)));
0227: } else {
0228: throw new InvalidPdfObjectException(
0229: "Contents reference in page does not reference a stream or array.");
0230: }
0231: } else if (contents instanceof PjArray) {
0232: // add the new object to the existing array
0233: ((PjArray) contents).getVector().addElement(objectToAdd);
0234: } else {
0235: throw new InvalidPdfObjectException(
0236: "Contents object in page is not a reference or array.");
0237: }
0238: }
0239:
0240: /**
0241: Looks up a PjObject by its object number.
0242: @param objectNumber the object number of the PjObject to retrieve.
0243: @return the requested PjObject.
0244: */
0245: public PjObject getObject(int objectNumber) {
0246: return _objects.objectAt(objectNumber);
0247: }
0248:
0249: /**
0250: Dereferences a PjObject if it is a PjReference.
0251: @param obj the PjObject to dereference.
0252: @return the referenced PjObject if obj is a PjReference, or obj otherwise.
0253: */
0254: public PjObject resolve(PjObject obj) {
0255: if (obj == null) {
0256: return null;
0257: } else {
0258: if (obj instanceof PjReference) {
0259: return resolve(getObject(((PjReference) obj)
0260: .getObjNumber().getInt()));
0261: } else {
0262: return obj;
0263: }
0264: }
0265: }
0266:
0267: /**
0268: Determines the number of pages in this PDF document.
0269: @return the number of pages in this PDF document.
0270: @exception InvalidPdfObjectException if an invalid object
0271: type is encountered.
0272: */
0273: public int getPageCount() throws InvalidPdfObjectException {
0274: // the total number of pages should always be stored
0275: // in the root Pages node
0276: int pagesId = getRootPages();
0277: PjDictionary d;
0278: try {
0279: d = (PjDictionary) getObject(pagesId);
0280: } catch (ClassCastException e) {
0281: throw new InvalidPdfObjectException(
0282: "Root pages object is not a dictionary.");
0283: }
0284: PjPages pages = new PjPages(d.getHashtable());
0285:
0286: PjObject countObj = pages.getCount();
0287: PjNumber count;
0288: try {
0289: count = (PjNumber) (resolve(countObj));
0290: if (count.isInteger() == false) {
0291: throw new ClassCastException();
0292: }
0293: } catch (ClassCastException e) {
0294: throw new InvalidPdfObjectException(
0295: "Count field in root pages object is not an integer.");
0296: }
0297: return count.getInt();
0298: }
0299:
0300: private int findPage(int pageNumber, int objectNumber,
0301: PjPages parentPages, IntCounter counter, boolean delete)
0302: throws InvalidPdfObjectException {
0303: PjDictionary node;
0304: try {
0305: node = (PjDictionary) getObject(objectNumber);
0306: } catch (ClassCastException e) {
0307: throw new InvalidPdfObjectException(
0308: "Object in page tree is not a dictionary.");
0309: }
0310: // figure out whether node is a Page or Pages object
0311: PjName type;
0312: try {
0313: type = (PjName) (node.getHashtable().get(PjName.TYPE));
0314: } catch (ClassCastException e) {
0315: throw new InvalidPdfObjectException(
0316: "Type field in dictionary in page tree is not a name object.");
0317: }
0318: if (type.equals(PjName.PAGES)) {
0319: PjPages pages = new PjPages(node.getHashtable());
0320: PjArray kids;
0321: try {
0322: kids = (PjArray) (resolve((PjObject) (pages.getKids())));
0323: } catch (ClassCastException e) {
0324: throw new InvalidPdfObjectException(
0325: "Kids field in pages object is not an array.");
0326: }
0327: if (kids != null) {
0328: Vector v = kids.getVector();
0329: int size = v.size();
0330: PjReference nodeRef;
0331: int found;
0332: for (int x = 0; x < size; x++) {
0333: try {
0334: nodeRef = (PjReference) (v.elementAt(x));
0335: } catch (ClassCastException e) {
0336: throw new InvalidPdfObjectException(
0337: "Object is kids array in pages object is not an indirect reference.");
0338: }
0339: found = findPage(pageNumber, nodeRef.getObjNumber()
0340: .getInt(), pages, counter, delete);
0341: if (found != -1) {
0342: if (delete) {
0343: // decrement the page count in this Pages node
0344: PjNumber count;
0345: try {
0346: count = (PjNumber) (resolve((PjObject) (pages
0347: .getCount())));
0348: if (count.isInteger() == false) {
0349: throw new ClassCastException();
0350: }
0351: } catch (ClassCastException e) {
0352: throw new InvalidPdfObjectException(
0353: "Count field in pages object is not an integer.");
0354: }
0355: pages.setCount(new PjNumber(
0356: count.getInt() - 1));
0357: }
0358: return found;
0359: }
0360: }
0361: }
0362: return -1;
0363: }
0364: if (type.equals(PjName.PAGE)) {
0365: counter.inc();
0366: if (counter.value() == pageNumber) {
0367: if (delete) {
0368: // remove the page from the kids array
0369: ((PjArray) (parentPages.getKids())).getVector()
0370: .removeElement(
0371: new PjReference(new PjNumber(
0372: objectNumber)));
0373: }
0374: return objectNumber;
0375: } else {
0376: return -1;
0377: }
0378: }
0379: return -1;
0380: }
0381:
0382: /**
0383: Looks up a page in this document by page number.
0384: @param pageNumber the page number. Pages are numbered
0385: starting with 1.
0386: @return the object number of the identified Page object.
0387: @exception IndexOutOfBoundsException if an invalid page
0388: number was given.
0389: @exception InvalidPdfObjectException if an invalid object
0390: type is encountered.
0391: */
0392: public int getPage(int pageNumber)
0393: throws IndexOutOfBoundsException, InvalidPdfObjectException {
0394: if (pageNumber < 1) {
0395: throw new IndexOutOfBoundsException("Page number "
0396: + pageNumber + " is not >= 1.");
0397: }
0398: IntCounter counter = new IntCounter(0);
0399: int found = findPage(pageNumber, getRootPages(), null, counter,
0400: false);
0401: if (found == -1) {
0402: if (pageNumber > getPageCount()) {
0403: throw new IndexOutOfBoundsException("Page number "
0404: + pageNumber + " is not <= " + getPageCount()
0405: + ".");
0406: } else {
0407: throw new InvalidPdfObjectException("Page number "
0408: + pageNumber + " not found; ran out of pages.");
0409: }
0410: } else {
0411: return found;
0412: }
0413: }
0414:
0415: /**
0416: Deletes a page in this document by page number. The page
0417: is deleted by removing the reference to it from the page
0418: tree; however, no objects are actually deleted from the
0419: document.
0420: @param pageNumber the page number. Pages are numbered
0421: starting with 1.
0422: @return the object number of the deleted Page object.
0423: @exception IndexOutOfBoundsException if an invalid page
0424: number was given.
0425: @exception InvalidPdfObjectException if an invalid object
0426: type is encountered.
0427: */
0428: public int deletePage(int pageNumber)
0429: throws IndexOutOfBoundsException, InvalidPdfObjectException {
0430: if (pageNumber < 1) {
0431: throw new IndexOutOfBoundsException("Page number "
0432: + pageNumber + " is not >= 1.");
0433: }
0434: IntCounter counter = new IntCounter(0);
0435: int found = findPage(pageNumber, getRootPages(), null, counter,
0436: true);
0437: if (found == -1) {
0438: if (pageNumber > getPageCount()) {
0439: throw new IndexOutOfBoundsException("Page number "
0440: + pageNumber + " is not <= " + getPageCount()
0441: + ".");
0442: } else {
0443: throw new InvalidPdfObjectException("Page number "
0444: + pageNumber + " not found; ran out of pages.");
0445: }
0446: } else {
0447: return found;
0448: }
0449: }
0450:
0451: // we should split this out so that once we find the parent
0452: // node, we call a method to add the new page; we'll need to
0453: // use it in insertPage() also.
0454: /**
0455: Appends a PjPage to the end of this PDF document.
0456: @param objectNumber the object number of the PjPage to append.
0457: @return the new object number of the appended PjPage. */
0458: public int appendPage(int objectNumber) {
0459: // we do this the quickest way: go to the root Pages
0460: // node and add a link to the page at the top level.
0461: // this ignores the issue of maintaining a balanced
0462: // tree; we probably need some tree algorithms to deal
0463: // with general functions to manipulate the page tree.
0464: PjReference catalogRef = (PjReference) (_trailer
0465: .get(PjName.ROOT));
0466: PjDictionary catalog = (PjDictionary) getObject(catalogRef
0467: .getObjNumber().getInt());
0468: PjReference pagesRef = (PjReference) (catalog.getHashtable()
0469: .get(PjName.PAGES));
0470: PjDictionary pages = (PjDictionary) getObject(pagesRef
0471: .getObjNumber().getInt());
0472: // we want to add the new page to the Kids array
0473: PjArray kids = (PjArray) (pages.getHashtable().get(PjName.KIDS));
0474: if (kids == null) {
0475: kids = new PjArray();
0476: pages.getHashtable().put(PjName.KIDS, kids);
0477: }
0478: kids.getVector().addElement(
0479: new PjReference(new PjNumber(objectNumber)));
0480: // also need to set the parent
0481: PjPage page = (PjPage) getObject(objectNumber);
0482: page.setParent(pagesRef);
0483: // while we're here we need to increment the page count
0484: PjObject countObj = (PjObject) (pages.getHashtable()
0485: .get(PjName.COUNT));
0486: PjNumber count = (PjNumber) resolve(countObj);
0487: int newCount = count.getInt() + 1;
0488: pages.getHashtable().put(PjName.COUNT, new PjNumber(newCount));
0489: return newCount;
0490: }
0491:
0492: /**
0493: Appends the pages of a PDF document to this document. Note
0494: that this does not clone the other document but simply
0495: includes references to its objects. Therefore the other
0496: document should be discarded immediately after a call to
0497: this method, otherwise you could get very strange results.
0498: @param pdf the PDF document to append.
0499: @exception InvalidPdfObjectException if an invalid object
0500: type is encountered in either document. */
0501: public void appendPdfDocument(Pdf pdf)
0502: throws InvalidPdfObjectException {
0503:
0504: // first gather some information
0505:
0506: // look up AcroForm in other document, and extract the
0507: // array of references to field objects; so that we
0508: // can add them to the field array in this document
0509: int otherCatalogId = pdf.getCatalog();
0510: PjCatalog otherCatalog;
0511: try {
0512: otherCatalog = (PjCatalog) (pdf.getObject(otherCatalogId));
0513: } catch (ClassCastException e) {
0514: throw new InvalidPdfObjectException(
0515: "Catalog object is not a dictionary.");
0516: }
0517: // get the AcroForm
0518: PjDictionary otherAcroForm;
0519: try {
0520: otherAcroForm = (PjDictionary) (pdf.resolve(otherCatalog
0521: .getAcroForm()));
0522: } catch (ClassCastException e) {
0523: throw new InvalidPdfObjectException(
0524: "AcroForm object is not a dictionary.");
0525: }
0526: Vector otherFieldsV = null;
0527: if (otherAcroForm != null) {
0528: PjArray otherFields = (PjArray) (otherAcroForm
0529: .getHashtable().get(PjName.FIELDS));
0530: if (otherFields != null) {
0531: otherFieldsV = otherFields.getVector();
0532: }
0533: }
0534:
0535: // locate the root Pages node in the other document
0536: int pagesId = pdf.getRootPages();
0537: PjDictionary d;
0538: try {
0539: d = (PjDictionary) (pdf.getObject(pagesId));
0540: } catch (ClassCastException e) {
0541: throw new InvalidPdfObjectException(
0542: "Root pages object is not a dictionary.");
0543: }
0544: PjPages pages = new PjPages(d.getHashtable());
0545:
0546: // get the page count of the other document
0547: int pageCount = pdf.getPageCount();
0548:
0549: // locate the root Pages node in this document
0550: int this PagesId = getRootPages();
0551: try {
0552: d = (PjDictionary) (getObject(this PagesId));
0553: } catch (ClassCastException e) {
0554: throw new InvalidPdfObjectException(
0555: "Root pages object is not a dictionary.");
0556: }
0557: PjPages this Pages = new PjPages(d.getHashtable());
0558:
0559: // at this point we haven't changed anything
0560:
0561: // register all the objects with this document,
0562: // building a mapping table as we go along
0563: int id;
0564: PjObject obj;
0565: int pagesIdNew = -1;
0566: int size = pdf._objects.size();
0567: Hashtable map = new Hashtable(size);
0568: for (int x = 1; x < size; x++) {
0569: obj = pdf._objects.objectAt(x);
0570: if (obj != null) {
0571: id = registerObject(obj);
0572: // new object number for the root Pages node
0573: if (x == pagesId) {
0574: pagesIdNew = id;
0575: }
0576: // add mapping
0577: map.put(new PjNumber(x), new PjReference(new PjNumber(
0578: id)));
0579: }
0580: }
0581:
0582: // renumber objects
0583: // enumerate map as a way of enumerating the objects we added
0584: for (Enumeration m = map.keys(); m.hasMoreElements();) {
0585: // get the object number of an object we added
0586: id = ((PjReference) (map.get(m.nextElement())))
0587: .getObjNumber().getInt();
0588: obj = _objects.objectAt(id);
0589: if (obj instanceof PjReference) {
0590: registerObject((PjReference) (map
0591: .get(((PjReference) obj).getObjNumber())), id);
0592: } else {
0593: obj.renumber(map);
0594: }
0595: }
0596:
0597: // create a new root Pages node that includes the root nodes from the two documents
0598: PjPages newPages = new PjPages();
0599: int newPagesId = registerObject(newPages);
0600: Vector v = new Vector();
0601: v.addElement(new PjReference(new PjNumber(this PagesId)));
0602: v.addElement(new PjReference(new PjNumber(pagesIdNew)));
0603: newPages.setKids(new PjArray(v));
0604: newPages.setCount(new PjNumber(getPageCount() + pageCount));
0605: // set the old root nodes' Parent to point to the new root node
0606: PjReference newPagesRef = new PjReference(new PjNumber(
0607: newPagesId));
0608: this Pages.setParent(newPagesRef);
0609: pages.setParent(newPagesRef);
0610:
0611: // update the catalog to point to the new root Pages node
0612: int catalogId = getCatalog();
0613: PjCatalog catalog;
0614: try {
0615: catalog = (PjCatalog) (getObject(catalogId));
0616: } catch (ClassCastException e) {
0617: throw new InvalidPdfObjectException(
0618: "Catalog object is not a dictionary.");
0619: }
0620: catalog.setPages(newPagesRef);
0621:
0622: // merge AcroForm from the two documents
0623: PjDictionary acroForm = (PjDictionary) (resolve(catalog
0624: .getAcroForm()));
0625: if (acroForm == null) {
0626: // use the other document's AcroForm
0627: PjDictionary otherAf = (PjDictionary) (otherCatalog
0628: .getAcroForm());
0629: if (otherAcroForm != null) {
0630: catalog.setAcroForm(otherAf);
0631: }
0632: } else {
0633: // add the fields extracted from other document's AcroForm
0634: // locate the fields array
0635: PjArray fields = (PjArray) (acroForm.getHashtable()
0636: .get(PjName.FIELDS));
0637: if ((otherFieldsV != null) && (fields != null)) {
0638: Vector fieldsV = fields.getVector();
0639: int otherFieldsV_n = otherFieldsV.size();
0640: for (int x = 0; x < otherFieldsV_n; x++) {
0641: fieldsV.addElement(otherFieldsV.elementAt(x));
0642: }
0643: }
0644: }
0645:
0646: }
0647:
0648: /**
0649: Looks up the Catalog object in this document.
0650: @return the object number of the Catalog object.
0651: @exception InvalidPdfObjectException if an invalid object
0652: type is encountered.
0653: */
0654: public int getCatalog() throws InvalidPdfObjectException {
0655: PjReference catalogRef;
0656: try {
0657: catalogRef = (PjReference) (_trailer.get(PjName.ROOT));
0658: } catch (ClassCastException e) {
0659: throw new InvalidPdfObjectException(
0660: "Root field in trailer is not an indirect reference.");
0661: }
0662: return catalogRef.getObjNumber().getInt();
0663: }
0664:
0665: /**
0666: Looks up the root Pages object of this document's Pages tree.
0667: @return the object number of the root Pages object.
0668: @exception InvalidPdfObjectException if an invalid object
0669: type is encountered.
0670: */
0671: public int getRootPages() throws InvalidPdfObjectException {
0672: // we find the root Pages node via the Catalog object
0673: int catalogId = getCatalog();
0674: PjDictionary catalog;
0675: try {
0676: catalog = (PjDictionary) getObject(catalogId);
0677: } catch (ClassCastException e) {
0678: throw new InvalidPdfObjectException(
0679: "Catalog is not a dictionary.");
0680: }
0681: PjReference pagesRef;
0682: try {
0683: pagesRef = (PjReference) (catalog.getHashtable()
0684: .get(PjName.PAGES));
0685: } catch (ClassCastException e) {
0686: throw new InvalidPdfObjectException(
0687: "Pages field in catalog is not an indirect reference.");
0688: }
0689: return pagesRef.getObjNumber().getInt();
0690: }
0691:
0692: /**
0693: Looks up the Info dictionary within this document's trailer.
0694: The Info dictionary contains general information about the
0695: document.
0696: @return a reference to the Info dictionary, or null if no
0697: Info field is present in the trailer.
0698: @exception InvalidPdfObjectException if the Info field in
0699: the trailer is not a reference (PjReference) object.
0700: */
0701: public PjReference getInfoDictionary()
0702: throws InvalidPdfObjectException {
0703: PjReference r;
0704: try {
0705: r = (PjReference) (_trailer.get(PjName.INFO));
0706: } catch (ClassCastException e) {
0707: throw new InvalidPdfObjectException(
0708: "Info field is not an indirect reference.");
0709: }
0710: return r;
0711: }
0712:
0713: /**
0714: Sets the Info dictionary within this document's trailer.
0715: @param ref a reference to the Info dictionary.
0716: */
0717: public void setInfoDictionary(PjReference ref) {
0718: _trailer.put(PjName.INFO, ref);
0719: }
0720:
0721: /**
0722: Looks up the Encrypt dictionary within this document's trailer.
0723: The Encrypt dictionary contains information for decrypting a
0724: document.
0725: @return the Encrypt dictionary, or null if no Encrypt field is
0726: present in the trailer.
0727: @exception InvalidPdfObjectException if the Encrypt field in
0728: the trailer is not a dictionary (PjDictionary) object.
0729: */
0730: public PjDictionary getEncryptDictionary()
0731: throws InvalidPdfObjectException {
0732: PjDictionary d;
0733: try {
0734: d = (PjDictionary) (resolve((PjObject) (_trailer
0735: .get(PjName.ENCRYPT))));
0736: } catch (ClassCastException e) {
0737: throw new InvalidPdfObjectException(
0738: "Encrypt field is not a dictionary.");
0739: }
0740: return d;
0741: }
0742:
0743: /**
0744: Sets the Encrypt dictionary within this document's trailer.
0745: @param ref a reference to the Encrypt dictionary.
0746: */
0747: public void setEncryptDictionary(PjReference ref) {
0748: _trailer.put(PjName.ENCRYPT, ref);
0749: }
0750:
0751: /**
0752: Sets the Encrypt dictionary within this document's trailer.
0753: @param dict the Encrypt dictionary.
0754: */
0755: public void setEncryptDictionary(PjDictionary dict) {
0756: _trailer.put(PjName.ENCRYPT, dict);
0757: }
0758:
0759: /**
0760: Returns a clone of a pages node such that all inherited
0761: attributes of the given pages node are made explicit. For
0762: example, if MediaBox is not defined in the given pages
0763: node, this method ascends the pages tree (via the Parent
0764: reference) looking for an ancestor node that does contain a
0765: value for MediaBox; if it finds one, it assigns that value
0766: in the cloned (returned) pages node. This is done for all
0767: inheritable attributes.
0768: @param node a pages node for which inherited attributes are
0769: to be retrieved.
0770: @return a cloned copy of the given pages node with actual
0771: values substituted for all inherited attributes.
0772: @exception InvalidPdfObjectException if an invalid object
0773: type is encountered.
0774: */
0775: public PjPagesNode inheritPageAttributes(PjPagesNode node)
0776: throws InvalidPdfObjectException {
0777: PjPagesNode newNode;
0778: try {
0779: newNode = (PjPagesNode) (node.clone());
0780: } catch (CloneNotSupportedException e) {
0781: throw new InvalidPdfObjectException(e.getMessage());
0782: }
0783: Hashtable ht = newNode.getHashtable();
0784: PjObject parentRef = newNode.getParent();
0785: while (parentRef != null) {
0786: PjObject parentObj = resolve(parentRef);
0787: if (!(parentObj instanceof PjPagesNode)) {
0788: throw new InvalidPdfObjectException(
0789: "Ancestor of pages node is not a pages node.");
0790: }
0791: PjPagesNode parent = (PjPagesNode) parentObj;
0792: inheritPageAttributesCollapse(PjName.MEDIABOX, ht, newNode,
0793: parent);
0794: inheritPageAttributesCollapse(PjName.RESOURCES, ht,
0795: newNode, parent);
0796: inheritPageAttributesCollapse(PjName.CROPBOX, ht, newNode,
0797: parent);
0798: inheritPageAttributesCollapse(PjName.ROTATE, ht, newNode,
0799: parent);
0800: inheritPageAttributesCollapse(PjName.DUR, ht, newNode,
0801: parent);
0802: inheritPageAttributesCollapse(PjName.HID, ht, newNode,
0803: parent);
0804: inheritPageAttributesCollapse(PjName.TRANS, ht, newNode,
0805: parent);
0806: inheritPageAttributesCollapse(PjName.AA, ht, newNode,
0807: parent);
0808: parentRef = parent.getParent();
0809: }
0810: return newNode;
0811: }
0812:
0813: /**
0814: Returns a clone of a field node such that all inherited
0815: attributes of the given field node are made explicit. For
0816: example, if the V key is not defined in the given field
0817: node, this method ascends the field tree (via the Parent
0818: reference) looking for an ancestor node that does contain a
0819: value for the V key; if it finds one, it assigns that value
0820: in the cloned (returned) field node. This is done for all
0821: inheritable attributes.
0822: @param node a field node for which inherited attributes are
0823: to be retrieved.
0824: @return a cloned copy of the given field node with actual
0825: values substituted for all inherited attributes.
0826: @exception InvalidPdfObjectException if an invalid object
0827: type is encountered.
0828: */
0829: public PjDictionary inheritFieldAttributes(PjDictionary node)
0830: throws InvalidPdfObjectException {
0831: PjDictionary newNode;
0832: try {
0833: newNode = (PjDictionary) (node.clone());
0834: } catch (CloneNotSupportedException e) {
0835: throw new InvalidPdfObjectException(e.getMessage());
0836: }
0837: Hashtable ht = newNode.getHashtable();
0838: PjObject parentRef = (PjObject) (newNode.getHashtable()
0839: .get(PjName.PARENT));
0840: while (parentRef != null) {
0841: PjObject parentObj = resolve(parentRef);
0842: if (!(parentObj instanceof PjDictionary)) {
0843: throw new InvalidPdfObjectException(
0844: "Ancestor of field node is not a dictionary.");
0845: }
0846: PjDictionary parent = (PjDictionary) parentObj;
0847: inheritFieldAttributesCollapse(PjName.FT, ht, newNode,
0848: parent);
0849: inheritFieldAttributesCollapse(PjName.V, ht, newNode,
0850: parent);
0851: inheritFieldAttributesCollapse(PjName.DV, ht, newNode,
0852: parent);
0853: inheritFieldAttributesCollapse(PjName.FF, ht, newNode,
0854: parent);
0855: inheritFieldAttributesCollapse(PjName.DR, ht, newNode,
0856: parent);
0857: inheritFieldAttributesCollapse(PjName.DA, ht, newNode,
0858: parent);
0859: inheritFieldAttributesCollapse(PjName.Q, ht, newNode,
0860: parent);
0861: inheritFieldAttributesCollapse(PjName.OPT, ht, newNode,
0862: parent);
0863: inheritFieldAttributesCollapse(PjName.TOPINDEX, ht,
0864: newNode, parent);
0865: inheritFieldAttributesCollapse(PjName.MAXLEN, ht, newNode,
0866: parent);
0867: parentRef = (PjObject) (parent.getHashtable()
0868: .get(PjName.PARENT));
0869: }
0870: return newNode;
0871: }
0872:
0873: /**
0874: Returns the largest object number in the list of registered
0875: PjObjects. This is useful mainly for functions that need
0876: to run through the list and process each object, because
0877: this provides the maximum object number they need to
0878: examine. The object number may not currently be assigned
0879: to an object, but probably was at some point in the past.
0880: @return the size of the object list.
0881: */
0882: public int getMaxObjectNumber() {
0883: return Math.max(_objects.size() - 1, 0);
0884: }
0885:
0886: public Vector getFields() throws InvalidPdfObjectException {
0887:
0888: Vector fieldList = new Vector();
0889:
0890: // get the Catalog
0891: int catalogId = getCatalog();
0892: PjCatalog catalog;
0893: try {
0894: catalog = (PjCatalog) (getObject(catalogId));
0895: } catch (ClassCastException e) {
0896: throw new InvalidPdfObjectException(
0897: "Catalog object is not a dictionary.");
0898: }
0899:
0900: // get the AcroForm
0901: PjDictionary acroForm;
0902: try {
0903: acroForm = (PjDictionary) (resolve(catalog.getAcroForm()));
0904: } catch (ClassCastException e) {
0905: throw new InvalidPdfObjectException(
0906: "AcroForm object is not a dictionary.");
0907: }
0908:
0909: if (acroForm == null) {
0910: return fieldList;
0911: }
0912:
0913: // for now we assume that all root fields have no
0914: // children; so we treat Fields as an array
0915:
0916: // get Fields array
0917: PjArray fields = (PjArray) (acroForm.getHashtable()
0918: .get(PjName.FIELDS));
0919: if (fields == null) {
0920: return fieldList;
0921: }
0922: Vector fieldsV = fields.getVector();
0923:
0924: // loop through all fields
0925: int fieldsV_n = fieldsV.size();
0926: for (int x = 0; x < fieldsV_n; x++) {
0927:
0928: // get the field object
0929: PjReference fieldRef;
0930: try {
0931: fieldRef = (PjReference) (fieldsV.elementAt(x));
0932: } catch (ClassCastException e) {
0933: throw new InvalidPdfObjectException(
0934: "Fields array element is not a reference.");
0935: }
0936:
0937: getFieldsAddField(fieldList, fieldRef);
0938:
0939: }
0940:
0941: return fieldList;
0942:
0943: }
0944:
0945: private void getFieldsAddField(Vector fieldList,
0946: PjReference fieldRef) throws InvalidPdfObjectException {
0947:
0948: // resolve field reference
0949: PjDictionary field;
0950: try {
0951: field = (PjDictionary) (resolve(fieldRef));
0952: } catch (ClassCastException e) {
0953: throw new InvalidPdfObjectException(
0954: "Field object is not a dictionary.");
0955: }
0956:
0957: Hashtable fieldHt = field.getHashtable();
0958:
0959: // add the field to the list
0960: fieldList.addElement(field);
0961:
0962: // check if there are any kids
0963: PjArray kids;
0964: try {
0965: kids = (PjArray) (resolve((PjObject) (fieldHt
0966: .get(PjName.KIDS))));
0967: } catch (ClassCastException e) {
0968: throw new InvalidPdfObjectException(
0969: "Kids object is not an array.");
0970: }
0971:
0972: // if there are kids, descend the tree
0973: if (kids != null) {
0974: Vector kidsV = kids.getVector();
0975: int kidsV_n = kidsV.size();
0976: for (int x = 0; x < kidsV_n; x++) {
0977:
0978: // get the field object
0979: PjReference fieldRef2;
0980: try {
0981: fieldRef2 = (PjReference) (kidsV.elementAt(x));
0982: } catch (ClassCastException e) {
0983: throw new InvalidPdfObjectException(
0984: "Kids array element is not a reference.");
0985: }
0986:
0987: getFieldsAddField(fieldList, fieldRef2);
0988:
0989: }
0990: }
0991:
0992: }
0993:
0994: public void updateFieldValue(PjDictionary origField,
0995: PjDictionary field, String value)
0996: throws PdfFormatException, InvalidPdfObjectException {
0997:
0998: Hashtable origFieldHt = origField.getHashtable();
0999:
1000: Hashtable fieldHt = field.getHashtable();
1001:
1002: // store old value for use in search/replace within appeareances stream(s)
1003: PjString oldValue = (PjString) (fieldHt.get(PjName.V));
1004:
1005: PjString valueString = new PjString(value);
1006: origFieldHt.put(PjName.V, valueString);
1007: origFieldHt.put(PjName.DV, valueString);
1008:
1009: // determine quadding
1010: PjNumber q = (PjNumber) (resolve((PjObject) (fieldHt
1011: .get(PjName.Q))));
1012: boolean leftJustified = false;
1013: boolean centered = false;
1014: boolean rightJustified = false;
1015: if (q == null) {
1016: leftJustified = true;
1017: } else {
1018: switch (q.getInt()) {
1019: case 1:
1020: centered = true;
1021: break;
1022: case 2:
1023: rightJustified = true;
1024: break;
1025: default:
1026: leftJustified = true;
1027: }
1028: }
1029:
1030: PjDictionary ap = (PjDictionary) (resolve((PjObject) (fieldHt
1031: .get(PjName.AP))));
1032: if (ap != null) {
1033: Hashtable apHt = ap.getHashtable();
1034: PjObject apnObj = (PjObject) (apHt.get(PjName.N));
1035: int apnId;
1036: PjReference apnRef;
1037: PjObject apn;
1038: PjDictionary apnDict;
1039: byte[] apnBuffer;
1040: if (apnObj instanceof PjReference) {
1041: // it's an indirect object
1042: apnRef = (PjReference) apnObj;
1043: apnId = apnRef.getObjNumber().getInt();
1044: apn = resolve(apnRef);
1045: } else {
1046: // if it's not an indirect object, let's make it indirect
1047: apnId = registerObject(apnObj);
1048: apnRef = new PjReference(new PjNumber(apnId));
1049: apHt.put(PjName.N, apnRef);
1050: apn = apnObj;
1051: }
1052:
1053: // "/C" = center text
1054: // this assumes Courier 10 pt; we can add support
1055: // for others if needed.
1056: // it also assumes a page width of 8.5"; this also could
1057: // be adjusted or read from the document.
1058:
1059: float rectX1 = 0;
1060: float rectX2 = 0;
1061: float rectWidth = 0;
1062: if (centered) {
1063: // adjust RECT
1064: PjRectangle rect = (PjRectangle) (fieldHt
1065: .get(PjName.RECT));
1066: rectX1 = rect.getLowerLeftX().getFloat();
1067: rectX2 = rect.getUpperRightX().getFloat();
1068: rectWidth = rectX2 - rectX1;
1069: }
1070:
1071: if ((apn != null) && (apn instanceof PjStream)) {
1072: // if centered: remove any text matrix adjustments.
1073: // get page mark operators
1074: Vector pmVector = new StreamParser()
1075: .parse(((PjStream) (apn)).flateDecompress());
1076: if (oldValue != null) {
1077: replaceTextData(pmVector, oldValue, valueString);
1078: }
1079: if (centered) {
1080: adjustTextMatrixX(pmVector, rectWidth);
1081: }
1082: // reconstruct stream from modified pmVector
1083: ByteArrayOutputStream baos = new ByteArrayOutputStream();
1084: for (int pmX = 0; pmX < pmVector.size(); pmX++) {
1085: PageMark pm = (PageMark) (pmVector.elementAt(pmX));
1086: try {
1087: pm.writePdf(baos);
1088: } catch (IOException e) {
1089: e.printStackTrace();
1090: }
1091: }
1092: byte[] ba = baos.toByteArray();
1093: // register new (modified) stream in pdf document
1094: registerObject(new PjStream(((PjStream) (apn))
1095: .getStreamDictionary(), ba), apnId);
1096:
1097: }
1098: }
1099:
1100: }
1101:
1102: // used exclusively by updateFieldValue()
1103: private static void replaceTextData(Vector pmVector,
1104: PjString oldText, PjString newText) {
1105: // this method replaces text data oldS with newS
1106:
1107: int pmX = pmVector.size();
1108:
1109: // no particular reason for searching backwards; just
1110: // because this was adapted from clearTextMatrixX()
1111: while (pmX > 0) {
1112:
1113: pmX--;
1114: PageMark pm = (PageMark) (pmVector.elementAt(pmX));
1115:
1116: if (pm instanceof XTj) {
1117: XTj tj = (XTj) pm;
1118: if (tj.getText().equals(oldText)) {
1119: XTj newTj = new XTj(newText);
1120: pmVector.setElementAt(newTj, pmX);
1121: }
1122: }
1123:
1124: }
1125: }
1126:
1127: // used exclusively by updateFieldValue()
1128: private static void adjustTextMatrixX(Vector pmVector,
1129: float rectWidth) {
1130: // this method examines the last text matrix in
1131: // pmVector and sets the X matrix value in order to
1132: // center the text written by the subsequent Tj
1133: // operator.
1134:
1135: int pmX = pmVector.size();
1136: float textWidth = 0;
1137: float rectCenter = rectWidth / 2;
1138:
1139: while (pmX > 0) {
1140:
1141: pmX--;
1142: PageMark pm = (PageMark) (pmVector.elementAt(pmX));
1143:
1144: if (pm instanceof XTj) {
1145: XTj tj = (XTj) pm;
1146: textWidth = tj.getText().getString().length() * 6;
1147: }
1148:
1149: if (pm instanceof XTm) {
1150: float newX = rectCenter - (textWidth / 2);
1151: if (newX < 0) {
1152: newX = 0;
1153: }
1154: XTm tm = (XTm) pm;
1155: XTm newTm = new XTm(tm.getA(), tm.getB(), tm.getC(), tm
1156: .getD(), new PjNumber(newX), tm.getY());
1157: pmVector.setElementAt(newTm, pmX);
1158: pmX = 0; // Tm found, now we can stop
1159: }
1160:
1161: }
1162: }
1163:
1164: // used exclusively by updateFieldValue()
1165: private static void clearTextMatrixX(Vector pmVector) {
1166: // this method examines the last text matrix in
1167: // pmVector and sets the X matrix value to 0.
1168:
1169: int pmX = pmVector.size();
1170:
1171: while (pmX > 0) {
1172:
1173: pmX--;
1174: PageMark pm = (PageMark) (pmVector.elementAt(pmX));
1175:
1176: if (pm instanceof XTm) {
1177: XTm tm = (XTm) pm;
1178: XTm newTm = new XTm(tm.getA(), tm.getB(), tm.getC(), tm
1179: .getD(), PjNumber.ZERO, tm.getY());
1180: pmVector.setElementAt(newTm, pmX);
1181: pmX = 0; // Tm found, now we can stop
1182: }
1183:
1184: }
1185: }
1186:
1187: private void inheritPageAttributesCollapse(PjName name,
1188: Hashtable ht, PjPagesNode newNode, PjPagesNode parent) {
1189: if (ht.get(name) == null) {
1190: Object obj = parent.getHashtable().get(name);
1191: if (obj != null) {
1192: ht.put(name, obj);
1193: }
1194: }
1195: }
1196:
1197: private void inheritFieldAttributesCollapse(PjName name,
1198: Hashtable ht, PjDictionary newNode, PjDictionary parent) {
1199: if (ht.get(name) == null) {
1200: Object obj = parent.getHashtable().get(name);
1201: if (obj != null) {
1202: ht.put(name, obj);
1203: }
1204: }
1205: }
1206:
1207: private void init() {
1208: _objects = new PjObjectVector();
1209: _trailer = new Hashtable();
1210: }
1211:
1212: // this creates the minimal data structures for an empty Pdf object
1213: // (a single blank page)
1214: private void createEmpty() {
1215: // make a ProcSet
1216: Vector v = new Vector();
1217: v.addElement(PjName.PDF);
1218: v.addElement(PjName.TEXT);
1219: PjProcSet procSet = new PjProcSet(v);
1220: int procSetId = registerObject(procSet);
1221: // make a Resources dictionary
1222: PjResources resources = new PjResources();
1223: resources.setProcSet(new PjReference(new PjNumber(procSetId)));
1224: int resourcesId = registerObject(resources);
1225: // make a MediaBox rectangle
1226: PjRectangle mediaBox = new PjRectangle();
1227: mediaBox.setLowerLeftX(PjNumber.ZERO);
1228: mediaBox.setLowerLeftY(PjNumber.ZERO);
1229: mediaBox.setUpperRightX(new PjNumber(612));
1230: mediaBox.setUpperRightY(new PjNumber(792));
1231: // make a blank Page
1232: PjPage page = new PjPage();
1233: int pageId = registerObject(page);
1234: // make the kids array
1235: v = new Vector();
1236: v.addElement(new PjReference(new PjNumber(pageId)));
1237: PjArray kids = new PjArray(v);
1238: // make the root Pages node
1239: PjPages root = new PjPages();
1240: root.setResources(new PjReference(new PjNumber(resourcesId)));
1241: root.setMediaBox(mediaBox);
1242: root.setCount(PjNumber.ONE);
1243: root.setKids(kids);
1244: int rootId = registerObject(root);
1245: // we have to go back and set the blank page's parent to root
1246: page.setParent(new PjReference(new PjNumber(rootId)));
1247: // make the Catalog
1248: PjCatalog catalog = new PjCatalog();
1249: catalog.setPages(new PjReference(new PjNumber(rootId)));
1250: int catalogId = registerObject(catalog);
1251: // set Root in the trailer to point to the Catalog
1252: _trailer.put(PjName.ROOT, new PjReference(new PjNumber(
1253: catalogId)));
1254: // create an Info dictionary with default fields
1255: PjInfo info = new PjInfo();
1256: info.setCreator(PjConst.COPYRIGHT_IN_INFO);
1257: // need to add CreationDate and ModDate here, once we implement PjDate(Date)
1258: int infoId = registerObject(info);
1259: _trailer
1260: .put(PjName.INFO, new PjReference(new PjNumber(infoId)));
1261: }
1262:
1263: private void readFromFile(String filename) throws IOException,
1264: PjException {
1265: init();
1266: RandomAccessFile raf = new RandomAccessFile(filename, "r");
1267: try {
1268: PdfParser.getObjects(this , raf);
1269: } finally {
1270: // make an attempt to close the file
1271: try {
1272: raf.close();
1273: } catch (IOException e) {
1274: }
1275: }
1276: }
1277:
1278: protected PjObjectVector _objects;
1279: protected Hashtable _trailer;
1280:
1281: }
|