import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.Iterator;
import com.lowagie.text.pdf.PRStream;
import com.lowagie.text.pdf.PRTokeniser;
import com.lowagie.text.pdf.PdfArray;
import com.lowagie.text.pdf.PdfDictionary;
import com.lowagie.text.pdf.PdfIndirectReference;
import com.lowagie.text.pdf.PdfLister;
import com.lowagie.text.pdf.PdfName;
import com.lowagie.text.pdf.PdfObject;
import com.lowagie.text.pdf.PdfReader;
public class MainClass {
public static void main(String[] args) throws Exception {
PdfReader reader = new PdfReader("2.pdf");
PrintStream list = new PrintStream(new FileOutputStream("2.txt"));
PdfLister lister = new PdfLister(new PrintStream(list));
PdfDictionary trailer = reader.getTrailer();
lister.listDict(trailer);
PdfIndirectReference info = (PdfIndirectReference) trailer.get(PdfName.INFO);
lister.listAnyObject(info);
lister.listAnyObject(reader.getPdfObject(info.getNumber()));
PdfDictionary root = reader.getCatalog();
lister.listDict(root);
PdfDictionary outlines = (PdfDictionary) reader.getPdfObject(((PdfIndirectReference) root
.get(PdfName.OUTLINES)).getNumber());
lister.listDict(outlines);
PdfObject first = reader.getPdfObject(((PdfIndirectReference) outlines.get(PdfName.FIRST))
.getNumber());
lister.listAnyObject(first);
PdfDictionary pages = (PdfDictionary) reader.getPdfObject(((PdfIndirectReference) root
.get(PdfName.PAGES)).getNumber());
lister.listDict(pages);
PdfArray kids = (PdfArray) pages.get(PdfName.KIDS);
PdfIndirectReference kid_ref;
PdfDictionary kid = null;
for (Iterator i = kids.getArrayList().iterator(); i.hasNext();) {
kid_ref = (PdfIndirectReference) i.next();
kid = (PdfDictionary) reader.getPdfObject(kid_ref.getNumber());
lister.listDict(kid);
}
PdfIndirectReference content_ref = (PdfIndirectReference) kid.get(PdfName.CONTENTS);
PRStream content = (PRStream) reader.getPdfObject(content_ref.getNumber());
lister.listDict(content);
byte[] contentstream = PdfReader.getStreamBytes(content);
list.println(new String(contentstream));
PRTokeniser tokenizer = new PRTokeniser(contentstream);
while (tokenizer.nextToken()) {
if (tokenizer.getTokenType() == PRTokeniser.TK_STRING) {
list.println(tokenizer.getStringValue());
}
}
}
}
|