0001: /*
0002: * $Id: PDFParser.java,v 1.2 2007/12/20 18:17:41 rbair Exp $
0003: *
0004: * Copyright 2004 Sun Microsystems, Inc., 4150 Network Circle,
0005: * Santa Clara, California 95054, U.S.A. All rights reserved.
0006: *
0007: * This library is free software; you can redistribute it and/or
0008: * modify it under the terms of the GNU Lesser General Public
0009: * License as published by the Free Software Foundation; either
0010: * version 2.1 of the License, or (at your option) any later version.
0011: *
0012: * This library is distributed in the hope that it will be useful,
0013: * but WITHOUT ANY WARRANTY; without even the implied warranty of
0014: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
0015: * Lesser General Public License for more details.
0016: *
0017: * You should have received a copy of the GNU Lesser General Public
0018: * License along with this library; if not, write to the Free Software
0019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
0020: */
0021:
0022: package com.sun.pdfview;
0023:
0024: import java.awt.geom.AffineTransform;
0025: import java.awt.geom.GeneralPath;
0026: import static java.awt.geom.GeneralPath.WIND_EVEN_ODD;
0027: import static java.awt.geom.GeneralPath.WIND_NON_ZERO;
0028: import java.awt.geom.Point2D;
0029: import java.awt.geom.Rectangle2D;
0030: import java.io.File;
0031: import java.io.FileOutputStream;
0032: import java.io.IOException;
0033: import java.lang.ref.WeakReference;
0034: import java.nio.ByteBuffer;
0035: import java.util.ArrayList;
0036: import java.util.HashMap;
0037: import java.util.Stack;
0038:
0039: import com.sun.pdfview.colorspace.PDFColorSpace;
0040: import com.sun.pdfview.colorspace.PatternSpace;
0041: import com.sun.pdfview.decode.PDFDecoder;
0042: import com.sun.pdfview.font.PDFFont;
0043: import com.sun.pdfview.pattern.PDFShader;
0044:
0045: /**
0046: * PDFParser is the class that parses a PDF content stream and
0047: * produces PDFCmds for a PDFPage. You should never ever see it run:
0048: * it gets created by a PDFPage only if needed, and may even run in
0049: * its own thread.
0050: *
0051: * @author Mike Wessler
0052: */
0053: public class PDFParser extends BaseWatchable {
0054: // ---- parsing variables
0055: private Stack stack; // stack of Object
0056: private Stack parserStates; // stack of RenderState
0057:
0058: // the current render state
0059: private ParserState state;
0060:
0061: private GeneralPath path;
0062: private int clip;
0063:
0064: private int loc;
0065: private boolean resend = false;
0066: private Tok tok;
0067: private boolean catchexceptions; // Indicates state of BX...EX
0068:
0069: /** a weak reference to the page we render into. For the page
0070: * to remain available, some other code must retain a strong reference to it.
0071: */
0072: private WeakReference pageRef;
0073:
0074: /** the actual command, for use within a singe iteration. Note that
0075: * this must be released at the end of each iteration to assure the
0076: * page can be collected if not in use
0077: */
0078: private PDFPage cmds;
0079:
0080: // ---- result variables
0081: byte[] stream;
0082: HashMap resources;
0083:
0084: public static int debuglevel = 4000;
0085:
0086: public static void debug(String msg, int level) {
0087: if (level > debuglevel) {
0088: System.out.println(escape(msg));
0089: }
0090: }
0091:
0092: public static String escape(String msg) {
0093: StringBuffer sb = new StringBuffer();
0094: for (int i = 0; i < msg.length(); i++) {
0095: char c = msg.charAt(i);
0096: if (c != '\n' && (c < 32 || c >= 127)) {
0097: c = '?';
0098: }
0099: sb.append(c);
0100: }
0101: return sb.toString();
0102: }
0103:
0104: public static void setDebugLevel(int level) {
0105: debuglevel = level;
0106: }
0107:
0108: /**
0109: * Don't call this constructor directly. Instead, use
0110: * PDFFile.getPage(int pagenum) to get a PDFPage. There should
0111: * never be any reason for a user to create, access, or hold
0112: * on to a PDFParser.
0113: */
0114: public PDFParser(PDFPage cmds, byte[] stream, HashMap resources) {
0115: super ();
0116:
0117: this .pageRef = new WeakReference(cmds);
0118: this .resources = resources;
0119: if (resources == null) {
0120: this .resources = new HashMap();
0121: }
0122:
0123: this .stream = stream;
0124: }
0125:
0126: /////////////////////////////////////////////////////////////////
0127: // B E G I N R E A D E R S E C T I O N
0128: /////////////////////////////////////////////////////////////////
0129:
0130: /**
0131: * a token from a PDF Stream
0132: */
0133: class Tok {
0134: /** begin brocket < */
0135: public static final int BRKB = 11;
0136:
0137: /** end bracket > */
0138: public static final int BRKE = 10;
0139:
0140: /** begin array [ */
0141: public static final int ARYB = 9;
0142:
0143: /** end array ] */
0144: public static final int ARYE = 8;
0145:
0146: /** String */
0147: public static final int STR = 7;
0148:
0149: /** begin brace { */
0150: public static final int BRCB = 5;
0151:
0152: /** end brace } */
0153: public static final int BRCE = 4;
0154:
0155: /** number */
0156: public static final int NUM = 3;
0157:
0158: /** keyword */
0159: public static final int CMD = 2;
0160:
0161: /** name (begins with /) */
0162: public static final int NAME = 1;
0163:
0164: /** unknown token */
0165: public static final int UNK = 0;
0166:
0167: /** end of stream */
0168: public static final int EOF = -1;
0169:
0170: /** the string value of a STR, NAME, or CMD token */
0171: public String name;
0172:
0173: /** the value of a NUM token */
0174: public double value;
0175:
0176: /** the type of the token */
0177: public int type;
0178:
0179: /** a printable representation of the token */
0180: @Override
0181: public String toString() {
0182: if (type == NUM) {
0183: return "NUM: " + value;
0184: } else if (type == CMD) {
0185: return "CMD: " + name;
0186: } else if (type == UNK) {
0187: return "UNK";
0188: } else if (type == EOF) {
0189: return "EOF";
0190: } else if (type == NAME) {
0191: return "NAME: " + name;
0192: } else if (type == CMD) {
0193: return "CMD: " + name;
0194: } else if (type == STR) {
0195: return "STR: " + name;
0196: } else if (type == ARYB) {
0197: return "ARY [";
0198: } else if (type == ARYE) {
0199: return "ARY ]";
0200: } else {
0201: return "some kind of brace (" + type + ")";
0202: }
0203: }
0204: }
0205:
0206: /**
0207: * put the current token back so that it is returned again by
0208: * nextToken().
0209: */
0210: private void throwback() {
0211: resend = true;
0212: }
0213:
0214: /**
0215: * get the next token.
0216: * TODO: this creates a new token each time. Is this strictly
0217: * necessary?
0218: */
0219: private Tok nextToken() {
0220: if (resend) {
0221: resend = false;
0222: return tok;
0223: }
0224: tok = new Tok();
0225: // skip whitespace
0226: while (loc < stream.length && PDFFile.isWhiteSpace(stream[loc])) {
0227: loc++;
0228: }
0229: if (loc >= stream.length) {
0230: tok.type = Tok.EOF;
0231: return tok;
0232: }
0233: int c = stream[loc++];
0234: // examine the character:
0235: if (c == '[') {
0236: tok.type = Tok.ARYB;
0237: } else if (c == ']') {
0238: tok.type = Tok.ARYE;
0239: } else if (c == '(') {
0240: // read a string
0241: tok.type = Tok.STR;
0242: tok.name = readString();
0243: } else if (c == '{') {
0244: tok.type = Tok.BRCB;
0245: } else if (c == '}') {
0246: tok.type = Tok.BRCE;
0247: } else if (c == '<' && stream[loc++] == '<') {
0248: tok.type = Tok.BRKB;
0249: } else if (c == '>' && stream[loc++] == '>') {
0250: tok.type = Tok.BRKE;
0251: } else if (c == '<') {
0252: loc--;
0253: tok.type = Tok.STR;
0254: tok.name = readByteArray();
0255: } else if (c == '/') {
0256: tok.type = Tok.NAME;
0257: tok.name = readName();
0258: } else if (c == '.' || c == '-' || (c >= '0' && c <= '9')) {
0259: loc--;
0260: tok.type = Tok.NUM;
0261: tok.value = readNum();
0262: } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
0263: || c == '\'' || c == '"') {
0264: loc--;
0265: tok.type = Tok.CMD;
0266: tok.name = readName();
0267: } else {
0268: System.out.println("Encountered character: " + c + " ("
0269: + (char) c + ")");
0270: tok.type = Tok.UNK;
0271: }
0272: // debug("Read token: "+tok, -1);
0273: return tok;
0274: }
0275:
0276: /**
0277: * read a name (sequence of non-PDF-delimiting characters) from the
0278: * stream.
0279: */
0280: private String readName() {
0281: int start = loc;
0282: while (loc < stream.length && !PDFFile.isDelimiter(stream[loc])) {
0283: loc++;
0284: }
0285: return new String(stream, start, loc - start);
0286: }
0287:
0288: /**
0289: * read a floating point number from the stream
0290: */
0291: private double readNum() {
0292: int c = stream[loc++];
0293: boolean neg = c == '-';
0294: boolean sawdot = c == '.';
0295: double dotmult = sawdot ? 0.1 : 1;
0296: double value = (c >= '0' && c <= '9') ? c - '0' : 0;
0297: while (true) {
0298: c = stream[loc++];
0299: if (c == '.') {
0300: if (sawdot) {
0301: loc--;
0302: break;
0303: }
0304: sawdot = true;
0305: dotmult = 0.1;
0306: } else if (c >= '0' && c <= '9') {
0307: int val = c - '0';
0308: if (sawdot) {
0309: value += val * dotmult;
0310: dotmult *= 0.1;
0311: } else {
0312: value = value * 10 + val;
0313: }
0314: } else {
0315: loc--;
0316: break;
0317: }
0318: }
0319: if (neg) {
0320: value = -value;
0321: }
0322: return value;
0323: }
0324:
0325: /**
0326: * read a String from the stream. Strings begin with a '('
0327: * character, which has already been read, and end with a ')'
0328: * character. A '\' character starts an escape sequence of up
0329: * to three octal digits.
0330: *
0331: * @return the string with escape sequences replaced with their
0332: * values
0333: */
0334: private String readString() {
0335: int start = loc;
0336: StringBuffer sb = new StringBuffer();
0337: while (loc < stream.length) {
0338: int c = stream[loc++];
0339: if (c == ')') {
0340: break;
0341: } else if (c == '\\') {
0342: // escape sequences
0343: c = stream[loc++];
0344: if (c >= '0' && c <= '9') {
0345: int count = 0;
0346: int val = 0;
0347: while (c >= '0' && c <= '9' && count < 3) {
0348: val = val * 8 + c - '0';
0349: c = stream[loc++];
0350: count++;
0351: }
0352: loc--;
0353: c = val;
0354: } else if (c == 'n') {
0355: c = '\n';
0356: } else if (c == 'r') {
0357: c = '\r';
0358: } else if (c == 't') {
0359: c = '\t';
0360: } else if (c == 'b') {
0361: c = '\b';
0362: } else if (c == 'f') {
0363: c = '\f';
0364: } else if (c == '\\') {
0365: c = '\\';
0366: } else if (c == '(') {
0367: c = '(';
0368: } else if (c == ')') {
0369: c = ')';
0370: }
0371: }
0372: sb.append((char) c);
0373: }
0374: return sb.toString();
0375: }
0376:
0377: /**
0378: * read a byte array from the stream. Byte arrays begin with a '<'
0379: * character, which has already been read, and end with a '>'
0380: * character. Each byte in the array is made up of two hex characters,
0381: * the first being the high-order bit.
0382: *
0383: * We translate the byte arrays into char arrays by combining two bytes
0384: * into a character, and then translate the character array into a string.
0385: * [JK FIXME this is probably a really bad idea!]
0386: *
0387: * @return the byte array
0388: */
0389: private String readByteArray() {
0390: StringBuffer buf = new StringBuffer();
0391:
0392: int count = 0;
0393: char w = (char) 0;
0394:
0395: // read individual bytes and format into a character array
0396: while ((loc < stream.length) && (stream[loc] != '>')) {
0397: char c = (char) stream[loc];
0398: byte b = (byte) 0;
0399:
0400: if (c >= '0' && c <= '9') {
0401: b = (byte) (c - '0');
0402: } else if (c >= 'a' && c <= 'f') {
0403: b = (byte) (10 + (c - 'a'));
0404: } else if (c >= 'A' && c <= 'F') {
0405: b = (byte) (10 + (c - 'A'));
0406: } else {
0407: loc++;
0408: continue;
0409: }
0410:
0411: // calculate where in the current byte this character goes
0412: int offset = 1 - (count % 2);
0413: w |= (0xf & b) << (offset * 4);
0414:
0415: // increment to the next char if we've written four bytes
0416: if (offset == 0) {
0417: buf.append(w);
0418: w = (char) 0;
0419: }
0420:
0421: count++;
0422: loc++;
0423: }
0424:
0425: // ignore trailing '>'
0426: loc++;
0427:
0428: return buf.toString();
0429: }
0430:
0431: /////////////////////////////////////////////////////////////////
0432: // B E G I N P A R S E R S E C T I O N
0433: /////////////////////////////////////////////////////////////////
0434:
0435: /**
0436: * Called to prepare for some iterations
0437: */
0438: @Override
0439: public void setup() {
0440: stack = new Stack();
0441: parserStates = new Stack();
0442: state = new ParserState();
0443: path = new GeneralPath();
0444: loc = 0;
0445: clip = 0;
0446:
0447: //initialize the ParserState
0448: state.fillCS = PDFColorSpace
0449: .getColorSpace(PDFColorSpace.COLORSPACE_GRAY);
0450: state.strokeCS = PDFColorSpace
0451: .getColorSpace(PDFColorSpace.COLORSPACE_GRAY);
0452: state.textFormat = new PDFTextFormat();
0453:
0454: // HexDump.printData(stream);
0455: // System.out.println(dumpStream());
0456: }
0457:
0458: /**
0459: * parse the stream. commands are added to the PDFPage initialized
0460: * in the constructor as they are encountered.
0461: * <p>
0462: * Page numbers in comments refer to the Adobe PDF specification.
0463: *
0464: * @return <ul><li>Watchable.RUNNING when there are commands to be processed
0465: * <li>Watchable.COMPLETED when the page is done and all
0466: * the commands have been processed
0467: * <li>Watchable.STOPPED if the page we are rendering into is
0468: * no longer available
0469: * </ul>
0470: */
0471: public int iterate() throws Exception {
0472: // make sure the page is still available, and create the reference
0473: // to it for use within this iteration
0474: cmds = (PDFPage) pageRef.get();
0475: if (cmds == null) {
0476: System.out.println("Page gone. Stopping");
0477: return Watchable.STOPPED;
0478: }
0479:
0480: Object obj = parseObject();
0481:
0482: // if there's nothing left to parse, we're done
0483: if (obj == null) {
0484: return Watchable.COMPLETED;
0485: }
0486:
0487: if (obj instanceof Tok) {
0488: // it's a command. figure out what to do.
0489: // (if not, the token will be "pushed" onto the stack)
0490: String cmd = ((Tok) obj).name;
0491: // debug("Command: "+cmd+" (stack size is "+stack.size()+")",0);
0492: if (cmd.equals("q")) {
0493: // push the parser state
0494: parserStates.push(state.clone());
0495:
0496: // push graphics state
0497: cmds.addPush();
0498: } else if (cmd.equals("Q")) {
0499: // pop graphics state
0500: cmds.addPop();
0501:
0502: // pop the parser state
0503: state = (ParserState) parserStates.pop();
0504: } else if (cmd.equals("cm")) {
0505: // set transform to array of values
0506: float[] elts = popFloat(6);
0507: AffineTransform xform = new AffineTransform(elts);
0508: cmds.addXform(xform);
0509: } else if (cmd.equals("w")) {
0510: // set stroke width
0511: cmds.addStrokeWidth(popFloat());
0512: } else if (cmd.equals("J")) {
0513: // set end cap style
0514: cmds.addEndCap(popInt());
0515: } else if (cmd.equals("j")) {
0516: // set line join style
0517: cmds.addLineJoin(popInt());
0518: } else if (cmd.equals("M")) {
0519: // set miter limit
0520: cmds.addMiterLimit(popInt());
0521: } else if (cmd.equals("d")) {
0522: // set dash style and phase
0523: float phase = popFloat();
0524: float[] dashary = popFloatArray();
0525: cmds.addDash(dashary, phase);
0526: } else if (cmd.equals("ri")) {
0527: // TODO: do something with rendering intent (page 197)
0528: } else if (cmd.equals("i")) {
0529: popFloat();
0530: // TODO: do something with flatness tolerance
0531: } else if (cmd.equals("gs")) {
0532: // set graphics state to values in a named dictionary
0533: setGSState(popString());
0534: } else if (cmd.equals("m")) {
0535: // path move to
0536: float y = popFloat();
0537: float x = popFloat();
0538: path.moveTo(x, y);
0539: } else if (cmd.equals("l")) {
0540: // path line to
0541: float y = popFloat();
0542: float x = popFloat();
0543: path.lineTo(x, y);
0544: } else if (cmd.equals("c")) {
0545: // path curve to
0546: float a[] = popFloat(6);
0547: path.curveTo(a[0], a[1], a[2], a[3], a[4], a[5]);
0548: } else if (cmd.equals("v")) {
0549: // path curve; first control point= start
0550: float a[] = popFloat(4);
0551: Point2D cp = path.getCurrentPoint();
0552: path.curveTo((float) cp.getX(), (float) cp.getY(),
0553: a[0], a[1], a[2], a[3]);
0554: } else if (cmd.equals("y")) {
0555: // path curve; last control point= end
0556: float a[] = popFloat(4);
0557: path.curveTo(a[0], a[1], a[2], a[3], a[2], a[3]);
0558: } else if (cmd.equals("h")) {
0559: // path close
0560: path.closePath();
0561: } else if (cmd.equals("re")) {
0562: // path add rectangle
0563: float a[] = popFloat(4);
0564: path.moveTo(a[0], a[1]);
0565: path.lineTo(a[0] + a[2], a[1]);
0566: path.lineTo(a[0] + a[2], a[1] + a[3]);
0567: path.lineTo(a[0], a[1] + a[3]);
0568: path.closePath();
0569: } else if (cmd.equals("S")) {
0570: // stroke the path
0571: cmds.addPath(path, PDFShapeCmd.STROKE | clip);
0572: clip = 0;
0573: path = new GeneralPath();
0574: } else if (cmd.equals("s")) {
0575: // close and stroke the path
0576: path.closePath();
0577: cmds.addPath(path, PDFShapeCmd.STROKE | clip);
0578: clip = 0;
0579: path = new GeneralPath();
0580: } else if (cmd.equals("f") || cmd.equals("F")) {
0581: // fill the path (close/not close identical)
0582: cmds.addPath(path, PDFShapeCmd.FILL | clip);
0583: clip = 0;
0584: path = new GeneralPath();
0585: } else if (cmd.equals("f*") || cmd.equals("F*")) {
0586: // fill the path using even/odd rule
0587: path.setWindingRule(WIND_EVEN_ODD);
0588: cmds.addPath(path, PDFShapeCmd.FILL | clip);
0589: clip = 0;
0590: path = new GeneralPath();
0591: } else if (cmd.equals("B")) {
0592: // fill and stroke the path
0593: cmds.addPath(path, PDFShapeCmd.BOTH | clip);
0594: clip = 0;
0595: path = new GeneralPath();
0596: } else if (cmd.equals("B*")) {
0597: // fill path using even/odd rule and stroke it
0598: path.setWindingRule(WIND_EVEN_ODD);
0599: cmds.addPath(path, PDFShapeCmd.BOTH | clip);
0600: clip = 0;
0601: path = new GeneralPath();
0602: } else if (cmd.equals("b")) {
0603: // close the path, then fill and stroke it
0604: path.closePath();
0605: cmds.addPath(path, PDFShapeCmd.BOTH | clip);
0606: clip = 0;
0607: path = new GeneralPath();
0608: } else if (cmd.equals("b*")) {
0609: // close path, fill using even/odd rule, then stroke it
0610: path.closePath();
0611: path.setWindingRule(WIND_EVEN_ODD);
0612: cmds.addPath(path, PDFShapeCmd.BOTH | clip);
0613: clip = 0;
0614: path = new GeneralPath();
0615: } else if (cmd.equals("n")) {
0616: // clip with the path and discard it
0617: if (clip != 0) {
0618: cmds.addPath(path, clip);
0619: }
0620: clip = 0;
0621: path = new GeneralPath();
0622: } else if (cmd.equals("W")) {
0623: // mark this path for clipping!
0624: clip = PDFShapeCmd.CLIP;
0625: } else if (cmd.equals("W*")) {
0626: // mark this path using even/odd rule for clipping
0627: path.setWindingRule(WIND_EVEN_ODD);
0628: clip = PDFShapeCmd.CLIP;
0629: } else if (cmd.equals("sh")) {
0630: // shade a region that is defined by the shader itself.
0631: // shading the current space from a dictionary
0632: // should only be used for limited-dimension shadings
0633: String gdictname = popString();
0634: // set up the pen to do a gradient fill according
0635: // to the dictionary
0636: PDFObject shobj = findResource(gdictname, "Shading");
0637: doShader(shobj);
0638:
0639: } else if (cmd.equals("CS")) {
0640: // set the stroke color space
0641: state.strokeCS = parseColorSpace(new PDFObject(stack
0642: .pop()));
0643: } else if (cmd.equals("cs")) {
0644: // set the fill color space
0645: state.fillCS = parseColorSpace(new PDFObject(stack
0646: .pop()));
0647: } else if (cmd.equals("SC")) {
0648: // set the stroke color
0649: int n = state.strokeCS.getNumComponents();
0650: cmds.addStrokePaint(state.strokeCS
0651: .getPaint(popFloat(n)));
0652: } else if (cmd.equals("SCN")) {
0653: if (state.strokeCS instanceof PatternSpace) {
0654: cmds
0655: .addFillPaint(doPattern((PatternSpace) state.strokeCS));
0656: } else {
0657: int n = state.strokeCS.getNumComponents();
0658: cmds.addStrokePaint(state.strokeCS
0659: .getPaint(popFloat(n)));
0660: }
0661: } else if (cmd.equals("sc")) {
0662: // set the fill color
0663: int n = state.fillCS.getNumComponents();
0664: cmds.addFillPaint(state.fillCS.getPaint(popFloat(n)));
0665: } else if (cmd.equals("scn")) {
0666: if (state.fillCS instanceof PatternSpace) {
0667: cmds
0668: .addFillPaint(doPattern((PatternSpace) state.fillCS));
0669: } else {
0670: int n = state.fillCS.getNumComponents();
0671: cmds.addFillPaint(state.fillCS
0672: .getPaint(popFloat(n)));
0673: }
0674: } else if (cmd.equals("G")) {
0675: // set the stroke color to a Gray value
0676: state.strokeCS = PDFColorSpace
0677: .getColorSpace(PDFColorSpace.COLORSPACE_GRAY);
0678: cmds.addStrokePaint(state.strokeCS
0679: .getPaint(popFloat(1)));
0680: } else if (cmd.equals("g")) {
0681: // set the fill color to a Gray value
0682: state.fillCS = PDFColorSpace
0683: .getColorSpace(PDFColorSpace.COLORSPACE_GRAY);
0684: cmds.addFillPaint(state.fillCS.getPaint(popFloat(1)));
0685: } else if (cmd.equals("RG")) {
0686: // set the stroke color to an RGB value
0687: state.strokeCS = PDFColorSpace
0688: .getColorSpace(PDFColorSpace.COLORSPACE_RGB);
0689: cmds.addStrokePaint(state.strokeCS
0690: .getPaint(popFloat(3)));
0691: } else if (cmd.equals("rg")) {
0692: // set the fill color to an RGB value
0693: state.fillCS = PDFColorSpace
0694: .getColorSpace(PDFColorSpace.COLORSPACE_RGB);
0695: cmds.addFillPaint(state.fillCS.getPaint(popFloat(3)));
0696: } else if (cmd.equals("K")) {
0697: // set the stroke color to a CMYK value
0698: state.strokeCS = PDFColorSpace
0699: .getColorSpace(PDFColorSpace.COLORSPACE_CMYK);
0700: cmds.addStrokePaint(state.strokeCS
0701: .getPaint(popFloat(4)));
0702: } else if (cmd.equals("k")) {
0703: // set the fill color to a CMYK value
0704: state.fillCS = PDFColorSpace
0705: .getColorSpace(PDFColorSpace.COLORSPACE_CMYK);
0706: cmds.addFillPaint(state.fillCS.getPaint(popFloat(4)));
0707: } else if (cmd.equals("Do")) {
0708: // make a do call on the referenced object
0709: PDFObject xobj = findResource(popString(), "XObject");
0710: doXObject(xobj);
0711: } else if (cmd.equals("BT")) {
0712: // begin text block: reset everything.
0713: state.textFormat.reset();
0714: } else if (cmd.equals("ET")) {
0715: // end of text. noop
0716: state.textFormat.end();
0717: } else if (cmd.equals("Tc")) {
0718: // set character spacing
0719: state.textFormat.setCharSpacing(popFloat());
0720: } else if (cmd.equals("Tw")) {
0721: // set word spacing
0722: state.textFormat.setWordSpacing(popFloat());
0723: } else if (cmd.equals("Tz")) {
0724: // set horizontal scaling
0725: state.textFormat.setHorizontalScale(popFloat());
0726: } else if (cmd.equals("TL")) {
0727: // set leading
0728: state.textFormat.setLeading(popFloat());
0729: } else if (cmd.equals("Tf")) {
0730: // set text font
0731: float sz = popFloat();
0732: String fontref = popString();
0733: state.textFormat.setFont(getFontFrom(fontref), sz);
0734: } else if (cmd.equals("Tr")) {
0735: // set text rendering mode
0736: state.textFormat.setMode(popInt());
0737: } else if (cmd.equals("Ts")) {
0738: // set text rise
0739: state.textFormat.setRise(popFloat());
0740: } else if (cmd.equals("Td")) {
0741: // set text matrix location
0742: float y = popFloat();
0743: float x = popFloat();
0744: state.textFormat.carriageReturn(x, y);
0745: } else if (cmd.equals("TD")) {
0746: // set leading and matrix: -y TL x y Td
0747: float y = popFloat();
0748: float x = popFloat();
0749: state.textFormat.setLeading(-y);
0750: state.textFormat.carriageReturn(x, y);
0751: } else if (cmd.equals("Tm")) {
0752: // set text matrix
0753: state.textFormat.setMatrix(popFloat(6));
0754: } else if (cmd.equals("T*")) {
0755: // go to next line
0756: state.textFormat.carriageReturn();
0757: } else if (cmd.equals("Tj")) {
0758: // show text
0759: state.textFormat.doText(cmds, popString());
0760: } else if (cmd.equals("\'")) {
0761: // next line and show text: T* string Tj
0762: state.textFormat.carriageReturn();
0763: state.textFormat.doText(cmds, popString());
0764: } else if (cmd.equals("\"")) {
0765: // draw string on new line with char & word spacing:
0766: // aw Tw ac Tc string '
0767: String string = popString();
0768: float ac = popFloat();
0769: float aw = popFloat();
0770: state.textFormat.setWordSpacing(aw);
0771: state.textFormat.setCharSpacing(ac);
0772: state.textFormat.doText(cmds, string);
0773: } else if (cmd.equals("TJ")) {
0774: // show kerned string
0775: state.textFormat.doText(cmds, popArray());
0776: } else if (cmd.equals("BI")) {
0777: // parse inline image
0778: parseInlineImage();
0779: } else if (cmd.equals("BX")) {
0780: catchexceptions = true;
0781: } else if (cmd.equals("EX")) {
0782: catchexceptions = false;
0783: } else if (cmd.equals("MP")) {
0784: // mark point (role= mark role name)
0785: popString();
0786: } else if (cmd.equals("DP")) {
0787: // mark point with dictionary (role, ref)
0788: // ref is either inline dict or name in "Properties" rsrc
0789: Object ref = stack.pop();
0790: popString();
0791: } else if (cmd.equals("BMC")) {
0792: // begin marked content (role)
0793: popString();
0794: } else if (cmd.equals("BDC")) {
0795: // begin marked content with dict (role, ref)
0796: // ref is either inline dict or name in "Properties" rsrc
0797: Object ref = stack.pop();
0798: popString();
0799: } else if (cmd.equals("EMC")) {
0800: // end marked content
0801: } else if (cmd.equals("d0")) {
0802: // character width in type3 fonts
0803: popFloat(2);
0804: } else if (cmd.equals("d1")) {
0805: // character width in type3 fonts
0806: popFloat(6);
0807: } else {
0808: if (catchexceptions) {
0809: // debug("**** WARNING: Unknown command: "+cmd+" **************************", 10);
0810: } else {
0811: throw new PDFParseException("Unknown command: "
0812: + cmd);
0813: }
0814: }
0815: if (stack.size() != 0) {
0816: // debug("**** WARNING! Stack not zero! (cmd="+cmd+", size="+stack.size()+") *************************",4);
0817: stack.setSize(0);
0818: }
0819: } else {
0820: stack.push(obj);
0821: }
0822:
0823: // release or reference to the page object, so that it can be
0824: // gc'd if it is no longer in use
0825: cmds = null;
0826:
0827: return Watchable.RUNNING;
0828: }
0829:
0830: /**
0831: * Cleanup when iteration is done
0832: */
0833: @Override
0834: public void cleanup() {
0835: state.textFormat.flush();
0836: cmds.finish();
0837:
0838: stack = null;
0839: parserStates = null;
0840: state = null;
0841: path = null;
0842: cmds = null;
0843: }
0844:
0845: boolean errorwritten = false;
0846:
0847: public void dumpStreamToError() {
0848: if (errorwritten) {
0849: return;
0850: }
0851: errorwritten = true;
0852: try {
0853: File oops = File.createTempFile("PDFError", ".err");
0854: FileOutputStream fos = new FileOutputStream(oops);
0855: fos.write(stream);
0856: fos.close();
0857: } catch (IOException ioe) { /* Do nothing */
0858: }
0859: ;
0860: }
0861:
0862: public String dumpStream() {
0863: return escape(new String(stream).replace('\r', '\n'));
0864: }
0865:
0866: /////////////////////////////////////////////////////////////////
0867: // H E L P E R S
0868: /////////////////////////////////////////////////////////////////
0869:
0870: /**
0871: * get a property from a named dictionary in the resources of this
0872: * content stream.
0873: * @param name the name of the property in the dictionary
0874: * @param inDict the name of the dictionary in the resources
0875: * @return the value of the property in the dictionary
0876: */
0877: private PDFObject findResource(String name, String inDict)
0878: throws IOException {
0879: if (inDict != null) {
0880: PDFObject in = (PDFObject) resources.get(inDict);
0881: if (in == null || in.getType() != PDFObject.DICTIONARY) {
0882: throw new PDFParseException("No dictionary called "
0883: + inDict + " found in the resources");
0884: }
0885: return (PDFObject) in.getDictRef(name);
0886: } else {
0887: return (PDFObject) resources.get(name);
0888: }
0889: }
0890:
0891: /**
0892: * Insert a PDF object into the command stream. The object must
0893: * either be an Image or a Form, which is a set of PDF commands
0894: * in a stream.
0895: * @param obj the object to insert, an Image or a Form.
0896: */
0897: private void doXObject(PDFObject obj) throws IOException {
0898: String type = obj.getDictRef("Subtype").getStringValue();
0899: if (type.equals("Image")) {
0900: doImage(obj);
0901: } else if (type.equals("Form")) {
0902: doForm(obj);
0903: } else {
0904: throw new PDFParseException("Unknown XObject subtype: "
0905: + type);
0906: }
0907: }
0908:
0909: /**
0910: * Parse image data into a Java BufferedImage and add the image
0911: * command to the page.
0912: * @param obj contains the image data, and a dictionary describing
0913: * the width, height and color space of the image.
0914: */
0915: private void doImage(PDFObject obj) throws IOException {
0916: cmds.addImage(PDFImage.createImage(obj, resources));
0917: }
0918:
0919: /**
0920: * Inject a stream of PDF commands onto the page. Optimized to cache
0921: * a parsed stream of commands, so that each Form object only needs
0922: * to be parsed once.
0923: * @param obj a stream containing the PDF commands, a transformation
0924: * matrix, bounding box, and resources.
0925: */
0926: private void doForm(PDFObject obj) throws IOException {
0927: // check to see if we've already parsed this sucker
0928: PDFPage formCmds = (PDFPage) obj.getCache();
0929: if (formCmds == null) {
0930: // rats. parse it.
0931: AffineTransform at;
0932: Rectangle2D bbox;
0933: PDFObject matrix = obj.getDictRef("Matrix");
0934: if (matrix == null) {
0935: at = new AffineTransform();
0936: } else {
0937: float elts[] = new float[6];
0938: for (int i = 0; i < elts.length; i++) {
0939: elts[i] = ((PDFObject) matrix.getAt(i))
0940: .getFloatValue();
0941: }
0942: at = new AffineTransform(elts);
0943: }
0944: PDFObject bobj = obj.getDictRef("BBox");
0945: bbox = new Rectangle2D.Float(bobj.getAt(0).getFloatValue(),
0946: bobj.getAt(1).getFloatValue(), bobj.getAt(2)
0947: .getFloatValue(), bobj.getAt(3)
0948: .getFloatValue());
0949: formCmds = new PDFPage(bbox, 0);
0950: formCmds.addXform(at);
0951:
0952: HashMap r = new HashMap(resources);
0953: PDFObject rsrc = obj.getDictRef("Resources");
0954: if (rsrc != null) {
0955: r.putAll(rsrc.getDictionary());
0956: }
0957:
0958: PDFParser form = new PDFParser(formCmds, obj.getStream(), r);
0959: form.go(true);
0960:
0961: obj.setCache(formCmds);
0962: }
0963: cmds.addPush();
0964: cmds.addCommands(formCmds);
0965: cmds.addPop();
0966: }
0967:
0968: /**
0969: * Set the values into a PatternSpace
0970: */
0971: private PDFPaint doPattern(PatternSpace patternSpace)
0972: throws IOException {
0973: float[] components = null;
0974:
0975: String patternName = popString();
0976: PDFObject pattern = findResource(patternName, "Pattern");
0977:
0978: if (pattern == null) {
0979: throw new PDFParseException("Unknown pattern : "
0980: + patternName);
0981: }
0982:
0983: if (stack.size() > 0) {
0984: components = popFloat(stack.size());
0985: }
0986:
0987: return patternSpace.getPaint(pattern, components, resources);
0988: }
0989:
0990: /**
0991: * Parse the next object out of the PDF stream. This could be a
0992: * Double, a String, a HashMap (dictionary), Object[] array, or
0993: * a Tok containing a PDF command.
0994: */
0995: private Object parseObject() throws PDFParseException {
0996: Tok t = nextToken();
0997: if (t.type == Tok.NUM) {
0998: return new Double(tok.value);
0999: } else if (t.type == Tok.STR) {
1000: return tok.name;
1001: } else if (t.type == Tok.NAME) {
1002: return tok.name;
1003: } else if (t.type == Tok.BRKB) {
1004: HashMap hm = new HashMap();
1005: String name = null;
1006: Object obj;
1007: while ((obj = parseObject()) != null) {
1008: if (name == null) {
1009: name = (String) obj;
1010: } else {
1011: hm.put(name, new PDFObject(obj));
1012: name = null;
1013: }
1014: }
1015: if (tok.type != Tok.BRKE) {
1016: throw new PDFParseException(
1017: "Inline dict should have ended with '>>'");
1018: }
1019: return hm;
1020: } else if (t.type == Tok.ARYB) {
1021: // build an array
1022: ArrayList ary = new ArrayList();
1023: Object obj;
1024: while ((obj = parseObject()) != null) {
1025: ary.add(obj);
1026: }
1027: if (tok.type != Tok.ARYE) {
1028: throw new PDFParseException("Expected ']'");
1029: }
1030: return ary.toArray();
1031: } else if (t.type == Tok.CMD) {
1032: return t;
1033: }
1034: return null;
1035: }
1036:
1037: /**
1038: * Parse an inline image. An inline image starts with BI (already
1039: * read, contains a dictionary until ID, and then image data until
1040: * EI.
1041: */
1042: private void parseInlineImage() throws IOException {
1043: // build dictionary until ID, then read image until EI
1044: HashMap hm = new HashMap();
1045: while (true) {
1046: Tok t = nextToken();
1047: if (t.type == Tok.CMD && t.name.equals("ID")) {
1048: break;
1049: }
1050: // it should be a name;
1051: String name = t.name;
1052: if (name.equals("BPC")) {
1053: name = "BitsPerComponent";
1054: } else if (name.equals("CS")) {
1055: name = "ColorSpace";
1056: } else if (name.equals("D")) {
1057: name = "Decode";
1058: } else if (name.equals("DP")) {
1059: name = "DecodeParms";
1060: } else if (name.equals("F")) {
1061: name = "Filter";
1062: } else if (name.equals("H")) {
1063: name = "Height";
1064: } else if (name.equals("IM")) {
1065: name = "ImageMask";
1066: } else if (name.equals("W")) {
1067: name = "Width";
1068: } else if (name.equals("I")) {
1069: name = "Interpolate";
1070: }
1071: Object vobj = parseObject();
1072: hm.put(name, new PDFObject(vobj));
1073: }
1074: if (stream[loc] == '\r') {
1075: loc++;
1076: }
1077: if (stream[loc] == '\n' || stream[loc] == ' ') {
1078: loc++;
1079: }
1080:
1081: // for some totally undocumented reason, inline image-masks seem to
1082: // use the opposite meanings for 0 and 1 than the rest of the world.
1083: // In order to make them work right we do one of two things:
1084: // if there is no decode array, add a [1 0] decode array. If there is
1085: // a decode array, swap the values in it. It's weird, but that's
1086: // the only way I can figure to get them not inverted.
1087: PDFObject imObj = (PDFObject) hm.get("ImageMask");
1088: if (imObj != null && imObj.getBooleanValue()) {
1089: // the default (if there is no decode array)
1090: Double[] decode = { new Double(1), new Double(0) };
1091:
1092: // if there is a decode array, invert it
1093: PDFObject decodeObj = (PDFObject) hm.get("Decode");
1094: if (decodeObj != null) {
1095: decode[0] = new Double(decodeObj.getAt(1)
1096: .getDoubleValue());
1097: decode[1] = new Double(decodeObj.getAt(0)
1098: .getDoubleValue());
1099: }
1100:
1101: hm.put("Decode", new PDFObject(decode));
1102: }
1103:
1104: PDFObject obj = new PDFObject(null, PDFObject.DICTIONARY, hm);
1105: int dstart = loc;
1106:
1107: // now skip data until a whitespace followed by EI
1108: while (!PDFFile.isWhiteSpace(stream[loc])
1109: || stream[loc + 1] != 'E' || stream[loc + 2] != 'I') {
1110: loc++;
1111: }
1112:
1113: // data runs from dstart to loc
1114: byte[] data = new byte[loc - dstart];
1115: System.arraycopy(stream, dstart, data, 0, loc - dstart);
1116: obj.setStream(PDFDecoder.decodeStream(obj, ByteBuffer
1117: .wrap(data)));
1118: loc += 3;
1119: doImage(obj);
1120: }
1121:
1122: /**
1123: * build a shader from a dictionary.
1124: */
1125: private void doShader(PDFObject shaderObj) throws IOException {
1126: PDFShader shader = PDFShader.getShader(shaderObj, resources);
1127:
1128: cmds.addPush();
1129:
1130: Rectangle2D bbox = shader.getBBox();
1131: if (bbox != null) {
1132: cmds.addFillPaint(shader.getPaint());
1133: cmds.addPath(new GeneralPath(bbox), PDFShapeCmd.FILL);
1134: }
1135:
1136: cmds.addPop();
1137: }
1138:
1139: /**
1140: * get a PDFFont from the resources, given the resource name of the
1141: * font.
1142: *
1143: * @param fontref the resource key for the font
1144: */
1145: private PDFFont getFontFrom(String fontref) throws IOException {
1146: PDFObject obj = findResource(fontref, "Font");
1147: return PDFFont.getFont(obj, resources);
1148: }
1149:
1150: /**
1151: * add graphics state commands contained within a dictionary.
1152: * @param name the resource name of the graphics state dictionary
1153: */
1154: private void setGSState(String name) throws IOException {
1155: // obj must be a string that is a key to the "ExtGState" dict
1156: PDFObject gsobj = findResource(name, "ExtGState");
1157: // get LW, LC, LJ, Font, SM, CA, ML, D, RI, FL, BM, ca
1158: // out of the reference, which is a dictionary
1159: PDFObject d;
1160: if ((d = gsobj.getDictRef("LW")) != null) {
1161: cmds.addStrokeWidth(d.getFloatValue());
1162: }
1163: if ((d = gsobj.getDictRef("LC")) != null) {
1164: cmds.addEndCap(d.getIntValue());
1165: }
1166: if ((d = gsobj.getDictRef("LJ")) != null) {
1167: cmds.addLineJoin(d.getIntValue());
1168: }
1169: if ((d = gsobj.getDictRef("Font")) != null) {
1170: state.textFormat.setFont(getFontFrom(d.getAt(0)
1171: .getStringValue()), d.getAt(1).getFloatValue());
1172: }
1173: if ((d = gsobj.getDictRef("ML")) != null) {
1174: cmds.addMiterLimit(d.getFloatValue());
1175: }
1176: if ((d = gsobj.getDictRef("D")) != null) {
1177: PDFObject pdash[] = d.getAt(0).getArray();
1178: float dash[] = new float[pdash.length];
1179: for (int i = 0; i < pdash.length; i++) {
1180: dash[i] = pdash[i].getFloatValue();
1181: }
1182: cmds.addDash(dash, d.getAt(1).getFloatValue());
1183: }
1184: if ((d = gsobj.getDictRef("CA")) != null) {
1185: cmds.addStrokeAlpha(d.getFloatValue());
1186: }
1187: if ((d = gsobj.getDictRef("ca")) != null) {
1188: cmds.addFillAlpha(d.getFloatValue());
1189: }
1190: // others: BM=blend mode
1191: }
1192:
1193: /**
1194: * generate a PDFColorSpace description based on a PDFObject. The
1195: * object could be a standard name, or the name of a resource in
1196: * the ColorSpace dictionary, or a color space name with a defining
1197: * dictionary or stream.
1198: */
1199: private PDFColorSpace parseColorSpace(PDFObject csobj)
1200: throws IOException {
1201: if (csobj == null) {
1202: return state.fillCS;
1203: }
1204:
1205: return PDFColorSpace.getColorSpace(csobj, resources);
1206: }
1207:
1208: /**
1209: * pop a single float value off the stack.
1210: * @return the float value of the top of the stack
1211: * @throws PDFParseException if the value on the top of the stack
1212: * isn't a number
1213: */
1214: private float popFloat() throws PDFParseException {
1215: Object obj = stack.pop();
1216: if (obj instanceof Double) {
1217: return ((Double) obj).floatValue();
1218: } else {
1219: throw new PDFParseException("Expected a number here.");
1220: }
1221: }
1222:
1223: /**
1224: * pop an array of float values off the stack. This is equivalent
1225: * to filling an array from end to front by popping values off the
1226: * stack.
1227: * @param count the number of numbers to pop off the stack
1228: * @return an array of length <tt>count</tt>
1229: * @throws PDFParseException if any of the values popped off the
1230: * stack are not numbers.
1231: */
1232: private float[] popFloat(int count) throws PDFParseException {
1233: float[] ary = new float[count];
1234: for (int i = count - 1; i >= 0; i--) {
1235: ary[i] = popFloat();
1236: }
1237: return ary;
1238: }
1239:
1240: /**
1241: * pop a single integer value off the stack.
1242: * @return the integer value of the top of the stack
1243: * @throws PDFParseException if the top of the stack isn't a number.
1244: */
1245: private int popInt() throws PDFParseException {
1246: Object obj = stack.pop();
1247: if (obj instanceof Double) {
1248: return ((Double) obj).intValue();
1249: } else {
1250: throw new PDFParseException("Expected a number here.");
1251: }
1252: }
1253:
1254: /**
1255: * pop an array of integer values off the stack. This is equivalent
1256: * to filling an array from end to front by popping values off the
1257: * stack.
1258: * @param count the number of numbers to pop off the stack
1259: * @return an array of length <tt>count</tt>
1260: * @throws PDFParseException if any of the values popped off the
1261: * stack are not numbers.
1262: */
1263: private float[] popFloatArray() throws PDFParseException {
1264: Object obj = stack.pop();
1265: if (!(obj instanceof Object[])) {
1266: throw new PDFParseException("Expected an [array] here.");
1267: }
1268: Object[] source = (Object[]) obj;
1269: float[] ary = new float[source.length];
1270: for (int i = 0; i < ary.length; i++) {
1271: if (source[i] instanceof Double) {
1272: ary[i] = ((Double) source[i]).floatValue();
1273: } else {
1274: throw new PDFParseException(
1275: "This array doesn't consist only of floats.");
1276: }
1277: }
1278: return ary;
1279: }
1280:
1281: /**
1282: * pop a String off the stack.
1283: * @return the String from the top of the stack
1284: * @throws PDFParseException if the top of the stack is not a NAME
1285: * or STR.
1286: */
1287: private String popString() throws PDFParseException {
1288: Object obj = stack.pop();
1289: if (!(obj instanceof String)) {
1290: throw new PDFParseException("Expected string here: "
1291: + obj.toString());
1292: } else {
1293: return (String) obj;
1294: }
1295: }
1296:
1297: /**
1298: * pop a PDFObject off the stack.
1299: * @return the PDFObject from the top of the stack
1300: * @throws PDFParseException if the top of the stack does not contain
1301: * a PDFObject.
1302: */
1303: private PDFObject popObject() throws PDFParseException {
1304: Object obj = stack.pop();
1305: if (!(obj instanceof PDFObject)) {
1306: throw new PDFParseException("Expected a reference here: "
1307: + obj.toString());
1308: }
1309: return (PDFObject) obj;
1310: }
1311:
1312: /**
1313: * pop an array off the stack
1314: * @return the array of objects that is the top element of the stack
1315: * @throws PDFParseException if the top element of the stack does not
1316: * contain an array.
1317: */
1318: private Object[] popArray() throws PDFParseException {
1319: Object obj = stack.pop();
1320: if (!(obj instanceof Object[])) {
1321: throw new PDFParseException("Expected an [array] here: "
1322: + obj.toString());
1323: }
1324: return (Object[]) obj;
1325: }
1326:
1327: /**
1328: * A class to store state needed whiel rendering. This includes the
1329: * stroke and fill color spaces, as well as the text formatting
1330: * parameters.
1331: */
1332: class ParserState implements Cloneable {
1333: /** the fill color space */
1334: PDFColorSpace fillCS;
1335:
1336: /** the stroke color space */
1337: PDFColorSpace strokeCS;
1338:
1339: /** the text paramters */
1340: PDFTextFormat textFormat;
1341:
1342: /**
1343: * Clone the render state.
1344: */
1345: @Override
1346: public Object clone() {
1347: ParserState newState = new ParserState();
1348:
1349: // no need to clone color spaces, since they are immutable
1350: newState.fillCS = fillCS;
1351: newState.strokeCS = strokeCS;
1352:
1353: // we do need to clone the textFormat
1354: newState.textFormat = (PDFTextFormat) textFormat.clone();
1355:
1356: return newState;
1357: }
1358:
1359: }
1360:
1361: }
|