Source Code Cross Referenced for PDFFile.java in » PDF » PDF-Renderer » com » sun » pdfview » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » PDF » PDF Renderer » com.sun.pdfview
Source Cross Referenced Class Diagram Java Document (Java Doc)
0001:        /*
0002:         * $Id: PDFFile.java,v 1.5 2007/12/20 18:17:41 rbair Exp $
0003:         *
0004:         * Copyright 2004 Sun Microsystems, Inc., 4150 Network Circle,
0005:         * Santa Clara, California 95054, U.S.A. All rights reserved.
0006:         *
0007:         * This library is free software; you can redistribute it and/or
0008:         * modify it under the terms of the GNU Lesser General Public
0009:         * License as published by the Free Software Foundation; either
0010:         * version 2.1 of the License, or (at your option) any later version.
0011:         * 
0012:         * This library is distributed in the hope that it will be useful,
0013:         * but WITHOUT ANY WARRANTY; without even the implied warranty of
0014:         * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0015:         * Lesser General Public License for more details.
0016:         * 
0017:         * You should have received a copy of the GNU Lesser General Public
0018:         * License along with this library; if not, write to the Free Software
0019:         * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
0020:         */
0021:
0022:        package com.sun.pdfview;
0023:
0024:        import java.awt.geom.Rectangle2D;
0025:        import java.io.File;
0026:        import java.io.IOException;
0027:        import java.io.RandomAccessFile;
0028:        import java.nio.ByteBuffer;
0029:        import java.nio.channels.FileChannel;
0030:        import java.util.ArrayList;
0031:        import java.util.HashMap;
0032:        import java.util.Map;
0033:
0034:        import com.sun.pdfview.action.GoToAction;
0035:        import com.sun.pdfview.action.PDFAction;
0036:
0037:        /**
0038:         * An encapsulation of a .pdf file.  The methods of this class
0039:         * can parse the contents of a PDF file, but those methods are
0040:         * hidden.  Instead, the public methods of this class allow
0041:         * access to the pages in the PDF file.  Typically, you create
0042:         * a new PDFFile, ask it for the number of pages, and then
0043:         * request one or more PDFPages.
0044:         * @author Mike Wessler
0045:         */
0046:        public class PDFFile {
0047:            /** the end of line character */
0048:            String eol = "\n";
0049:
0050:            /**
0051:             * A ByteBuffer containing the file data
0052:             */
0053:            ByteBuffer buf;
0054:
0055:            /**
0056:             * the cross reference table mapping object numbers to locations
0057:             * in the PDF file
0058:             */
0059:            PDFXref[] objIdx;
0060:
0061:            /** the root PDFObject, as specified in the PDF file */
0062:            PDFObject root = null;
0063:
0064:            /** the Encrypt PDFObject, from the trailer */
0065:            PDFObject encrypt = null;
0066:
0067:            /** a mapping of page numbers to parsed PDF commands */
0068:            Cache cache;
0069:
0070:            /**
0071:             * whether the file is printable or not (trailer -> Encrypt -> P & 0x4)
0072:             */
0073:            private boolean printable = true;
0074:
0075:            /**
0076:             * whether the file is saveable or not (trailer -> Encrypt -> P & 0x10)
0077:             */
0078:            private boolean saveable = true;
0079:
0080:            /**
0081:             * get a PDFFile from a .pdf file.  The file must me a random access file
0082:             * at the moment.  It should really be a file mapping from the nio package.
0083:             * <p>
0084:             * Use the getPage(...) methods to get a page from the PDF file.
0085:             * @param buf the RandomAccessFile containing the PDF.
0086:             */
0087:            public PDFFile(ByteBuffer buf) throws IOException {
0088:                this .buf = buf;
0089:
0090:                cache = new Cache();
0091:
0092:                parseFile();
0093:            }
0094:
0095:            /**
0096:             * Gets whether the owner of the file has given permission to print
0097:             * the file.
0098:             * @return true if it is okay to print the file
0099:             */
0100:            public boolean isPrintable() {
0101:                return printable;
0102:            }
0103:
0104:            /**
0105:             * Gets whether the owner of the file has given permission to save
0106:             * a copy of the file.
0107:             * @return true if it is okay to save the file
0108:             */
0109:            public boolean isSaveable() {
0110:                return saveable;
0111:            }
0112:
0113:            /**
0114:             * get the root PDFObject of this PDFFile.  You generally shouldn't need
0115:             * this, but we've left it open in case you want to go spelunking.
0116:             */
0117:            public PDFObject getRoot() {
0118:                return root;
0119:            }
0120:
0121:            /**
0122:             * return the number of pages in this PDFFile.  The pages will be
0123:             * numbered from 1 to getNumPages(), inclusive.
0124:             */
0125:            public int getNumPages() {
0126:                try {
0127:                    return root.getDictRef("Pages").getDictRef("Count")
0128:                            .getIntValue();
0129:                } catch (IOException ioe) {
0130:                    return 0;
0131:                }
0132:            }
0133:
0134:            /**
0135:             * Used internally to track down PDFObject references.  You should never
0136:             * need to call this.
0137:             * <p>
0138:             * Since this is the only public method for tracking down PDF objects,
0139:             * it is synchronized.  This means that the PDFFile can only hunt down
0140:             * one object at a time, preventing the file's location from getting
0141:             * messed around.
0142:             * <p>
0143:             * This call stores the current buffer position before any changes are made
0144:             * and restores it afterwards, so callers need not know that the position
0145:             * has changed.
0146:             *
0147:             */
0148:            public synchronized PDFObject dereference(PDFXref ref)
0149:                    throws IOException {
0150:                int id = ref.getID();
0151:
0152:                // make sure the id is valid and has been read
0153:                if (id >= objIdx.length || objIdx[id] == null) {
0154:                    return PDFObject.nullObj;
0155:                }
0156:
0157:                // check to see if this is already dereferenced
0158:                PDFObject obj = objIdx[id].getObject();
0159:                if (obj != null) {
0160:                    return obj;
0161:                }
0162:
0163:                int loc = objIdx[id].getFilePos();
0164:                if (loc < 0) {
0165:                    return PDFObject.nullObj;
0166:                }
0167:
0168:                // store the current position in the buffer
0169:                int startPos = buf.position();
0170:
0171:                // move to where this object is
0172:                buf.position(loc);
0173:
0174:                // read the object and cache the reference
0175:                obj = readObject();
0176:                if (obj == null) {
0177:                    obj = PDFObject.nullObj;
0178:                }
0179:
0180:                objIdx[id].setObject(obj);
0181:
0182:                // reset to the previous position
0183:                buf.position(startPos);
0184:
0185:                return obj;
0186:            }
0187:
0188:            /**
0189:             * Is the argument a white space character according to the PDF spec?
0190:             */
0191:            public static boolean isWhiteSpace(int c) {
0192:                return (c == ' ' || c == '\t' || c == '\r' || c == '\n'
0193:                        || c == 0 || c == 12);
0194:                // 0=nul, 12=ff
0195:            }
0196:
0197:            /**
0198:             * Is the argument a delimiter according to the PDF spec?
0199:             */
0200:            public static boolean isDelimiter(int c) {
0201:                return (c == '(' || c == ')' || c == '{' || c == '}'
0202:                        || c == '[' || c == ']' || c == '/' || c == '<'
0203:                        || c == '>' || c == '%' || isWhiteSpace(c));
0204:            }
0205:
0206:            /**
0207:             * read the next object from the file
0208:             */
0209:            private PDFObject readObject() throws IOException {
0210:                return readObject(false);
0211:            }
0212:
0213:            /**
0214:             * read the next object with a special catch for numbers
0215:             * @param numscan if true, don't bother trying to see if a number
0216:             * is part of a "241 43 R" type of object reference.
0217:             * @return the next PDFObject in the file
0218:             */
0219:            private PDFObject readObject(boolean numscan) throws IOException {
0220:                // skip whitespace
0221:                int c;
0222:                PDFObject obj = null;
0223:                while (obj == null) {
0224:                    while (isWhiteSpace(c = buf.get())) {
0225:                    }
0226:                    // check character for special punctuation:
0227:                    if (c == '<') {
0228:                        // could be start of <hex data>, or start of <<dictionary>>
0229:                        c = buf.get();
0230:                        if (c == '<') {
0231:                            // it's a dictionary
0232:                            obj = readDictionary();
0233:                        } else {
0234:                            buf.position(buf.position() - 1);
0235:                            obj = readHexString();
0236:                        }
0237:                    } else if (c == '(') {
0238:                        // it's a string
0239:                        obj = readString();
0240:                    } else if (c == '[') {
0241:                        // it's an array
0242:                        obj = readArray();
0243:                    } else if (c == '/') {
0244:                        // it's a name
0245:                        obj = readName();
0246:                    } else if (c == '%') {
0247:                        // it's a comment
0248:                        readLine();
0249:                    } else if ((c >= '0' && c <= '9') || c == '-' || c == '+'
0250:                            || c == '.') {
0251:                        // it's a number
0252:                        obj = readNumber((char) c);
0253:                        if (!numscan) {
0254:                            // It could be the start of a reference.
0255:                            // Check to see if there's another number, then "R".
0256:                            // 
0257:                            // We can't use mark/reset, since this could be called
0258:                            // from dereference, which already is using a mark
0259:                            int startPos = buf.position();
0260:
0261:                            PDFObject testnum = readObject(true);
0262:                            if (testnum != null
0263:                                    && testnum.getType() == PDFObject.NUMBER) {
0264:                                PDFObject testR = readObject(true);
0265:                                if (testR != null
0266:                                        && testR.getType() == PDFObject.KEYWORD
0267:                                        && testR.getStringValue().equals("R")) {
0268:                                    // yup.  it's a reference.
0269:                                    PDFXref xref = new PDFXref(obj
0270:                                            .getIntValue(), testnum
0271:                                            .getIntValue());
0272:                                    // Create a placeholder that will be dereferenced
0273:                                    // as needed
0274:                                    obj = new PDFObject(this , xref);
0275:                                } else if (testR != null
0276:                                        && testR.getType() == PDFObject.KEYWORD
0277:                                        && testR.getStringValue().equals("obj")) {
0278:                                    // it's an object description
0279:                                    obj = readObjectDescription();
0280:                                } else {
0281:                                    buf.position(startPos);
0282:                                }
0283:                            } else {
0284:                                buf.position(startPos);
0285:                            }
0286:                        }
0287:                    } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
0288:                        // it's a keyword
0289:                        obj = readKeyword((char) c);
0290:                    } else {
0291:                        // it's probably a closing character.
0292:                        // throwback
0293:                        buf.position(buf.position() - 1);
0294:                        break;
0295:                    }
0296:                }
0297:                return obj;
0298:            }
0299:
0300:            /**
0301:             * requires the next few characters (after whitespace) to match the
0302:             * argument.
0303:             * @param match the next few characters after any whitespace that
0304:             * must be in the file
0305:             * @return true if the next characters match; false otherwise.
0306:             */
0307:            private boolean nextItemIs(String match) throws IOException {
0308:                // skip whitespace
0309:                int c;
0310:                while (isWhiteSpace(c = buf.get())) {
0311:                }
0312:                for (int i = 0; i < match.length(); i++) {
0313:                    if (i > 0) {
0314:                        c = buf.get();
0315:                    }
0316:                    if (c != match.charAt(i)) {
0317:                        return false;
0318:                    }
0319:                }
0320:                return true;
0321:            }
0322:
0323:            /**
0324:             * read an entire &lt;&lt; dictionary &gt;&gt;.  The initial
0325:             * &lt;&lt; has already been read.
0326:             * @return the Dictionary as a PDFObject.
0327:             */
0328:            private PDFObject readDictionary() throws IOException {
0329:                HashMap hm = new HashMap();
0330:                // we've already read the <<.  Now get /Name obj pairs until >>
0331:                PDFObject name;
0332:                while ((name = readObject()) != null) {
0333:                    // make sure first item is a NAME
0334:                    if (name.getType() != PDFObject.NAME) {
0335:                        throw new PDFParseException(
0336:                                "First item in dictionary must be a /Name.  (Was "
0337:                                        + name + ")");
0338:                    }
0339:                    PDFObject value = readObject();
0340:                    if (value != null) {
0341:                        hm.put(name.getStringValue(), value);
0342:                    }
0343:                }
0344:                //	System.out.println("End of dictionary at location "+raf.getFilePointer());
0345:                if (!nextItemIs(">>")) {
0346:                    throw new PDFParseException("End of dictionary wasn't '>>'");
0347:                }
0348:                //	System.out.println("Dictionary closed at location "+raf.getFilePointer());
0349:                return new PDFObject(this , PDFObject.DICTIONARY, hm);
0350:            }
0351:
0352:            /**
0353:             * read a character, and return its value as if it were a hexidecimal
0354:             * digit.
0355:             * @return a number between 0 and 15 whose value matches the next
0356:             * hexidecimal character.  Returns -1 if the next character isn't in
0357:             * [0-9a-fA-F]
0358:             */
0359:            private int readHexDigit() throws IOException {
0360:                int a;
0361:                while (isWhiteSpace(a = buf.get())) {
0362:                }
0363:                if (a >= '0' && a <= '9') {
0364:                    a -= '0';
0365:                } else if (a >= 'a' && a <= 'f') {
0366:                    a -= 'a' - 10;
0367:                } else if (a >= 'A' && a <= 'F') {
0368:                    a -= 'A' - 10;
0369:                } else {
0370:                    a = -1;
0371:                }
0372:                return a;
0373:            }
0374:
0375:            /**
0376:             * return the 8-bit value represented by the next two hex characters.
0377:             * If the next two characters don't represent a hex value, return -1
0378:             * and reset the read head.  If there is only one hex character,
0379:             * return its value as if there were an implicit 0 after it.
0380:             */
0381:            private int readHexPair() throws IOException {
0382:                int first = readHexDigit();
0383:                if (first < 0) {
0384:                    buf.position(buf.position() - 1);
0385:                    return -1;
0386:                }
0387:                int second = readHexDigit();
0388:                if (second < 0) {
0389:                    buf.position(buf.position() - 1);
0390:                    return (first << 4);
0391:                } else {
0392:                    return (first << 4) + second;
0393:                }
0394:            }
0395:
0396:            /**
0397:             * read a < hex string >.  The initial < has already been read.
0398:             */
0399:            private PDFObject readHexString() throws IOException {
0400:                // we've already read the <. Now get the hex bytes until >
0401:                int val;
0402:                StringBuffer sb = new StringBuffer();
0403:                while ((val = readHexPair()) >= 0) {
0404:                    sb.append((char) val);
0405:                }
0406:                if (buf.get() != '>') {
0407:                    throw new PDFParseException("Bad character in Hex String");
0408:                }
0409:                return new PDFObject(this , PDFObject.STRING, unicode(sb
0410:                        .toString()));
0411:            }
0412:
0413:            /**
0414:             * take a string and determine if it is unicode by looking at the lead
0415:             * characters, and that the string must be a multiple of 2 chars long.
0416:             * Convert a unicoded string's characters into the true unicode.
0417:             * 
0418:             * @param input
0419:             * @return
0420:             */
0421:            private String unicode(String input) {
0422:                // determine if we have unicode, if so, translate it
0423:                if (input.length() < 2 || (input.length() % 2) != 0) {
0424:                    return input;
0425:                }
0426:                int c0 = input.charAt(0) & 0xFF;
0427:                int c1 = input.charAt(1) & 0xFF;
0428:                if ((c0 == 0xFE && c1 == 0xFF) || (c0 == 0xFF && c1 == 0xFE)) {
0429:                    // we have unicode
0430:                    boolean bigEndian = (input.charAt(1) == 0xFFFF);
0431:                    StringBuffer out = new StringBuffer();
0432:                    for (int i = 2; i < input.length(); i += 2) {
0433:                        if (bigEndian) {
0434:                            out
0435:                                    .append((char) (((input.charAt(i + 1) & 0xFF) << 8) + (input
0436:                                            .charAt(i) & 0xFF)));
0437:                        } else {
0438:                            out
0439:                                    .append((char) (((input.charAt(i) & 0xFF) << 8) + (input
0440:                                            .charAt(i + 1) & 0xFF)));
0441:                        }
0442:                    }
0443:                    return out.toString();
0444:                } else {
0445:                    return input;
0446:                }
0447:            }
0448:
0449:            /**
0450:             * <p>read a ( character string ).  The initial ( has already been read.
0451:             * Read until a *balanced* ) appears.</p>
0452:             * 
0453:             * <p>PDF Reference Section 3.8.1, Table 3.31 "PDF Data Types" defines
0454:             * String data as:<pre>
0455:             * "text string     Bytes that represent characters encoded
0456:             *                  using either PDFDocEncoding or UTF-16BE with a 
0457:             *                  leading byte-order marker (as defined in 
0458:             *                  “Text String Type�? on page 158.)
0459:             * </pre></p>
0460:             * 
0461:             * <p>Section 5.3.2 defines character sequences and escapes.<br>
0462:             * "The strings must conform to the syntax for string objects. 
0463:             * When a string is written by enclosing the data in parentheses, 
0464:             * bytes whose values are the same as those of the ASCII characters 
0465:             * left parenthesis (40), right parenthesis (41), and backslash (92) 
0466:             * must be preceded by a backslash character. All other byte values 
0467:             * between 0 and 255 may be used in a string object. <br>
0468:             * These rules apply to each individual byte in a string object, 
0469:             * whether the string is interpreted by the text-showing operators 
0470:             * as single-byte or multiple-byte character codes."</p>
0471:             */
0472:            private PDFObject readString() throws IOException {
0473:                int c;
0474:
0475:                // we've already read the (.  now get the characters until a
0476:                // *balanced* ) appears.  Translate \r \n \t \b \f \( \) \\ \ddd
0477:                // if a cr/lf follows a backslash, ignore the cr/lf
0478:                int parencount = 1;
0479:                StringBuffer sb = new StringBuffer();
0480:
0481:                while (parencount > 0) {
0482:                    c = buf.get() & 0xFF;
0483:                    // process unescaped parenthesis
0484:                    if (c == '(') {
0485:                        parencount++;
0486:                    } else if (c == ')') {
0487:                        parencount--;
0488:                        if (parencount == 0) {
0489:                            c = -1;
0490:                            break;
0491:                        }
0492:                    } else if (c == '\\') {
0493:                        // time to do some work
0494:                        c = buf.get() & 0xFF;
0495:                        if (c == 'r') {
0496:                            c = '\r';
0497:                        } else if (c == 'n') {
0498:                            c = '\n';
0499:                        } else if (c == 't') {
0500:                            c = '\t';
0501:                        } else if (c == 'b') {
0502:                            c = '\b';
0503:                        } else if (c == 'f') {
0504:                            c = '\f';
0505:                        }
0506:                        if (c == '\r') {
0507:                            // check for following \n
0508:                            c = buf.get() & 0xFF;
0509:                            if (c != '\n') {
0510:                                buf.position(buf.position() - 1);
0511:                            }
0512:                            c = -1;
0513:                        } else if (c == '\n') {
0514:                            c = -1;
0515:                        } else if (c >= '0' && c <= '9') {
0516:                            int count = 0;
0517:                            int val = 0;
0518:                            while (c >= '0' && c <= '9' && count < 3) {
0519:                                val = val * 8 + c - '0';
0520:                                c = buf.get() & 0xFF;
0521:                                count++;
0522:                            }
0523:                            buf.position(buf.position() - 1);
0524:                            c = val;
0525:                        }
0526:                    }
0527:                    if (c >= 0) {
0528:                        sb.append((char) c);
0529:                    }
0530:                }
0531:                return new PDFObject(this , PDFObject.STRING, unicode(sb
0532:                        .toString()));
0533:            }
0534:
0535:            /**
0536:             * Read a line of text.  This follows the semantics of readLine() in
0537:             * DataInput -- it reads character by character until a '/n' is
0538:             * encountered.  If a '/r' is encountered, it is discarded.
0539:             */
0540:            private String readLine() {
0541:                StringBuffer sb = new StringBuffer();
0542:
0543:                while (buf.remaining() > 0) {
0544:                    char c = (char) buf.get();
0545:
0546:                    if (c == '\r') {
0547:                        if (buf.remaining() > 0) {
0548:                            char n = (char) buf.get(buf.position());
0549:                            if (n == '\n') {
0550:                                buf.get();
0551:                            }
0552:                        }
0553:                        break;
0554:                    } else if (c == '\n') {
0555:                        break;
0556:                    }
0557:
0558:                    sb.append(c);
0559:                }
0560:
0561:                return sb.toString();
0562:            }
0563:
0564:            /**
0565:             * read an [ array ].  The initial [ has already been read.  PDFObjects
0566:             * are read until ].
0567:             */
0568:            private PDFObject readArray() throws IOException {
0569:                // we've already read the [.  Now read objects until ]
0570:                ArrayList ary = new ArrayList();
0571:                PDFObject obj;
0572:                while ((obj = readObject()) != null) {
0573:                    ary.add(obj);
0574:                }
0575:                if (buf.get() != ']') {
0576:                    throw new PDFParseException("Array should end with ']'");
0577:                }
0578:                PDFObject[] objlist = new PDFObject[ary.size()];
0579:                for (int i = 0; i < objlist.length; i++) {
0580:                    objlist[i] = (PDFObject) ary.get(i);
0581:                }
0582:                return new PDFObject(this , PDFObject.ARRAY, objlist);
0583:            }
0584:
0585:            /**
0586:             * read a /name.  The / has already been read.
0587:             */
0588:            private PDFObject readName() throws IOException {
0589:                // we've already read the / that begins the name.
0590:                // all we have to check for is #hh hex notations.
0591:                StringBuffer sb = new StringBuffer();
0592:                int c;
0593:                while (!isDelimiter(c = buf.get())) {
0594:                    if (c == '#') {
0595:                        int hex = readHexPair();
0596:                        if (hex >= 0) {
0597:                            c = hex;
0598:                        } else {
0599:                            throw new PDFParseException("Bad #hex in /Name");
0600:                        }
0601:                    }
0602:                    sb.append((char) c);
0603:                }
0604:                buf.position(buf.position() - 1);
0605:                return new PDFObject(this , PDFObject.NAME, sb.toString());
0606:            }
0607:
0608:            /**
0609:             * read a number.  The initial digit or . or - is passed in as the
0610:             * argument.
0611:             */
0612:            private PDFObject readNumber(char start) throws IOException {
0613:                // we've read the first digit (it's passed in as the argument)
0614:                boolean neg = start == '-';
0615:                boolean sawdot = start == '.';
0616:                double dotmult = sawdot ? 0.1 : 1;
0617:                double value = (start >= '0' && start <= '9') ? start - '0' : 0;
0618:                while (true) {
0619:                    int c = buf.get();
0620:                    if (c == '.') {
0621:                        if (sawdot) {
0622:                            throw new PDFParseException(
0623:                                    "Can't have two '.' in a number");
0624:                        }
0625:                        sawdot = true;
0626:                        dotmult = 0.1;
0627:                    } else if (c >= '0' && c <= '9') {
0628:                        int val = c - '0';
0629:                        if (sawdot) {
0630:                            value += val * dotmult;
0631:                            dotmult *= 0.1;
0632:                        } else {
0633:                            value = value * 10 + val;
0634:                        }
0635:                    } else {
0636:                        buf.position(buf.position() - 1);
0637:                        break;
0638:                    }
0639:                }
0640:                if (neg) {
0641:                    value = -value;
0642:                }
0643:                return new PDFObject(this , PDFObject.NUMBER, new Double(value));
0644:            }
0645:
0646:            /**
0647:             * read a bare keyword.  The initial character is passed in as the
0648:             * argument.
0649:             */
0650:            private PDFObject readKeyword(char start) throws IOException {
0651:                // we've read the first character (it's passed in as the argument)
0652:                StringBuffer sb = new StringBuffer(String.valueOf(start));
0653:                int c;
0654:                while (!isDelimiter(c = buf.get())) {
0655:                    sb.append((char) c);
0656:                }
0657:                buf.position(buf.position() - 1);
0658:                return new PDFObject(this , PDFObject.KEYWORD, sb.toString());
0659:            }
0660:
0661:            /**
0662:             * read an entire PDFObject.  The intro line, which looks something
0663:             * like "4 0 obj" has already been read.
0664:             */
0665:            private PDFObject readObjectDescription() throws IOException {
0666:                // we've already read the 4 0 obj bit.  Next thing up is the object.
0667:                // object descriptions end with the keyword endobj
0668:                long debugpos = buf.position();
0669:                PDFObject obj = readObject();
0670:                // see if it's a dictionary.  If so, this could be a stream.
0671:                PDFObject endkey = readObject();
0672:                if (endkey.getType() != PDFObject.KEYWORD) {
0673:                    throw new PDFParseException("Expected 'stream' or 'endobj'");
0674:                }
0675:                if (obj.getType() == PDFObject.DICTIONARY
0676:                        && endkey.getStringValue().equals("stream")) {
0677:                    // skip until we see \n
0678:                    readLine();
0679:                    ByteBuffer data = readStream(obj);
0680:                    if (data == null) {
0681:                        data = ByteBuffer.allocate(0);
0682:                    }
0683:                    obj.setStream(data);
0684:                    endkey = readObject();
0685:                    //	    if (endkey.getType()!=PDFObject.KEYWORD) {
0686:                    //                System.out.println("WARNING! Object at "+debugpos+" didn't end with 'endobj'");
0687:                    //throw new PDFParseException("Object must end with 'endobj'");
0688:                    //	    }
0689:                }
0690:                // at this point, obj is the object, keyword should be "endobj"
0691:                String endcheck = endkey.getStringValue();
0692:                if (endcheck == null || !endcheck.equals("endobj")) {
0693:                    System.out.println("WARNING: object at " + debugpos
0694:                            + " didn't end with 'endobj'");
0695:                    //throw new PDFParseException("Object musst end with 'endobj'");
0696:                }
0697:                return obj;
0698:            }
0699:
0700:            /**
0701:             * read the stream portion of a PDFObject.  Calls decodeStream to
0702:             * un-filter the stream as necessary.
0703:             * 
0704:             * @param dict the dictionary associated with this stream.
0705:             * @return a ByteBuffer with the encoded stream data
0706:             */
0707:            private ByteBuffer readStream(PDFObject dict) throws IOException {
0708:                // pointer is at the start of a stream.  read the stream and
0709:                // decode, based on the entries in the dictionary
0710:                PDFObject lengthObj = dict.getDictRef("Length");
0711:                int length = -1;
0712:                if (lengthObj != null) {
0713:                    length = lengthObj.getIntValue();
0714:                }
0715:                if (length < 0) {
0716:                    throw new PDFParseException("Unknown length for stream");
0717:                }
0718:
0719:                // slice the data
0720:                int start = buf.position();
0721:                ByteBuffer streamBuf = buf.slice();
0722:                streamBuf.limit(length);
0723:
0724:                // move the current position to the end of the data
0725:                buf.position(buf.position() + length);
0726:                int ending = buf.position();
0727:
0728:                if (!nextItemIs("endstream")) {
0729:                    System.out.println("read " + length + " chars from "
0730:                            + start + " to " + ending);
0731:                    throw new PDFParseException("Stream ended inappropriately");
0732:                }
0733:
0734:                return streamBuf;
0735:                // now decode stream
0736:                // return PDFDecoder.decodeStream(dict, streamBuf);
0737:            }
0738:
0739:            /** 
0740:             * read the cross reference table from a PDF file.  When this method
0741:             * is called, the file pointer must point to the start of the word
0742:             * "xref" in the file.  Reads the xref table and the trailer dictionary.
0743:             * If dictionary has a /Prev entry, move file pointer
0744:             * and read new trailer
0745:             */
0746:            private void readTrailer() throws IOException {
0747:                // the table of xrefs
0748:                objIdx = new PDFXref[50];
0749:
0750:                // read a bunch of nester trailer tables
0751:                while (true) {
0752:                    // make sure we are looking at an xref table
0753:                    if (!nextItemIs("xref")) {
0754:                        throw new PDFParseException(
0755:                                "Expected 'xref' at start of table");
0756:                    }
0757:
0758:                    // read a bunch of linked tabled
0759:                    while (true) {
0760:                        // read until the word "trailer"
0761:                        PDFObject obj = readObject();
0762:                        if (obj.getType() == PDFObject.KEYWORD
0763:                                && obj.getStringValue().equals("trailer")) {
0764:                            break;
0765:                        }
0766:
0767:                        // read the starting position of the reference
0768:                        if (obj.getType() != PDFObject.NUMBER) {
0769:                            throw new PDFParseException(
0770:                                    "Expected number for first xref entry");
0771:                        }
0772:                        int refstart = obj.getIntValue();
0773:
0774:                        // read the size of the reference table
0775:                        obj = readObject();
0776:                        if (obj.getType() != PDFObject.NUMBER) {
0777:                            throw new PDFParseException(
0778:                                    "Expected number for length of xref table");
0779:                        }
0780:                        int reflen = obj.getIntValue();
0781:
0782:                        // skip a line
0783:                        readLine();
0784:
0785:                        // extend the objIdx table, if necessary
0786:                        if (refstart + reflen >= objIdx.length) {
0787:                            PDFXref nobjIdx[] = new PDFXref[refstart + reflen];
0788:                            System.arraycopy(objIdx, 0, nobjIdx, 0,
0789:                                    objIdx.length);
0790:                            objIdx = nobjIdx;
0791:                        }
0792:
0793:                        // read reference lines
0794:                        for (int refID = refstart; refID < refstart + reflen; refID++) {
0795:                            // each reference line is 20 bytes long
0796:                            byte[] refline = new byte[20];
0797:                            buf.get(refline);
0798:
0799:                            // ignore this line if the object ID is already defined
0800:                            if (objIdx[refID] != null) {
0801:                                continue;
0802:                            }
0803:
0804:                            // see if it's an active object
0805:                            if (refline[17] == 'n') {
0806:                                objIdx[refID] = new PDFXref(refline);
0807:                            } else {
0808:                                objIdx[refID] = new PDFXref(null);
0809:                            }
0810:                        }
0811:                    }
0812:
0813:                    // at this point, the "trailer" word (not EOL) has been read.
0814:                    PDFObject trailerdict = readObject();
0815:                    if (trailerdict.getType() != PDFObject.DICTIONARY) {
0816:                        throw new IOException(
0817:                                "Expected dictionary after \"trailer\"");
0818:                    }
0819:
0820:                    // read the root object location
0821:                    if (root == null) {
0822:                        root = trailerdict.getDictRef("Root");
0823:                    }
0824:
0825:                    // read the encryption information
0826:                    if (encrypt == null) {
0827:                        encrypt = trailerdict.getDictRef("Encrypt");
0828:                    }
0829:
0830:                    // read the location of the previous xref table
0831:                    PDFObject prevloc = trailerdict.getDictRef("Prev");
0832:                    if (prevloc != null) {
0833:                        buf.position(prevloc.getIntValue());
0834:                    } else {
0835:                        break;
0836:                    }
0837:                }
0838:
0839:                // make sure we found a root
0840:                if (root == null) {
0841:                    throw new PDFParseException(
0842:                            "No /Root key found in trailer dictionary");
0843:                }
0844:
0845:                // check what permissions are relevant
0846:                if (encrypt != null) {
0847:                    PDFObject permissions = encrypt.getDictRef("P");
0848:                    if (permissions != null) {
0849:                        int perms = permissions.getIntValue();
0850:                        if ((perms & 4) == 0) {
0851:                            printable = false;
0852:                        }
0853:                        if ((perms & 16) == 0) {
0854:                            saveable = false;
0855:                        }
0856:                    }
0857:                }
0858:
0859:                // dereference the root object
0860:                root.dereference();
0861:            }
0862:
0863:            /**
0864:             * build the PDFFile reference table.  Nothing in the PDFFile actually
0865:             * gets parsed, despite the name of this function.  Things only get
0866:             * read and parsed when they're needed.
0867:             */
0868:            private void parseFile() throws IOException {
0869:                // start at the begining of the file
0870:                buf.rewind();
0871:
0872:                // back up about 32 characters from the end of the file to find
0873:                // startxref\n
0874:                byte[] scan = new byte[32];
0875:                int scanPos = buf.remaining() - scan.length;
0876:                int loc = 0;
0877:
0878:                while (scanPos >= 0) {
0879:                    buf.position(scanPos);
0880:                    buf.get(scan);
0881:
0882:                    // find startxref in scan
0883:                    String scans = new String(scan);
0884:                    loc = scans.indexOf("startxref");
0885:                    if (loc > 0) {
0886:                        if (scanPos + loc + scan.length <= buf.limit()) {
0887:                            scanPos = scanPos + loc;
0888:                            loc = 0;
0889:                        }
0890:
0891:                        break;
0892:                    }
0893:                    scanPos -= scan.length - 10;
0894:                }
0895:
0896:                if (scanPos < 0) {
0897:                    throw new IOException("This may not be a PDF File");
0898:                }
0899:
0900:                buf.position(scanPos);
0901:                buf.get(scan);
0902:                String scans = new String(scan);
0903:
0904:                loc += 10; // skip over "startxref" and first EOL char
0905:                if (scans.charAt(loc) < 32) {
0906:                    loc++;
0907:                } // skip over possible 2nd EOL char
0908:                // read number
0909:                int numstart = loc;
0910:                while (loc < scans.length() && scans.charAt(loc) >= '0'
0911:                        && scans.charAt(loc) <= '9') {
0912:                    loc++;
0913:                }
0914:                int xrefpos = Integer.parseInt(scans.substring(numstart, loc));
0915:                buf.position(xrefpos);
0916:
0917:                readTrailer();
0918:            }
0919:
0920:            /**
0921:             * Gets the outline tree as a tree of OutlineNode, which is a subclass
0922:             * of DefaultMutableTreeNode.  If there is no outline tree, this method
0923:             * returns null.
0924:             */
0925:            public OutlineNode getOutline() throws IOException {
0926:                // find the outlines entry in the root object
0927:                PDFObject oroot = root.getDictRef("Outlines");
0928:                OutlineNode work = null;
0929:                OutlineNode outline = null;
0930:                if (oroot != null) {
0931:                    // find the first child of the outline root
0932:                    PDFObject scan = oroot.getDictRef("First");
0933:                    outline = work = new OutlineNode("<top>");
0934:
0935:                    // scan each sibling in turn
0936:                    while (scan != null) {
0937:                        // add the new node with it's name
0938:                        String title = scan.getDictRef("Title")
0939:                                .getStringValue();
0940:                        OutlineNode build = new OutlineNode(title);
0941:                        work.add(build);
0942:
0943:                        // find the action
0944:                        PDFAction action = null;
0945:
0946:                        PDFObject actionObj = scan.getDictRef("A");
0947:                        if (actionObj != null) {
0948:                            action = PDFAction.getAction(actionObj, getRoot());
0949:                        } else {
0950:                            // try to create an action from a destination
0951:                            PDFObject destObj = scan.getDictRef("Dest");
0952:                            if (destObj != null) {
0953:                                try {
0954:                                    PDFDestination dest = PDFDestination
0955:                                            .getDestination(destObj, getRoot());
0956:
0957:                                    action = new GoToAction(dest);
0958:                                } catch (IOException ioe) {
0959:                                    // oh well
0960:                                }
0961:                            }
0962:                        }
0963:
0964:                        // did we find an action?  If so, add it
0965:                        if (action != null) {
0966:                            build.setAction(action);
0967:                        }
0968:
0969:                        // find the first child of this node    
0970:                        PDFObject kid = scan.getDictRef("First");
0971:                        if (kid != null) {
0972:                            work = build;
0973:                            scan = kid;
0974:                        } else {
0975:                            // no child.  Process the next sibling
0976:                            PDFObject next = scan.getDictRef("Next");
0977:                            while (next == null) {
0978:                                scan = scan.getDictRef("Parent");
0979:                                next = scan.getDictRef("Next");
0980:                                work = (OutlineNode) work.getParent();
0981:                                if (work == null) {
0982:                                    break;
0983:                                }
0984:                            }
0985:                            scan = next;
0986:                        }
0987:                    }
0988:                }
0989:
0990:                return outline;
0991:            }
0992:
0993:            /**
0994:             * Gets the page number (starting from 1) of the page represented by
0995:             * a particular PDFObject.  The PDFObject must be a Page dictionary or
0996:             * a destination description (or an action).
0997:             * @return a number between 1 and the number of pages indicating the
0998:             * page number, or 0 if the PDFObject is not in the page tree.
0999:             */
1000:            public int getPageNumber(PDFObject page) throws IOException {
1001:                if (page.getType() == PDFObject.ARRAY) {
1002:                    page = page.getAt(0);
1003:                }
1004:
1005:                // now we've got a page.  Make sure.
1006:                PDFObject typeObj = page.getDictRef("Type");
1007:                if (typeObj == null || !typeObj.getStringValue().equals("Page")) {
1008:                    return 0;
1009:                }
1010:
1011:                int count = 0;
1012:                while (true) {
1013:                    PDFObject parent = page.getDictRef("Parent");
1014:                    if (parent == null) {
1015:                        break;
1016:                    }
1017:                    PDFObject kids[] = parent.getDictRef("Kids").getArray();
1018:                    for (int i = 0; i < kids.length; i++) {
1019:                        if (kids[i].equals(page)) {
1020:                            break;
1021:                        } else {
1022:                            PDFObject kcount = kids[i].getDictRef("Count");
1023:                            if (kcount != null) {
1024:                                count += kcount.getIntValue();
1025:                            } else {
1026:                                count += 1;
1027:                            }
1028:                        }
1029:                    }
1030:                    page = parent;
1031:                }
1032:                return count;
1033:            }
1034:
1035:            /**
1036:             * Get the page commands for a given page in a separate thread.
1037:             *
1038:             * @param pagenum the number of the page to get commands for
1039:             */
1040:            public PDFPage getPage(int pagenum) {
1041:                return getPage(pagenum, false);
1042:            }
1043:
1044:            /** 
1045:             * Get the page commands for a given page.
1046:             *
1047:             * @param pagenum the number of the page to get commands for
1048:             * @param wait if true, do not exit until the page is complete.
1049:             */
1050:            public PDFPage getPage(int pagenum, boolean wait) {
1051:                Integer key = new Integer(pagenum);
1052:                HashMap resources = null;
1053:                PDFObject pageObj = null;
1054:                boolean needread = false;
1055:
1056:                PDFPage page = cache.getPage(key);
1057:                PDFParser parser = cache.getPageParser(key);
1058:                if (page == null) {
1059:                    try {
1060:                        // hunt down the page!
1061:                        resources = new HashMap();
1062:
1063:                        PDFObject topPagesObj = root.getDictRef("Pages");
1064:                        pageObj = findPage(topPagesObj, 0, pagenum, resources);
1065:
1066:                        if (pageObj == null) {
1067:                            return null;
1068:                        }
1069:
1070:                        page = createPage(pagenum, pageObj);
1071:
1072:                        byte[] stream = getContents(pageObj);
1073:                        parser = new PDFParser(page, stream, resources);
1074:
1075:                        cache.addPage(key, page, parser);
1076:                    } catch (IOException ioe) {
1077:                        System.out.println("GetPage inner loop:");
1078:                        ioe.printStackTrace();
1079:                        return null;
1080:                    }
1081:                }
1082:
1083:                if (parser != null && !parser.isFinished()) {
1084:                    parser.go(wait);
1085:                }
1086:
1087:                return page;
1088:            }
1089:
1090:            /**
1091:             * Stop the rendering of a particular image on this page
1092:             */
1093:            public void stop(int pageNum) {
1094:                PDFParser parser = cache.getPageParser(new Integer(pageNum));
1095:                if (parser != null) {
1096:                    // stop it
1097:                    parser.stop();
1098:                }
1099:            }
1100:
1101:            /**
1102:             * get the stream representing the content of a particular page. 
1103:             *
1104:             * @param pageObj the page object to get the contents of
1105:             * @return a concatenation of any content streams for the requested
1106:             * page.
1107:             */
1108:            private byte[] getContents(PDFObject pageObj) throws IOException {
1109:                // concatenate all the streams
1110:                PDFObject contentsObj = pageObj.getDictRef("Contents");
1111:                if (contentsObj == null) {
1112:                    throw new IOException("No page contents!");
1113:                }
1114:
1115:                PDFObject contents[] = contentsObj.getArray();
1116:
1117:                // see if we have only one stream (the easy case)
1118:                if (contents.length == 1) {
1119:                    return contents[0].getStream();
1120:                }
1121:
1122:                // first get the total length of all the streams
1123:                int len = 0;
1124:                for (int i = 0; i < contents.length; i++) {
1125:                    byte[] data = contents[i].getStream();
1126:                    if (data == null) {
1127:                        throw new PDFParseException("No stream on content " + i
1128:                                + ": " + contents[i]);
1129:                    }
1130:                    len += data.length;
1131:                }
1132:
1133:                // now assemble them all into one object
1134:                byte[] stream = new byte[len];
1135:                len = 0;
1136:                for (int i = 0; i < contents.length; i++) {
1137:                    byte data[] = contents[i].getStream();
1138:                    System.arraycopy(data, 0, stream, len, data.length);
1139:                    len += data.length;
1140:                }
1141:
1142:                return stream;
1143:            }
1144:
1145:            /**
1146:             * Create a PDF Page object by finding the relevant inherited
1147:             * properties
1148:             *
1149:             * @param pageObj the PDF object for the page to be created
1150:             */
1151:            private PDFPage createPage(int pagenum, PDFObject pageObj)
1152:                    throws IOException {
1153:                int rotation = 0;
1154:                Rectangle2D mediabox = null; // second choice, if no crop
1155:                Rectangle2D cropbox = null; // first choice
1156:
1157:                PDFObject mediaboxObj = getInheritedValue(pageObj, "MediaBox");
1158:                if (mediaboxObj != null) {
1159:                    mediabox = parseRect(mediaboxObj);
1160:                }
1161:
1162:                PDFObject cropboxObj = getInheritedValue(pageObj, "CropBox");
1163:                if (cropboxObj != null) {
1164:                    cropbox = parseRect(cropboxObj);
1165:                }
1166:
1167:                PDFObject rotateObj = getInheritedValue(pageObj, "Rotate");
1168:                if (rotateObj != null) {
1169:                    rotation = rotateObj.getIntValue();
1170:                }
1171:
1172:                Rectangle2D bbox = ((cropbox == null) ? mediabox : cropbox);
1173:
1174:                return new PDFPage(pagenum, bbox, rotation, cache);
1175:            }
1176:
1177:            /**
1178:             * Get the PDFObject representing the content of a particular page. Note
1179:             * that the number of the page need not have anything to do with the
1180:             * label on that page.  If there are two blank pages, and then roman
1181:             * numerals for the page number, then passing in 6 will get page (iv).
1182:             *
1183:             * @param pagedict the top of the pages tree
1184:             * @param start the page number of the first page in this dictionary
1185:             * @param getPage the number of the page to find; NOT the page's label.
1186:             * @param resources a HashMap that will be filled with any resource
1187:             *                  definitions encountered on the search for the page
1188:             */
1189:            private PDFObject findPage(PDFObject pagedict, int start,
1190:                    int getPage, Map resources) throws IOException {
1191:                PDFObject rsrcObj = pagedict.getDictRef("Resources");
1192:                if (rsrcObj != null) {
1193:                    resources.putAll(rsrcObj.getDictionary());
1194:                }
1195:
1196:                PDFObject typeObj = pagedict.getDictRef("Type");
1197:                if (typeObj != null && typeObj.getStringValue().equals("Page")) {
1198:                    // we found our page!
1199:                    return pagedict;
1200:                }
1201:
1202:                // find the first child for which (start + count) > getPage
1203:                PDFObject kidsObj = pagedict.getDictRef("Kids");
1204:                if (kidsObj != null) {
1205:                    PDFObject[] kids = kidsObj.getArray();
1206:                    for (int i = 0; i < kids.length; i++) {
1207:                        int count = 1;
1208:                        // BUG: some PDFs (T1Format.pdf) don't have the Type tag.
1209:                        // use the Count tag to indicate a Pages dictionary instead.
1210:                        PDFObject countItem = kids[i].getDictRef("Count");
1211:                        //                if (kids[i].getDictRef("Type").getStringValue().equals("Pages")) {
1212:                        if (countItem != null) {
1213:                            count = countItem.getIntValue();
1214:                        }
1215:
1216:                        if (start + count >= getPage) {
1217:                            return findPage(kids[i], start, getPage, resources);
1218:                        }
1219:
1220:                        start += count;
1221:                    }
1222:                }
1223:
1224:                return null;
1225:            }
1226:
1227:            /**
1228:             * Find a property value in a page that may be inherited.  If the value
1229:             * is not defined in the page itself, follow the page's "parent" links
1230:             * until the value is found or the top of the tree is reached.
1231:             *
1232:             * @param pageObj the object representing the page
1233:             * @param propName the name of the property we are looking for
1234:             */
1235:            private PDFObject getInheritedValue(PDFObject pageObj,
1236:                    String propName) throws IOException {
1237:                // see if we have the property
1238:                PDFObject propObj = pageObj.getDictRef(propName);
1239:                if (propObj != null) {
1240:                    return propObj;
1241:                }
1242:
1243:                // recursively see if any of our parent have it
1244:                PDFObject parentObj = pageObj.getDictRef("Parent");
1245:                if (parentObj != null) {
1246:                    return getInheritedValue(parentObj, propName);
1247:                }
1248:
1249:                // no luck
1250:                return null;
1251:            }
1252:
1253:            /**
1254:             * get a Rectangle2D.Float representation for a PDFObject that is an
1255:             * array of four Numbers.
1256:             * @param obj a PDFObject that represents an Array of exactly four
1257:             * Numbers.
1258:             */
1259:            public Rectangle2D.Float parseRect(PDFObject obj)
1260:                    throws IOException {
1261:                if (obj.getType() == PDFObject.ARRAY) {
1262:                    PDFObject bounds[] = obj.getArray();
1263:                    if (bounds.length == 4) {
1264:                        return new Rectangle2D.Float(bounds[0].getFloatValue(),
1265:                                bounds[1].getFloatValue(), bounds[2]
1266:                                        .getFloatValue()
1267:                                        - bounds[0].getFloatValue(), bounds[3]
1268:                                        .getFloatValue()
1269:                                        - bounds[1].getFloatValue());
1270:                    } else {
1271:                        throw new PDFParseException(
1272:                                "Rectangle definition didn't have 4 elements");
1273:                    }
1274:                } else {
1275:                    throw new PDFParseException(
1276:                            "Rectangle definition not an array");
1277:                }
1278:            }
1279:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.