Source Code Cross Referenced for PDFParser.java in  » PDF » jPod » de » intarsys » pdf » parser » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » PDF » jPod » de.intarsys.pdf.parser 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        /*
0002:         * Copyright (c) 2007, intarsys consulting GmbH
0003:         *
0004:         * Redistribution and use in source and binary forms, with or without
0005:         * modification, are permitted provided that the following conditions are met:
0006:         *
0007:         * - Redistributions of source code must retain the above copyright notice,
0008:         *   this list of conditions and the following disclaimer.
0009:         *
0010:         * - Redistributions in binary form must reproduce the above copyright notice,
0011:         *   this list of conditions and the following disclaimer in the documentation
0012:         *   and/or other materials provided with the distribution.
0013:         *
0014:         * - Neither the name of intarsys nor the names of its contributors may be used
0015:         *   to endorse or promote products derived from this software without specific
0016:         *   prior written permission.
0017:         *
0018:         * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
0019:         * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
0020:         * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
0021:         * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
0022:         * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
0023:         * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
0024:         * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
0025:         * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
0026:         * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
0027:         * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
0028:         * POSSIBILITY OF SUCH DAMAGE.
0029:         */
0030:        package de.intarsys.pdf.parser;
0031:
0032:        import java.io.IOException;
0033:        import java.util.Arrays;
0034:        import java.util.List;
0035:
0036:        import de.intarsys.pdf.cos.COSArray;
0037:        import de.intarsys.pdf.cos.COSDictionary;
0038:        import de.intarsys.pdf.cos.COSDocumentElement;
0039:        import de.intarsys.pdf.cos.COSFalse;
0040:        import de.intarsys.pdf.cos.COSFixed;
0041:        import de.intarsys.pdf.cos.COSIndirectObject;
0042:        import de.intarsys.pdf.cos.COSInteger;
0043:        import de.intarsys.pdf.cos.COSName;
0044:        import de.intarsys.pdf.cos.COSNull;
0045:        import de.intarsys.pdf.cos.COSNumber;
0046:        import de.intarsys.pdf.cos.COSObject;
0047:        import de.intarsys.pdf.cos.COSObjectKey;
0048:        import de.intarsys.pdf.cos.COSStream;
0049:        import de.intarsys.pdf.cos.COSString;
0050:        import de.intarsys.pdf.cos.COSTrue;
0051:        import de.intarsys.pdf.crypt.COSSecurityException;
0052:        import de.intarsys.pdf.crypt.ISystemSecurityHandler;
0053:        import de.intarsys.pdf.st.STDocType;
0054:        import de.intarsys.tools.hex.HexTools;
0055:        import de.intarsys.tools.randomaccess.IRandomAccess;
0056:        import de.intarsys.tools.randomaccess.RandomAccessByteArray;
0057:        import de.intarsys.tools.stream.FastByteArrayOutputStream;
0058:        import de.intarsys.tools.string.StringTools;
0059:
0060:        /**
0061:         * An abstract superclass for our two flavours of PDF Parsers.
0062:         */
0063:        public abstract class PDFParser {
0064:            public static char CHAR_CR = '\r';
0065:
0066:            public static char CHAR_LF = '\n';
0067:
0068:            public static char CHAR_HT = '\t';
0069:
0070:            public static char CHAR_BS = '\b';
0071:
0072:            public static char CHAR_FF = '\f';
0073:
0074:            public static final byte[] TOKEN_PDFHEADER = "%PDF".getBytes(); //$NON-NLS-1$
0075:
0076:            public static final byte[] TOKEN_FDFHEADER = "%FDF".getBytes(); //$NON-NLS-1$
0077:
0078:            public static final byte[] TOKEN_EOF = "%%EOF".getBytes(); //$NON-NLS-1$
0079:
0080:            public static final byte[] TOKEN_obj = "obj".getBytes(); //$NON-NLS-1$
0081:
0082:            public static final byte[] TOKEN_endobj = "endobj".getBytes(); //$NON-NLS-1$
0083:
0084:            public static final byte[] TOKEN_false = "false".getBytes(); //$NON-NLS-1$
0085:
0086:            public static final byte[] TOKEN_true = "true".getBytes(); //$NON-NLS-1$
0087:
0088:            public static final byte[] TOKEN_null = "null".getBytes(); //$NON-NLS-1$
0089:
0090:            public static final byte[] TOKEN_startxref = "startxref".getBytes(); //$NON-NLS-1$
0091:
0092:            public static final byte[] TOKEN_trailer = "trailer".getBytes(); //$NON-NLS-1$
0093:
0094:            public static final byte[] TOKEN_xref = "xref".getBytes(); //$NON-NLS-1$
0095:
0096:            public static final byte[] TOKEN_stream = "stream".getBytes(); //$NON-NLS-1$
0097:
0098:            public static final byte[] TOKEN_s_tream = "tream".getBytes(); //$NON-NLS-1$
0099:
0100:            public static final byte[] TOKEN_endstream = "endstream".getBytes(); //$NON-NLS-1$
0101:
0102:            public static final byte[] TOKEN_ndstream = "ndstream".getBytes(); //$NON-NLS-1$
0103:
0104:            public static final byte[] TOKEN_R = "R".getBytes(); //$NON-NLS-1$
0105:
0106:            public static final String C_WARN_UNEVENHEX = "616a"; //$NON-NLS-1$
0107:
0108:            public static final String C_WARN_ILLEGALHEX = "616b"; //$NON-NLS-1$
0109:
0110:            public static final String C_WARN_STRINGTOLONG = "ImplLimitString"; //$NON-NLS-1$
0111:
0112:            public static final String C_WARN_NAMETOLONG = "ImplLimitName"; //$NON-NLS-1$
0113:
0114:            public static final String C_WARN_ARRAYSIZE = "ImplLimitArray"; //$NON-NLS-1$
0115:
0116:            public static final String C_WARN_SINGLESPACE = "614a"; //$NON-NLS-1$
0117:
0118:            public static final String C_WARN_SINGLEEOL = "614b"; //$NON-NLS-1$
0119:
0120:            public static final String C_WARN_STREAMEOL = "617a"; //$NON-NLS-1$
0121:
0122:            public static final String C_WARN_ENDSTREAMEOL = "617b"; //$NON-NLS-1$
0123:
0124:            public static final String C_WARN_ENDSTREAMCORRUPT = "617c"; //$NON-NLS-1$
0125:
0126:            public static final String C_WARN_STREAMEXTERNAL = "617d"; //$NON-NLS-1$
0127:
0128:            public static final String C_WARN_STREAMLENGTH = "617e"; //$NON-NLS-1$
0129:
0130:            public static final String C_WARN_SINGLESPACE_OBJ = "618a"; //$NON-NLS-1$
0131:
0132:            public static final String C_WARN_SINGLEEOL_OBJ = "618b"; //$NON-NLS-1$
0133:
0134:            public static final String C_WARN_ENDOBJ_MISSING = "618c"; //$NON-NLS-1$
0135:
0136:            protected static final String C_TOKEN_ADDWSB = "additional whitespace before"; //$NON-NLS-1$
0137:
0138:            protected static final String C_TOKEN_ADDWSA = "additional whitespace after"; //$NON-NLS-1$
0139:
0140:            protected static final String C_TOKEN_COMMENT = "comment"; //$NON-NLS-1$
0141:
0142:            protected static final String C_TOKEN_NOWSA = "no whitespace after"; //$NON-NLS-1$
0143:
0144:            protected static final byte[] characterClass = new byte[256];
0145:
0146:            protected static final byte CHARCLASS_ANY = 0;
0147:
0148:            protected static final byte CHARCLASS_DELIMITER = 1;
0149:
0150:            protected static final byte CHARCLASS_WHITESPACE = 2;
0151:
0152:            protected static final byte CHARCLASS_TOKEN = 3;
0153:
0154:            protected static final byte CHARCLASS_DIGIT = 4;
0155:
0156:            protected static final byte CHARCLASS_NUMBERSPECIAL = 5;
0157:
0158:            static {
0159:                for (int i = 0; i < 256; i++) {
0160:                    characterClass[i] = CHARCLASS_ANY;
0161:                }
0162:                // delimiters
0163:                characterClass['('] = CHARCLASS_DELIMITER;
0164:                characterClass[')'] = CHARCLASS_DELIMITER;
0165:                characterClass['<'] = CHARCLASS_DELIMITER;
0166:                characterClass['>'] = CHARCLASS_DELIMITER;
0167:                characterClass['['] = CHARCLASS_DELIMITER;
0168:                characterClass[']'] = CHARCLASS_DELIMITER;
0169:                characterClass['{'] = CHARCLASS_DELIMITER;
0170:                characterClass['}'] = CHARCLASS_DELIMITER;
0171:                characterClass['/'] = CHARCLASS_DELIMITER;
0172:                characterClass['%'] = CHARCLASS_DELIMITER;
0173:
0174:                // whitespace
0175:                characterClass[' '] = CHARCLASS_WHITESPACE;
0176:                characterClass['\t'] = CHARCLASS_WHITESPACE;
0177:                characterClass['\r'] = CHARCLASS_WHITESPACE;
0178:                characterClass['\n'] = CHARCLASS_WHITESPACE;
0179:                characterClass[12] = CHARCLASS_WHITESPACE;
0180:                characterClass[0] = CHARCLASS_WHITESPACE;
0181:
0182:                // digits
0183:                characterClass['0'] = CHARCLASS_DIGIT;
0184:                characterClass['1'] = CHARCLASS_DIGIT;
0185:                characterClass['2'] = CHARCLASS_DIGIT;
0186:                characterClass['3'] = CHARCLASS_DIGIT;
0187:                characterClass['4'] = CHARCLASS_DIGIT;
0188:                characterClass['5'] = CHARCLASS_DIGIT;
0189:                characterClass['6'] = CHARCLASS_DIGIT;
0190:                characterClass['7'] = CHARCLASS_DIGIT;
0191:                characterClass['8'] = CHARCLASS_DIGIT;
0192:                characterClass['9'] = CHARCLASS_DIGIT;
0193:
0194:                // number special
0195:                characterClass['.'] = CHARCLASS_NUMBERSPECIAL;
0196:                characterClass['-'] = CHARCLASS_NUMBERSPECIAL;
0197:                characterClass['+'] = CHARCLASS_NUMBERSPECIAL;
0198:
0199:                // alpha
0200:                for (int i = 'a'; i <= 'z'; i++) {
0201:                    characterClass[i] = CHARCLASS_TOKEN;
0202:                }
0203:                for (int i = 'A'; i <= 'Z'; i++) {
0204:                    characterClass[i] = CHARCLASS_TOKEN;
0205:                }
0206:
0207:                // contentstream allowed token characters
0208:                characterClass['\''] = CHARCLASS_TOKEN;
0209:                characterClass['"'] = CHARCLASS_TOKEN;
0210:            }
0211:
0212:            /**
0213:             * evaluate to true if i is a PDF Delimiter char.
0214:             * 
0215:             * <p>
0216:             * See pdf spec delimiter characters.
0217:             * </p>
0218:             * 
0219:             * @param i
0220:             *            i a byte representation
0221:             * 
0222:             * @return true if i is a PDF delimiter char
0223:             */
0224:            public static final boolean isDelimiter(int i) {
0225:                return characterClass[i] == CHARCLASS_DELIMITER;
0226:            }
0227:
0228:            /**
0229:             * evaluate to true if i is a valid digit.
0230:             * 
0231:             * @param i
0232:             *            i a byte representation
0233:             * 
0234:             * @return true if i is a valid digit
0235:             */
0236:            public static final boolean isDigit(int i) {
0237:                return characterClass[i] == CHARCLASS_DIGIT;
0238:            }
0239:
0240:            /**
0241:             * evaluate to true if i is a valid line terminator.
0242:             * 
0243:             * @param i
0244:             *            i a byte representation
0245:             * 
0246:             * @return true if i is a valid line terminator
0247:             */
0248:            public static final boolean isEOL(int i) {
0249:                return (i == CHAR_CR) || (i == CHAR_LF) || (i == 12);
0250:            }
0251:
0252:            /**
0253:             * evaluate to true if i is a valid first char for a number token.
0254:             * 
0255:             * @param i
0256:             *            i a byte representation
0257:             * 
0258:             * @return true if i is a valid first char for a number token
0259:             */
0260:            public static final boolean isNumberStart(int i) {
0261:                int cc = characterClass[i];
0262:                return (cc == CHARCLASS_DIGIT)
0263:                        || (cc == CHARCLASS_NUMBERSPECIAL);
0264:            }
0265:
0266:            /**
0267:             * evaluate to true if i is a valid octal digit.
0268:             * 
0269:             * @param i
0270:             *            i a byte representation
0271:             * 
0272:             * @return true if i is a valid octal digit
0273:             */
0274:            public static final boolean isOctalDigit(int i) {
0275:                return ((i >= '0') && (i <= '7'));
0276:            }
0277:
0278:            /**
0279:             * evaluate to true if i is a valid string token start.
0280:             * 
0281:             * @param i
0282:             *            i a byte representation
0283:             * 
0284:             * @return true if i is a valid string token start
0285:             */
0286:            public static final boolean isTokenStart(int i) {
0287:                return characterClass[i] == CHARCLASS_TOKEN;
0288:            }
0289:
0290:            /**
0291:             * evaluate to true if i is a valid whitespace.
0292:             * 
0293:             * <p>
0294:             * See pdf spec "white space characters"
0295:             * </p>
0296:             * 
0297:             * @param i
0298:             *            i a byte representation
0299:             * 
0300:             * @return true if i is a valid whitespace
0301:             */
0302:            public static final boolean isWhitespace(int i) {
0303:                return characterClass[i] == CHARCLASS_WHITESPACE;
0304:            }
0305:
0306:            /**
0307:             * parse the given byte array to a valid COSObject.
0308:             * 
0309:             * @param data
0310:             *            a byte array containing COS encoded objects
0311:             * 
0312:             * @return a COSObject
0313:             * 
0314:             * @throws IOException
0315:             * @throws COSLoadException
0316:             */
0317:            public static COSObject toCOSObject(byte[] data)
0318:                    throws IOException, COSLoadException {
0319:                COSDocumentParser docParser = new COSDocumentParser(null);
0320:                return (COSObject) docParser
0321:                        .parseElement(new RandomAccessByteArray(data));
0322:            }
0323:
0324:            /** A list for object lookahead (needed with PDF references) */
0325:            private COSObject[] lookahead = new COSObject[] { null, null, null };
0326:
0327:            /**
0328:             * The number of elements currently in the lookahead buffer.
0329:             */
0330:            private int lookaheadCount = 0;
0331:
0332:            private ISystemSecurityHandler securityHandler;
0333:
0334:            /** A flag indicating we should flush the lookahead */
0335:            private boolean flushLookahead = false;
0336:
0337:            private FastByteArrayOutputStream localStream = new FastByteArrayOutputStream();
0338:
0339:            /** an exception handler for handling messages eg PDFA compliance checks * */
0340:            private IPDFParserExceptionHandler exceptionHandler;
0341:
0342:            private COSObjectKey objectKey;
0343:
0344:            protected boolean check = false;
0345:
0346:            protected abstract COSIndirectObject createObjectReference()
0347:                    throws IOException, COSLoadException;
0348:
0349:            public IPDFParserExceptionHandler getExceptionHandler() {
0350:                return exceptionHandler;
0351:            }
0352:
0353:            protected COSObjectKey getObjectKey() {
0354:                return objectKey;
0355:            }
0356:
0357:            /**
0358:             * Handle an error if an exceptionHandler is set.
0359:             * 
0360:             * @param error
0361:             * @throws COSLoadException
0362:             */
0363:            public void handleError(COSLoadError error) throws COSLoadException {
0364:                if (exceptionHandler != null) {
0365:                    exceptionHandler.error(error);
0366:                } else {
0367:                    throw error;
0368:                }
0369:            }
0370:
0371:            /**
0372:             * Handle a warning if an exceptionHandler is set.
0373:             * 
0374:             * @param warning
0375:             * @throws COSLoadException
0376:             */
0377:            public void handleWarning(COSLoadWarning warning)
0378:                    throws COSLoadException {
0379:                if (exceptionHandler != null) {
0380:                    exceptionHandler.warning(warning);
0381:                } else {
0382:                    // it is just a warning...
0383:                }
0384:            }
0385:
0386:            /**
0387:             * in order to read references we need a two object lookahead for the
0388:             * integer numbers this method pops the first object from the fifo
0389:             * structure.
0390:             * 
0391:             * @return The topmost {@link COSObject}in the object lookahead buffer.
0392:             */
0393:            protected COSObject lookaheadPop() {
0394:                COSObject result = lookahead[0];
0395:                lookahead[0] = lookahead[1];
0396:                lookahead[1] = lookahead[2];
0397:                lookahead[2] = null;
0398:                lookaheadCount--;
0399:                if (lookaheadCount <= 0) {
0400:                    // everything flushed now
0401:                    lookaheadCount = 0;
0402:                    this .flushLookahead = false;
0403:                }
0404:                return result;
0405:            }
0406:
0407:            /**
0408:             * in order to read references we need a two object lookahead for the
0409:             * integer numbers this method pushes an object in the fifo structure.
0410:             * 
0411:             * @param obj
0412:             *            The {@link COSObject}to push in the buffer.
0413:             */
0414:            protected void lookaheadPush(COSObject obj) {
0415:                lookahead[lookaheadCount++] = obj;
0416:            }
0417:
0418:            /**
0419:             * comment see PDF Reference v1.4, chapter 3.1.2 comments Comment ::= "%"
0420:             * anyChar EOL read until end of line.
0421:             * 
0422:             * @throws IOException
0423:             */
0424:            protected void parseComment(IRandomAccess input) throws IOException {
0425:                int next;
0426:                while (true) {
0427:                    next = input.read();
0428:                    if (next == -1) {
0429:                        break;
0430:                    }
0431:                    if (isEOL(next)) {
0432:                        break;
0433:                    }
0434:                }
0435:            }
0436:
0437:            /**
0438:             * parse the basic elements from the current stream position.
0439:             * 
0440:             * <p>
0441:             * see PDF Reference v1.4, chapter 3.2 Objects
0442:             * </p>
0443:             * 
0444:             * <p>
0445:             * COSObject ::= COSToken | COSBoolean | COSString | COSNumber | COSName |
0446:             * COSNull | COSArray | COSDictionary | COSStream
0447:             * </p>
0448:             * 
0449:             * @return the object parsed
0450:             * 
0451:             * @throws IOException
0452:             * @throws COSLoadException
0453:             */
0454:            public Object parseElement(IRandomAccess input) throws IOException,
0455:                    COSLoadException {
0456:                int next;
0457:                do {
0458:                    next = input.read();
0459:                    if (next == -1) {
0460:                        return null;
0461:                    }
0462:
0463:                    // we have found a non-whitespace character
0464:                    if (isNumberStart(next)) {
0465:                        return parseOnObjectNumber(input, next);
0466:                    }
0467:                    if (next == '(') {
0468:                        return parseOnObjectString(input);
0469:                    }
0470:                    if (isTokenStart(next)) {
0471:                        byte[] token = readTokenElement(input, next);
0472:                        if (token.length == 1) {
0473:                            if (token[0] == TOKEN_R[0]) {
0474:                                return TOKEN_R;
0475:                            }
0476:                        } else if (token.length == 4) {
0477:                            if ((token[0] == TOKEN_true[0])
0478:                                    && (token[1] == TOKEN_true[1])
0479:                                    && (token[2] == TOKEN_true[2])
0480:                                    && (token[3] == TOKEN_true[3])) {
0481:                                return COSTrue.create();
0482:                            }
0483:                            if ((token[0] == TOKEN_null[0])
0484:                                    && (token[1] == TOKEN_null[1])
0485:                                    && (token[2] == TOKEN_null[2])
0486:                                    && (token[3] == TOKEN_null[3])) {
0487:                                return COSNull.create();
0488:                            }
0489:                        } else if (token.length == 5) {
0490:                            if ((token[0] == TOKEN_false[0])
0491:                                    && (token[1] == TOKEN_false[1])
0492:                                    && (token[2] == TOKEN_false[2])
0493:                                    && (token[3] == TOKEN_false[3])
0494:                                    && (token[4] == TOKEN_false[4])) {
0495:                                return COSFalse.create();
0496:                            }
0497:                        }
0498:                        return token;
0499:                    }
0500:                    if (next == '/') {
0501:                        return parseOnObjectName(input);
0502:                    }
0503:
0504:                    // performance shortcut for simple space
0505:                    if ((next == ' ') || isWhitespace(next)) {
0506:                        continue;
0507:                    }
0508:                    if (next == '%') {
0509:                        parseComment(input);
0510:                        continue;
0511:                    }
0512:
0513:                    // before we start parsing a container we must flush lookahead
0514:                    if (lookaheadCount > 0) {
0515:                        input.seekBy(-1);
0516:                        return null;
0517:                    }
0518:                    if (next == '<') {
0519:                        return parseOnObjectStreamOrDictionaryOrHexString(input);
0520:                    }
0521:                    if (next == '[') {
0522:                        return parseOnObjectArray(input);
0523:                    }
0524:                    // unread, i do not understand...
0525:                    // return null if char unexpected, if this is an error depends on
0526:                    // context
0527:                    input.seekBy(-1);
0528:                    return null;
0529:                } while (true);
0530:            }
0531:
0532:            /**
0533:             * Consume whitespace. check if exactly a EOL sequence with no other
0534:             * whitespace around is available.
0535:             * 
0536:             * @param input
0537:             * @return <code>true</code> if EOL was found.
0538:             * @throws IOException
0539:             */
0540:            protected boolean readEOL(IRandomAccess input) throws IOException {
0541:                int next = input.read();
0542:                if (next == -1) {
0543:                    return false;
0544:                }
0545:                if (next == CHAR_CR) {
0546:                    next = input.read();
0547:                    if (next == -1) {
0548:                        return false;
0549:                    } else if (next == CHAR_LF) {
0550:                        next = input.read();
0551:                        if (next == -1) {
0552:                            return true;
0553:                        } else if (isWhitespace(next)) {
0554:                            readSpaces(input);
0555:                            return false;
0556:                        } else {
0557:                            input.seekBy(-1);
0558:                            return true;
0559:                        }
0560:                    }
0561:                } else if (next == CHAR_LF) {
0562:                    next = input.read();
0563:                    if (next == -1) {
0564:                        return true;
0565:                    } else if (isWhitespace(next)) {
0566:                        readSpaces(input);
0567:                        return false;
0568:                    } else {
0569:                        input.seekBy(-1);
0570:                        return true;
0571:                    }
0572:                } else if (isWhitespace(next)) {
0573:                    readSpaces(input);
0574:                    return false;
0575:                }
0576:                input.seekBy(-1);
0577:                return true;
0578:            }
0579:
0580:            /**
0581:             * pdf header see PDF Reference v1.4, chapter 3.4.1 Header COSHEader ::=
0582:             * "%PDF-" version.
0583:             * 
0584:             * @throws IOException
0585:             * @throws COSLoadException
0586:             */
0587:            public STDocType parseHeader(IRandomAccess input)
0588:                    throws IOException, COSLoadException {
0589:                int next;
0590:                while (true) {
0591:                    next = input.read();
0592:                    if (next == -1) {
0593:                        break;
0594:                    }
0595:
0596:                    // performance shortcut for simple space
0597:                    if ((next == ' ') || isWhitespace(next)) {
0598:                        continue;
0599:                    }
0600:                    break;
0601:                }
0602:                byte[] token = new byte[4];
0603:                token[0] = (byte) next;
0604:                input.read(token, 1, 3);
0605:                STDocType docType = new STDocType();
0606:                if (Arrays.equals(token, TOKEN_PDFHEADER)) {
0607:                    docType.setTypeName("PDF");
0608:                } else if (Arrays.equals(token, TOKEN_FDFHEADER)) {
0609:                    docType.setTypeName("FDF");
0610:                } else {
0611:                    input.seekBy(-token.length);
0612:                    COSLoadError e = new COSLoadError(
0613:                            "file format error. document must start with %PDF or %FDF");
0614:                    handleError(e);
0615:                }
0616:                input.read();
0617:                byte[] version = readToken(input);
0618:                if (version == null) {
0619:                    COSLoadError e = new COSLoadError(
0620:                            "file format error. no pdf/fdf version info found");
0621:                    handleError(e);
0622:                } else {
0623:                    docType.setVersion(StringTools.toString(version));
0624:                }
0625:                return docType;
0626:            }
0627:
0628:            /**
0629:             * Parse a valid COS object for use in document context from the current
0630:             * stream position.
0631:             * 
0632:             * <p>
0633:             * see PDF Reference v1.4, chapter 3.2 Objects
0634:             * </p>
0635:             * 
0636:             * <p>
0637:             * this implementation is a little more complicated, as we hava a two object
0638:             * lookahead to detect references.
0639:             * 
0640:             * <code>
0641:             * COSObject ::=   COSReference |
0642:             *                 COSBoolean |
0643:             *                 COSString |
0644:             *                 COSNumber |
0645:             *                 COSName |
0646:             *                 COSNull |
0647:             *                 COSArray |
0648:             *                 COSDictionary |
0649:             *                 COSStream
0650:             *
0651:             * </code>
0652:             * 
0653:             * </p>
0654:             * 
0655:             * @return the object parsed
0656:             * 
0657:             * @throws IOException
0658:             * @throws COSLoadException
0659:             */
0660:            protected COSDocumentElement parseObject(IRandomAccess input)
0661:                    throws IOException, COSLoadException {
0662:                if (flushLookahead) {
0663:                    return lookaheadPop();
0664:                }
0665:
0666:                // parse another element
0667:                Object parsedElement = parseElement(input);
0668:                if (parsedElement == null) {
0669:                    flushLookahead = true;
0670:                    return lookaheadPop();
0671:                }
0672:
0673:                // try to detect reference "R"
0674:                COSObject resultObject;
0675:                if (parsedElement instanceof  byte[]) {
0676:                    if (TOKEN_R == parsedElement) {
0677:                        // reference detected, clean up lookahed and return
0678:                        return createObjectReference();
0679:                    }
0680:                    // we have found a token that has to be re-read in another context
0681:                    // take care of consumed whitespace!
0682:                    input.seekBy(-1);
0683:                    int next = input.read();
0684:
0685:                    // performance shortcut for simple space
0686:                    if ((next == ' ') || isWhitespace(next)) {
0687:                        input.seekBy(-1);
0688:                    }
0689:                    input.seekBy(-((byte[]) parsedElement).length);
0690:                    this .flushLookahead = true;
0691:                    return lookaheadPop();
0692:                }
0693:                resultObject = (COSObject) parsedElement;
0694:
0695:                // build up lookahead stack
0696:                if (resultObject instanceof  COSNumber) {
0697:                    lookaheadPush(resultObject);
0698:                    // return one object if lookahead larger than 2
0699:                    if (lookaheadCount > 2) {
0700:                        return lookaheadPop();
0701:                    }
0702:
0703:                    // enter parse recursive
0704:                    return parseObject(input);
0705:                }
0706:
0707:                // shortcut to avoid building entry in lookahead
0708:                if (lookaheadCount > 0) {
0709:                    lookaheadPush(resultObject);
0710:                    this .flushLookahead = true;
0711:                    return lookaheadPop();
0712:                }
0713:                return resultObject;
0714:            }
0715:
0716:            protected COSObject parseObjectDictionary(IRandomAccess input)
0717:                    throws IOException, COSLoadException {
0718:                int next;
0719:                next = input.read();
0720:                if (next != '<') {
0721:                    input.seekBy(-1);
0722:                    COSLoadError e = new COSLoadError("'<' expected");
0723:                    handleError(e);
0724:                }
0725:                next = input.read();
0726:                if (next != '<') {
0727:                    input.seekBy(-1);
0728:                    COSLoadError e = new COSLoadError("'<' expected");
0729:                    handleError(e);
0730:                }
0731:                return parseOnObjectDictionary(input);
0732:            }
0733:
0734:            /**
0735:             * parse a COS array from the current stream position. see PDF Reference
0736:             * v1.4, chapter 3.2.5 Array objects COSArray ::= "[" (COSObject) "]"
0737:             * 
0738:             * @return the array parsed
0739:             * @throws IOException
0740:             * 
0741:             * @throws IOException
0742:             */
0743:            protected COSObject parseOnObjectArray(IRandomAccess input)
0744:                    throws COSLoadException, IOException {
0745:                int next;
0746:                COSArray result = COSArray.create();
0747:                while (true) {
0748:                    COSDocumentElement element = parseObject(input);
0749:                    if (element == null) {
0750:                        next = input.read();
0751:                        if (next == -1) {
0752:                            COSLoadError e = new COSLoadError(
0753:                                    "file format error. unexpected end of array");
0754:                            handleError(e);
0755:                        }
0756:                        if (next != ']') {
0757:                            byte[] badElement = readTokenElement(input, next);
0758:                            if (check) {
0759:                                COSLoadWarning pwarn = new COSLoadWarning(
0760:                                        "bad array element ("
0761:                                                + new String(badElement) + ")");
0762:                                pwarn.setHint(result);
0763:                                handleWarning(pwarn);
0764:                            }
0765:                            continue;
0766:                        }
0767:                        break;
0768:                    }
0769:                    result.basicAddSilent(element);
0770:                }
0771:                if (check && (result.size() > 8191)) {
0772:                    COSLoadWarning pwarn = new COSLoadWarning(C_WARN_ARRAYSIZE);
0773:                    pwarn.setHint(result);
0774:                    handleWarning(pwarn);
0775:                }
0776:                return result;
0777:            }
0778:
0779:            /**
0780:             * parse a COS dictionary from the current stream position. see PDF
0781:             * Reference v1.4, chapter 3.2.6 Dictionary objects
0782:             * 
0783:             * <code>
0784:             * COSDictionary ::= &quot;&lt;&lt;&quot; (COSName COSObject)* &quot;&gt;&gt;&quot;
0785:             * </code>
0786:             * 
0787:             * @return the dictionary parsed
0788:             * 
0789:             * @throws IOException
0790:             * @throws COSLoadException
0791:             */
0792:            protected COSObject parseOnObjectDictionary(IRandomAccess input)
0793:                    throws IOException, COSLoadException {
0794:                int next;
0795:                COSDictionary dict = COSDictionary.create();
0796:                try {
0797:                    while (true) {
0798:                        COSDocumentElement keyObject = parseObject(input);
0799:                        if (keyObject == null) {
0800:                            break;
0801:                        }
0802:                        COSName dictKey = (COSName) keyObject;
0803:                        COSDocumentElement value = parseObject(input);
0804:                        if (value == null) {
0805:                            COSLoadError e = new COSLoadError(
0806:                                    "missing value for key " + keyObject);
0807:                            handleError(e);
0808:                        }
0809:                        dict.basicPutSilent(dictKey, value);
0810:                    }
0811:                } catch (ClassCastException ex) {
0812:                    COSLoadError e = new COSLoadError("name expected");
0813:                    handleError(e);
0814:                }
0815:                next = input.read();
0816:                if (next != '>') {
0817:                    COSLoadError e = new COSLoadError("unexpected character ("
0818:                            + (char) next + ")");
0819:                    handleError(e);
0820:                }
0821:                next = input.read();
0822:                if (next != '>') {
0823:                    COSLoadError e = new COSLoadError("unexpected character ("
0824:                            + (char) next + ")");
0825:                    handleError(e);
0826:                }
0827:                return dict;
0828:            }
0829:
0830:            /**
0831:             * parse a COS string encoded in hex from the current stream position. see
0832:             * PDF Reference v1.4, chapter 3.2.3 String objects
0833:             * 
0834:             * <code>
0835:             * COSString ::= COSString | COSHexString
0836:             * COSHexString ::= &quot;&lt;&quot; (hexChar)* &quot;&gt;&quot;
0837:             * </code>
0838:             * 
0839:             * @return the string parsed
0840:             * 
0841:             * @throws IOException
0842:             * @throws COSLoadException
0843:             */
0844:            protected COSObject parseOnObjectHexString(IRandomAccess input,
0845:                    int next) throws IOException, COSLoadException {
0846:                localStream.reset();
0847:                boolean secondDigit = false;
0848:                int digitValue = 0;
0849:                int charValue = 0;
0850:                while (true) {
0851:                    digitValue = HexTools.hexDigitToInt((char) next);
0852:                    if (digitValue == -1) {
0853:                        if (next == -1) {
0854:                            break;
0855:                        }
0856:                        if (next == '>') {
0857:                            break;
0858:                        }
0859:                        if (!isWhitespace(next)) {
0860:                            IOException ioe = new IOException("<" + next
0861:                                    + "> '" + (char) next
0862:                                    + "' not a valid hex char");
0863:
0864:                            // a warning for PDF/A related checks will be triggered
0865:                            // exception is handled right on track
0866:                            COSLoadWarning pwarn = new COSLoadWarning(
0867:                                    C_WARN_ILLEGALHEX);
0868:                            pwarn.setHint(new Long(input.getOffset()));
0869:                            handleWarning(pwarn);
0870:                            throw ioe;
0871:                        }
0872:                    } else {
0873:                        if (secondDigit) {
0874:                            charValue = (charValue << 4) + digitValue;
0875:                            localStream.write(charValue);
0876:                            secondDigit = false;
0877:                        } else {
0878:                            secondDigit = true;
0879:                            charValue = digitValue;
0880:                        }
0881:                    }
0882:                    next = input.read();
0883:                }
0884:                if (secondDigit) {
0885:                    // this is a warning for uneven numbers on hex codes
0886:                    if (check) {
0887:                        COSLoadWarning pwarn = new COSLoadWarning(
0888:                                C_WARN_UNEVENHEX);
0889:                        pwarn.setHint(new Long(input.getOffset()));
0890:                        handleWarning(pwarn);
0891:                    }
0892:                    // assume trailing "0"
0893:                    charValue = charValue << 4;
0894:                    localStream.write(charValue);
0895:                }
0896:
0897:                COSString result;
0898:                if ((securityHandler == null) || (objectKey == null)) {
0899:                    result = COSString.createHex(localStream.toByteArray());
0900:                } else {
0901:                    try {
0902:                        byte[] decrypted = securityHandler.decryptString(
0903:                                objectKey, localStream.toByteArray());
0904:                        result = COSString.createHex(decrypted);
0905:                    } catch (COSSecurityException e) {
0906:                        throw new COSLoadError(e);
0907:                    }
0908:                }
0909:                if (check && (result.stringValue().length() > 65535)) {
0910:                    COSLoadWarning pwarn = new COSLoadWarning(
0911:                            C_WARN_STRINGTOLONG);
0912:                    pwarn.setHint(result);
0913:                    handleWarning(pwarn);
0914:                }
0915:                return result;
0916:            }
0917:
0918:            /**
0919:             * parse a COS name from the current stream position. see PDF Reference
0920:             * v1.4, chapter 3.2.4 Name Objects COSName ::= "/" nameChars
0921:             * 
0922:             * @return the name parsed
0923:             * 
0924:             * @throws IOException
0925:             * @throws COSLoadException
0926:             */
0927:            protected COSObject parseOnObjectName(IRandomAccess input)
0928:                    throws IOException, COSLoadException {
0929:                int next;
0930:                localStream.reset();
0931:                do {
0932:                    next = input.read();
0933:                    if (next == -1) {
0934:                        break;
0935:                    }
0936:
0937:                    // performance shortcut for simple space
0938:                    if ((next == ' ') || isWhitespace(next)) {
0939:                        break;
0940:                    }
0941:                    if (isDelimiter(next)) {
0942:                        input.seekBy(-1);
0943:                        break;
0944:                    }
0945:                    if (next == '#') {
0946:                        next = input.read();
0947:
0948:                        int digit1 = HexTools.hexDigitToInt((char) next);
0949:                        if (digit1 == -1) {
0950:                            COSLoadError e = new COSLoadError("<" + next
0951:                                    + "> not a valid hex char");
0952:                            handleError(e);
0953:                        }
0954:                        next = input.read();
0955:
0956:                        int digit2 = HexTools.hexDigitToInt((char) next);
0957:                        if (digit2 == -1) {
0958:                            COSLoadError e = new COSLoadError("<" + next
0959:                                    + "> not a valid hex char");
0960:                            handleError(e);
0961:                        }
0962:                        localStream.write((digit1 << 4) + digit2);
0963:                    } else {
0964:                        localStream.write(next);
0965:                    }
0966:                } while (true);
0967:                byte[] bytes = localStream.toByteArray();
0968:                COSName result = COSName.create(bytes);
0969:                if (check && (result.stringValue().length() > 127)) {
0970:                    COSLoadWarning pwarn = new COSLoadWarning(C_WARN_NAMETOLONG);
0971:                    pwarn.setHint(result);
0972:                    handleWarning(pwarn);
0973:                }
0974:                return result;
0975:            }
0976:
0977:            /**
0978:             * parse a COS number from the current stream position. see PDF Reference
0979:             * v1.4, chapter 3.2.2 Numeric objects COSNumber ::= COSFixed | COSInteger
0980:             * COSFixed ::= (+ | -)? (digit) "." (digit) COSInteger ::= (+ | -)? (digit)
0981:             * 
0982:             * @return the number parsed
0983:             * 
0984:             * @throws IOException
0985:             * @throws COSLoadException
0986:             */
0987:            protected COSObject parseOnObjectNumber(IRandomAccess input,
0988:                    int next) throws IOException, COSLoadException {
0989:                boolean isFixed = false;
0990:                localStream.reset();
0991:                isFixed = next == '.';
0992:                localStream.write((byte) next);
0993:                do {
0994:                    next = input.read();
0995:                    if (next == -1) {
0996:                        break;
0997:                    } else if (isDigit(next)) {
0998:                        localStream.write((byte) next);
0999:                    } else if (next == '.') {
1000:                        isFixed = true;
1001:                        localStream.write((byte) next);
1002:                    } else if ((next == ' ') || isWhitespace(next)) {
1003:                        break;
1004:                    } else {
1005:                        input.seekBy(-1);
1006:                        break;
1007:                    }
1008:                } while (true);
1009:                if (isFixed) {
1010:                    COSFixed fixed = COSFixed.create(localStream.getBytes(), 0,
1011:                            localStream.size());
1012:                    return fixed;
1013:                }
1014:                return COSInteger.create(localStream.getBytes(), 0, localStream
1015:                        .size());
1016:            }
1017:
1018:            /**
1019:             * parse a COS stream from the current stream position. see PDF Reference
1020:             * v1.4, chapter 3.2.7 Stream objects COSStream ::= COSDictionary "stream"
1021:             * bytes "endstream"
1022:             * 
1023:             * @param dict
1024:             *            The object that should be filled with the dictionary entries.
1025:             * 
1026:             * @return The stream parsed.
1027:             * 
1028:             * @throws IOException
1029:             * @throws COSLoadException
1030:             */
1031:            protected COSObject parseOnObjectStream(IRandomAccess input,
1032:                    COSDictionary dict) throws IOException, COSLoadException {
1033:                COSStream stream = COSStream.create(dict);
1034:
1035:                byte[] token = new byte[5];
1036:                // read "tream", "s" already consumed
1037:                input.read(token);
1038:                if (!Arrays.equals(token, TOKEN_s_tream)) {
1039:                    input.seekBy(-token.length - 1);
1040:                    COSLoadError e = new COSLoadError(
1041:                            "file format error. 'stream' expected");
1042:                    handleError(e);
1043:                }
1044:
1045:                // allow for at max two separator chars after "stream"
1046:                int next;
1047:                next = input.read();
1048:                if (next == CHAR_CR) {
1049:                    next = input.read();
1050:                }
1051:                if (next != CHAR_LF) {
1052:                    // ?? its legal to have NO separator
1053:                    // ?? there are testdocuments that provide only a single CR
1054:                    if (check) {
1055:                        COSLoadWarning pwarn = new COSLoadWarning(
1056:                                C_WARN_STREAMEOL);
1057:                        pwarn.setHint(new Long(input.getOffset()));
1058:                        handleWarning(pwarn);
1059:                    }
1060:                    input.seekBy(-1);
1061:                }
1062:
1063:                long offset = input.getOffset();
1064:                int length = -1;
1065:                COSNumber cosLength = dict.get(COSStream.DK_Length).asInteger();
1066:                if (cosLength == null) {
1067:                    // warning for pdfa
1068:                    if (check) {
1069:                        COSLoadWarning pwarn = new COSLoadWarning(
1070:                                C_WARN_STREAMLENGTH);
1071:                        pwarn.setHint(new Long(input.getOffset()));
1072:                        handleWarning(pwarn);
1073:                    }
1074:                } else {
1075:                    length = cosLength.intValue();
1076:                }
1077:                input.seek(offset);
1078:
1079:                byte[] bytes = null;
1080:
1081:                if (length < 0) {
1082:                    bytes = readStream(input);
1083:                } else {
1084:                    bytes = new byte[length];
1085:                    int count = input.read(bytes);
1086:                    if (count < length) {
1087:                        if (check) {
1088:                            // get additional warning for pdfa
1089:                            COSLoadWarning pwarn = new COSLoadWarning(
1090:                                    C_WARN_STREAMLENGTH);
1091:                            pwarn.setHint(new Long(input.getOffset()));
1092:                            handleWarning(pwarn);
1093:                        }
1094:                        COSLoadError e = new COSLoadError(
1095:                                "unexpected end of stream");
1096:                        handleError(e);
1097:                    }
1098:                }
1099:
1100:                if (check) {
1101:                    // pdfa compliance check
1102:                    if (!readEOL(input)) {
1103:                        COSLoadWarning pwarn = new COSLoadWarning(
1104:                                C_WARN_ENDSTREAMEOL);
1105:                        pwarn.setHint(new Long(input.getOffset()));
1106:                        handleWarning(pwarn);
1107:                    }
1108:                } else {
1109:                    // be lazy with pdf spec and accept any whitespace before
1110:                    // 'endstream'
1111:                    readSpaces(input);
1112:                }
1113:
1114:                // read "endstream"
1115:                token = new byte[9];
1116:                input.read(token);
1117:                if (!Arrays.equals(token, TOKEN_endstream)) {
1118:                    input.seekBy(-token.length - 1);
1119:                    // a warning for PDF/A related checks will be triggered
1120:                    COSLoadWarning pwarn = new COSLoadWarning(
1121:                            C_WARN_ENDSTREAMCORRUPT);
1122:                    pwarn.setHint(new Long(input.getOffset()));
1123:                    handleWarning(pwarn);
1124:
1125:                    if (length > 0) {
1126:                        // retry from the beginning with undeterminate length
1127:                        input.seek(offset);
1128:                        bytes = readStream(input);
1129:                        // read "endstream"
1130:                        token = new byte[9];
1131:                        input.read(token);
1132:                        if (!Arrays.equals(token, TOKEN_endstream)) {
1133:                            COSLoadError e = new COSLoadError(
1134:                                    "file format error. 'endstream' expected");
1135:                            handleError(e);
1136:                        }
1137:                        // fix length
1138:                        length = bytes.length;
1139:                        dict
1140:                                .put(COSStream.DK_Length, COSInteger
1141:                                        .create(length));
1142:                    } else {
1143:                        COSLoadError e = new COSLoadError(
1144:                                "file format error. 'endstream' expected");
1145:                        handleError(e);
1146:                    }
1147:
1148:                }
1149:                if ((securityHandler == null) || (objectKey == null)) {
1150:                    stream.basicSetEncodedBytes(bytes);
1151:                } else {
1152:                    try {
1153:                        byte[] decrypted = securityHandler.decryptStream(
1154:                                objectKey, dict, bytes);
1155:                        stream.basicSetEncodedBytes(decrypted);
1156:                    } catch (COSSecurityException e) {
1157:                        throw new COSLoadError(e);
1158:                    }
1159:                }
1160:                return stream;
1161:            }
1162:
1163:            /**
1164:             * parse a COS stream or dictionary from the current stream position.
1165:             * COSStreamOrDict ::= COSStream | COSDict
1166:             * 
1167:             * @return the object parsed
1168:             * 
1169:             * @throws IOException
1170:             * @throws COSLoadException
1171:             */
1172:            protected COSObject parseOnObjectStreamOrDictionary(
1173:                    IRandomAccess input) throws IOException, COSLoadException {
1174:                COSObject dict = parseOnObjectDictionary(input);
1175:                int next;
1176:                while (true) {
1177:                    next = input.read();
1178:                    if (next == -1) {
1179:                        return dict;
1180:                    }
1181:
1182:                    // performance shortcut for simple space
1183:                    if ((next == ' ') || isWhitespace(next)) {
1184:                        continue;
1185:                    }
1186:                    break;
1187:                }
1188:                if (next == 's') {
1189:                    return parseOnObjectStream(input, (COSDictionary) dict);
1190:                }
1191:                input.seekBy(-1);
1192:                return dict;
1193:            }
1194:
1195:            /**
1196:             * parse a COS stream or dictionary or hex string from the current stream
1197:             * position. COSStreamOrDictOrHex ::= COSStream | COSDict | COSHexString
1198:             * 
1199:             * @return the object parsed
1200:             * 
1201:             * @throws IOException
1202:             * @throws COSLoadException
1203:             */
1204:            protected COSObject parseOnObjectStreamOrDictionaryOrHexString(
1205:                    IRandomAccess input) throws IOException, COSLoadException {
1206:                int next;
1207:                next = input.read();
1208:                if (next == '<') {
1209:                    return parseOnObjectStreamOrDictionary(input);
1210:                }
1211:                return parseOnObjectHexString(input, next);
1212:            }
1213:
1214:            /**
1215:             * parse a COS string from the current stream position. see PDF Reference
1216:             * v1.4, chapter 3.2.3. String objects COSString ::= "(" stringData ")"
1217:             * 
1218:             * @return the string parsed
1219:             * 
1220:             * @throws IOException
1221:             * @throws COSLoadException
1222:             */
1223:            protected COSObject parseOnObjectString(IRandomAccess input)
1224:                    throws IOException, COSLoadException {
1225:                int next;
1226:                int paraCount = 0;
1227:                localStream.reset();
1228:                while (true) {
1229:                    next = input.read();
1230:                    if (next == '\\') {
1231:                        int c = readEscape(input);
1232:                        if (c != -1) {
1233:                            localStream.write(c);
1234:                        }
1235:                    } else if (next == ')') {
1236:                        if (paraCount > 0) {
1237:                            paraCount--;
1238:                            localStream.write(next);
1239:                        } else {
1240:                            break;
1241:                        }
1242:                    } else if (next == CHAR_CR) {
1243:                        // eol is always \n in a string
1244:                        next = input.read();
1245:                        if (next != CHAR_LF) {
1246:                            input.seekBy(-1);
1247:                        }
1248:                        localStream.write(CHAR_LF);
1249:                    } else if (next == '(') {
1250:                        paraCount++;
1251:                        localStream.write(next);
1252:                    } else if (next == -1) {
1253:                        COSLoadError e = new COSLoadError(
1254:                                "file format error. unexpected end of string");
1255:                        handleError(e);
1256:                    } else {
1257:                        localStream.write(next);
1258:                    }
1259:                }
1260:                COSString result;
1261:                if ((securityHandler == null) || (objectKey == null)) {
1262:                    result = COSString.create(localStream.toByteArray());
1263:                } else {
1264:                    try {
1265:                        byte[] decrypted = securityHandler.decryptString(
1266:                                objectKey, localStream.toByteArray());
1267:                        result = COSString.create(decrypted);
1268:                    } catch (COSSecurityException e) {
1269:                        throw new COSLoadError(e);
1270:                    }
1271:                }
1272:                if (check && (result.stringValue().length() > 65535)) {
1273:                    COSLoadWarning pwarn = new COSLoadWarning(
1274:                            C_WARN_STRINGTOLONG);
1275:                    pwarn.setHint(result);
1276:                    handleWarning(pwarn);
1277:                }
1278:                return result;
1279:            }
1280:
1281:            /**
1282:             * read an esacped char from the stream.
1283:             * 
1284:             * @return the character corresponding to the escape code
1285:             * 
1286:             * @throws IOException
1287:             */
1288:            protected int readEscape(IRandomAccess input) throws IOException {
1289:                int next = 0;
1290:                next = input.read();
1291:                if (next == -1) {
1292:                    return -1;
1293:                }
1294:                if (isOctalDigit(next)) {
1295:                    input.seekBy(-1);
1296:                    return readOctalChar(input);
1297:                }
1298:                if (next == CHAR_LF) {
1299:                    return -1;
1300:                }
1301:                if (next == CHAR_CR) {
1302:                    next = input.read();
1303:                    if (next != CHAR_LF) {
1304:                        input.seekBy(-1);
1305:                    }
1306:                    return -1;
1307:                }
1308:                if (next == 'n') {
1309:                    return CHAR_LF;
1310:                }
1311:                if (next == 'r') {
1312:                    return CHAR_CR;
1313:                }
1314:                if (next == 't') {
1315:                    return CHAR_HT;
1316:                }
1317:                if (next == 'b') {
1318:                    return CHAR_BS;
1319:                }
1320:                if (next == 'f') {
1321:                    return CHAR_FF;
1322:                }
1323:                return next;
1324:            }
1325:
1326:            /**
1327:             * reads the next integer on input. consumes one trailing space if
1328:             * consumeSpaceAfter is set to true. Consumes leading spaces and comments.
1329:             * 
1330:             * @param input
1331:             * @param consumeSpaceAfter
1332:             * @return The integer read.
1333:             * @throws IOException
1334:             */
1335:            public int readInteger(IRandomAccess input,
1336:                    boolean consumeSpaceAfter) throws IOException {
1337:                int result = 0;
1338:                int next;
1339:                while (true) {
1340:                    next = input.read();
1341:                    if (next == -1) {
1342:                        return result;
1343:                    } else if ((next == ' ') || isWhitespace(next)) {
1344:                        continue;
1345:                    } else if (next == '%') {
1346:                        parseComment(input);
1347:                    } else {
1348:                        break;
1349:                    }
1350:                }
1351:                while (true) {
1352:                    if (isDigit(next)) {
1353:                        result = ((result * 10) + next) - '0';
1354:                    } else {
1355:                        input.seekBy(-1);
1356:                        break;
1357:                    }
1358:                    next = input.read();
1359:                    if (next == -1) {
1360:                        break;
1361:                    } else if ((next == ' ') || isWhitespace(next)) {
1362:                        if (!consumeSpaceAfter) {
1363:                            input.seekBy(-1);
1364:                        }
1365:                        break;
1366:                    }
1367:                }
1368:                return result;
1369:            }
1370:
1371:            /**
1372:             * read an octal character from the stream.
1373:             * 
1374:             * @return the integer value of the character read or -1
1375:             * 
1376:             * @throws IOException
1377:             */
1378:            protected int readOctalChar(IRandomAccess input) throws IOException {
1379:                int result = -1;
1380:                int c = 0;
1381:
1382:                c = input.read();
1383:                if ((c != -1) && isOctalDigit(c)) {
1384:                    result = c - '0';
1385:                    c = input.read();
1386:                    if (isOctalDigit(c)) {
1387:                        result = ((result * 8) + c) - '0';
1388:                        c = input.read();
1389:                        if (isOctalDigit(c)) {
1390:                            result = ((result * 8) + c) - '0';
1391:                        } else {
1392:                            input.seekBy(-1);
1393:                        }
1394:                    } else {
1395:                        input.seekBy(-1);
1396:                    }
1397:                } else {
1398:                    input.seekBy(-1);
1399:                }
1400:                return result;
1401:            }
1402:
1403:            /**
1404:             * read all characters until EOF or non space char appears. the first non
1405:             * space char is pushed back so the next char read is the first non space
1406:             * char.
1407:             * 
1408:             * @throws IOException
1409:             */
1410:            public void readSpaces(IRandomAccess input) throws IOException {
1411:                int next = 0;
1412:                while (true) {
1413:                    next = input.read();
1414:                    if (next == -1) {
1415:                        break;
1416:                    }
1417:                    // performance shortcut for simple space
1418:                    if ((next == ' ') || isWhitespace(next)) {
1419:                        continue;
1420:                    }
1421:                    input.seekBy(-1);
1422:                    break;
1423:                }
1424:            }
1425:
1426:            /**
1427:             * Read all characters up to "endstream" and assume them belonging to the
1428:             * stream.
1429:             * <p>
1430:             * ATTENTION this is a heuristic approach as the tag "endstream" may be part
1431:             * of the stream data!
1432:             * 
1433:             * @return All characters up to "endstream"
1434:             * 
1435:             * @throws IOException
1436:             */
1437:            protected byte[] readStream(IRandomAccess input) throws IOException {
1438:                byte[] token = new byte[8];
1439:                localStream.reset();
1440:                int next;
1441:                while (true) {
1442:                    next = input.read();
1443:                    if (next == 'e') {
1444:                        input.read(token);
1445:                        if (Arrays.equals(token, TOKEN_ndstream)) {
1446:                            input.seekBy(-TOKEN_endstream.length);
1447:                            return localStream.toByteArray();
1448:                        }
1449:                        input.seekBy(-token.length);
1450:                    } else if (next == -1) {
1451:                        break;
1452:                    }
1453:                    localStream.write(next);
1454:                }
1455:                if (localStream.size() == 0) {
1456:                    return null;
1457:                }
1458:                return localStream.toByteArray();
1459:            }
1460:
1461:            /**
1462:             * read a single token.
1463:             * 
1464:             * @return the array of characters belonging to the token
1465:             * 
1466:             * @throws IOException
1467:             */
1468:            public byte[] readToken(IRandomAccess input) throws IOException {
1469:                //
1470:                int next;
1471:                while (true) {
1472:                    next = input.read();
1473:                    if (next == -1) {
1474:                        return null;
1475:                    } else if ((next == ' ') || isWhitespace(next)) {
1476:                        continue;
1477:                    } else if (next == '%') {
1478:                        parseComment(input);
1479:                    } else {
1480:                        break;
1481:                    }
1482:                }
1483:                return readTokenElement(input, next);
1484:            }
1485:
1486:            /**
1487:             * derive of readToken, populates the messages list with non-fatal error
1488:             * messages
1489:             * 
1490:             * @param input
1491:             * @param messages
1492:             * @return token bytes
1493:             * @throws IOException
1494:             */
1495:            public byte[] readToken(IRandomAccess input, List messages)
1496:                    throws IOException {
1497:                int next;
1498:                int countWS = 0;
1499:                while (true) {
1500:                    next = input.read();
1501:                    if (next == -1) {
1502:                        return null;
1503:                    } else if ((next == ' ') || isWhitespace(next)) {
1504:                        countWS++;
1505:                        if (countWS > 1) {
1506:                            messages.add(C_TOKEN_ADDWSB);
1507:                        }
1508:                        continue;
1509:                    } else if (next == '%') {
1510:                        messages.add(C_TOKEN_COMMENT);
1511:                        parseComment(input);
1512:                    } else {
1513:                        break;
1514:                    }
1515:                }
1516:                return readTokenElement(input, next, messages);
1517:            }
1518:
1519:            protected byte[] readTokenElement(IRandomAccess input, int next)
1520:                    throws IOException {
1521:                localStream.reset();
1522:                //
1523:                localStream.write(next);
1524:                do {
1525:                    next = input.read();
1526:                    if (next == -1) {
1527:                        break;
1528:                    } else if ((next == ' ') || isWhitespace(next)) { // performance
1529:                        // shortcut
1530:                        break;
1531:                    } else if (isDelimiter(next)) {
1532:                        input.seekBy(-1);
1533:                        break;
1534:                    }
1535:                    localStream.write(next);
1536:                } while (true);
1537:                return localStream.toByteArray();
1538:            }
1539:
1540:            /**
1541:             * derive of readToken, populates the messages list with non-fatal error
1542:             * messages
1543:             * 
1544:             * @param input
1545:             * @param next
1546:             * @param messages
1547:             * @return token bytes
1548:             * @throws IOException
1549:             */
1550:            protected byte[] readTokenElement(IRandomAccess input, int next,
1551:                    List messages) throws IOException {
1552:                localStream.reset();
1553:                localStream.write(next);
1554:                do {
1555:                    next = input.read();
1556:                    if (next == -1) {
1557:                        break;
1558:                    } else if ((next == ' ') || isWhitespace(next)) { // performance
1559:                        // shortcut
1560:                        next = input.read();
1561:                        if ((next == ' ') || isWhitespace(next)) { // performance
1562:                            // shortcut
1563:                            messages.add(C_TOKEN_ADDWSA);
1564:                        }
1565:                        input.seekBy(-1);
1566:                        break;
1567:                    } else if (isDelimiter(next)) {
1568:                        messages.add(C_TOKEN_NOWSA);
1569:                        input.seekBy(-1);
1570:                        break;
1571:                    }
1572:                    localStream.write(next);
1573:                } while (true);
1574:                return localStream.toByteArray();
1575:            }
1576:
1577:            public void setExceptionHandler(
1578:                    IPDFParserExceptionHandler exceptionHandler) {
1579:                this .exceptionHandler = exceptionHandler;
1580:                check = exceptionHandler != null;
1581:            }
1582:
1583:            protected void setObjectKey(COSObjectKey objectKey) {
1584:                this .objectKey = objectKey;
1585:            }
1586:
1587:            protected ISystemSecurityHandler getSecurityHandler() {
1588:                return securityHandler;
1589:            }
1590:
1591:            protected void setSecurityHandler(
1592:                    ISystemSecurityHandler securityHandler) {
1593:                this.securityHandler = securityHandler;
1594:            }
1595:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.