0001: /*
0002: * Copyright (c) 2007, intarsys consulting GmbH
0003: *
0004: * Redistribution and use in source and binary forms, with or without
0005: * modification, are permitted provided that the following conditions are met:
0006: *
0007: * - Redistributions of source code must retain the above copyright notice,
0008: * this list of conditions and the following disclaimer.
0009: *
0010: * - Redistributions in binary form must reproduce the above copyright notice,
0011: * this list of conditions and the following disclaimer in the documentation
0012: * and/or other materials provided with the distribution.
0013: *
0014: * - Neither the name of intarsys nor the names of its contributors may be used
0015: * to endorse or promote products derived from this software without specific
0016: * prior written permission.
0017: *
0018: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
0019: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
0020: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
0021: * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
0022: * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
0023: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
0024: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
0025: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
0026: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
0027: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
0028: * POSSIBILITY OF SUCH DAMAGE.
0029: */
0030: package de.intarsys.pdf.parser;
0031:
0032: import java.io.IOException;
0033: import java.util.Arrays;
0034: import java.util.List;
0035:
0036: import de.intarsys.pdf.cos.COSArray;
0037: import de.intarsys.pdf.cos.COSDictionary;
0038: import de.intarsys.pdf.cos.COSDocumentElement;
0039: import de.intarsys.pdf.cos.COSFalse;
0040: import de.intarsys.pdf.cos.COSFixed;
0041: import de.intarsys.pdf.cos.COSIndirectObject;
0042: import de.intarsys.pdf.cos.COSInteger;
0043: import de.intarsys.pdf.cos.COSName;
0044: import de.intarsys.pdf.cos.COSNull;
0045: import de.intarsys.pdf.cos.COSNumber;
0046: import de.intarsys.pdf.cos.COSObject;
0047: import de.intarsys.pdf.cos.COSObjectKey;
0048: import de.intarsys.pdf.cos.COSStream;
0049: import de.intarsys.pdf.cos.COSString;
0050: import de.intarsys.pdf.cos.COSTrue;
0051: import de.intarsys.pdf.crypt.COSSecurityException;
0052: import de.intarsys.pdf.crypt.ISystemSecurityHandler;
0053: import de.intarsys.pdf.st.STDocType;
0054: import de.intarsys.tools.hex.HexTools;
0055: import de.intarsys.tools.randomaccess.IRandomAccess;
0056: import de.intarsys.tools.randomaccess.RandomAccessByteArray;
0057: import de.intarsys.tools.stream.FastByteArrayOutputStream;
0058: import de.intarsys.tools.string.StringTools;
0059:
0060: /**
0061: * An abstract superclass for our two flavours of PDF Parsers.
0062: */
0063: public abstract class PDFParser {
0064: public static char CHAR_CR = '\r';
0065:
0066: public static char CHAR_LF = '\n';
0067:
0068: public static char CHAR_HT = '\t';
0069:
0070: public static char CHAR_BS = '\b';
0071:
0072: public static char CHAR_FF = '\f';
0073:
0074: public static final byte[] TOKEN_PDFHEADER = "%PDF".getBytes(); //$NON-NLS-1$
0075:
0076: public static final byte[] TOKEN_FDFHEADER = "%FDF".getBytes(); //$NON-NLS-1$
0077:
0078: public static final byte[] TOKEN_EOF = "%%EOF".getBytes(); //$NON-NLS-1$
0079:
0080: public static final byte[] TOKEN_obj = "obj".getBytes(); //$NON-NLS-1$
0081:
0082: public static final byte[] TOKEN_endobj = "endobj".getBytes(); //$NON-NLS-1$
0083:
0084: public static final byte[] TOKEN_false = "false".getBytes(); //$NON-NLS-1$
0085:
0086: public static final byte[] TOKEN_true = "true".getBytes(); //$NON-NLS-1$
0087:
0088: public static final byte[] TOKEN_null = "null".getBytes(); //$NON-NLS-1$
0089:
0090: public static final byte[] TOKEN_startxref = "startxref".getBytes(); //$NON-NLS-1$
0091:
0092: public static final byte[] TOKEN_trailer = "trailer".getBytes(); //$NON-NLS-1$
0093:
0094: public static final byte[] TOKEN_xref = "xref".getBytes(); //$NON-NLS-1$
0095:
0096: public static final byte[] TOKEN_stream = "stream".getBytes(); //$NON-NLS-1$
0097:
0098: public static final byte[] TOKEN_s_tream = "tream".getBytes(); //$NON-NLS-1$
0099:
0100: public static final byte[] TOKEN_endstream = "endstream".getBytes(); //$NON-NLS-1$
0101:
0102: public static final byte[] TOKEN_ndstream = "ndstream".getBytes(); //$NON-NLS-1$
0103:
0104: public static final byte[] TOKEN_R = "R".getBytes(); //$NON-NLS-1$
0105:
0106: public static final String C_WARN_UNEVENHEX = "616a"; //$NON-NLS-1$
0107:
0108: public static final String C_WARN_ILLEGALHEX = "616b"; //$NON-NLS-1$
0109:
0110: public static final String C_WARN_STRINGTOLONG = "ImplLimitString"; //$NON-NLS-1$
0111:
0112: public static final String C_WARN_NAMETOLONG = "ImplLimitName"; //$NON-NLS-1$
0113:
0114: public static final String C_WARN_ARRAYSIZE = "ImplLimitArray"; //$NON-NLS-1$
0115:
0116: public static final String C_WARN_SINGLESPACE = "614a"; //$NON-NLS-1$
0117:
0118: public static final String C_WARN_SINGLEEOL = "614b"; //$NON-NLS-1$
0119:
0120: public static final String C_WARN_STREAMEOL = "617a"; //$NON-NLS-1$
0121:
0122: public static final String C_WARN_ENDSTREAMEOL = "617b"; //$NON-NLS-1$
0123:
0124: public static final String C_WARN_ENDSTREAMCORRUPT = "617c"; //$NON-NLS-1$
0125:
0126: public static final String C_WARN_STREAMEXTERNAL = "617d"; //$NON-NLS-1$
0127:
0128: public static final String C_WARN_STREAMLENGTH = "617e"; //$NON-NLS-1$
0129:
0130: public static final String C_WARN_SINGLESPACE_OBJ = "618a"; //$NON-NLS-1$
0131:
0132: public static final String C_WARN_SINGLEEOL_OBJ = "618b"; //$NON-NLS-1$
0133:
0134: public static final String C_WARN_ENDOBJ_MISSING = "618c"; //$NON-NLS-1$
0135:
0136: protected static final String C_TOKEN_ADDWSB = "additional whitespace before"; //$NON-NLS-1$
0137:
0138: protected static final String C_TOKEN_ADDWSA = "additional whitespace after"; //$NON-NLS-1$
0139:
0140: protected static final String C_TOKEN_COMMENT = "comment"; //$NON-NLS-1$
0141:
0142: protected static final String C_TOKEN_NOWSA = "no whitespace after"; //$NON-NLS-1$
0143:
0144: protected static final byte[] characterClass = new byte[256];
0145:
0146: protected static final byte CHARCLASS_ANY = 0;
0147:
0148: protected static final byte CHARCLASS_DELIMITER = 1;
0149:
0150: protected static final byte CHARCLASS_WHITESPACE = 2;
0151:
0152: protected static final byte CHARCLASS_TOKEN = 3;
0153:
0154: protected static final byte CHARCLASS_DIGIT = 4;
0155:
0156: protected static final byte CHARCLASS_NUMBERSPECIAL = 5;
0157:
0158: static {
0159: for (int i = 0; i < 256; i++) {
0160: characterClass[i] = CHARCLASS_ANY;
0161: }
0162: // delimiters
0163: characterClass['('] = CHARCLASS_DELIMITER;
0164: characterClass[')'] = CHARCLASS_DELIMITER;
0165: characterClass['<'] = CHARCLASS_DELIMITER;
0166: characterClass['>'] = CHARCLASS_DELIMITER;
0167: characterClass['['] = CHARCLASS_DELIMITER;
0168: characterClass[']'] = CHARCLASS_DELIMITER;
0169: characterClass['{'] = CHARCLASS_DELIMITER;
0170: characterClass['}'] = CHARCLASS_DELIMITER;
0171: characterClass['/'] = CHARCLASS_DELIMITER;
0172: characterClass['%'] = CHARCLASS_DELIMITER;
0173:
0174: // whitespace
0175: characterClass[' '] = CHARCLASS_WHITESPACE;
0176: characterClass['\t'] = CHARCLASS_WHITESPACE;
0177: characterClass['\r'] = CHARCLASS_WHITESPACE;
0178: characterClass['\n'] = CHARCLASS_WHITESPACE;
0179: characterClass[12] = CHARCLASS_WHITESPACE;
0180: characterClass[0] = CHARCLASS_WHITESPACE;
0181:
0182: // digits
0183: characterClass['0'] = CHARCLASS_DIGIT;
0184: characterClass['1'] = CHARCLASS_DIGIT;
0185: characterClass['2'] = CHARCLASS_DIGIT;
0186: characterClass['3'] = CHARCLASS_DIGIT;
0187: characterClass['4'] = CHARCLASS_DIGIT;
0188: characterClass['5'] = CHARCLASS_DIGIT;
0189: characterClass['6'] = CHARCLASS_DIGIT;
0190: characterClass['7'] = CHARCLASS_DIGIT;
0191: characterClass['8'] = CHARCLASS_DIGIT;
0192: characterClass['9'] = CHARCLASS_DIGIT;
0193:
0194: // number special
0195: characterClass['.'] = CHARCLASS_NUMBERSPECIAL;
0196: characterClass['-'] = CHARCLASS_NUMBERSPECIAL;
0197: characterClass['+'] = CHARCLASS_NUMBERSPECIAL;
0198:
0199: // alpha
0200: for (int i = 'a'; i <= 'z'; i++) {
0201: characterClass[i] = CHARCLASS_TOKEN;
0202: }
0203: for (int i = 'A'; i <= 'Z'; i++) {
0204: characterClass[i] = CHARCLASS_TOKEN;
0205: }
0206:
0207: // contentstream allowed token characters
0208: characterClass['\''] = CHARCLASS_TOKEN;
0209: characterClass['"'] = CHARCLASS_TOKEN;
0210: }
0211:
0212: /**
0213: * evaluate to true if i is a PDF Delimiter char.
0214: *
0215: * <p>
0216: * See pdf spec delimiter characters.
0217: * </p>
0218: *
0219: * @param i
0220: * i a byte representation
0221: *
0222: * @return true if i is a PDF delimiter char
0223: */
0224: public static final boolean isDelimiter(int i) {
0225: return characterClass[i] == CHARCLASS_DELIMITER;
0226: }
0227:
0228: /**
0229: * evaluate to true if i is a valid digit.
0230: *
0231: * @param i
0232: * i a byte representation
0233: *
0234: * @return true if i is a valid digit
0235: */
0236: public static final boolean isDigit(int i) {
0237: return characterClass[i] == CHARCLASS_DIGIT;
0238: }
0239:
0240: /**
0241: * evaluate to true if i is a valid line terminator.
0242: *
0243: * @param i
0244: * i a byte representation
0245: *
0246: * @return true if i is a valid line terminator
0247: */
0248: public static final boolean isEOL(int i) {
0249: return (i == CHAR_CR) || (i == CHAR_LF) || (i == 12);
0250: }
0251:
0252: /**
0253: * evaluate to true if i is a valid first char for a number token.
0254: *
0255: * @param i
0256: * i a byte representation
0257: *
0258: * @return true if i is a valid first char for a number token
0259: */
0260: public static final boolean isNumberStart(int i) {
0261: int cc = characterClass[i];
0262: return (cc == CHARCLASS_DIGIT)
0263: || (cc == CHARCLASS_NUMBERSPECIAL);
0264: }
0265:
0266: /**
0267: * evaluate to true if i is a valid octal digit.
0268: *
0269: * @param i
0270: * i a byte representation
0271: *
0272: * @return true if i is a valid octal digit
0273: */
0274: public static final boolean isOctalDigit(int i) {
0275: return ((i >= '0') && (i <= '7'));
0276: }
0277:
0278: /**
0279: * evaluate to true if i is a valid string token start.
0280: *
0281: * @param i
0282: * i a byte representation
0283: *
0284: * @return true if i is a valid string token start
0285: */
0286: public static final boolean isTokenStart(int i) {
0287: return characterClass[i] == CHARCLASS_TOKEN;
0288: }
0289:
0290: /**
0291: * evaluate to true if i is a valid whitespace.
0292: *
0293: * <p>
0294: * See pdf spec "white space characters"
0295: * </p>
0296: *
0297: * @param i
0298: * i a byte representation
0299: *
0300: * @return true if i is a valid whitespace
0301: */
0302: public static final boolean isWhitespace(int i) {
0303: return characterClass[i] == CHARCLASS_WHITESPACE;
0304: }
0305:
0306: /**
0307: * parse the given byte array to a valid COSObject.
0308: *
0309: * @param data
0310: * a byte array containing COS encoded objects
0311: *
0312: * @return a COSObject
0313: *
0314: * @throws IOException
0315: * @throws COSLoadException
0316: */
0317: public static COSObject toCOSObject(byte[] data)
0318: throws IOException, COSLoadException {
0319: COSDocumentParser docParser = new COSDocumentParser(null);
0320: return (COSObject) docParser
0321: .parseElement(new RandomAccessByteArray(data));
0322: }
0323:
0324: /** A list for object lookahead (needed with PDF references) */
0325: private COSObject[] lookahead = new COSObject[] { null, null, null };
0326:
0327: /**
0328: * The number of elements currently in the lookahead buffer.
0329: */
0330: private int lookaheadCount = 0;
0331:
0332: private ISystemSecurityHandler securityHandler;
0333:
0334: /** A flag indicating we should flush the lookahead */
0335: private boolean flushLookahead = false;
0336:
0337: private FastByteArrayOutputStream localStream = new FastByteArrayOutputStream();
0338:
0339: /** an exception handler for handling messages eg PDFA compliance checks * */
0340: private IPDFParserExceptionHandler exceptionHandler;
0341:
0342: private COSObjectKey objectKey;
0343:
0344: protected boolean check = false;
0345:
0346: protected abstract COSIndirectObject createObjectReference()
0347: throws IOException, COSLoadException;
0348:
0349: public IPDFParserExceptionHandler getExceptionHandler() {
0350: return exceptionHandler;
0351: }
0352:
0353: protected COSObjectKey getObjectKey() {
0354: return objectKey;
0355: }
0356:
0357: /**
0358: * Handle an error if an exceptionHandler is set.
0359: *
0360: * @param error
0361: * @throws COSLoadException
0362: */
0363: public void handleError(COSLoadError error) throws COSLoadException {
0364: if (exceptionHandler != null) {
0365: exceptionHandler.error(error);
0366: } else {
0367: throw error;
0368: }
0369: }
0370:
0371: /**
0372: * Handle a warning if an exceptionHandler is set.
0373: *
0374: * @param warning
0375: * @throws COSLoadException
0376: */
0377: public void handleWarning(COSLoadWarning warning)
0378: throws COSLoadException {
0379: if (exceptionHandler != null) {
0380: exceptionHandler.warning(warning);
0381: } else {
0382: // it is just a warning...
0383: }
0384: }
0385:
0386: /**
0387: * in order to read references we need a two object lookahead for the
0388: * integer numbers this method pops the first object from the fifo
0389: * structure.
0390: *
0391: * @return The topmost {@link COSObject}in the object lookahead buffer.
0392: */
0393: protected COSObject lookaheadPop() {
0394: COSObject result = lookahead[0];
0395: lookahead[0] = lookahead[1];
0396: lookahead[1] = lookahead[2];
0397: lookahead[2] = null;
0398: lookaheadCount--;
0399: if (lookaheadCount <= 0) {
0400: // everything flushed now
0401: lookaheadCount = 0;
0402: this .flushLookahead = false;
0403: }
0404: return result;
0405: }
0406:
0407: /**
0408: * in order to read references we need a two object lookahead for the
0409: * integer numbers this method pushes an object in the fifo structure.
0410: *
0411: * @param obj
0412: * The {@link COSObject}to push in the buffer.
0413: */
0414: protected void lookaheadPush(COSObject obj) {
0415: lookahead[lookaheadCount++] = obj;
0416: }
0417:
0418: /**
0419: * comment see PDF Reference v1.4, chapter 3.1.2 comments Comment ::= "%"
0420: * anyChar EOL read until end of line.
0421: *
0422: * @throws IOException
0423: */
0424: protected void parseComment(IRandomAccess input) throws IOException {
0425: int next;
0426: while (true) {
0427: next = input.read();
0428: if (next == -1) {
0429: break;
0430: }
0431: if (isEOL(next)) {
0432: break;
0433: }
0434: }
0435: }
0436:
0437: /**
0438: * parse the basic elements from the current stream position.
0439: *
0440: * <p>
0441: * see PDF Reference v1.4, chapter 3.2 Objects
0442: * </p>
0443: *
0444: * <p>
0445: * COSObject ::= COSToken | COSBoolean | COSString | COSNumber | COSName |
0446: * COSNull | COSArray | COSDictionary | COSStream
0447: * </p>
0448: *
0449: * @return the object parsed
0450: *
0451: * @throws IOException
0452: * @throws COSLoadException
0453: */
0454: public Object parseElement(IRandomAccess input) throws IOException,
0455: COSLoadException {
0456: int next;
0457: do {
0458: next = input.read();
0459: if (next == -1) {
0460: return null;
0461: }
0462:
0463: // we have found a non-whitespace character
0464: if (isNumberStart(next)) {
0465: return parseOnObjectNumber(input, next);
0466: }
0467: if (next == '(') {
0468: return parseOnObjectString(input);
0469: }
0470: if (isTokenStart(next)) {
0471: byte[] token = readTokenElement(input, next);
0472: if (token.length == 1) {
0473: if (token[0] == TOKEN_R[0]) {
0474: return TOKEN_R;
0475: }
0476: } else if (token.length == 4) {
0477: if ((token[0] == TOKEN_true[0])
0478: && (token[1] == TOKEN_true[1])
0479: && (token[2] == TOKEN_true[2])
0480: && (token[3] == TOKEN_true[3])) {
0481: return COSTrue.create();
0482: }
0483: if ((token[0] == TOKEN_null[0])
0484: && (token[1] == TOKEN_null[1])
0485: && (token[2] == TOKEN_null[2])
0486: && (token[3] == TOKEN_null[3])) {
0487: return COSNull.create();
0488: }
0489: } else if (token.length == 5) {
0490: if ((token[0] == TOKEN_false[0])
0491: && (token[1] == TOKEN_false[1])
0492: && (token[2] == TOKEN_false[2])
0493: && (token[3] == TOKEN_false[3])
0494: && (token[4] == TOKEN_false[4])) {
0495: return COSFalse.create();
0496: }
0497: }
0498: return token;
0499: }
0500: if (next == '/') {
0501: return parseOnObjectName(input);
0502: }
0503:
0504: // performance shortcut for simple space
0505: if ((next == ' ') || isWhitespace(next)) {
0506: continue;
0507: }
0508: if (next == '%') {
0509: parseComment(input);
0510: continue;
0511: }
0512:
0513: // before we start parsing a container we must flush lookahead
0514: if (lookaheadCount > 0) {
0515: input.seekBy(-1);
0516: return null;
0517: }
0518: if (next == '<') {
0519: return parseOnObjectStreamOrDictionaryOrHexString(input);
0520: }
0521: if (next == '[') {
0522: return parseOnObjectArray(input);
0523: }
0524: // unread, i do not understand...
0525: // return null if char unexpected, if this is an error depends on
0526: // context
0527: input.seekBy(-1);
0528: return null;
0529: } while (true);
0530: }
0531:
0532: /**
0533: * Consume whitespace. check if exactly a EOL sequence with no other
0534: * whitespace around is available.
0535: *
0536: * @param input
0537: * @return <code>true</code> if EOL was found.
0538: * @throws IOException
0539: */
0540: protected boolean readEOL(IRandomAccess input) throws IOException {
0541: int next = input.read();
0542: if (next == -1) {
0543: return false;
0544: }
0545: if (next == CHAR_CR) {
0546: next = input.read();
0547: if (next == -1) {
0548: return false;
0549: } else if (next == CHAR_LF) {
0550: next = input.read();
0551: if (next == -1) {
0552: return true;
0553: } else if (isWhitespace(next)) {
0554: readSpaces(input);
0555: return false;
0556: } else {
0557: input.seekBy(-1);
0558: return true;
0559: }
0560: }
0561: } else if (next == CHAR_LF) {
0562: next = input.read();
0563: if (next == -1) {
0564: return true;
0565: } else if (isWhitespace(next)) {
0566: readSpaces(input);
0567: return false;
0568: } else {
0569: input.seekBy(-1);
0570: return true;
0571: }
0572: } else if (isWhitespace(next)) {
0573: readSpaces(input);
0574: return false;
0575: }
0576: input.seekBy(-1);
0577: return true;
0578: }
0579:
0580: /**
0581: * pdf header see PDF Reference v1.4, chapter 3.4.1 Header COSHEader ::=
0582: * "%PDF-" version.
0583: *
0584: * @throws IOException
0585: * @throws COSLoadException
0586: */
0587: public STDocType parseHeader(IRandomAccess input)
0588: throws IOException, COSLoadException {
0589: int next;
0590: while (true) {
0591: next = input.read();
0592: if (next == -1) {
0593: break;
0594: }
0595:
0596: // performance shortcut for simple space
0597: if ((next == ' ') || isWhitespace(next)) {
0598: continue;
0599: }
0600: break;
0601: }
0602: byte[] token = new byte[4];
0603: token[0] = (byte) next;
0604: input.read(token, 1, 3);
0605: STDocType docType = new STDocType();
0606: if (Arrays.equals(token, TOKEN_PDFHEADER)) {
0607: docType.setTypeName("PDF");
0608: } else if (Arrays.equals(token, TOKEN_FDFHEADER)) {
0609: docType.setTypeName("FDF");
0610: } else {
0611: input.seekBy(-token.length);
0612: COSLoadError e = new COSLoadError(
0613: "file format error. document must start with %PDF or %FDF");
0614: handleError(e);
0615: }
0616: input.read();
0617: byte[] version = readToken(input);
0618: if (version == null) {
0619: COSLoadError e = new COSLoadError(
0620: "file format error. no pdf/fdf version info found");
0621: handleError(e);
0622: } else {
0623: docType.setVersion(StringTools.toString(version));
0624: }
0625: return docType;
0626: }
0627:
0628: /**
0629: * Parse a valid COS object for use in document context from the current
0630: * stream position.
0631: *
0632: * <p>
0633: * see PDF Reference v1.4, chapter 3.2 Objects
0634: * </p>
0635: *
0636: * <p>
0637: * this implementation is a little more complicated, as we hava a two object
0638: * lookahead to detect references.
0639: *
0640: * <code>
0641: * COSObject ::= COSReference |
0642: * COSBoolean |
0643: * COSString |
0644: * COSNumber |
0645: * COSName |
0646: * COSNull |
0647: * COSArray |
0648: * COSDictionary |
0649: * COSStream
0650: *
0651: * </code>
0652: *
0653: * </p>
0654: *
0655: * @return the object parsed
0656: *
0657: * @throws IOException
0658: * @throws COSLoadException
0659: */
0660: protected COSDocumentElement parseObject(IRandomAccess input)
0661: throws IOException, COSLoadException {
0662: if (flushLookahead) {
0663: return lookaheadPop();
0664: }
0665:
0666: // parse another element
0667: Object parsedElement = parseElement(input);
0668: if (parsedElement == null) {
0669: flushLookahead = true;
0670: return lookaheadPop();
0671: }
0672:
0673: // try to detect reference "R"
0674: COSObject resultObject;
0675: if (parsedElement instanceof byte[]) {
0676: if (TOKEN_R == parsedElement) {
0677: // reference detected, clean up lookahed and return
0678: return createObjectReference();
0679: }
0680: // we have found a token that has to be re-read in another context
0681: // take care of consumed whitespace!
0682: input.seekBy(-1);
0683: int next = input.read();
0684:
0685: // performance shortcut for simple space
0686: if ((next == ' ') || isWhitespace(next)) {
0687: input.seekBy(-1);
0688: }
0689: input.seekBy(-((byte[]) parsedElement).length);
0690: this .flushLookahead = true;
0691: return lookaheadPop();
0692: }
0693: resultObject = (COSObject) parsedElement;
0694:
0695: // build up lookahead stack
0696: if (resultObject instanceof COSNumber) {
0697: lookaheadPush(resultObject);
0698: // return one object if lookahead larger than 2
0699: if (lookaheadCount > 2) {
0700: return lookaheadPop();
0701: }
0702:
0703: // enter parse recursive
0704: return parseObject(input);
0705: }
0706:
0707: // shortcut to avoid building entry in lookahead
0708: if (lookaheadCount > 0) {
0709: lookaheadPush(resultObject);
0710: this .flushLookahead = true;
0711: return lookaheadPop();
0712: }
0713: return resultObject;
0714: }
0715:
0716: protected COSObject parseObjectDictionary(IRandomAccess input)
0717: throws IOException, COSLoadException {
0718: int next;
0719: next = input.read();
0720: if (next != '<') {
0721: input.seekBy(-1);
0722: COSLoadError e = new COSLoadError("'<' expected");
0723: handleError(e);
0724: }
0725: next = input.read();
0726: if (next != '<') {
0727: input.seekBy(-1);
0728: COSLoadError e = new COSLoadError("'<' expected");
0729: handleError(e);
0730: }
0731: return parseOnObjectDictionary(input);
0732: }
0733:
0734: /**
0735: * parse a COS array from the current stream position. see PDF Reference
0736: * v1.4, chapter 3.2.5 Array objects COSArray ::= "[" (COSObject) "]"
0737: *
0738: * @return the array parsed
0739: * @throws IOException
0740: *
0741: * @throws IOException
0742: */
0743: protected COSObject parseOnObjectArray(IRandomAccess input)
0744: throws COSLoadException, IOException {
0745: int next;
0746: COSArray result = COSArray.create();
0747: while (true) {
0748: COSDocumentElement element = parseObject(input);
0749: if (element == null) {
0750: next = input.read();
0751: if (next == -1) {
0752: COSLoadError e = new COSLoadError(
0753: "file format error. unexpected end of array");
0754: handleError(e);
0755: }
0756: if (next != ']') {
0757: byte[] badElement = readTokenElement(input, next);
0758: if (check) {
0759: COSLoadWarning pwarn = new COSLoadWarning(
0760: "bad array element ("
0761: + new String(badElement) + ")");
0762: pwarn.setHint(result);
0763: handleWarning(pwarn);
0764: }
0765: continue;
0766: }
0767: break;
0768: }
0769: result.basicAddSilent(element);
0770: }
0771: if (check && (result.size() > 8191)) {
0772: COSLoadWarning pwarn = new COSLoadWarning(C_WARN_ARRAYSIZE);
0773: pwarn.setHint(result);
0774: handleWarning(pwarn);
0775: }
0776: return result;
0777: }
0778:
0779: /**
0780: * parse a COS dictionary from the current stream position. see PDF
0781: * Reference v1.4, chapter 3.2.6 Dictionary objects
0782: *
0783: * <code>
0784: * COSDictionary ::= "<<" (COSName COSObject)* ">>"
0785: * </code>
0786: *
0787: * @return the dictionary parsed
0788: *
0789: * @throws IOException
0790: * @throws COSLoadException
0791: */
0792: protected COSObject parseOnObjectDictionary(IRandomAccess input)
0793: throws IOException, COSLoadException {
0794: int next;
0795: COSDictionary dict = COSDictionary.create();
0796: try {
0797: while (true) {
0798: COSDocumentElement keyObject = parseObject(input);
0799: if (keyObject == null) {
0800: break;
0801: }
0802: COSName dictKey = (COSName) keyObject;
0803: COSDocumentElement value = parseObject(input);
0804: if (value == null) {
0805: COSLoadError e = new COSLoadError(
0806: "missing value for key " + keyObject);
0807: handleError(e);
0808: }
0809: dict.basicPutSilent(dictKey, value);
0810: }
0811: } catch (ClassCastException ex) {
0812: COSLoadError e = new COSLoadError("name expected");
0813: handleError(e);
0814: }
0815: next = input.read();
0816: if (next != '>') {
0817: COSLoadError e = new COSLoadError("unexpected character ("
0818: + (char) next + ")");
0819: handleError(e);
0820: }
0821: next = input.read();
0822: if (next != '>') {
0823: COSLoadError e = new COSLoadError("unexpected character ("
0824: + (char) next + ")");
0825: handleError(e);
0826: }
0827: return dict;
0828: }
0829:
0830: /**
0831: * parse a COS string encoded in hex from the current stream position. see
0832: * PDF Reference v1.4, chapter 3.2.3 String objects
0833: *
0834: * <code>
0835: * COSString ::= COSString | COSHexString
0836: * COSHexString ::= "<" (hexChar)* ">"
0837: * </code>
0838: *
0839: * @return the string parsed
0840: *
0841: * @throws IOException
0842: * @throws COSLoadException
0843: */
0844: protected COSObject parseOnObjectHexString(IRandomAccess input,
0845: int next) throws IOException, COSLoadException {
0846: localStream.reset();
0847: boolean secondDigit = false;
0848: int digitValue = 0;
0849: int charValue = 0;
0850: while (true) {
0851: digitValue = HexTools.hexDigitToInt((char) next);
0852: if (digitValue == -1) {
0853: if (next == -1) {
0854: break;
0855: }
0856: if (next == '>') {
0857: break;
0858: }
0859: if (!isWhitespace(next)) {
0860: IOException ioe = new IOException("<" + next
0861: + "> '" + (char) next
0862: + "' not a valid hex char");
0863:
0864: // a warning for PDF/A related checks will be triggered
0865: // exception is handled right on track
0866: COSLoadWarning pwarn = new COSLoadWarning(
0867: C_WARN_ILLEGALHEX);
0868: pwarn.setHint(new Long(input.getOffset()));
0869: handleWarning(pwarn);
0870: throw ioe;
0871: }
0872: } else {
0873: if (secondDigit) {
0874: charValue = (charValue << 4) + digitValue;
0875: localStream.write(charValue);
0876: secondDigit = false;
0877: } else {
0878: secondDigit = true;
0879: charValue = digitValue;
0880: }
0881: }
0882: next = input.read();
0883: }
0884: if (secondDigit) {
0885: // this is a warning for uneven numbers on hex codes
0886: if (check) {
0887: COSLoadWarning pwarn = new COSLoadWarning(
0888: C_WARN_UNEVENHEX);
0889: pwarn.setHint(new Long(input.getOffset()));
0890: handleWarning(pwarn);
0891: }
0892: // assume trailing "0"
0893: charValue = charValue << 4;
0894: localStream.write(charValue);
0895: }
0896:
0897: COSString result;
0898: if ((securityHandler == null) || (objectKey == null)) {
0899: result = COSString.createHex(localStream.toByteArray());
0900: } else {
0901: try {
0902: byte[] decrypted = securityHandler.decryptString(
0903: objectKey, localStream.toByteArray());
0904: result = COSString.createHex(decrypted);
0905: } catch (COSSecurityException e) {
0906: throw new COSLoadError(e);
0907: }
0908: }
0909: if (check && (result.stringValue().length() > 65535)) {
0910: COSLoadWarning pwarn = new COSLoadWarning(
0911: C_WARN_STRINGTOLONG);
0912: pwarn.setHint(result);
0913: handleWarning(pwarn);
0914: }
0915: return result;
0916: }
0917:
0918: /**
0919: * parse a COS name from the current stream position. see PDF Reference
0920: * v1.4, chapter 3.2.4 Name Objects COSName ::= "/" nameChars
0921: *
0922: * @return the name parsed
0923: *
0924: * @throws IOException
0925: * @throws COSLoadException
0926: */
0927: protected COSObject parseOnObjectName(IRandomAccess input)
0928: throws IOException, COSLoadException {
0929: int next;
0930: localStream.reset();
0931: do {
0932: next = input.read();
0933: if (next == -1) {
0934: break;
0935: }
0936:
0937: // performance shortcut for simple space
0938: if ((next == ' ') || isWhitespace(next)) {
0939: break;
0940: }
0941: if (isDelimiter(next)) {
0942: input.seekBy(-1);
0943: break;
0944: }
0945: if (next == '#') {
0946: next = input.read();
0947:
0948: int digit1 = HexTools.hexDigitToInt((char) next);
0949: if (digit1 == -1) {
0950: COSLoadError e = new COSLoadError("<" + next
0951: + "> not a valid hex char");
0952: handleError(e);
0953: }
0954: next = input.read();
0955:
0956: int digit2 = HexTools.hexDigitToInt((char) next);
0957: if (digit2 == -1) {
0958: COSLoadError e = new COSLoadError("<" + next
0959: + "> not a valid hex char");
0960: handleError(e);
0961: }
0962: localStream.write((digit1 << 4) + digit2);
0963: } else {
0964: localStream.write(next);
0965: }
0966: } while (true);
0967: byte[] bytes = localStream.toByteArray();
0968: COSName result = COSName.create(bytes);
0969: if (check && (result.stringValue().length() > 127)) {
0970: COSLoadWarning pwarn = new COSLoadWarning(C_WARN_NAMETOLONG);
0971: pwarn.setHint(result);
0972: handleWarning(pwarn);
0973: }
0974: return result;
0975: }
0976:
0977: /**
0978: * parse a COS number from the current stream position. see PDF Reference
0979: * v1.4, chapter 3.2.2 Numeric objects COSNumber ::= COSFixed | COSInteger
0980: * COSFixed ::= (+ | -)? (digit) "." (digit) COSInteger ::= (+ | -)? (digit)
0981: *
0982: * @return the number parsed
0983: *
0984: * @throws IOException
0985: * @throws COSLoadException
0986: */
0987: protected COSObject parseOnObjectNumber(IRandomAccess input,
0988: int next) throws IOException, COSLoadException {
0989: boolean isFixed = false;
0990: localStream.reset();
0991: isFixed = next == '.';
0992: localStream.write((byte) next);
0993: do {
0994: next = input.read();
0995: if (next == -1) {
0996: break;
0997: } else if (isDigit(next)) {
0998: localStream.write((byte) next);
0999: } else if (next == '.') {
1000: isFixed = true;
1001: localStream.write((byte) next);
1002: } else if ((next == ' ') || isWhitespace(next)) {
1003: break;
1004: } else {
1005: input.seekBy(-1);
1006: break;
1007: }
1008: } while (true);
1009: if (isFixed) {
1010: COSFixed fixed = COSFixed.create(localStream.getBytes(), 0,
1011: localStream.size());
1012: return fixed;
1013: }
1014: return COSInteger.create(localStream.getBytes(), 0, localStream
1015: .size());
1016: }
1017:
1018: /**
1019: * parse a COS stream from the current stream position. see PDF Reference
1020: * v1.4, chapter 3.2.7 Stream objects COSStream ::= COSDictionary "stream"
1021: * bytes "endstream"
1022: *
1023: * @param dict
1024: * The object that should be filled with the dictionary entries.
1025: *
1026: * @return The stream parsed.
1027: *
1028: * @throws IOException
1029: * @throws COSLoadException
1030: */
1031: protected COSObject parseOnObjectStream(IRandomAccess input,
1032: COSDictionary dict) throws IOException, COSLoadException {
1033: COSStream stream = COSStream.create(dict);
1034:
1035: byte[] token = new byte[5];
1036: // read "tream", "s" already consumed
1037: input.read(token);
1038: if (!Arrays.equals(token, TOKEN_s_tream)) {
1039: input.seekBy(-token.length - 1);
1040: COSLoadError e = new COSLoadError(
1041: "file format error. 'stream' expected");
1042: handleError(e);
1043: }
1044:
1045: // allow for at max two separator chars after "stream"
1046: int next;
1047: next = input.read();
1048: if (next == CHAR_CR) {
1049: next = input.read();
1050: }
1051: if (next != CHAR_LF) {
1052: // ?? its legal to have NO separator
1053: // ?? there are testdocuments that provide only a single CR
1054: if (check) {
1055: COSLoadWarning pwarn = new COSLoadWarning(
1056: C_WARN_STREAMEOL);
1057: pwarn.setHint(new Long(input.getOffset()));
1058: handleWarning(pwarn);
1059: }
1060: input.seekBy(-1);
1061: }
1062:
1063: long offset = input.getOffset();
1064: int length = -1;
1065: COSNumber cosLength = dict.get(COSStream.DK_Length).asInteger();
1066: if (cosLength == null) {
1067: // warning for pdfa
1068: if (check) {
1069: COSLoadWarning pwarn = new COSLoadWarning(
1070: C_WARN_STREAMLENGTH);
1071: pwarn.setHint(new Long(input.getOffset()));
1072: handleWarning(pwarn);
1073: }
1074: } else {
1075: length = cosLength.intValue();
1076: }
1077: input.seek(offset);
1078:
1079: byte[] bytes = null;
1080:
1081: if (length < 0) {
1082: bytes = readStream(input);
1083: } else {
1084: bytes = new byte[length];
1085: int count = input.read(bytes);
1086: if (count < length) {
1087: if (check) {
1088: // get additional warning for pdfa
1089: COSLoadWarning pwarn = new COSLoadWarning(
1090: C_WARN_STREAMLENGTH);
1091: pwarn.setHint(new Long(input.getOffset()));
1092: handleWarning(pwarn);
1093: }
1094: COSLoadError e = new COSLoadError(
1095: "unexpected end of stream");
1096: handleError(e);
1097: }
1098: }
1099:
1100: if (check) {
1101: // pdfa compliance check
1102: if (!readEOL(input)) {
1103: COSLoadWarning pwarn = new COSLoadWarning(
1104: C_WARN_ENDSTREAMEOL);
1105: pwarn.setHint(new Long(input.getOffset()));
1106: handleWarning(pwarn);
1107: }
1108: } else {
1109: // be lazy with pdf spec and accept any whitespace before
1110: // 'endstream'
1111: readSpaces(input);
1112: }
1113:
1114: // read "endstream"
1115: token = new byte[9];
1116: input.read(token);
1117: if (!Arrays.equals(token, TOKEN_endstream)) {
1118: input.seekBy(-token.length - 1);
1119: // a warning for PDF/A related checks will be triggered
1120: COSLoadWarning pwarn = new COSLoadWarning(
1121: C_WARN_ENDSTREAMCORRUPT);
1122: pwarn.setHint(new Long(input.getOffset()));
1123: handleWarning(pwarn);
1124:
1125: if (length > 0) {
1126: // retry from the beginning with undeterminate length
1127: input.seek(offset);
1128: bytes = readStream(input);
1129: // read "endstream"
1130: token = new byte[9];
1131: input.read(token);
1132: if (!Arrays.equals(token, TOKEN_endstream)) {
1133: COSLoadError e = new COSLoadError(
1134: "file format error. 'endstream' expected");
1135: handleError(e);
1136: }
1137: // fix length
1138: length = bytes.length;
1139: dict
1140: .put(COSStream.DK_Length, COSInteger
1141: .create(length));
1142: } else {
1143: COSLoadError e = new COSLoadError(
1144: "file format error. 'endstream' expected");
1145: handleError(e);
1146: }
1147:
1148: }
1149: if ((securityHandler == null) || (objectKey == null)) {
1150: stream.basicSetEncodedBytes(bytes);
1151: } else {
1152: try {
1153: byte[] decrypted = securityHandler.decryptStream(
1154: objectKey, dict, bytes);
1155: stream.basicSetEncodedBytes(decrypted);
1156: } catch (COSSecurityException e) {
1157: throw new COSLoadError(e);
1158: }
1159: }
1160: return stream;
1161: }
1162:
1163: /**
1164: * parse a COS stream or dictionary from the current stream position.
1165: * COSStreamOrDict ::= COSStream | COSDict
1166: *
1167: * @return the object parsed
1168: *
1169: * @throws IOException
1170: * @throws COSLoadException
1171: */
1172: protected COSObject parseOnObjectStreamOrDictionary(
1173: IRandomAccess input) throws IOException, COSLoadException {
1174: COSObject dict = parseOnObjectDictionary(input);
1175: int next;
1176: while (true) {
1177: next = input.read();
1178: if (next == -1) {
1179: return dict;
1180: }
1181:
1182: // performance shortcut for simple space
1183: if ((next == ' ') || isWhitespace(next)) {
1184: continue;
1185: }
1186: break;
1187: }
1188: if (next == 's') {
1189: return parseOnObjectStream(input, (COSDictionary) dict);
1190: }
1191: input.seekBy(-1);
1192: return dict;
1193: }
1194:
1195: /**
1196: * parse a COS stream or dictionary or hex string from the current stream
1197: * position. COSStreamOrDictOrHex ::= COSStream | COSDict | COSHexString
1198: *
1199: * @return the object parsed
1200: *
1201: * @throws IOException
1202: * @throws COSLoadException
1203: */
1204: protected COSObject parseOnObjectStreamOrDictionaryOrHexString(
1205: IRandomAccess input) throws IOException, COSLoadException {
1206: int next;
1207: next = input.read();
1208: if (next == '<') {
1209: return parseOnObjectStreamOrDictionary(input);
1210: }
1211: return parseOnObjectHexString(input, next);
1212: }
1213:
1214: /**
1215: * parse a COS string from the current stream position. see PDF Reference
1216: * v1.4, chapter 3.2.3. String objects COSString ::= "(" stringData ")"
1217: *
1218: * @return the string parsed
1219: *
1220: * @throws IOException
1221: * @throws COSLoadException
1222: */
1223: protected COSObject parseOnObjectString(IRandomAccess input)
1224: throws IOException, COSLoadException {
1225: int next;
1226: int paraCount = 0;
1227: localStream.reset();
1228: while (true) {
1229: next = input.read();
1230: if (next == '\\') {
1231: int c = readEscape(input);
1232: if (c != -1) {
1233: localStream.write(c);
1234: }
1235: } else if (next == ')') {
1236: if (paraCount > 0) {
1237: paraCount--;
1238: localStream.write(next);
1239: } else {
1240: break;
1241: }
1242: } else if (next == CHAR_CR) {
1243: // eol is always \n in a string
1244: next = input.read();
1245: if (next != CHAR_LF) {
1246: input.seekBy(-1);
1247: }
1248: localStream.write(CHAR_LF);
1249: } else if (next == '(') {
1250: paraCount++;
1251: localStream.write(next);
1252: } else if (next == -1) {
1253: COSLoadError e = new COSLoadError(
1254: "file format error. unexpected end of string");
1255: handleError(e);
1256: } else {
1257: localStream.write(next);
1258: }
1259: }
1260: COSString result;
1261: if ((securityHandler == null) || (objectKey == null)) {
1262: result = COSString.create(localStream.toByteArray());
1263: } else {
1264: try {
1265: byte[] decrypted = securityHandler.decryptString(
1266: objectKey, localStream.toByteArray());
1267: result = COSString.create(decrypted);
1268: } catch (COSSecurityException e) {
1269: throw new COSLoadError(e);
1270: }
1271: }
1272: if (check && (result.stringValue().length() > 65535)) {
1273: COSLoadWarning pwarn = new COSLoadWarning(
1274: C_WARN_STRINGTOLONG);
1275: pwarn.setHint(result);
1276: handleWarning(pwarn);
1277: }
1278: return result;
1279: }
1280:
1281: /**
1282: * read an esacped char from the stream.
1283: *
1284: * @return the character corresponding to the escape code
1285: *
1286: * @throws IOException
1287: */
1288: protected int readEscape(IRandomAccess input) throws IOException {
1289: int next = 0;
1290: next = input.read();
1291: if (next == -1) {
1292: return -1;
1293: }
1294: if (isOctalDigit(next)) {
1295: input.seekBy(-1);
1296: return readOctalChar(input);
1297: }
1298: if (next == CHAR_LF) {
1299: return -1;
1300: }
1301: if (next == CHAR_CR) {
1302: next = input.read();
1303: if (next != CHAR_LF) {
1304: input.seekBy(-1);
1305: }
1306: return -1;
1307: }
1308: if (next == 'n') {
1309: return CHAR_LF;
1310: }
1311: if (next == 'r') {
1312: return CHAR_CR;
1313: }
1314: if (next == 't') {
1315: return CHAR_HT;
1316: }
1317: if (next == 'b') {
1318: return CHAR_BS;
1319: }
1320: if (next == 'f') {
1321: return CHAR_FF;
1322: }
1323: return next;
1324: }
1325:
1326: /**
1327: * reads the next integer on input. consumes one trailing space if
1328: * consumeSpaceAfter is set to true. Consumes leading spaces and comments.
1329: *
1330: * @param input
1331: * @param consumeSpaceAfter
1332: * @return The integer read.
1333: * @throws IOException
1334: */
1335: public int readInteger(IRandomAccess input,
1336: boolean consumeSpaceAfter) throws IOException {
1337: int result = 0;
1338: int next;
1339: while (true) {
1340: next = input.read();
1341: if (next == -1) {
1342: return result;
1343: } else if ((next == ' ') || isWhitespace(next)) {
1344: continue;
1345: } else if (next == '%') {
1346: parseComment(input);
1347: } else {
1348: break;
1349: }
1350: }
1351: while (true) {
1352: if (isDigit(next)) {
1353: result = ((result * 10) + next) - '0';
1354: } else {
1355: input.seekBy(-1);
1356: break;
1357: }
1358: next = input.read();
1359: if (next == -1) {
1360: break;
1361: } else if ((next == ' ') || isWhitespace(next)) {
1362: if (!consumeSpaceAfter) {
1363: input.seekBy(-1);
1364: }
1365: break;
1366: }
1367: }
1368: return result;
1369: }
1370:
1371: /**
1372: * read an octal character from the stream.
1373: *
1374: * @return the integer value of the character read or -1
1375: *
1376: * @throws IOException
1377: */
1378: protected int readOctalChar(IRandomAccess input) throws IOException {
1379: int result = -1;
1380: int c = 0;
1381:
1382: c = input.read();
1383: if ((c != -1) && isOctalDigit(c)) {
1384: result = c - '0';
1385: c = input.read();
1386: if (isOctalDigit(c)) {
1387: result = ((result * 8) + c) - '0';
1388: c = input.read();
1389: if (isOctalDigit(c)) {
1390: result = ((result * 8) + c) - '0';
1391: } else {
1392: input.seekBy(-1);
1393: }
1394: } else {
1395: input.seekBy(-1);
1396: }
1397: } else {
1398: input.seekBy(-1);
1399: }
1400: return result;
1401: }
1402:
1403: /**
1404: * read all characters until EOF or non space char appears. the first non
1405: * space char is pushed back so the next char read is the first non space
1406: * char.
1407: *
1408: * @throws IOException
1409: */
1410: public void readSpaces(IRandomAccess input) throws IOException {
1411: int next = 0;
1412: while (true) {
1413: next = input.read();
1414: if (next == -1) {
1415: break;
1416: }
1417: // performance shortcut for simple space
1418: if ((next == ' ') || isWhitespace(next)) {
1419: continue;
1420: }
1421: input.seekBy(-1);
1422: break;
1423: }
1424: }
1425:
1426: /**
1427: * Read all characters up to "endstream" and assume them belonging to the
1428: * stream.
1429: * <p>
1430: * ATTENTION this is a heuristic approach as the tag "endstream" may be part
1431: * of the stream data!
1432: *
1433: * @return All characters up to "endstream"
1434: *
1435: * @throws IOException
1436: */
1437: protected byte[] readStream(IRandomAccess input) throws IOException {
1438: byte[] token = new byte[8];
1439: localStream.reset();
1440: int next;
1441: while (true) {
1442: next = input.read();
1443: if (next == 'e') {
1444: input.read(token);
1445: if (Arrays.equals(token, TOKEN_ndstream)) {
1446: input.seekBy(-TOKEN_endstream.length);
1447: return localStream.toByteArray();
1448: }
1449: input.seekBy(-token.length);
1450: } else if (next == -1) {
1451: break;
1452: }
1453: localStream.write(next);
1454: }
1455: if (localStream.size() == 0) {
1456: return null;
1457: }
1458: return localStream.toByteArray();
1459: }
1460:
1461: /**
1462: * read a single token.
1463: *
1464: * @return the array of characters belonging to the token
1465: *
1466: * @throws IOException
1467: */
1468: public byte[] readToken(IRandomAccess input) throws IOException {
1469: //
1470: int next;
1471: while (true) {
1472: next = input.read();
1473: if (next == -1) {
1474: return null;
1475: } else if ((next == ' ') || isWhitespace(next)) {
1476: continue;
1477: } else if (next == '%') {
1478: parseComment(input);
1479: } else {
1480: break;
1481: }
1482: }
1483: return readTokenElement(input, next);
1484: }
1485:
1486: /**
1487: * derive of readToken, populates the messages list with non-fatal error
1488: * messages
1489: *
1490: * @param input
1491: * @param messages
1492: * @return token bytes
1493: * @throws IOException
1494: */
1495: public byte[] readToken(IRandomAccess input, List messages)
1496: throws IOException {
1497: int next;
1498: int countWS = 0;
1499: while (true) {
1500: next = input.read();
1501: if (next == -1) {
1502: return null;
1503: } else if ((next == ' ') || isWhitespace(next)) {
1504: countWS++;
1505: if (countWS > 1) {
1506: messages.add(C_TOKEN_ADDWSB);
1507: }
1508: continue;
1509: } else if (next == '%') {
1510: messages.add(C_TOKEN_COMMENT);
1511: parseComment(input);
1512: } else {
1513: break;
1514: }
1515: }
1516: return readTokenElement(input, next, messages);
1517: }
1518:
1519: protected byte[] readTokenElement(IRandomAccess input, int next)
1520: throws IOException {
1521: localStream.reset();
1522: //
1523: localStream.write(next);
1524: do {
1525: next = input.read();
1526: if (next == -1) {
1527: break;
1528: } else if ((next == ' ') || isWhitespace(next)) { // performance
1529: // shortcut
1530: break;
1531: } else if (isDelimiter(next)) {
1532: input.seekBy(-1);
1533: break;
1534: }
1535: localStream.write(next);
1536: } while (true);
1537: return localStream.toByteArray();
1538: }
1539:
1540: /**
1541: * derive of readToken, populates the messages list with non-fatal error
1542: * messages
1543: *
1544: * @param input
1545: * @param next
1546: * @param messages
1547: * @return token bytes
1548: * @throws IOException
1549: */
1550: protected byte[] readTokenElement(IRandomAccess input, int next,
1551: List messages) throws IOException {
1552: localStream.reset();
1553: localStream.write(next);
1554: do {
1555: next = input.read();
1556: if (next == -1) {
1557: break;
1558: } else if ((next == ' ') || isWhitespace(next)) { // performance
1559: // shortcut
1560: next = input.read();
1561: if ((next == ' ') || isWhitespace(next)) { // performance
1562: // shortcut
1563: messages.add(C_TOKEN_ADDWSA);
1564: }
1565: input.seekBy(-1);
1566: break;
1567: } else if (isDelimiter(next)) {
1568: messages.add(C_TOKEN_NOWSA);
1569: input.seekBy(-1);
1570: break;
1571: }
1572: localStream.write(next);
1573: } while (true);
1574: return localStream.toByteArray();
1575: }
1576:
1577: public void setExceptionHandler(
1578: IPDFParserExceptionHandler exceptionHandler) {
1579: this .exceptionHandler = exceptionHandler;
1580: check = exceptionHandler != null;
1581: }
1582:
1583: protected void setObjectKey(COSObjectKey objectKey) {
1584: this .objectKey = objectKey;
1585: }
1586:
1587: protected ISystemSecurityHandler getSecurityHandler() {
1588: return securityHandler;
1589: }
1590:
1591: protected void setSecurityHandler(
1592: ISystemSecurityHandler securityHandler) {
1593: this.securityHandler = securityHandler;
1594: }
1595: }
|