001: /*
002: Copyright © 2006,2007 Stefano Chizzolini. http://clown.stefanochizzolini.it
003:
004: Contributors:
005: * Stefano Chizzolini (original code developer, http://www.stefanochizzolini.it)
006: * Haakan Aakerberg (bugfix contributor):
007: - [FIX:0.0.4:1]
008: - [FIX:0.0.4:4]
009:
010: This file should be part of the source code distribution of "PDF Clown library"
011: (the Program): see the accompanying README files for more info.
012:
013: This Program is free software; you can redistribute it and/or modify it under
014: the terms of the GNU General Public License as published by the Free Software
015: Foundation; either version 2 of the License, or (at your option) any later version.
016:
017: This Program is distributed in the hope that it will be useful, but WITHOUT ANY
018: WARRANTY, either expressed or implied; without even the implied warranty of
019: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the License for more details.
020:
021: You should have received a copy of the GNU General Public License along with this
022: Program (see README files); if not, go to the GNU website (http://www.gnu.org/).
023:
024: Redistribution and use, with or without modification, are permitted provided that such
025: redistributions retain the above copyright notice, license and disclaimer, along with
026: this list of conditions.
027: */
028:
029: package it.stefanochizzolini.clown.tokens;
030:
031: import it.stefanochizzolini.clown.bytes.Buffer;
032: import it.stefanochizzolini.clown.bytes.IInputStream;
033: import it.stefanochizzolini.clown.files.File;
034: import it.stefanochizzolini.clown.objects.PdfArray;
035: import it.stefanochizzolini.clown.objects.PdfBoolean;
036: import it.stefanochizzolini.clown.objects.PdfDataObject;
037: import it.stefanochizzolini.clown.objects.PdfDate;
038: import it.stefanochizzolini.clown.objects.PdfDictionary;
039: import it.stefanochizzolini.clown.objects.PdfDirectObject;
040: import it.stefanochizzolini.clown.objects.PdfHex;
041: import it.stefanochizzolini.clown.objects.PdfInteger;
042: import it.stefanochizzolini.clown.objects.PdfLiteral;
043: import it.stefanochizzolini.clown.objects.PdfName;
044: import it.stefanochizzolini.clown.objects.PdfNull;
045: import it.stefanochizzolini.clown.objects.PdfReal;
046: import it.stefanochizzolini.clown.objects.PdfReference;
047: import it.stefanochizzolini.clown.objects.PdfStream;
048: import java.io.EOFException;
049: import java.util.Date;
050:
051: /**
052: Token parser.
053: <h3>Contract</h3>
054: <ul>
055: <li>Preconditions:
056: <ol>
057: <li>(none).</li>
058: </ol>
059: </li>
060: <li>Postconditions:
061: <ol>
062: <li>(none).</li>
063: </ol>
064: </li>
065: <li>Invariants:
066: <ol>
067: <li>Stream data IS kept untouched.</li>
068: </ol>
069: </li>
070: <li>Side-effects:
071: <ol>
072: <li>(none).</li>
073: </ol>
074: </li>
075: </ul>
076: */
077: public class Parser {
078: // <class>
079: // <classes>
080: public class Reference {
081: // <class>
082: // <fields>
083: private final int generationNumber;
084: private final int objectNumber;
085:
086: // </fields>
087:
088: // <constructors>
089: private Reference(int objectNumber, int generationNumber) {
090: this .objectNumber = objectNumber;
091: this .generationNumber = generationNumber;
092: }
093:
094: // </constructors>
095:
096: // <interface>
097: // <public>
098: public int getGenerationNumber() {
099: return generationNumber;
100: }
101:
102: public int getObjectNumber() {
103: return objectNumber;
104: }
105: // </public>
106: // </interface>
107: // </class>
108: }
109:
110: // </classes>
111:
112: // <static>
113: // <fields>
114: private static final String PdfHeader = "%PDF-";
115:
116: // </fields>
117:
118: // <interface>
119: // <protected>
120: /**
121: Evaluates whether a character is a delimiter [PDF:1.6:3.1.1].
122: */
123: protected static boolean isDelimiter(int c) {
124: return (c == '(' || c == ')' || c == '<' || c == '>'
125: || c == '[' || c == ']' || c == '/' || c == '%');
126: }
127:
128: /**
129: Evaluates whether a character is an EOL marker [PDF:1.6:3.1.1].
130: */
131: protected static boolean isEOL(int c) {
132: return (c == 12 || c == 15);
133: }
134:
135: /**
136: Evaluates whether a character is a white-space [PDF:1.6:3.1.1].
137: */
138: protected static boolean isWhitespace(int c) {
139: return (c == 0 || c == 9 || c == 10 || c == 12 || c == 13 || c == 32);
140: }
141:
142: // </protected>
143: // </interface>
144: // </static>
145:
146: // <dynamic>
147: // <fields>
148: private File file;
149: private IInputStream stream;
150: private Object token;
151: private TokenTypeEnum tokenType;
152:
153: private boolean multipleTokenParsing;
154:
155: // </fields>
156:
157: // <constructors>
158: Parser(IInputStream stream, File file) {
159: this .stream = stream;
160: this .file = file;
161: }
162:
163: // </constructors>
164:
165: // <interface>
166: // <public>
167: public long getLength() {
168: return stream.getLength();
169: }
170:
171: public long getPosition() {
172: return stream.getPosition();
173: }
174:
175: public IInputStream getStream() {
176: return stream;
177: }
178:
179: /**
180: Gets the currently-parsed token.
181: @return The current token.
182: */
183: public Object getToken() {
184: return token;
185: }
186:
187: /**
188: Gets the currently-parsed token type.
189: @return The current token type.
190: */
191: public TokenTypeEnum getTokenType() {
192: return tokenType;
193: }
194:
195: public int hashCode() {
196: return stream.hashCode();
197: }
198:
199: /**
200: @param offset Number of tokens to be skipped before reaching the intended one.
201: */
202: public boolean moveNext(int offset) throws FileFormatException {
203: for (int index = 0; index < offset; index++) {
204: if (!moveNext())
205: return false;
206: }
207:
208: return true;
209: }
210:
211: /**
212: Parse the next token [PDF:1.6:3.1].
213: <h3>Contract</h3>
214: <ul>
215: <li>Preconditions:
216: <ol>
217: <li>To properly parse the current token, the pointer MUST be just before its starting (leading whitespaces are ignored).</li>
218: </ol>
219: </li>
220: <li>Postconditions:
221: <ol>
222: <li id="moveNext_contract_post[0]">When this method terminates, the pointer IS at the last byte of the current token.</li>
223: </ol>
224: </li>
225: <li>Invariants:
226: <ol>
227: <li>The byte-level position of the pointer IS anytime (during token parsing) at the end of the current token (whereas the 'current token' represents the token-level position of the pointer).</li>
228: </ol>
229: </li>
230: <li>Side-effects:
231: <ol>
232: <li>See <a href="#moveNext_contract_post[0]">Postconditions</a>.</li>
233: </ol>
234: </li>
235: </ul>
236: @return Whether a new token was found.
237: */
238: public boolean moveNext() throws FileFormatException {
239: /*
240: NOTE: It'd be interesting to evaluate an alternative regular-expression-based
241: implementation...
242: */
243: StringBuilder buffer = null;
244: token = null;
245: int c = 0;
246:
247: // Skip leading white-space characters [PDF:1.6:3.1.1].
248: try {
249: do {
250: c = stream.readUnsignedByte();
251: } while (isWhitespace(c)); // Keep goin' till there's a white-space character...
252: } catch (EOFException e) {
253: return false;
254: }
255:
256: // Which character is it?
257: switch (c) {
258: case '/': // Name [PDF:1.6:3.2.4].
259: tokenType = TokenTypeEnum.Name;
260:
261: /*
262: NOTE: As name objects are atomic symbols uniquely defined by sequences of characters,
263: the bytes making up the name are never treated as text, so here they are just
264: passed through without unescaping.
265: */
266: buffer = new StringBuilder();
267: try {
268: while (true) {
269: c = stream.readUnsignedByte();
270: if (isDelimiter(c) || isWhitespace(c))
271: break;
272:
273: buffer.append((char) c);
274: }
275: } catch (EOFException e) {
276: throw new FileFormatException(
277: "Unexpected EOF (malformed name object).", e,
278: stream.getPosition());
279: }
280:
281: stream.skip(-1); // Recover the first byte after the current token.
282: break;
283: case '0':
284: case '1':
285: case '2':
286: case '3':
287: case '4':
288: case '5':
289: case '6':
290: case '7':
291: case '8':
292: case '9':
293: case '.':
294: case '-':
295: case '+': // Number [PDF:1.6:3.2.2] | Indirect reference.
296: switch (c) {
297: case '.': // Decimal point.
298: tokenType = TokenTypeEnum.Real;
299: break;
300: case '-':
301: case '+': // Signum.
302: tokenType = TokenTypeEnum.Integer; // By default (it may be real).
303: break;
304: default: // Digit.
305: if (multipleTokenParsing) // Plain number (multiple token parsing -- see indirect reference search).
306: {
307: tokenType = TokenTypeEnum.Integer; // By default (it may be real).
308: } else // Maybe an indirect reference (postfix notation [PDF:1.6:3.2.9]).
309: {
310: /*
311: NOTE: We need to identify this pattern:
312: ref := { int int 'R' }
313: */
314: // Enable multiple token parsing!
315: // NOTE: This state MUST be disabled before returning.
316: multipleTokenParsing = true;
317:
318: // 1. Object number.
319: // Try the possible object number!
320: stream.skip(-1);
321: moveNext();
322: // Isn't it a valid object number?
323: if (tokenType != TokenTypeEnum.Integer) {
324: // Disable multiple token parsing!
325: multipleTokenParsing = false;
326: return true;
327: }
328: // Assign object number!
329: int objectNumber = (Integer) token;
330: // Backup the recovery position!
331: long oldOffset = stream.getPosition();
332:
333: // 2. Generation number.
334: // Try the possible generation number!
335: moveNext();
336: // Isn't it a valid generation number?
337: if (tokenType != TokenTypeEnum.Integer) {
338: // Rollback!
339: stream.seek(oldOffset);
340: token = objectNumber;
341: tokenType = TokenTypeEnum.Integer;
342: // Disable multiple token parsing!
343: multipleTokenParsing = false;
344: return true;
345: }
346: // Assign generation number!
347: int generationNumber = (Integer) token;
348:
349: // 3. Reference keyword.
350: // Try the possible reference keyword!
351: moveNext();
352: // Isn't it a valid reference keyword?
353: if (tokenType != TokenTypeEnum.Reference) {
354: // Rollback!
355: stream.seek(oldOffset);
356: token = objectNumber;
357: tokenType = TokenTypeEnum.Integer;
358: // Disable multiple token parsing!
359: multipleTokenParsing = false;
360: return true;
361: }
362: token = new Reference(objectNumber,
363: generationNumber);
364: // Disable multiple token parsing!
365: multipleTokenParsing = false;
366: return true;
367: }
368: break;
369: }
370:
371: // Building the number...
372: buffer = new StringBuilder();
373: try {
374: do {
375: buffer.append((char) c);
376: c = stream.readUnsignedByte();
377: if (c == '.')
378: tokenType = TokenTypeEnum.Real;
379: else if (c < '0' || c > '9')
380: break;
381: } while (true);
382: } catch (EOFException e) {
383: throw new FileFormatException(
384: "Unexpected EOF (malformed number object).", e,
385: stream.getPosition());
386: }
387:
388: stream.skip(-1); // Recover the first byte after the current token.
389: break;
390: case '[': // Array (begin).
391: tokenType = TokenTypeEnum.ArrayBegin;
392: break;
393: case ']': // Array (end).
394: tokenType = TokenTypeEnum.ArrayEnd;
395: break;
396: case '<': // Dictionary (begin) | Hexadecimal string.
397: try {
398: c = stream.readUnsignedByte();
399: } catch (EOFException e) {
400: throw new FileFormatException(
401: "Unexpected EOF (isolated opening angle-bracket character).",
402: e, stream.getPosition());
403: }
404: // Is it a dictionary (2nd angle bracket [PDF:1.6:3.2.6])?
405: if (c == '<') {
406: tokenType = TokenTypeEnum.DictionaryBegin;
407: break;
408: }
409:
410: // Hexadecimal string (single angle bracket [PDF:1.6:3.2.3]).
411: tokenType = TokenTypeEnum.Hex;
412:
413: // [FIX:0.0.4:4] It skipped after the first hexadecimal character, missing it.
414: buffer = new StringBuilder();
415: try {
416: while (c != '>') // NOT string end.
417: {
418: buffer.append((char) c);
419:
420: c = stream.readUnsignedByte();
421: }
422: } catch (EOFException e) {
423: throw new FileFormatException(
424: "Unexpected EOF (malformed hex string).", e,
425: stream.getPosition());
426: }
427:
428: break;
429: case '>': // Dictionary (end).
430: try {
431: c = stream.readUnsignedByte();
432: } catch (EOFException e) {
433: throw new FileFormatException(
434: "Unexpected EOF (malformed dictionary).", e,
435: stream.getPosition());
436: }
437: if (c != '>')
438: throw new FileFormatException("Malformed dictionary.",
439: stream.getPosition());
440:
441: tokenType = TokenTypeEnum.DictionaryEnd;
442:
443: break;
444: case '%': // Comment [PDF:1.6:3.1.2].
445: tokenType = TokenTypeEnum.Comment;
446: // Skipping comment content...
447: try {
448: do {
449: c = stream.readUnsignedByte();
450: } while (!isEOL(c));
451: } catch (EOFException e) {/* Let it go. */
452: }
453:
454: break;
455: case '(': // Literal string [PDF:1.6:3.2.3].
456: tokenType = TokenTypeEnum.Literal;
457:
458: /*
459: NOTE: As literal objects are textual, their characters are unescaped when deserialized.
460: */
461: buffer = new StringBuilder();
462: int level = 0;
463: try {
464: while (true) {
465: c = stream.readUnsignedByte();
466: if (c == '(')
467: level++;
468: else if (c == ')')
469: level--;
470: else if (c == '\\') {
471: boolean lineBreak = false;
472: c = stream.readUnsignedByte();
473: switch (c) {
474: case 'n':
475: c = '\n';
476: break;
477: case 'r':
478: c = '\r';
479: break;
480: case 't':
481: c = '\t';
482: break;
483: case 'b':
484: c = '\b';
485: break;
486: case 'f':
487: c = '\f';
488: break;
489: case '(':
490: case ')':
491: case '\\':
492: break;
493: case '\r':
494: lineBreak = true;
495: c = stream.readUnsignedByte();
496: if (c != '\n')
497: stream.skip(-1);
498: break;
499: case '\n':
500: lineBreak = true;
501: break;
502: default: {
503: // Is it outside the octal encoding?
504: if (c < '0' || c > '7')
505: break;
506:
507: // Octal.
508: int octal = c - '0';
509: c = stream.readUnsignedByte();
510: // Octal end?
511: if (c < '0' || c > '7') {
512: c = octal;
513: stream.skip(-1);
514: break;
515: }
516: octal = (octal << 3) + c - '0';
517: c = stream.readUnsignedByte();
518: // Octal end?
519: if (c < '0' || c > '7') {
520: c = octal;
521: stream.skip(-1);
522: break;
523: }
524: octal = (octal << 3) + c - '0';
525: c = octal & 0xff;
526: break;
527: }
528: }
529: if (lineBreak)
530: continue;
531: } else if (c == '\r') {
532: c = stream.readUnsignedByte();
533: if (c != '\n') {
534: c = '\n';
535: stream.skip(-1);
536: }
537: }
538: if (level == -1)
539: break;
540:
541: buffer.append((char) c);
542: }
543: } catch (EOFException e) {
544: throw new FileFormatException(
545: "Unexpected EOF (malformed literal string).",
546: e, stream.getPosition());
547: }
548:
549: break;
550: case 'R': // Indirect reference.
551: tokenType = TokenTypeEnum.Reference;
552:
553: break;
554: default: // Keyword object.
555: tokenType = TokenTypeEnum.Keyword;
556:
557: buffer = new StringBuilder();
558: try {
559: do {
560: buffer.append((char) c);
561: c = stream.readUnsignedByte();
562: } while (!isDelimiter(c) && !isWhitespace(c));
563: } catch (EOFException e) {/* Let it go. */
564: }
565: stream.skip(-1); // Recover the first byte after the current token.
566:
567: break;
568: }
569:
570: if (buffer != null) {
571: /*
572: Here we prepare the current token state.
573: */
574: // Which token type?
575: switch (tokenType) {
576: case Keyword:
577: token = buffer.toString();
578: // Late recognition.
579: if (((String) token).equals("false")
580: || ((String) token).equals("true")) // Boolean.
581: {
582: tokenType = TokenTypeEnum.Boolean;
583: token = Boolean.parseBoolean((String) token);
584: } else if (((String) token).equals("null")) // Null.
585: {
586: tokenType = TokenTypeEnum.Null;
587: token = null;
588: }
589: break;
590: case Comment:
591: case Hex:
592: case Name:
593: token = buffer.toString();
594: break;
595: case Literal:
596: token = buffer.toString();
597: // Late recognition.
598: if (((String) token).startsWith("D:")) // Date.
599: {
600: tokenType = TokenTypeEnum.Date;
601: token = PdfDate.toDate((String) token);
602: }
603: break;
604: case Integer:
605: token = Integer.parseInt(buffer.toString());
606: break;
607: case Real:
608: token = Float.parseFloat(buffer.toString());
609: break;
610: }
611: }
612:
613: return true;
614: }
615:
616: /**
617: Parse the current PDF object [PDF:1.6:3.2].
618: <h3>Contract</h3>
619: <ul>
620: <li>Preconditions:
621: <ol>
622: <li>When this method is invoked, the pointer MUST be at the first
623: token of the requested object.</li>
624: </ol>
625: </li>
626: <li>Postconditions:
627: <ol>
628: <li id="parsePdfObject_contract_post[0]">When this method terminates,
629: the pointer IS at the last token of the requested object.</li>
630: </ol>
631: </li>
632: <li>Invariants:
633: <ol>
634: <li>(none).</li>
635: </ol>
636: </li>
637: <li>Side-effects:
638: <ol>
639: <li>See <a href="#parsePdfObject_contract_post[0]">Postconditions</a>.</li>
640: </ol>
641: </li>
642: </ul>
643: */
644: public PdfDataObject parsePdfObject() throws FileFormatException {
645: /*
646: NOTE: Object parsing is intrinsically a sequential operation tied to the stream pointer.
647: Calls bound towards other classes are potentially disruptive for the predictability of
648: the position of the stream pointer, so we are forced to carefully keep track of our
649: current position in order to recover its proper state after any outbound call.
650: */
651:
652: // Which token type?
653: switch (tokenType) {
654: case Integer:
655: return new PdfInteger((Integer) token);
656: case Name:
657: return new PdfName((String) token, true);
658: case Reference:
659: /*
660: NOTE: Curiously, PDF references are the only primitive objects that require
661: a file reference. That's because they deal with indirect objects, which are strongly
662: coupled with the current state of the file: so, PDF references are the fundamental
663: bridge between the token layer and the file layer.
664: */
665: return new PdfReference((Reference) token, file);
666: case Literal:
667: return new PdfLiteral((String) token);
668: case DictionaryBegin:
669: PdfDictionary dictionary = new PdfDictionary();
670: // Populate the dictionary.
671: while (true) {
672: // Key.
673: moveNext();
674: if (tokenType == TokenTypeEnum.DictionaryEnd)
675: break;
676: PdfName key = (PdfName) parsePdfObject();
677:
678: // Value.
679: moveNext();
680: PdfDirectObject value = (PdfDirectObject) parsePdfObject();
681:
682: // Add the current entry to the dictionary!
683: dictionary.put(key, value);
684: }
685:
686: int oldOffset = (int) stream.getPosition();
687: moveNext();
688: // Is this dictionary the header of a stream object [PDF:1.6:3.2.7]?
689: if ((tokenType == TokenTypeEnum.Keyword)
690: && token.equals("stream")) // Stream.
691: {
692: // Keep track of current position!
693: long position = stream.getPosition();
694:
695: // Get the stream length!
696: /*
697: NOTE: Indirect reference resolution is an outbound call (stream pointer hazard!),
698: so we need to recover our current position after it returns.
699: */
700: int length = ((PdfInteger) File.resolve(dictionary
701: .get(PdfName.Length))).getValue();
702:
703: // Come back to current position!
704: stream.seek(position);
705:
706: skipWhitespace();
707:
708: // Copy the stream data to the instance!
709: byte[] data = new byte[length];
710: try {
711: stream.read(data);
712: } catch (EOFException e) {
713: throw new FileFormatException(
714: "Unexpected EOF (malformed stream object).",
715: e, stream.getPosition());
716: }
717:
718: moveNext(); // Postcondition (last token should be 'endstream' keyword).
719:
720: return new PdfStream(dictionary, new Buffer(data));
721: } else // Simple dictionary.
722: {
723: stream.seek(oldOffset); // Restore postcondition (last token should be the dictionary end).
724:
725: return dictionary;
726: }
727: case ArrayBegin:
728: PdfArray array = new PdfArray();
729: // Populate the array.
730: while (true) {
731: // Value.
732: moveNext();
733: if (tokenType == TokenTypeEnum.ArrayEnd)
734: break;
735:
736: // Add the current item to the array!
737: array.add((PdfDirectObject) parsePdfObject());
738: }
739: return array;
740: case Real:
741: return new PdfReal((Float) token);
742: case Boolean:
743: return new PdfBoolean((Boolean) token);
744: case Date:
745: return new PdfDate((Date) token);
746: case Hex:
747: return new PdfHex((String) token);
748: case Null:
749: return PdfNull.Null;
750: default:
751: return null;
752: }
753: }
754:
755: /**
756: Retrieves the PDF version of the file [PDF:1.6:3.4.1].
757: <h3>Contract</h3>
758: <ul>
759: <li>Preconditions:
760: <ol>
761: <li>(none).</li>
762: </ol>
763: </li>
764: <li>Postconditions:
765: <ol>
766: <li>(none).</li>
767: </ol>
768: </li>
769: <li>Invariants:
770: <ol>
771: <li>(none).</li>
772: </ol>
773: </li>
774: <li>Side-effects:
775: <ol>
776: <li>The pointer is released at an undefined location.</li>
777: </ol>
778: </li>
779: </ul>
780: */
781: public String retrieveVersion() throws FileFormatException {
782: stream.seek(0);
783: String header;
784: try {
785: header = stream.readString(10);
786: } catch (EOFException e) {
787: throw new FileFormatException(
788: "Unexpected EOF (malformed version data).", e,
789: stream.getPosition());
790: }
791: if (!header.startsWith(PdfHeader))
792: throw new FileFormatException("PDF header not found.",
793: stream.getPosition());
794:
795: return header.substring(PdfHeader.length(),
796: PdfHeader.length() + 3);
797: }
798:
799: /**
800: Retrieves the starting position of the last xref-table section.
801: @see retrieveXRefOffset(long)
802: */
803: public long retrieveXRefOffset() throws FileFormatException {
804: return retrieveXRefOffset(stream.getLength());
805: }
806:
807: /**
808: Retrieves the starting position of an xref-table section [PDF:1.6:3.4.4].
809: <h3>Contract</h3>
810: <ul>
811: <li>Preconditions:
812: <ol>
813: <li>(none).</li>
814: </ol>
815: </li>
816: <li>Postconditions:
817: <ol>
818: <li>(none).</li>
819: </ol>
820: </li>
821: <li>Invariants:
822: <ol>
823: <li>(none).</li>
824: </ol>
825: </li>
826: <li>Side-effects:
827: <ol>
828: <li>The pointer is released at an undefined location.</li>
829: </ol>
830: </li>
831: </ul>
832: @param offset Position of the EOF marker related to the section intended to be parsed.
833: */
834: public long retrieveXRefOffset(long offset)
835: throws FileFormatException {
836: final int chunkSize = 1024; // [PDF:1.6:H.3.18].
837:
838: // Move back before 'startxref' keyword!
839: long position = offset - chunkSize;
840: if (position < 0) {
841: position = 0;
842: } // [FIX:0.0.4:1] It failed to deal with less-than-1024-byte-long PDF files.
843: stream.seek(position);
844:
845: // Get 'startxref' keyword position!
846: int index;
847: try {
848: index = stream.readString(chunkSize).lastIndexOf(
849: "startxref");
850: } catch (EOFException e) {
851: throw new FileFormatException(
852: "Unexpected EOF (malformed 'startxref' tag).", e,
853: stream.getPosition());
854: }
855: if (index < 0)
856: throw new FileFormatException("PDF startxref not found.",
857: stream.getPosition());
858: // Go past the 'startxref' keyword!
859: stream.seek(position + index);
860: moveNext();
861:
862: // Get the xref offset!
863: moveNext();
864: if (tokenType != TokenTypeEnum.Integer)
865: throw new FileFormatException("PDF startxref malformed.",
866: stream.getPosition());
867:
868: return (Integer) token;
869: }
870:
871: public void seek(long position) {
872: stream.seek(position);
873: }
874:
875: public void skip(long offset) {
876: stream.skip(offset);
877: }
878:
879: /**
880: Moves to the last whitespace after the current position in order to let read
881: the first non-whitespace.
882: */
883: public boolean skipWhitespace() {
884: int b;
885: try {
886: do {
887: b = stream.readUnsignedByte();
888: } while (isWhitespace(b)); // Keep goin' till there's a white-space character...
889: } catch (EOFException e) {
890: return false;
891: }
892: stream.skip(-1); // Recover the last whitespace position.
893:
894: return true;
895: }
896: // </public>
897: // </interface>
898: // </dynamic>
899: // </class>
900: }
|