001: /*
002: Copyright © 2006,2007 Stefano Chizzolini. http://clown.stefanochizzolini.it
003:
004: Contributors:
005: * Stefano Chizzolini (original code developer, http://www.stefanochizzolini.it)
006: * Haakan Aakerberg (bugfix contributor):
007: - [FIX:0.0.4:4]
008:
009: This file should be part of the source code distribution of "PDF Clown library"
010: (the Program): see the accompanying README files for more info.
011:
012: This Program is free software; you can redistribute it and/or modify it under
013: the terms of the GNU General Public License as published by the Free Software
014: Foundation; either version 2 of the License, or (at your option) any later version.
015:
016: This Program is distributed in the hope that it will be useful, but WITHOUT ANY
017: WARRANTY, either expressed or implied; without even the implied warranty of
018: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the License for more details.
019:
020: You should have received a copy of the GNU General Public License along with this
021: Program (see README files); if not, go to the GNU website (http://www.gnu.org/).
022:
023: Redistribution and use, with or without modification, are permitted provided that such
024: redistributions retain the above copyright notice, license and disclaimer, along with
025: this list of conditions.
026: */
027:
028: package it.stefanochizzolini.clown.documents.contents.tokens;
029:
030: import it.stefanochizzolini.clown.bytes.Buffer;
031: import it.stefanochizzolini.clown.bytes.IBuffer;
032: import it.stefanochizzolini.clown.bytes.IInputStream;
033: import it.stefanochizzolini.clown.documents.contents.objects.BeginText;
034: import it.stefanochizzolini.clown.documents.contents.objects.BeginInlineImage;
035: import it.stefanochizzolini.clown.documents.contents.objects.ContentObject;
036: import it.stefanochizzolini.clown.documents.contents.objects.EndText;
037: import it.stefanochizzolini.clown.documents.contents.objects.EndInlineImage;
038: import it.stefanochizzolini.clown.documents.contents.objects.Operation;
039: import it.stefanochizzolini.clown.documents.contents.objects.InlineImage;
040: import it.stefanochizzolini.clown.documents.contents.objects.InlineImageBody;
041: import it.stefanochizzolini.clown.documents.contents.objects.InlineImageHeader;
042: import it.stefanochizzolini.clown.documents.contents.objects.LocalGraphicsState;
043: import it.stefanochizzolini.clown.documents.contents.objects.PaintShadingObject;
044: import it.stefanochizzolini.clown.documents.contents.objects.PaintXObject;
045: import it.stefanochizzolini.clown.documents.contents.objects.RestoreGraphicsState;
046: import it.stefanochizzolini.clown.documents.contents.objects.SaveGraphicsState;
047: import it.stefanochizzolini.clown.documents.contents.objects.ShadingObject;
048: import it.stefanochizzolini.clown.documents.contents.objects.Text;
049: import it.stefanochizzolini.clown.documents.contents.objects.XObject;
050: import it.stefanochizzolini.clown.objects.PdfArray;
051: import it.stefanochizzolini.clown.objects.PdfBoolean;
052: import it.stefanochizzolini.clown.objects.PdfDataObject;
053: import it.stefanochizzolini.clown.objects.PdfDate;
054: import it.stefanochizzolini.clown.objects.PdfDictionary;
055: import it.stefanochizzolini.clown.objects.PdfDirectObject;
056: import it.stefanochizzolini.clown.objects.PdfHex;
057: import it.stefanochizzolini.clown.objects.PdfInteger;
058: import it.stefanochizzolini.clown.objects.PdfLiteral;
059: import it.stefanochizzolini.clown.objects.PdfName;
060: import it.stefanochizzolini.clown.objects.PdfNull;
061: import it.stefanochizzolini.clown.objects.PdfReal;
062: import it.stefanochizzolini.clown.objects.PdfReference;
063: import it.stefanochizzolini.clown.objects.PdfStream;
064: import it.stefanochizzolini.clown.tokens.FileFormatException;
065: import it.stefanochizzolini.clown.tokens.TokenTypeEnum;
066: import it.stefanochizzolini.clown.util.NotImplementedException;
067:
068: import java.io.EOFException;
069: import java.util.ArrayList;
070: import java.util.Date;
071: import java.util.List;
072:
073: /**
074: Content stream parser [PDF:1.6:3.7.1].
075: @version 0.0.4
076: */
077: public class Parser {
078: /*
079: TODO:IMPL this parser evaluates a subset of the lexical domain of the token parser (clown.serialization.Parser): it should be better to derive both parsers from a common parsing engine in order to avoid unwieldy duplications.
080: */
081: // <class>
082: // <static>
083: // <interface>
084: // <protected>
085: protected static int getHex(int c) {
086: if (c >= '0' && c <= '9')
087: return (c - '0');
088: if (c >= 'A' && c <= 'F')
089: return (c - 'A' + 10);
090: if (c >= 'a' && c <= 'f')
091: return (c - 'a' + 10);
092: return -1;
093: }
094:
095: /**
096: Evaluates whether a character is a delimiter [PDF:1.6:3.1.1].
097: */
098: protected static boolean isDelimiter(int c) {
099: return (c == '(' || c == ')' || c == '<' || c == '>'
100: || c == '[' || c == ']' || c == '/' || c == '%');
101: }
102:
103: /**
104: Evaluates whether a character is an EOL marker [PDF:1.6:3.1.1].
105: */
106: protected static boolean isEOL(int c) {
107: return (c == 12 || c == 15);
108: }
109:
110: /**
111: Evaluates whether a character is a white-space [PDF:1.6:3.1.1].
112: */
113: protected static boolean isWhitespace(int c) {
114: return (c == 0 || c == 9 || c == 10 || c == 12 || c == 13 || c == 32);
115: }
116:
117: // </protected>
118: // </interface>
119: // </static>
120:
121: // <dynamic>
122: // <fields>
123: private final PdfDataObject contentStream;
124:
125: private long basePosition;
126: private IInputStream stream;
127: private int streamIndex = -1;
128: private Object token;
129: private TokenTypeEnum tokenType;
130:
131: // </fields>
132:
133: // <constructors>
134: /**
135: <h3>Remarks</h3>
136: <p>For internal use only.</p>
137: */
138: public Parser(PdfDataObject contentStream) {
139: this .contentStream = contentStream;
140:
141: moveNextStream();
142: }
143:
144: // </constructors>
145:
146: // <interface>
147: // <public>
148: /**
149: Gets the content stream on which parsing is done.
150: <h3>Remarks</h3>
151: <p>A content stream may be made up of either a single stream or an array of streams.</p>
152: */
153: public PdfDataObject getContentStream() {
154: return contentStream;
155: }
156:
157: public long getLength() {
158: if (contentStream instanceof PdfStream) // Single stream.
159: return ((PdfStream) contentStream).getBody().getLength();
160: else // Array of streams.
161: {
162: int length = 0;
163: for (PdfDirectObject stream : (PdfArray) contentStream) {
164: length += ((PdfStream) ((PdfReference) stream)
165: .getDataObject()).getBody().getLength();
166: }
167: return length;
168: }
169: }
170:
171: public long getPosition() {
172: return basePosition + stream.getPosition();
173: }
174:
175: /**
176: Gets the current stream.
177: */
178: public IInputStream getStream() {
179: return stream;
180: }
181:
182: /**
183: Gets the current stream index.
184: */
185: public int getStreamIndex() {
186: return streamIndex;
187: }
188:
189: /**
190: Gets the currently-parsed token.
191: @return The current token.
192: */
193: public Object getToken() {
194: return token;
195: }
196:
197: /**
198: Gets the currently-parsed token type.
199: @return The current token type.
200: */
201: public TokenTypeEnum getTokenType() {
202: return tokenType;
203: }
204:
205: /**
206: @param offset Number of tokens to be skipped before reaching the intended one.
207: */
208: public boolean moveNext(int offset) throws FileFormatException {
209: for (int index = 0; index < offset; index++) {
210: if (!moveNext())
211: return false;
212: }
213:
214: return true;
215: }
216:
217: /**
218: Parse the next token [PDF:1.6:3.1].
219: <h3>Contract</h3>
220: <ul>
221: <li>Preconditions:
222: <ol>
223: <li>To properly parse the current token, the pointer MUST be just before its starting (leading whitespaces are ignored).</li>
224: </ol>
225: </li>
226: <li>Postconditions:
227: <ol>
228: <li id="moveNext_contract_post[0]">When this method terminates, the pointer IS at the last byte of the current token.</li>
229: </ol>
230: </li>
231: <li>Invariants:
232: <ol>
233: <li>The byte-level position of the pointer IS anytime (during token parsing) at the end of the current token (whereas the 'current token' represents the token-level position of the pointer).</li>
234: </ol>
235: </li>
236: <li>Side-effects:
237: <ol>
238: <li>See <a href="#moveNext_contract_post[0]">Postconditions</a>.</li>
239: </ol>
240: </li>
241: </ul>
242: @return Whether a new token was found.
243: */
244: public boolean moveNext() throws FileFormatException {
245: /*
246: NOTE: It'd be interesting to evaluate an alternative regular-expression-based
247: implementation...
248: */
249: StringBuilder buffer = null;
250: token = null;
251: int c = 0;
252:
253: // Skip leading white-space characters [PDF:1.6:3.1.1].
254: while (true) {
255: try {
256: do {
257: c = stream.readUnsignedByte();
258: } while (isWhitespace(c)); // Keep goin' till there's a white-space character...
259: break;
260: } catch (EOFException e) {
261: /* NOTE: Current stream has finished. */
262: // Move to the next stream!
263: moveNextStream();
264: } catch (Exception e) {
265: // No more streams?
266: if (stream == null)
267: return false;
268: }
269: }
270:
271: // Which character is it?
272: switch (c) {
273: case '/': // Name.
274: tokenType = TokenTypeEnum.Name;
275:
276: buffer = new StringBuilder();
277: try {
278: while (true) {
279: c = stream.readUnsignedByte();
280: if (isDelimiter(c) || isWhitespace(c))
281: break;
282: // Is it an hexadecimal code [PDF:1.6:3.2.4]?
283: if (c == '#') {
284: try {
285: c = (getHex(stream.readUnsignedByte()) << 4)
286: + getHex(stream.readUnsignedByte());
287: } catch (EOFException e) {
288: throw new FileFormatException(
289: "Unexpected EOF (malformed hexadecimal code in name object).",
290: e, stream.getPosition());
291: }
292: }
293:
294: buffer.append((char) c);
295: }
296: } catch (EOFException e) {
297: throw new FileFormatException(
298: "Unexpected EOF (malformed name object).", e,
299: stream.getPosition());
300: }
301:
302: stream.skip(-1); // Recover the first byte after the current token.
303: break;
304: case '0':
305: case '1':
306: case '2':
307: case '3':
308: case '4':
309: case '5':
310: case '6':
311: case '7':
312: case '8':
313: case '9':
314: case '.':
315: case '-':
316: case '+': // Number [PDF:1.6:3.2.2] | Indirect reference.
317: switch (c) {
318: case '.': // Decimal point.
319: tokenType = TokenTypeEnum.Real;
320: break;
321: default: // Digit or signum.
322: tokenType = TokenTypeEnum.Integer; // By default (it may be real).
323: break;
324: }
325:
326: // Building the number...
327: buffer = new StringBuilder();
328: try {
329: do {
330: buffer.append((char) c);
331: c = stream.readUnsignedByte();
332: if (c == '.')
333: tokenType = TokenTypeEnum.Real;
334: else if (c < '0' || c > '9')
335: break;
336: } while (true);
337: } catch (EOFException e) {
338: throw new FileFormatException(
339: "Unexpected EOF (malformed number object).", e,
340: stream.getPosition());
341: }
342:
343: stream.skip(-1); // Recover the first byte after the current token.
344: break;
345: case '[': // Array (begin).
346: tokenType = TokenTypeEnum.ArrayBegin;
347: break;
348: case ']': // Array (end).
349: tokenType = TokenTypeEnum.ArrayEnd;
350: break;
351: case '<': // Dictionary (begin) | Hexadecimal string.
352: try {
353: c = stream.readUnsignedByte();
354: } catch (EOFException e) {
355: throw new FileFormatException(
356: "Unexpected EOF (isolated opening angle-bracket character).",
357: e, stream.getPosition());
358: }
359: // Is it a dictionary (2nd angle bracket [PDF:1.6:3.2.6])?
360: if (c == '<') {
361: tokenType = TokenTypeEnum.DictionaryBegin;
362: break;
363: }
364:
365: // Hexadecimal string (single angle bracket [PDF:1.6:3.2.3]).
366: tokenType = TokenTypeEnum.Hex;
367:
368: // [FIX:0.0.4:4] It skipped after the first hexadecimal character, missing it.
369: buffer = new StringBuilder();
370: try {
371: while (c != '>') // NOT string end.
372: {
373: buffer.append((char) c);
374:
375: c = stream.readUnsignedByte();
376: }
377: } catch (EOFException e) {
378: throw new FileFormatException(
379: "Unexpected EOF (malformed hex string).", e,
380: stream.getPosition());
381: }
382:
383: break;
384: case '>': // Dictionary (end).
385: try {
386: c = stream.readUnsignedByte();
387: } catch (EOFException e) {
388: throw new FileFormatException(
389: "Unexpected EOF (malformed dictionary).", e,
390: stream.getPosition());
391: }
392: if (c != '>')
393: throw new FileFormatException("Malformed dictionary.",
394: stream.getPosition());
395:
396: tokenType = TokenTypeEnum.DictionaryEnd;
397:
398: break;
399: case '%': // Comment.
400: tokenType = TokenTypeEnum.Comment;
401: // Skipping comment content...
402: try {
403: do {
404: c = stream.readUnsignedByte();
405: } while (!isEOL(c));
406: } catch (EOFException e) {/* Let it go. */
407: }
408:
409: break;
410: case '(': // Literal string.
411: tokenType = TokenTypeEnum.Literal;
412:
413: buffer = new StringBuilder();
414: int level = 0;
415: try {
416: while (true) {
417: c = stream.readUnsignedByte();
418: if (c == '(')
419: level++;
420: else if (c == ')')
421: level--;
422: else if (c == '\\') {
423: boolean lineBreak = false;
424: c = stream.readUnsignedByte();
425: switch (c) {
426: case 'n':
427: c = '\n';
428: break;
429: case 'r':
430: c = '\r';
431: break;
432: case 't':
433: c = '\t';
434: break;
435: case 'b':
436: c = '\b';
437: break;
438: case 'f':
439: c = '\f';
440: break;
441: case '(':
442: case ')':
443: case '\\':
444: break;
445: case '\r':
446: lineBreak = true;
447: c = stream.readUnsignedByte();
448: if (c != '\n')
449: stream.skip(-1);
450: break;
451: case '\n':
452: lineBreak = true;
453: break;
454: default: {
455: // Is it outside the octal encoding?
456: if (c < '0' || c > '7')
457: break;
458:
459: // Octal [PDF:1.6:3.2.3].
460: int octal = c - '0';
461: c = stream.readUnsignedByte();
462: // Octal end?
463: if (c < '0' || c > '7') {
464: c = octal;
465: stream.skip(-1);
466: break;
467: }
468: octal = (octal << 3) + c - '0';
469: c = stream.readUnsignedByte();
470: // Octal end?
471: if (c < '0' || c > '7') {
472: c = octal;
473: stream.skip(-1);
474: break;
475: }
476: octal = (octal << 3) + c - '0';
477: c = octal & 0xff;
478: break;
479: }
480: }
481: if (lineBreak)
482: continue;
483: } else if (c == '\r') {
484: c = stream.readUnsignedByte();
485: if (c != '\n') {
486: c = '\n';
487: stream.skip(-1);
488: }
489: }
490: if (level == -1)
491: break;
492:
493: buffer.append((char) c);
494: }
495: } catch (EOFException e) {
496: throw new FileFormatException(
497: "Unexpected EOF (malformed literal string).",
498: e, stream.getPosition());
499: }
500:
501: break;
502: default: // Keyword.
503: tokenType = TokenTypeEnum.Keyword;
504:
505: buffer = new StringBuilder();
506: try {
507: do {
508: buffer.append((char) c);
509: c = stream.readUnsignedByte();
510: } while (!isDelimiter(c) && !isWhitespace(c));
511: } catch (EOFException e) {/* Let it go. */
512: }
513: stream.skip(-1); // Recover the first byte after the current token.
514:
515: break;
516: }
517:
518: if (buffer != null) {
519: /*
520: Here we prepare the current token state.
521: */
522: // Which token type?
523: switch (tokenType) {
524: case Keyword:
525: token = buffer.toString();
526: // Late recognition.
527: if (((String) token).equals("false")
528: || ((String) token).equals("true")) // Boolean.
529: {
530: tokenType = TokenTypeEnum.Boolean;
531: token = Boolean.parseBoolean((String) token);
532: } else if (((String) token).equals("null")) // Null.
533: {
534: tokenType = TokenTypeEnum.Null;
535: token = null;
536: }
537: break;
538: case Comment:
539: case Hex:
540: case Name:
541: token = buffer.toString();
542: break;
543: case Literal:
544: token = buffer.toString();
545: // Late recognition.
546: if (((String) token).startsWith("D:")) // Date.
547: {
548: tokenType = TokenTypeEnum.Date;
549: token = PdfDate.toDate((String) token);
550: }
551: break;
552: case Integer:
553: token = Integer.parseInt(buffer.toString());
554: break;
555: case Real:
556: token = Float.parseFloat(buffer.toString());
557: break;
558: }
559: }
560:
561: return true;
562: }
563:
564: /**
565: Parses the next content object [PDF:1.6:4.1], may it be a single operation or a graphics object.
566:
567: @version 0.0.4, 06/09/07
568: @since 0.0.4
569: */
570: public ContentObject parseContentObject()
571: throws FileFormatException {
572: //TODO:manage path objects!
573: final Operation operation = parseOperation();
574: // Single-operation graphics object?
575: if (operation instanceof PaintXObject) // External object.
576: return new XObject(operation);
577: else if (operation instanceof PaintShadingObject) // Shading object.
578: return new ShadingObject(operation);
579: // Multiple-operation graphics object begin?
580: else if (operation instanceof BeginText) // Text.
581: return new Text(
582: (List<Operation>) (List<? extends ContentObject>) parseContentObjects());
583: else if (operation instanceof SaveGraphicsState) // Local graphics state.
584: return new LocalGraphicsState(parseContentObjects());
585: else if (operation instanceof BeginInlineImage) // Inline image.
586: return parseInlineImage();
587: else
588: // Single operation.
589: return operation;
590: }
591:
592: public List<ContentObject> parseContentObjects()
593: throws FileFormatException {
594: final List<ContentObject> contentObjects = new ArrayList<ContentObject>();
595: while (moveNext()) {
596: ContentObject contentObject = parseContentObject();
597: // Multiple-operation graphics object end?
598: if (contentObject instanceof EndText // Text.
599: || contentObject instanceof RestoreGraphicsState // Local graphics state.
600: || contentObject instanceof EndInlineImage) // Inline image.
601: return contentObjects;
602:
603: contentObjects.add(contentObject);
604: }
605: return contentObjects;
606: }
607:
608: public Operation parseOperation() throws FileFormatException {
609: String operator = null;
610: final List<PdfDirectObject> operands = new ArrayList<PdfDirectObject>();
611: // Parsing the operation parts...
612: while (true) {
613: // Did we reach the operator keyword?
614: if (tokenType == TokenTypeEnum.Keyword) {
615: operator = (String) token;
616: break;
617: }
618:
619: operands.add(parsePdfObject());
620: moveNext();
621: }
622:
623: return Operation.get(operator, operands);
624: }
625:
626: /**
627: Parse the current PDF object [PDF:1.6:3.2].
628: <h3>Contract</h3>
629: <ul>
630: <li>Preconditions:
631: <ol>
632: <li>When this method is invoked, the pointer MUST be at the first
633: token of the requested object.</li>
634: </ol>
635: </li>
636: <li>Postconditions:
637: <ol>
638: <li id="parsePdfObject_contract_post[0]">When this method terminates,
639: the pointer IS at the last token of the requested object.</li>
640: </ol>
641: </li>
642: <li>Invariants:
643: <ol>
644: <li>(none).</li>
645: </ol>
646: </li>
647: <li>Side-effects:
648: <ol>
649: <li>See <a href="#parsePdfObject_contract_post[0]">Postconditions</a>.</li>
650: </ol>
651: </li>
652: </ul>
653: */
654: protected PdfDirectObject parsePdfObject()
655: throws FileFormatException {
656: switch (tokenType) {
657: case Integer:
658: return new PdfInteger((Integer) token);
659: case Name:
660: return new PdfName((String) token, true);
661: case Literal:
662: return new PdfLiteral((String) token);
663: case DictionaryBegin: {
664: PdfDictionary dictionary = new PdfDictionary();
665: // Populate the dictionary.
666: while (true) {
667: // Key.
668: moveNext();
669: if (tokenType == TokenTypeEnum.DictionaryEnd)
670: break;
671: PdfName key = (PdfName) parsePdfObject();
672:
673: // Value.
674: moveNext();
675: PdfDirectObject value = (PdfDirectObject) parsePdfObject();
676:
677: // Add the current entry to the dictionary!
678: dictionary.put(key, value);
679: }
680: return dictionary;
681: }
682: case ArrayBegin: {
683: PdfArray array = new PdfArray();
684: // Populate the array.
685: while (true) {
686: // Value.
687: moveNext();
688: if (tokenType == TokenTypeEnum.ArrayEnd)
689: break;
690:
691: // Add the current item to the array!
692: array.add((PdfDirectObject) parsePdfObject());
693: }
694: return array;
695: }
696: case Real:
697: return new PdfReal((Float) token);
698: case Boolean:
699: return new PdfBoolean((Boolean) token);
700: case Date:
701: return new PdfDate((Date) token);
702: case Hex:
703: return new PdfHex((String) token);
704: case Null:
705: return PdfNull.Null;
706: default:
707: return null;
708: }
709: }
710:
711: public void seek(long position) {
712: while (true) {
713: if (position < basePosition) //Before current stream.
714: {
715: if (!movePreviousStream())
716: throw new IllegalArgumentException(
717: "The 'position' argument is lower than acceptable.");
718: } else if (position > basePosition + stream.getLength()) // After current stream.
719: {
720: if (!moveNextStream())
721: throw new IllegalArgumentException(
722: "The 'position' argument is higher than acceptable.");
723: } else // At current stream.
724: {
725: stream.seek(position - basePosition);
726: break;
727: }
728: }
729: }
730:
731: public void skip(long offset) {
732: while (true) {
733: long position = stream.getPosition() + offset;
734: if (position < 0) //Before current stream.
735: {
736: offset += stream.getPosition();
737: if (!movePreviousStream())
738: throw new IllegalArgumentException(
739: "The 'offset' argument is lower than acceptable.");
740:
741: stream.setPosition(stream.getLength());
742: } else if (position > stream.getLength()) // After current stream.
743: {
744: offset -= (stream.getLength() - stream.getPosition());
745: if (!moveNextStream())
746: throw new IllegalArgumentException(
747: "The 'offset' argument is higher than acceptable.");
748: } else // At current stream.
749: {
750: stream.skip(position);
751: break;
752: }
753: }
754: }
755:
756: /**
757: Moves to the last whitespace after the current position in order to let read
758: the first non-whitespace.
759: */
760: public boolean skipWhitespace() {
761: int b;
762: try {
763: do {
764: b = stream.readUnsignedByte();
765: } while (isWhitespace(b)); // Keep goin' till there's a white-space character...
766: } catch (EOFException e) {
767: return false;
768: }
769: stream.skip(-1); // Recover the last whitespace position.
770:
771: return true;
772: }
773:
774: // </public>
775:
776: // <private>
777: private boolean moveNextStream() {
778: /* NOTE: A content stream may be made up of multiple streams [PDF:1.6:3.6.2]. */
779: // Is the content stream just a single stream?
780: if (contentStream instanceof PdfStream) // Single stream.
781: {
782: if (streamIndex == 0) {
783: streamIndex++;
784: basePosition += stream.getLength();
785: stream = null;
786: }
787: if (streamIndex == 1)
788: return false;
789:
790: streamIndex++;
791: basePosition = 0;
792: stream = ((PdfStream) contentStream).getBody();
793: } else // Array of streams.
794: {
795: PdfArray streams = (PdfArray) contentStream;
796: if (streamIndex == (streams.size() - 1)) {
797: streamIndex++;
798: basePosition += stream.getLength();
799: stream = null;
800: }
801: if (streamIndex == streams.size())
802: return false;
803:
804: streamIndex++;
805: if (streamIndex == 0) {
806: basePosition = 0;
807: } else {
808: basePosition += stream.getLength();
809: }
810: stream = ((PdfStream) ((PdfReference) streams
811: .get(streamIndex)).getDataObject()).getBody();
812: }
813:
814: return true;
815: }
816:
817: private boolean movePreviousStream() {
818: if (streamIndex == 0) {
819: streamIndex--;
820: stream = null;
821: }
822: if (streamIndex == -1)
823: return false;
824:
825: streamIndex--;
826: /* NOTE: A content stream may be made up of multiple streams [PDF:1.6:3.6.2]. */
827: // Is the content stream just a single stream?
828: if (contentStream instanceof PdfStream) // Single stream.
829: {
830: stream = ((PdfStream) contentStream).getBody();
831: basePosition = 0;
832: } else // Array of streams.
833: {
834: PdfArray streams = (PdfArray) contentStream;
835:
836: stream = ((PdfStream) ((PdfReference) streams
837: .get(streamIndex)).getDataObject()).getBody();
838: basePosition -= stream.getLength();
839: }
840:
841: return true;
842: }
843:
844: private InlineImage parseInlineImage() throws FileFormatException {
845: /*
846: NOTE: Inline images use a peculiar syntax that's an exception to the usual rule
847: that the data in a content stream is interpreted according to the standard PDF syntax
848: for objects.
849: */
850: InlineImageHeader header;
851: {
852: final List<PdfDirectObject> operands = new ArrayList<PdfDirectObject>();
853: // Parsing the image entries...
854: while (tokenType != TokenTypeEnum.Keyword) // Not keyword (i.e. end at image data beginning (ID operator)).
855: {
856: operands.add(parsePdfObject());
857: moveNext();
858: }
859: header = new InlineImageHeader(operands);
860: }
861:
862: InlineImageBody body;
863: {
864: moveNext();
865: IBuffer data = new Buffer();
866: byte c1 = 0, c2 = 0;
867: do {
868: try {
869: while (true) {
870: c1 = stream.readByte();
871: c2 = stream.readByte();
872: if (c1 == 'E' && c2 == 'I')
873: break;
874:
875: data.append(c1);
876: data.append(c2);
877: }
878: break;
879: } catch (EOFException e) {
880: /* NOTE: Current stream has finished. */
881: // Move to the next stream!
882: moveNextStream();
883: }
884: } while (stream != null);
885: body = new InlineImageBody(data);
886: }
887:
888: return new InlineImage(header, body);
889: }
890: // </private>
891: // </interface>
892: // </dynamic>
893: // </class>
894: }
|