Source Code Cross Referenced for Parser.java in  » PDF » PDFClown-0.0.5 » it » stefanochizzolini » clown » documents » contents » tokens » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » PDF » PDFClown 0.0.5 » it.stefanochizzolini.clown.documents.contents.tokens 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:          Copyright © 2006,2007 Stefano Chizzolini. http://clown.stefanochizzolini.it
003:
004:          Contributors:
005:         * Stefano Chizzolini (original code developer, http://www.stefanochizzolini.it)
006:         * Haakan Aakerberg (bugfix contributor):
007:              - [FIX:0.0.4:4]
008:
009:          This file should be part of the source code distribution of "PDF Clown library"
010:          (the Program): see the accompanying README files for more info.
011:
012:          This Program is free software; you can redistribute it and/or modify it under
013:          the terms of the GNU General Public License as published by the Free Software
014:          Foundation; either version 2 of the License, or (at your option) any later version.
015:
016:          This Program is distributed in the hope that it will be useful, but WITHOUT ANY
017:          WARRANTY, either expressed or implied; without even the implied warranty of
018:          MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the License for more details.
019:
020:          You should have received a copy of the GNU General Public License along with this
021:          Program (see README files); if not, go to the GNU website (http://www.gnu.org/).
022:
023:          Redistribution and use, with or without modification, are permitted provided that such
024:          redistributions retain the above copyright notice, license and disclaimer, along with
025:          this list of conditions.
026:         */
027:
028:        package it.stefanochizzolini.clown.documents.contents.tokens;
029:
030:        import it.stefanochizzolini.clown.bytes.Buffer;
031:        import it.stefanochizzolini.clown.bytes.IBuffer;
032:        import it.stefanochizzolini.clown.bytes.IInputStream;
033:        import it.stefanochizzolini.clown.documents.contents.objects.BeginText;
034:        import it.stefanochizzolini.clown.documents.contents.objects.BeginInlineImage;
035:        import it.stefanochizzolini.clown.documents.contents.objects.ContentObject;
036:        import it.stefanochizzolini.clown.documents.contents.objects.EndText;
037:        import it.stefanochizzolini.clown.documents.contents.objects.EndInlineImage;
038:        import it.stefanochizzolini.clown.documents.contents.objects.Operation;
039:        import it.stefanochizzolini.clown.documents.contents.objects.InlineImage;
040:        import it.stefanochizzolini.clown.documents.contents.objects.InlineImageBody;
041:        import it.stefanochizzolini.clown.documents.contents.objects.InlineImageHeader;
042:        import it.stefanochizzolini.clown.documents.contents.objects.LocalGraphicsState;
043:        import it.stefanochizzolini.clown.documents.contents.objects.PaintShadingObject;
044:        import it.stefanochizzolini.clown.documents.contents.objects.PaintXObject;
045:        import it.stefanochizzolini.clown.documents.contents.objects.RestoreGraphicsState;
046:        import it.stefanochizzolini.clown.documents.contents.objects.SaveGraphicsState;
047:        import it.stefanochizzolini.clown.documents.contents.objects.ShadingObject;
048:        import it.stefanochizzolini.clown.documents.contents.objects.Text;
049:        import it.stefanochizzolini.clown.documents.contents.objects.XObject;
050:        import it.stefanochizzolini.clown.objects.PdfArray;
051:        import it.stefanochizzolini.clown.objects.PdfBoolean;
052:        import it.stefanochizzolini.clown.objects.PdfDataObject;
053:        import it.stefanochizzolini.clown.objects.PdfDate;
054:        import it.stefanochizzolini.clown.objects.PdfDictionary;
055:        import it.stefanochizzolini.clown.objects.PdfDirectObject;
056:        import it.stefanochizzolini.clown.objects.PdfHex;
057:        import it.stefanochizzolini.clown.objects.PdfInteger;
058:        import it.stefanochizzolini.clown.objects.PdfLiteral;
059:        import it.stefanochizzolini.clown.objects.PdfName;
060:        import it.stefanochizzolini.clown.objects.PdfNull;
061:        import it.stefanochizzolini.clown.objects.PdfReal;
062:        import it.stefanochizzolini.clown.objects.PdfReference;
063:        import it.stefanochizzolini.clown.objects.PdfStream;
064:        import it.stefanochizzolini.clown.tokens.FileFormatException;
065:        import it.stefanochizzolini.clown.tokens.TokenTypeEnum;
066:        import it.stefanochizzolini.clown.util.NotImplementedException;
067:
068:        import java.io.EOFException;
069:        import java.util.ArrayList;
070:        import java.util.Date;
071:        import java.util.List;
072:
073:        /**
074:         Content stream parser [PDF:1.6:3.7.1].
075:         @version 0.0.4
076:         */
077:        public class Parser {
078:            /*
079:             TODO:IMPL this parser evaluates a subset of the lexical domain of the token parser (clown.serialization.Parser): it should be better to derive both parsers from a common parsing engine in order to avoid unwieldy duplications.
080:             */
081:            // <class>
082:            // <static>
083:            // <interface>
084:            // <protected>
085:            protected static int getHex(int c) {
086:                if (c >= '0' && c <= '9')
087:                    return (c - '0');
088:                if (c >= 'A' && c <= 'F')
089:                    return (c - 'A' + 10);
090:                if (c >= 'a' && c <= 'f')
091:                    return (c - 'a' + 10);
092:                return -1;
093:            }
094:
095:            /**
096:              Evaluates whether a character is a delimiter [PDF:1.6:3.1.1].
097:             */
098:            protected static boolean isDelimiter(int c) {
099:                return (c == '(' || c == ')' || c == '<' || c == '>'
100:                        || c == '[' || c == ']' || c == '/' || c == '%');
101:            }
102:
103:            /**
104:              Evaluates whether a character is an EOL marker [PDF:1.6:3.1.1].
105:             */
106:            protected static boolean isEOL(int c) {
107:                return (c == 12 || c == 15);
108:            }
109:
110:            /**
111:              Evaluates whether a character is a white-space [PDF:1.6:3.1.1].
112:             */
113:            protected static boolean isWhitespace(int c) {
114:                return (c == 0 || c == 9 || c == 10 || c == 12 || c == 13 || c == 32);
115:            }
116:
117:            // </protected>
118:            // </interface>
119:            // </static>
120:
121:            // <dynamic>
122:            // <fields>
123:            private final PdfDataObject contentStream;
124:
125:            private long basePosition;
126:            private IInputStream stream;
127:            private int streamIndex = -1;
128:            private Object token;
129:            private TokenTypeEnum tokenType;
130:
131:            // </fields>
132:
133:            // <constructors>
134:            /**
135:              <h3>Remarks</h3>
136:              <p>For internal use only.</p>
137:             */
138:            public Parser(PdfDataObject contentStream) {
139:                this .contentStream = contentStream;
140:
141:                moveNextStream();
142:            }
143:
144:            // </constructors>
145:
146:            // <interface>
147:            // <public>
148:            /**
149:              Gets the content stream on which parsing is done.
150:              <h3>Remarks</h3>
151:              <p>A content stream may be made up of either a single stream or an array of streams.</p>
152:             */
153:            public PdfDataObject getContentStream() {
154:                return contentStream;
155:            }
156:
157:            public long getLength() {
158:                if (contentStream instanceof  PdfStream) // Single stream.
159:                    return ((PdfStream) contentStream).getBody().getLength();
160:                else // Array of streams.
161:                {
162:                    int length = 0;
163:                    for (PdfDirectObject stream : (PdfArray) contentStream) {
164:                        length += ((PdfStream) ((PdfReference) stream)
165:                                .getDataObject()).getBody().getLength();
166:                    }
167:                    return length;
168:                }
169:            }
170:
171:            public long getPosition() {
172:                return basePosition + stream.getPosition();
173:            }
174:
175:            /**
176:              Gets the current stream.
177:             */
178:            public IInputStream getStream() {
179:                return stream;
180:            }
181:
182:            /**
183:              Gets the current stream index.
184:             */
185:            public int getStreamIndex() {
186:                return streamIndex;
187:            }
188:
189:            /**
190:              Gets the currently-parsed token.
191:              @return The current token.
192:             */
193:            public Object getToken() {
194:                return token;
195:            }
196:
197:            /**
198:              Gets the currently-parsed token type.
199:              @return The current token type.
200:             */
201:            public TokenTypeEnum getTokenType() {
202:                return tokenType;
203:            }
204:
205:            /**
206:              @param offset Number of tokens to be skipped before reaching the intended one.
207:             */
208:            public boolean moveNext(int offset) throws FileFormatException {
209:                for (int index = 0; index < offset; index++) {
210:                    if (!moveNext())
211:                        return false;
212:                }
213:
214:                return true;
215:            }
216:
217:            /**
218:              Parse the next token [PDF:1.6:3.1].
219:              <h3>Contract</h3>
220:              <ul>
221:               <li>Preconditions:
222:                <ol>
223:                 <li>To properly parse the current token, the pointer MUST be just before its starting (leading whitespaces are ignored).</li>
224:                </ol>
225:               </li>
226:               <li>Postconditions:
227:                <ol>
228:                 <li id="moveNext_contract_post[0]">When this method terminates, the pointer IS at the last byte of the current token.</li>
229:                </ol>
230:               </li>
231:               <li>Invariants:
232:                <ol>
233:                 <li>The byte-level position of the pointer IS anytime (during token parsing) at the end of the current token (whereas the 'current token' represents the token-level position of the pointer).</li>
234:                </ol>
235:               </li>
236:               <li>Side-effects:
237:                <ol>
238:                 <li>See <a href="#moveNext_contract_post[0]">Postconditions</a>.</li>
239:                </ol>
240:               </li>
241:              </ul>
242:              @return Whether a new token was found.
243:             */
244:            public boolean moveNext() throws FileFormatException {
245:                /*
246:                  NOTE: It'd be interesting to evaluate an alternative regular-expression-based
247:                  implementation...
248:                 */
249:                StringBuilder buffer = null;
250:                token = null;
251:                int c = 0;
252:
253:                // Skip leading white-space characters [PDF:1.6:3.1.1].
254:                while (true) {
255:                    try {
256:                        do {
257:                            c = stream.readUnsignedByte();
258:                        } while (isWhitespace(c)); // Keep goin' till there's a white-space character...
259:                        break;
260:                    } catch (EOFException e) {
261:                        /* NOTE: Current stream has finished. */
262:                        // Move to the next stream!
263:                        moveNextStream();
264:                    } catch (Exception e) {
265:                        // No more streams?
266:                        if (stream == null)
267:                            return false;
268:                    }
269:                }
270:
271:                // Which character is it?
272:                switch (c) {
273:                case '/': // Name.
274:                    tokenType = TokenTypeEnum.Name;
275:
276:                    buffer = new StringBuilder();
277:                    try {
278:                        while (true) {
279:                            c = stream.readUnsignedByte();
280:                            if (isDelimiter(c) || isWhitespace(c))
281:                                break;
282:                            // Is it an hexadecimal code [PDF:1.6:3.2.4]?
283:                            if (c == '#') {
284:                                try {
285:                                    c = (getHex(stream.readUnsignedByte()) << 4)
286:                                            + getHex(stream.readUnsignedByte());
287:                                } catch (EOFException e) {
288:                                    throw new FileFormatException(
289:                                            "Unexpected EOF (malformed hexadecimal code in name object).",
290:                                            e, stream.getPosition());
291:                                }
292:                            }
293:
294:                            buffer.append((char) c);
295:                        }
296:                    } catch (EOFException e) {
297:                        throw new FileFormatException(
298:                                "Unexpected EOF (malformed name object).", e,
299:                                stream.getPosition());
300:                    }
301:
302:                    stream.skip(-1); // Recover the first byte after the current token.
303:                    break;
304:                case '0':
305:                case '1':
306:                case '2':
307:                case '3':
308:                case '4':
309:                case '5':
310:                case '6':
311:                case '7':
312:                case '8':
313:                case '9':
314:                case '.':
315:                case '-':
316:                case '+': // Number [PDF:1.6:3.2.2] | Indirect reference.
317:                    switch (c) {
318:                    case '.': // Decimal point.
319:                        tokenType = TokenTypeEnum.Real;
320:                        break;
321:                    default: // Digit or signum.
322:                        tokenType = TokenTypeEnum.Integer; // By default (it may be real).
323:                        break;
324:                    }
325:
326:                    // Building the number...
327:                    buffer = new StringBuilder();
328:                    try {
329:                        do {
330:                            buffer.append((char) c);
331:                            c = stream.readUnsignedByte();
332:                            if (c == '.')
333:                                tokenType = TokenTypeEnum.Real;
334:                            else if (c < '0' || c > '9')
335:                                break;
336:                        } while (true);
337:                    } catch (EOFException e) {
338:                        throw new FileFormatException(
339:                                "Unexpected EOF (malformed number object).", e,
340:                                stream.getPosition());
341:                    }
342:
343:                    stream.skip(-1); // Recover the first byte after the current token.
344:                    break;
345:                case '[': // Array (begin).
346:                    tokenType = TokenTypeEnum.ArrayBegin;
347:                    break;
348:                case ']': // Array (end).
349:                    tokenType = TokenTypeEnum.ArrayEnd;
350:                    break;
351:                case '<': // Dictionary (begin) | Hexadecimal string.
352:                    try {
353:                        c = stream.readUnsignedByte();
354:                    } catch (EOFException e) {
355:                        throw new FileFormatException(
356:                                "Unexpected EOF (isolated opening angle-bracket character).",
357:                                e, stream.getPosition());
358:                    }
359:                    // Is it a dictionary (2nd angle bracket [PDF:1.6:3.2.6])?
360:                    if (c == '<') {
361:                        tokenType = TokenTypeEnum.DictionaryBegin;
362:                        break;
363:                    }
364:
365:                    // Hexadecimal string (single angle bracket [PDF:1.6:3.2.3]).
366:                    tokenType = TokenTypeEnum.Hex;
367:
368:                    // [FIX:0.0.4:4] It skipped after the first hexadecimal character, missing it.
369:                    buffer = new StringBuilder();
370:                    try {
371:                        while (c != '>') // NOT string end.
372:                        {
373:                            buffer.append((char) c);
374:
375:                            c = stream.readUnsignedByte();
376:                        }
377:                    } catch (EOFException e) {
378:                        throw new FileFormatException(
379:                                "Unexpected EOF (malformed hex string).", e,
380:                                stream.getPosition());
381:                    }
382:
383:                    break;
384:                case '>': // Dictionary (end).
385:                    try {
386:                        c = stream.readUnsignedByte();
387:                    } catch (EOFException e) {
388:                        throw new FileFormatException(
389:                                "Unexpected EOF (malformed dictionary).", e,
390:                                stream.getPosition());
391:                    }
392:                    if (c != '>')
393:                        throw new FileFormatException("Malformed dictionary.",
394:                                stream.getPosition());
395:
396:                    tokenType = TokenTypeEnum.DictionaryEnd;
397:
398:                    break;
399:                case '%': // Comment.
400:                    tokenType = TokenTypeEnum.Comment;
401:                    // Skipping comment content...
402:                    try {
403:                        do {
404:                            c = stream.readUnsignedByte();
405:                        } while (!isEOL(c));
406:                    } catch (EOFException e) {/* Let it go. */
407:                    }
408:
409:                    break;
410:                case '(': // Literal string.
411:                    tokenType = TokenTypeEnum.Literal;
412:
413:                    buffer = new StringBuilder();
414:                    int level = 0;
415:                    try {
416:                        while (true) {
417:                            c = stream.readUnsignedByte();
418:                            if (c == '(')
419:                                level++;
420:                            else if (c == ')')
421:                                level--;
422:                            else if (c == '\\') {
423:                                boolean lineBreak = false;
424:                                c = stream.readUnsignedByte();
425:                                switch (c) {
426:                                case 'n':
427:                                    c = '\n';
428:                                    break;
429:                                case 'r':
430:                                    c = '\r';
431:                                    break;
432:                                case 't':
433:                                    c = '\t';
434:                                    break;
435:                                case 'b':
436:                                    c = '\b';
437:                                    break;
438:                                case 'f':
439:                                    c = '\f';
440:                                    break;
441:                                case '(':
442:                                case ')':
443:                                case '\\':
444:                                    break;
445:                                case '\r':
446:                                    lineBreak = true;
447:                                    c = stream.readUnsignedByte();
448:                                    if (c != '\n')
449:                                        stream.skip(-1);
450:                                    break;
451:                                case '\n':
452:                                    lineBreak = true;
453:                                    break;
454:                                default: {
455:                                    // Is it outside the octal encoding?
456:                                    if (c < '0' || c > '7')
457:                                        break;
458:
459:                                    // Octal [PDF:1.6:3.2.3].
460:                                    int octal = c - '0';
461:                                    c = stream.readUnsignedByte();
462:                                    // Octal end?
463:                                    if (c < '0' || c > '7') {
464:                                        c = octal;
465:                                        stream.skip(-1);
466:                                        break;
467:                                    }
468:                                    octal = (octal << 3) + c - '0';
469:                                    c = stream.readUnsignedByte();
470:                                    // Octal end?
471:                                    if (c < '0' || c > '7') {
472:                                        c = octal;
473:                                        stream.skip(-1);
474:                                        break;
475:                                    }
476:                                    octal = (octal << 3) + c - '0';
477:                                    c = octal & 0xff;
478:                                    break;
479:                                }
480:                                }
481:                                if (lineBreak)
482:                                    continue;
483:                            } else if (c == '\r') {
484:                                c = stream.readUnsignedByte();
485:                                if (c != '\n') {
486:                                    c = '\n';
487:                                    stream.skip(-1);
488:                                }
489:                            }
490:                            if (level == -1)
491:                                break;
492:
493:                            buffer.append((char) c);
494:                        }
495:                    } catch (EOFException e) {
496:                        throw new FileFormatException(
497:                                "Unexpected EOF (malformed literal string).",
498:                                e, stream.getPosition());
499:                    }
500:
501:                    break;
502:                default: // Keyword.
503:                    tokenType = TokenTypeEnum.Keyword;
504:
505:                    buffer = new StringBuilder();
506:                    try {
507:                        do {
508:                            buffer.append((char) c);
509:                            c = stream.readUnsignedByte();
510:                        } while (!isDelimiter(c) && !isWhitespace(c));
511:                    } catch (EOFException e) {/* Let it go. */
512:                    }
513:                    stream.skip(-1); // Recover the first byte after the current token.
514:
515:                    break;
516:                }
517:
518:                if (buffer != null) {
519:                    /*
520:                      Here we prepare the current token state.
521:                     */
522:                    // Which token type?
523:                    switch (tokenType) {
524:                    case Keyword:
525:                        token = buffer.toString();
526:                        // Late recognition.
527:                        if (((String) token).equals("false")
528:                                || ((String) token).equals("true")) // Boolean.
529:                        {
530:                            tokenType = TokenTypeEnum.Boolean;
531:                            token = Boolean.parseBoolean((String) token);
532:                        } else if (((String) token).equals("null")) // Null.
533:                        {
534:                            tokenType = TokenTypeEnum.Null;
535:                            token = null;
536:                        }
537:                        break;
538:                    case Comment:
539:                    case Hex:
540:                    case Name:
541:                        token = buffer.toString();
542:                        break;
543:                    case Literal:
544:                        token = buffer.toString();
545:                        // Late recognition.
546:                        if (((String) token).startsWith("D:")) // Date.
547:                        {
548:                            tokenType = TokenTypeEnum.Date;
549:                            token = PdfDate.toDate((String) token);
550:                        }
551:                        break;
552:                    case Integer:
553:                        token = Integer.parseInt(buffer.toString());
554:                        break;
555:                    case Real:
556:                        token = Float.parseFloat(buffer.toString());
557:                        break;
558:                    }
559:                }
560:
561:                return true;
562:            }
563:
564:            /**
565:              Parses the next content object [PDF:1.6:4.1], may it be a single operation or a graphics object.
566:
567:              @version 0.0.4, 06/09/07
568:              @since 0.0.4
569:             */
570:            public ContentObject parseContentObject()
571:                    throws FileFormatException {
572:                //TODO:manage path objects!
573:                final Operation operation = parseOperation();
574:                // Single-operation graphics object?
575:                if (operation instanceof  PaintXObject) // External object.
576:                    return new XObject(operation);
577:                else if (operation instanceof  PaintShadingObject) // Shading object.
578:                    return new ShadingObject(operation);
579:                // Multiple-operation graphics object begin?
580:                else if (operation instanceof  BeginText) // Text.
581:                    return new Text(
582:                            (List<Operation>) (List<? extends ContentObject>) parseContentObjects());
583:                else if (operation instanceof  SaveGraphicsState) // Local graphics state.
584:                    return new LocalGraphicsState(parseContentObjects());
585:                else if (operation instanceof  BeginInlineImage) // Inline image.
586:                    return parseInlineImage();
587:                else
588:                    // Single operation.
589:                    return operation;
590:            }
591:
592:            public List<ContentObject> parseContentObjects()
593:                    throws FileFormatException {
594:                final List<ContentObject> contentObjects = new ArrayList<ContentObject>();
595:                while (moveNext()) {
596:                    ContentObject contentObject = parseContentObject();
597:                    // Multiple-operation graphics object end?
598:                    if (contentObject instanceof  EndText // Text.
599:                            || contentObject instanceof  RestoreGraphicsState // Local graphics state.
600:                            || contentObject instanceof  EndInlineImage) // Inline image.
601:                        return contentObjects;
602:
603:                    contentObjects.add(contentObject);
604:                }
605:                return contentObjects;
606:            }
607:
608:            public Operation parseOperation() throws FileFormatException {
609:                String operator = null;
610:                final List<PdfDirectObject> operands = new ArrayList<PdfDirectObject>();
611:                // Parsing the operation parts...
612:                while (true) {
613:                    // Did we reach the operator keyword?
614:                    if (tokenType == TokenTypeEnum.Keyword) {
615:                        operator = (String) token;
616:                        break;
617:                    }
618:
619:                    operands.add(parsePdfObject());
620:                    moveNext();
621:                }
622:
623:                return Operation.get(operator, operands);
624:            }
625:
626:            /**
627:              Parse the current PDF object [PDF:1.6:3.2].
628:              <h3>Contract</h3>
629:              <ul>
630:               <li>Preconditions:
631:                <ol>
632:                 <li>When this method is invoked, the pointer MUST be at the first
633:                 token of the requested object.</li>
634:                </ol>
635:               </li>
636:               <li>Postconditions:
637:                <ol>
638:                 <li id="parsePdfObject_contract_post[0]">When this method terminates,
639:                 the pointer IS at the last token of the requested object.</li>
640:                </ol>
641:               </li>
642:               <li>Invariants:
643:                <ol>
644:                 <li>(none).</li>
645:                </ol>
646:               </li>
647:               <li>Side-effects:
648:                <ol>
649:                 <li>See <a href="#parsePdfObject_contract_post[0]">Postconditions</a>.</li>
650:                </ol>
651:               </li>
652:              </ul>
653:             */
654:            protected PdfDirectObject parsePdfObject()
655:                    throws FileFormatException {
656:                switch (tokenType) {
657:                case Integer:
658:                    return new PdfInteger((Integer) token);
659:                case Name:
660:                    return new PdfName((String) token, true);
661:                case Literal:
662:                    return new PdfLiteral((String) token);
663:                case DictionaryBegin: {
664:                    PdfDictionary dictionary = new PdfDictionary();
665:                    // Populate the dictionary.
666:                    while (true) {
667:                        // Key.
668:                        moveNext();
669:                        if (tokenType == TokenTypeEnum.DictionaryEnd)
670:                            break;
671:                        PdfName key = (PdfName) parsePdfObject();
672:
673:                        // Value.
674:                        moveNext();
675:                        PdfDirectObject value = (PdfDirectObject) parsePdfObject();
676:
677:                        // Add the current entry to the dictionary!
678:                        dictionary.put(key, value);
679:                    }
680:                    return dictionary;
681:                }
682:                case ArrayBegin: {
683:                    PdfArray array = new PdfArray();
684:                    // Populate the array.
685:                    while (true) {
686:                        // Value.
687:                        moveNext();
688:                        if (tokenType == TokenTypeEnum.ArrayEnd)
689:                            break;
690:
691:                        // Add the current item to the array!
692:                        array.add((PdfDirectObject) parsePdfObject());
693:                    }
694:                    return array;
695:                }
696:                case Real:
697:                    return new PdfReal((Float) token);
698:                case Boolean:
699:                    return new PdfBoolean((Boolean) token);
700:                case Date:
701:                    return new PdfDate((Date) token);
702:                case Hex:
703:                    return new PdfHex((String) token);
704:                case Null:
705:                    return PdfNull.Null;
706:                default:
707:                    return null;
708:                }
709:            }
710:
711:            public void seek(long position) {
712:                while (true) {
713:                    if (position < basePosition) //Before current stream.
714:                    {
715:                        if (!movePreviousStream())
716:                            throw new IllegalArgumentException(
717:                                    "The 'position' argument is lower than acceptable.");
718:                    } else if (position > basePosition + stream.getLength()) // After current stream.
719:                    {
720:                        if (!moveNextStream())
721:                            throw new IllegalArgumentException(
722:                                    "The 'position' argument is higher than acceptable.");
723:                    } else // At current stream.
724:                    {
725:                        stream.seek(position - basePosition);
726:                        break;
727:                    }
728:                }
729:            }
730:
731:            public void skip(long offset) {
732:                while (true) {
733:                    long position = stream.getPosition() + offset;
734:                    if (position < 0) //Before current stream.
735:                    {
736:                        offset += stream.getPosition();
737:                        if (!movePreviousStream())
738:                            throw new IllegalArgumentException(
739:                                    "The 'offset' argument is lower than acceptable.");
740:
741:                        stream.setPosition(stream.getLength());
742:                    } else if (position > stream.getLength()) // After current stream.
743:                    {
744:                        offset -= (stream.getLength() - stream.getPosition());
745:                        if (!moveNextStream())
746:                            throw new IllegalArgumentException(
747:                                    "The 'offset' argument is higher than acceptable.");
748:                    } else // At current stream.
749:                    {
750:                        stream.skip(position);
751:                        break;
752:                    }
753:                }
754:            }
755:
756:            /**
757:              Moves to the last whitespace after the current position in order to let read
758:              the first non-whitespace.
759:             */
760:            public boolean skipWhitespace() {
761:                int b;
762:                try {
763:                    do {
764:                        b = stream.readUnsignedByte();
765:                    } while (isWhitespace(b)); // Keep goin' till there's a white-space character...
766:                } catch (EOFException e) {
767:                    return false;
768:                }
769:                stream.skip(-1); // Recover the last whitespace position.
770:
771:                return true;
772:            }
773:
774:            // </public>
775:
776:            // <private>
777:            private boolean moveNextStream() {
778:                /* NOTE: A content stream may be made up of multiple streams [PDF:1.6:3.6.2]. */
779:                // Is the content stream just a single stream?
780:                if (contentStream instanceof  PdfStream) // Single stream.
781:                {
782:                    if (streamIndex == 0) {
783:                        streamIndex++;
784:                        basePosition += stream.getLength();
785:                        stream = null;
786:                    }
787:                    if (streamIndex == 1)
788:                        return false;
789:
790:                    streamIndex++;
791:                    basePosition = 0;
792:                    stream = ((PdfStream) contentStream).getBody();
793:                } else // Array of streams.
794:                {
795:                    PdfArray streams = (PdfArray) contentStream;
796:                    if (streamIndex == (streams.size() - 1)) {
797:                        streamIndex++;
798:                        basePosition += stream.getLength();
799:                        stream = null;
800:                    }
801:                    if (streamIndex == streams.size())
802:                        return false;
803:
804:                    streamIndex++;
805:                    if (streamIndex == 0) {
806:                        basePosition = 0;
807:                    } else {
808:                        basePosition += stream.getLength();
809:                    }
810:                    stream = ((PdfStream) ((PdfReference) streams
811:                            .get(streamIndex)).getDataObject()).getBody();
812:                }
813:
814:                return true;
815:            }
816:
817:            private boolean movePreviousStream() {
818:                if (streamIndex == 0) {
819:                    streamIndex--;
820:                    stream = null;
821:                }
822:                if (streamIndex == -1)
823:                    return false;
824:
825:                streamIndex--;
826:                /* NOTE: A content stream may be made up of multiple streams [PDF:1.6:3.6.2]. */
827:                // Is the content stream just a single stream?
828:                if (contentStream instanceof  PdfStream) // Single stream.
829:                {
830:                    stream = ((PdfStream) contentStream).getBody();
831:                    basePosition = 0;
832:                } else // Array of streams.
833:                {
834:                    PdfArray streams = (PdfArray) contentStream;
835:
836:                    stream = ((PdfStream) ((PdfReference) streams
837:                            .get(streamIndex)).getDataObject()).getBody();
838:                    basePosition -= stream.getLength();
839:                }
840:
841:                return true;
842:            }
843:
844:            private InlineImage parseInlineImage() throws FileFormatException {
845:                /*
846:                  NOTE: Inline images use a peculiar syntax that's an exception to the usual rule
847:                  that the data in a content stream is interpreted according to the standard PDF syntax
848:                  for objects.
849:                 */
850:                InlineImageHeader header;
851:                {
852:                    final List<PdfDirectObject> operands = new ArrayList<PdfDirectObject>();
853:                    // Parsing the image entries...
854:                    while (tokenType != TokenTypeEnum.Keyword) // Not keyword (i.e. end at image data beginning (ID operator)).
855:                    {
856:                        operands.add(parsePdfObject());
857:                        moveNext();
858:                    }
859:                    header = new InlineImageHeader(operands);
860:                }
861:
862:                InlineImageBody body;
863:                {
864:                    moveNext();
865:                    IBuffer data = new Buffer();
866:                    byte c1 = 0, c2 = 0;
867:                    do {
868:                        try {
869:                            while (true) {
870:                                c1 = stream.readByte();
871:                                c2 = stream.readByte();
872:                                if (c1 == 'E' && c2 == 'I')
873:                                    break;
874:
875:                                data.append(c1);
876:                                data.append(c2);
877:                            }
878:                            break;
879:                        } catch (EOFException e) {
880:                            /* NOTE: Current stream has finished. */
881:                            // Move to the next stream!
882:                            moveNextStream();
883:                        }
884:                    } while (stream != null);
885:                    body = new InlineImageBody(data);
886:                }
887:
888:                return new InlineImage(header, body);
889:            }
890:            // </private>
891:            // </interface>
892:            // </dynamic>
893:            // </class>
894:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.