001: /*
002: * $Id: PdfContentParser.java 2382 2006-09-15 23:37:38Z xlv $
003: *
004: * Copyright 2005 by Paulo Soares.
005: *
006: * The contents of this file are subject to the Mozilla Public License Version 1.1
007: * (the "License"); you may not use this file except in compliance with the License.
008: * You may obtain a copy of the License at http://www.mozilla.org/MPL/
009: *
010: * Software distributed under the License is distributed on an "AS IS" basis,
011: * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
012: * for the specific language governing rights and limitations under the License.
013: *
014: * The Original Code is 'iText, a free JAVA-PDF library'.
015: *
016: * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
017: * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
018: * All Rights Reserved.
019: * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
020: * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
021: *
022: * Contributor(s): all the names of the contributors are added in the source code
023: * where applicable.
024: *
025: * Alternatively, the contents of this file may be used under the terms of the
026: * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
027: * provisions of LGPL are applicable instead of those above. If you wish to
028: * allow use of your version of this file only under the terms of the LGPL
029: * License and not to allow others to use your version of this file under
030: * the MPL, indicate your decision by deleting the provisions above and
031: * replace them with the notice and other provisions required by the LGPL.
032: * If you do not delete the provisions above, a recipient may use your version
033: * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
034: *
035: * This library is free software; you can redistribute it and/or modify it
036: * under the terms of the MPL as stated above or under the terms of the GNU
037: * Library General Public License as published by the Free Software Foundation;
038: * either version 2 of the License, or any later version.
039: *
040: * This library is distributed in the hope that it will be useful, but WITHOUT
041: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
042: * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
043: * details.
044: *
045: * If you didn't download this code from the following link, you should check if
046: * you aren't using an obsolete version:
047: * http://www.lowagie.com/iText/
048: */
049:
050: package com.lowagie.text.pdf;
051:
052: import java.io.IOException;
053: import java.util.ArrayList;
054:
055: /**
056: * Parses the page or template content.
057: * @author Paulo Soares (psoares@consiste.pt)
058: */
059: public class PdfContentParser {
060:
061: /**
062: * Commands have this type.
063: */
064: public static final int COMMAND_TYPE = 200;
065: /**
066: * Holds value of property tokeniser.
067: */
068: private PRTokeniser tokeniser;
069:
070: /**
071: * Creates a new instance of PdfContentParser
072: * @param tokeniser the tokeniser with the content
073: */
074: public PdfContentParser(PRTokeniser tokeniser) {
075: this .tokeniser = tokeniser;
076: }
077:
078: /**
079: * Parses a single command from the content. Each command is output as an array of arguments
080: * having the command itself as the last element. The returned array will be empty if the
081: * end of content was reached.
082: * @param ls an <CODE>ArrayList</CODE> to use. It will be cleared before using. If it's
083: * <CODE>null</CODE> will create a new <CODE>ArrayList</CODE>
084: * @return the same <CODE>ArrayList</CODE> given as argument or a new one
085: * @throws IOException on error
086: */
087: public ArrayList parse(ArrayList ls) throws IOException {
088: if (ls == null)
089: ls = new ArrayList();
090: else
091: ls.clear();
092: PdfObject ob = null;
093: while ((ob = readPRObject()) != null) {
094: ls.add(ob);
095: if (ob.type() == COMMAND_TYPE)
096: break;
097: }
098: return ls;
099: }
100:
101: /**
102: * Gets the tokeniser.
103: * @return the tokeniser.
104: */
105: public PRTokeniser getTokeniser() {
106: return this .tokeniser;
107: }
108:
109: /**
110: * Sets the tokeniser.
111: * @param tokeniser the tokeniser
112: */
113: public void setTokeniser(PRTokeniser tokeniser) {
114: this .tokeniser = tokeniser;
115: }
116:
117: /**
118: * Reads a dictionary. The tokeniser must be positioned past the "<<" token.
119: * @return the dictionary
120: * @throws IOException on error
121: */
122: public PdfDictionary readDictionary() throws IOException {
123: PdfDictionary dic = new PdfDictionary();
124: while (true) {
125: if (!nextValidToken())
126: throw new IOException("Unexpected end of file.");
127: if (tokeniser.getTokenType() == PRTokeniser.TK_END_DIC)
128: break;
129: if (tokeniser.getTokenType() != PRTokeniser.TK_NAME)
130: throw new IOException("Dictionary key is not a name.");
131: PdfName name = new PdfName(tokeniser.getStringValue(),
132: false);
133: PdfObject obj = readPRObject();
134: int type = obj.type();
135: if (-type == PRTokeniser.TK_END_DIC)
136: throw new IOException("Unexpected '>>'");
137: if (-type == PRTokeniser.TK_END_ARRAY)
138: throw new IOException("Unexpected ']'");
139: dic.put(name, obj);
140: }
141: return dic;
142: }
143:
144: /**
145: * Reads an array. The tokeniser must be positioned past the "[" token.
146: * @return an array
147: * @throws IOException on error
148: */
149: public PdfArray readArray() throws IOException {
150: PdfArray array = new PdfArray();
151: while (true) {
152: PdfObject obj = readPRObject();
153: int type = obj.type();
154: if (-type == PRTokeniser.TK_END_ARRAY)
155: break;
156: if (-type == PRTokeniser.TK_END_DIC)
157: throw new IOException("Unexpected '>>'");
158: array.add(obj);
159: }
160: return array;
161: }
162:
163: /**
164: * Reads a pdf object.
165: * @return the pdf object
166: * @throws IOException on error
167: */
168: public PdfObject readPRObject() throws IOException {
169: if (!nextValidToken())
170: return null;
171: int type = tokeniser.getTokenType();
172: switch (type) {
173: case PRTokeniser.TK_START_DIC: {
174: PdfDictionary dic = readDictionary();
175: return dic;
176: }
177: case PRTokeniser.TK_START_ARRAY:
178: return readArray();
179: case PRTokeniser.TK_STRING:
180: PdfString str = new PdfString(tokeniser.getStringValue(),
181: null).setHexWriting(tokeniser.isHexString());
182: return str;
183: case PRTokeniser.TK_NAME:
184: return new PdfName(tokeniser.getStringValue(), false);
185: case PRTokeniser.TK_NUMBER:
186: return new PdfNumber(tokeniser.getStringValue());
187: case PRTokeniser.TK_OTHER:
188: return new PdfLiteral(COMMAND_TYPE, tokeniser
189: .getStringValue());
190: default:
191: return new PdfLiteral(-type, tokeniser.getStringValue());
192: }
193: }
194:
195: /**
196: * Reads the next token skipping over the comments.
197: * @return <CODE>true</CODE> if a token was read, <CODE>false</CODE> if the end of content was reached
198: * @throws IOException on error
199: */
200: public boolean nextValidToken() throws IOException {
201: while (tokeniser.nextToken()) {
202: if (tokeniser.getTokenType() == PRTokeniser.TK_COMMENT)
203: continue;
204: return true;
205: }
206: return false;
207: }
208: }
|