001: /*
002: * $Id: PDFObject.java,v 1.2 2007/12/20 18:17:41 rbair Exp $
003: *
004: * Copyright 2004 Sun Microsystems, Inc., 4150 Network Circle,
005: * Santa Clara, California 95054, U.S.A. All rights reserved.
006: *
007: * This library is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU Lesser General Public
009: * License as published by the Free Software Foundation; either
010: * version 2.1 of the License, or (at your option) any later version.
011: *
012: * This library is distributed in the hope that it will be useful,
013: * but WITHOUT ANY WARRANTY; without even the implied warranty of
014: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015: * Lesser General Public License for more details.
016: *
017: * You should have received a copy of the GNU Lesser General Public
018: * License along with this library; if not, write to the Free Software
019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
020: */
021:
022: package com.sun.pdfview;
023:
024: import java.io.IOException;
025: import java.lang.ref.SoftReference;
026: import java.nio.ByteBuffer;
027: import java.util.ArrayList;
028: import java.util.HashMap;
029: import java.util.Iterator;
030:
031: import com.sun.pdfview.decode.PDFDecoder;
032:
033: /**
034: * a class encapsulating all the possibilities of content for
035: * an object in a PDF file.
036: * <p>
037: * A PDF object can be a simple type, like a Boolean, a Number,
038: * a String, or the Null value. It can also be a NAME, which
039: * looks like a string, but is a special type in PDF files, like
040: * "/Name".
041: * <p>
042: * A PDF object can also be complex types, including Array;
043: * Dictionary; Stream, which is a Dictionary plus an array of
044: * bytes; or Indirect, which is a reference to some other
045: * PDF object. Indirect references will always be dereferenced
046: * by the time any data is returned from one of the methods
047: * in this class.
048: *
049: * @author Mike Wessler
050: */
051: public class PDFObject {
052: /** an indirect reference*/
053: public static final int INDIRECT = 0; // PDFXref
054:
055: /** a Boolean */
056: public static final int BOOLEAN = 1; // Boolean
057:
058: /** a Number, represented as a double */
059: public static final int NUMBER = 2; // Double
060:
061: /** a String */
062: public static final int STRING = 3; // String
063:
064: /** a special string, seen in PDF files as /Name */
065: public static final int NAME = 4; // String
066:
067: /** an array of PDFObjects */
068: public static final int ARRAY = 5; // Array of PDFObject
069:
070: /** a Hashmap that maps String names to PDFObjects */
071: public static final int DICTIONARY = 6; // HashMap(String->PDFObject)
072:
073: /** a Stream: a Hashmap with a byte array */
074: public static final int STREAM = 7; // HashMap + byte[]
075:
076: /** the NULL object (there is only one) */
077: public static final int NULL = 8; // null
078:
079: /** a special PDF bare word, like R, obj, true, false, etc */
080: public static final int KEYWORD = 9; // String
081:
082: /** the NULL PDFObject */
083: public static final PDFObject nullObj = new PDFObject(null, NULL,
084: null);
085:
086: /** the type of this object */
087: private int type;
088:
089: /** the value of this object */
090: private Object value;
091:
092: /** the encoded stream, if this is a STREAM object */
093: private ByteBuffer stream;
094:
095: /** a cached version of the decoded stream */
096: private SoftReference decodedStream;
097:
098: /**
099: * the PDFFile from which this object came, used for
100: * dereferences
101: */
102: private PDFFile owner;
103:
104: /**
105: * a cache of translated data. This data can be
106: * garbage collected at any time, after which it will
107: * have to be rebuilt.
108: */
109: private SoftReference cache;
110:
111: /**
112: * create a new simple PDFObject with a type and a value
113: * @param owner the PDFFile in which this object resides, used
114: * for dereferencing. This may be null.
115: * @param type the type of object
116: * @param value the value. For DICTIONARY, this is a HashMap.
117: * for ARRAY it's an ArrayList. For NUMBER, it's a Double.
118: * for BOOLEAN, it's Boolean.TRUE or Boolean.FALSE. For
119: * everything else, it's a String.
120: */
121: public PDFObject(PDFFile owner, int type, Object value) {
122: this .type = type;
123: if (type == NAME) {
124: value = ((String) value).intern();
125: } else if (type == KEYWORD && value.equals("true")) {
126: this .type = BOOLEAN;
127: value = Boolean.TRUE;
128: } else if (type == KEYWORD && value.equals("false")) {
129: this .type = BOOLEAN;
130: value = Boolean.FALSE;
131: }
132: this .value = value;
133: this .owner = owner;
134: }
135:
136: /**
137: * create a new PDFObject that is the closest match to a
138: * given Java object. Possibilities include Double, String,
139: * PDFObject[], HashMap, Boolean, or PDFParser.Tok,
140: * which should be "true" or "false" to turn into a BOOLEAN.
141: *
142: * @param obj the sample Java object to convert to a PDFObject.
143: * @throws PDFParseException if the object isn't one of the
144: * above examples, and can't be turned into a PDFObject.
145: */
146: public PDFObject(Object obj) throws PDFParseException {
147: this .owner = null;
148: this .value = obj;
149: if ((obj instanceof Double) || (obj instanceof Integer)) {
150: this .type = NUMBER;
151: } else if (obj instanceof String) {
152: this .type = NAME;
153: } else if (obj instanceof PDFObject[]) {
154: this .type = ARRAY;
155: } else if (obj instanceof Object[]) {
156: Object[] srcary = (Object[]) obj;
157: PDFObject[] dstary = new PDFObject[srcary.length];
158: for (int i = 0; i < srcary.length; i++) {
159: dstary[i] = new PDFObject(srcary[i]);
160: }
161: value = dstary;
162: this .type = ARRAY;
163: } else if (obj instanceof HashMap) {
164: this .type = DICTIONARY;
165: } else if (obj instanceof Boolean) {
166: this .type = BOOLEAN;
167: } else if (obj instanceof PDFParser.Tok) {
168: PDFParser.Tok tok = (PDFParser.Tok) obj;
169: if (tok.name.equals("true")) {
170: this .value = Boolean.TRUE;
171: this .type = BOOLEAN;
172: } else if (tok.name.equals("false")) {
173: this .value = Boolean.FALSE;
174: this .type = BOOLEAN;
175: } else {
176: this .value = tok.name;
177: this .type = NAME;
178: }
179: } else {
180: throw new PDFParseException("Bad type for raw PDFObject: "
181: + obj);
182: }
183: }
184:
185: /**
186: * create a new PDFObject based on a PDFXref
187: * @param owner the PDFFile from which the PDFXref was drawn
188: * @param xref the PDFXref to turn into a PDFObject
189: */
190: public PDFObject(PDFFile owner, PDFXref xref) {
191: this .type = INDIRECT;
192: this .value = xref;
193: this .owner = owner;
194: }
195:
196: /**
197: * get the type of this object. The object will be
198: * dereferenced, so INDIRECT will never be returned.
199: * @return the type of the object
200: */
201: public int getType() throws IOException {
202: if (type == INDIRECT) {
203: return dereference().getType();
204: }
205:
206: return type;
207: }
208:
209: /**
210: * set the stream of this object. It should have been
211: * a DICTIONARY before the call.
212: * @param data the data, as a ByteBuffer.
213: */
214: public void setStream(ByteBuffer data) {
215: this .type = STREAM;
216: this .stream = data;
217: }
218:
219: /**
220: * get the value in the cache. May become null at any time.
221: * @return the cached value, or null if the value has been
222: * garbage collected.
223: */
224: public Object getCache() throws IOException {
225: if (type == INDIRECT) {
226: return dereference().getCache();
227: } else if (cache != null) {
228: return cache.get();
229: } else {
230: return null;
231: }
232: }
233:
234: /**
235: * set the cached value. The object may be garbage collected
236: * if no other reference exists to it.
237: * @param obj the object to be cached
238: */
239: public void setCache(Object obj) throws IOException {
240: if (type == INDIRECT) {
241: dereference().setCache(obj);
242: return;
243: } else {
244: cache = new SoftReference(obj);
245: }
246: }
247:
248: /**
249: * get the stream from this object. Will return null if this
250: * object isn't a STREAM.
251: * @return the stream, or null, if this isn't a STREAM.
252: */
253: public byte[] getStream() throws IOException {
254: if (type == INDIRECT) {
255: return dereference().getStream();
256: } else if (type == STREAM && stream != null) {
257: byte[] data = null;
258:
259: synchronized (stream) {
260: // decode
261: ByteBuffer streamBuf = decodeStream();
262: // ByteBuffer streamBuf = stream;
263:
264: // First try to use the array with no copying. This can only
265: // be done if the buffer has a backing array, and is not a slice
266: if (streamBuf.hasArray()
267: && streamBuf.arrayOffset() == 0) {
268: byte[] ary = streamBuf.array();
269:
270: // make sure there is no extra data in the buffer
271: if (ary.length == streamBuf.remaining()) {
272: return ary;
273: }
274: }
275:
276: // Can't use the direct buffer, so copy the data (bad)
277: data = new byte[streamBuf.remaining()];
278: streamBuf.get(data);
279:
280: // return the stream to its starting position
281: streamBuf.flip();
282: }
283:
284: return data;
285: } else if (type == STRING) {
286: String src = getStringValue();
287: byte[] data = new byte[src.length()];
288: for (int i = 0; i < data.length; i++) {
289: data[i] = (byte) src.charAt(i);
290: }
291: return data;
292: }
293:
294: // wrong type
295: return null;
296: }
297:
298: /**
299: * get the stream from this object as a byte buffer. Will return null if
300: * this object isn't a STREAM.
301: * @return the buffer, or null, if this isn't a STREAM.
302: */
303: public ByteBuffer getStreamBuffer() throws IOException {
304: if (type == INDIRECT) {
305: return dereference().getStreamBuffer();
306: } else if (type == STREAM && stream != null) {
307: synchronized (stream) {
308: ByteBuffer streamBuf = decodeStream();
309: // ByteBuffer streamBuf = stream;
310: return streamBuf.duplicate();
311: }
312: } else if (type == STRING) {
313: String src = getStringValue();
314: return ByteBuffer.wrap(src.getBytes());
315: }
316:
317: // wrong type
318: return null;
319: }
320:
321: /**
322: * Get the decoded stream value
323: */
324: private ByteBuffer decodeStream() throws IOException {
325: ByteBuffer outStream = null;
326:
327: // first try the cache
328: if (decodedStream != null) {
329: outStream = (ByteBuffer) decodedStream.get();
330: }
331:
332: // no luck in the cache, do the actual decoding
333: if (outStream == null) {
334: stream.rewind();
335: outStream = PDFDecoder.decodeStream(this , stream);
336: }
337:
338: return outStream;
339: }
340:
341: /**
342: * get the value as an int. Will return 0 if this object
343: * isn't a NUMBER.
344: */
345: public int getIntValue() throws IOException {
346: if (type == INDIRECT) {
347: return dereference().getIntValue();
348: } else if (type == NUMBER) {
349: return ((Double) value).intValue();
350: }
351:
352: // wrong type
353: return 0;
354: }
355:
356: /**
357: * get the value as a float. Will return 0 if this object
358: * isn't a NUMBER
359: */
360: public float getFloatValue() throws IOException {
361: if (type == INDIRECT) {
362: return dereference().getFloatValue();
363: } else if (type == NUMBER) {
364: return ((Double) value).floatValue();
365: }
366:
367: // wrong type
368: return 0;
369: }
370:
371: /**
372: * get the value as a double. Will return 0 if this object
373: * isn't a NUMBER.
374: */
375: public double getDoubleValue() throws IOException {
376: if (type == INDIRECT) {
377: return dereference().getDoubleValue();
378: } else if (type == NUMBER) {
379: return ((Double) value).doubleValue();
380: }
381:
382: // wrong type
383: return 0;
384: }
385:
386: /**
387: * get the value as a String. Will return null if the object
388: * isn't a STRING, NAME, or KEYWORD. This method will <b>NOT</b>
389: * convert a NUMBER to a String.
390: */
391: public String getStringValue() throws IOException {
392: if (type == INDIRECT) {
393: return dereference().getStringValue();
394: } else if (type == STRING || type == NAME || type == KEYWORD) {
395: return (String) value;
396: }
397:
398: // wrong type
399: return null;
400: }
401:
402: /**
403: * get the value as a PDFObject[]. If this object is an ARRAY,
404: * will return the array. Otherwise, will return an array
405: * of one element with this object as the element.
406: */
407: public PDFObject[] getArray() throws IOException {
408: if (type == INDIRECT) {
409: return dereference().getArray();
410: } else if (type == ARRAY) {
411: PDFObject[] ary = (PDFObject[]) value;
412: return ary;
413: } else {
414: PDFObject[] ary = new PDFObject[1];
415: ary[0] = this ;
416: return ary;
417: }
418: }
419:
420: /**
421: * get the value as a boolean. Will return false if this
422: * object is not a BOOLEAN
423: */
424: public boolean getBooleanValue() throws IOException {
425: if (type == INDIRECT) {
426: return dereference().getBooleanValue();
427: } else if (type == BOOLEAN) {
428: return value == Boolean.TRUE;
429: }
430:
431: // wrong type
432: return false;
433: }
434:
435: /**
436: * if this object is an ARRAY, get the PDFObject at some
437: * position in the array. If this is not an ARRAY, returns
438: * null.
439: */
440: public PDFObject getAt(int idx) throws IOException {
441: if (type == INDIRECT) {
442: return dereference().getAt(idx);
443: } else if (type == ARRAY) {
444: PDFObject[] ary = (PDFObject[]) value;
445: return ary[idx];
446: }
447:
448: // wrong type
449: return null;
450: }
451:
452: /**
453: * get an Iterator over all the keys in the dictionary. If
454: * this object is not a DICTIONARY or a STREAM, returns an
455: * Iterator over the empty list.
456: */
457: public Iterator getDictKeys() throws IOException {
458: if (type == INDIRECT) {
459: return dereference().getDictKeys();
460: } else if (type == DICTIONARY || type == STREAM) {
461: return ((HashMap) value).keySet().iterator();
462: }
463:
464: // wrong type
465: return new ArrayList().iterator();
466: }
467:
468: /**
469: * get the dictionary as a HashMap. If this isn't a DICTIONARY
470: * or a STREAM, returns null
471: */
472: public HashMap getDictionary() throws IOException {
473: if (type == INDIRECT) {
474: return dereference().getDictionary();
475: } else if (type == DICTIONARY || type == STREAM) {
476: return (HashMap) value;
477: }
478:
479: // wrong type
480: return new HashMap();
481: }
482:
483: /**
484: * get the value associated with a particular key in the
485: * dictionary. If this isn't a DICTIONARY or a STREAM,
486: * or there is no such key, returns null.
487: */
488: public PDFObject getDictRef(String key) throws IOException {
489: if (type == INDIRECT) {
490: return dereference().getDictRef(key);
491: } else if (type == DICTIONARY || type == STREAM) {
492: key = key.intern();
493: HashMap h = (HashMap) value;
494: PDFObject obj = (PDFObject) h.get(key.intern());
495: return obj;
496: }
497:
498: // wrong type
499: return null;
500: }
501:
502: /**
503: * returns true only if this object is a DICTIONARY or a
504: * STREAM, and the "Type" entry in the dictionary matches a
505: * given value.
506: * @param match the expected value for the "Type" key in the
507: * dictionary
508: * @return whether the dictionary is of the expected type
509: */
510: public boolean isDictType(String match) throws IOException {
511: if (type == INDIRECT) {
512: return dereference().isDictType(match);
513: } else if (type != DICTIONARY && type != STREAM) {
514: return false;
515: }
516:
517: PDFObject obj = getDictRef("Type");
518: return obj != null && obj.getStringValue().equals(match);
519: }
520:
521: /**
522: * return a representation of this PDFObject as a String.
523: * Does NOT dereference anything: this is the only method
524: * that allows you to distinguish an INDIRECT PDFObject.
525: */
526: @Override
527: public String toString() {
528: try {
529: if (type == INDIRECT) {
530: return "Indirect to #" + ((PDFXref) value).getID();
531: } else if (type == BOOLEAN) {
532: return "Boolean: "
533: + (getBooleanValue() ? "true" : "false");
534: } else if (type == NUMBER) {
535: return "Number: " + getDoubleValue();
536: } else if (type == STRING) {
537: return "String: " + getStringValue();
538: } else if (type == NAME) {
539: return "Name: /" + getStringValue();
540: } else if (type == ARRAY) {
541: return "Array, length=" + ((PDFObject[]) value).length;
542: } else if (type == DICTIONARY) {
543: StringBuffer sb = new StringBuffer();
544: PDFObject obj = getDictRef("Type");
545: if (obj != null) {
546: sb.append(obj.getStringValue());
547: obj = getDictRef("Subtype");
548: if (obj != null) {
549: sb.append("/" + obj.getStringValue());
550: }
551: } else {
552: sb.append("Untyped");
553: }
554: sb.append(" dictionary. Keys:");
555: HashMap hm = (HashMap) value;
556: Iterator it = hm.keySet().iterator();
557: while (it.hasNext()) {
558: sb.append(" " + (String) it.next());
559: }
560: return sb.toString();
561: } else if (type == STREAM) {
562: byte[] st = getStream();
563: if (st == null) {
564: return "Broken stream";
565: }
566: return "Stream: [["
567: + new String(st, 0, st.length > 30 ? 30
568: : st.length) + "]]";
569: } else if (type == NULL) {
570: return "Null";
571: } else if (type == KEYWORD) {
572: return "Keyword: " + getStringValue();
573: /* } else if (type==IMAGE) {
574: StringBuffer sb= new StringBuffer();
575: java.awt.Image im= (java.awt.Image)stream;
576: sb.append("Image ("+im.getWidth(null)+"x"+im.getHeight(null)+", with keys:");
577: HashMap hm= (HashMap)value;
578: Iterator it= hm.keySet().iterator();
579: while(it.hasNext()) {
580: sb.append(" "+(String)it.next());
581: }
582: return sb.toString();*/
583: } else {
584: return "Whoops! big error! Unknown type";
585: }
586: } catch (IOException ioe) {
587: return "Caught an error: " + ioe;
588: }
589: }
590:
591: /**
592: * Make sure that this object is dereferenced. Use the cache of
593: * an indirect object to cache the dereferenced value, if possible.
594: */
595: public PDFObject dereference() throws IOException {
596: if (type == INDIRECT) {
597: PDFObject obj = null;
598:
599: if (cache != null) {
600: obj = (PDFObject) cache.get();
601: }
602:
603: if (obj == null) {
604: if (owner == null) {
605: System.out
606: .println("Bad seed (owner==null)! Object="
607: + this );
608: }
609:
610: obj = owner.dereference((PDFXref) value);
611:
612: cache = new SoftReference(obj);
613: }
614:
615: return obj;
616: } else {
617: // not indirect, no need to dereference
618: return this ;
619: }
620: }
621:
622: /**
623: * Test whether two PDFObject are equal. Objects are equal IFF they
624: * are the same reference OR they are both indirect objects with the
625: * same id and generation number in their xref
626: */
627: @Override
628: public boolean equals(Object o) {
629: if (super .equals(o)) {
630: // they are the same object
631: return true;
632: } else if (type == INDIRECT && o instanceof PDFObject) {
633: // they are both PDFObjects. Check type and xref.
634: PDFObject obj = (PDFObject) o;
635:
636: if (obj.type == INDIRECT) {
637: PDFXref lXref = (PDFXref) value;
638: PDFXref rXref = (PDFXref) obj.value;
639:
640: return ((lXref.getID() == rXref.getID()) && (lXref
641: .getGeneration() == rXref.getGeneration()));
642: }
643: }
644:
645: return false;
646: }
647: }
|