001: /**
002: * Copyright (c) 2005-2006, www.pdfbox.org
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without
006: * modification, are permitted provided that the following conditions are met:
007: *
008: * 1. Redistributions of source code must retain the above copyright notice,
009: * this list of conditions and the following disclaimer.
010: * 2. Redistributions in binary form must reproduce the above copyright notice,
011: * this list of conditions and the following disclaimer in the documentation
012: * and/or other materials provided with the distribution.
013: * 3. Neither the name of pdfbox; nor the names of its
014: * contributors may be used to endorse or promote products derived from this
015: * software without specific prior written permission.
016: *
017: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
018: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
019: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
020: * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
021: * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
022: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
024: * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
026: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: *
028: * http://www.pdfbox.org
029: *
030: */package org.pdfbox.pdmodel.graphics.xobject;
031:
032: import java.awt.image.BufferedImage;
033: import java.io.InputStream;
034: import java.io.IOException;
035: import java.io.OutputStream;
036:
037: import java.util.ArrayList;
038: import java.util.List;
039:
040: import javax.imageio.ImageIO;
041:
042: import org.pdfbox.cos.COSDictionary;
043: import org.pdfbox.cos.COSName;
044: import org.pdfbox.io.RandomAccess;
045:
046: import org.pdfbox.pdmodel.PDDocument;
047: import org.pdfbox.pdmodel.common.PDStream;
048: import org.pdfbox.pdmodel.graphics.color.PDDeviceGray;
049:
050: /**
051: * An image class for CCITT Fax.
052: *
053: * @author <a href="ben@benlitchfield.com">Ben Litchfield</a>
054: * @author paul king
055: * @version $Revision: 1.4 $
056: */
057: public class PDCcitt extends PDXObjectImage {
058: private static final List FAX_FILTERS = new ArrayList();
059:
060: static {
061: FAX_FILTERS.add(COSName.CCITTFAX_DECODE.getName());
062: FAX_FILTERS.add(COSName.CCITTFAX_DECODE_ABBREVIATION.getName());
063: }
064:
065: /**
066: * Standard constructor.
067: *
068: * @param ccitt The PDStream that already contains all ccitt information.
069: */
070: public PDCcitt(PDStream ccitt) {
071: super (ccitt, "tiff");
072:
073: }
074:
075: /**
076: * Construct from a tiff file.
077: *
078: * @param doc The document to create the image as part of.
079: * @param raf The random access TIFF file which contains a suitable CCITT compressed image
080: * @throws IOException If there is an error reading the tiff data.
081: */
082:
083: public PDCcitt(PDDocument doc, RandomAccess raf) throws IOException {
084: super (new PDStream(doc), "tiff");
085: // super( new PDStream( doc, null, true ), "tiff" );
086:
087: COSDictionary decodeParms = new COSDictionary();
088:
089: COSDictionary dic = getCOSStream();
090:
091: extractFromTiff(raf, getCOSStream().createFilteredStream(),
092: decodeParms);
093:
094: dic.setItem(COSName.FILTER, COSName.CCITTFAX_DECODE);
095: dic.setItem(COSName.SUBTYPE, COSName.IMAGE);
096: dic.setItem(COSName.TYPE, COSName.getPDFName("XObject"));
097: dic.setItem("DecodeParms", decodeParms);
098:
099: setBitsPerComponent(1);
100: setColorSpace(new PDDeviceGray());
101: setWidth(decodeParms.getInt("Columns"));
102: setHeight(decodeParms.getInt("Rows"));
103:
104: }
105:
106: /**
107: * Returns an image of the CCITT Fax, or null if TIFFs are not supported. (Requires additional JAI Image filters )
108: *
109: * {@inheritDoc}
110: */
111: public BufferedImage getRGBImage() throws IOException {
112: // ImageIO.scanForPlugins();
113: return ImageIO.read(new TiffWrapper(getPDStream()
114: .getPartiallyFilteredStream(FAX_FILTERS),
115: getCOSStream()));
116: }
117:
118: /**
119: * This writes a tiff to out.
120: *
121: * {@inheritDoc}
122: */
123: public void write2OutputStream(OutputStream out) throws IOException {
124: InputStream data = new TiffWrapper(getPDStream()
125: .getPartiallyFilteredStream(FAX_FILTERS),
126: getCOSStream());
127: byte[] buf = new byte[1024];
128: int amountRead = -1;
129: while ((amountRead = data.read(buf)) != -1) {
130: out.write(buf, 0, amountRead);
131: }
132: }
133:
134: /**
135: * Extract the ccitt stream from the tiff file.
136: *
137: * @param raf - TIFF File
138: * @param os - Stream to write raw ccitt data two
139: * @param parms - COSDictionary which the encoding parameters are added to
140: * @throws IOException If there is an error reading/writing to/from the stream
141: */
142: private void extractFromTiff(RandomAccess raf, OutputStream os,
143: COSDictionary parms) throws IOException {
144: try {
145:
146: // First check the basic tiff header
147: raf.seek(0);
148: char endianess = (char) raf.read();
149: if ((char) raf.read() != endianess) {
150: throw new IOException("Not a valid tiff file");
151: }
152: //ensure that endianess is either M or I
153: if (endianess != 'M' && endianess != 'I') {
154: throw new IOException("Not a valid tiff file");
155: }
156: int magicNumber = readshort(endianess, raf);
157: if (magicNumber != 42) {
158: throw new IOException("Not a valid tiff file");
159: }
160:
161: // Relocate to the first set of tags
162: raf.seek(readlong(endianess, raf));
163:
164: int numtags = readshort(endianess, raf);
165:
166: // The number 50 is somewhat arbitary, it just stops us load up junk from somewhere and tramping on
167: if (numtags > 50) {
168: throw new IOException("Not a valid tiff file");
169: }
170:
171: // Loop through the tags, some will convert to items in the parms dictionary
172: // Other point us to where to find the data stream
173: // The only parm which might change as a result of other options is K, so
174: // We'll deal with that as a special;
175:
176: int k = -1000; // Default Non CCITT compression
177: int dataoffset = 0;
178: int datalength = 0;
179:
180: for (int i = 0; i < numtags; i++) {
181: int tag = readshort(endianess, raf);
182: int type = readshort(endianess, raf);
183: int count = readlong(endianess, raf);
184: int val = readlong(endianess, raf); // See note
185:
186: // Note, we treated that value as a long. The value always occupies 4 bytes
187: // But it might only use the first byte or two. Depending on endianess we might need to correct
188: // Note we ignore all other types, they are of little interest for PDFs/CCITT Fax
189: if (endianess == 'M') {
190: switch (type) {
191: case 1: {
192: val = val >> 24;
193: break; // byte value
194: }
195: case 3: {
196: val = val >> 16;
197: break; // short value
198: }
199: case 4: {
200: break; // long value
201: }
202: default: {
203: //do nothing
204: }
205: }
206: }
207: switch (tag) {
208: case 256: {
209: parms.setInt("Columns", val);
210: break;
211: }
212: case 257: {
213: parms.setInt("Rows", val);
214: break;
215: }
216: case 259: {
217: if (val == 4) {
218: k = -1;
219: }
220: if (val == 3) {
221: k = 0;
222: }
223: break; // T6/T4 Compression
224: }
225: case 262: {
226: if (val == 1) {
227: parms.setBoolean("BlackIs1", true);
228: }
229: break;
230: }
231: case 273: {
232: if (count == 1) {
233: dataoffset = val;
234: }
235: break;
236: }
237: case 279: {
238: if (count == 1) {
239: datalength = val;
240: }
241: break;
242: }
243: case 292: {
244: if (val == 1) {
245: k = 50; // T4 2D - arbitary K value
246: }
247: break;
248: }
249: case 324: {
250: if (count == 1) {
251: dataoffset = val;
252: }
253: break;
254: }
255: case 325: {
256: if (count == 1) {
257: datalength = val;
258: }
259: break;
260: }
261: default: {
262: //do nothing
263: }
264: }
265: }
266:
267: if (k == -1000) {
268: throw new IOException(
269: "First image in tiff is not CCITT T4 or T6 compressed");
270: }
271: if (dataoffset == 0) {
272: throw new IOException(
273: "First image in tiff is not a single tile/strip");
274: }
275:
276: parms.setInt("K", k);
277:
278: raf.seek(dataoffset);
279:
280: byte[] buf = new byte[8192];
281: int amountRead = -1;
282: while ((amountRead = raf.read(buf, 0, Math.min(8192,
283: datalength))) > 0) {
284: datalength -= amountRead;
285: os.write(buf, 0, amountRead);
286: }
287:
288: } finally {
289: os.close();
290: }
291: }
292:
293: private int readshort(char endianess, RandomAccess raf)
294: throws IOException {
295: if (endianess == 'I') {
296: return raf.read() | (raf.read() << 8);
297: }
298: return (raf.read() << 8) | raf.read();
299: }
300:
301: private int readlong(char endianess, RandomAccess raf)
302: throws IOException {
303: if (endianess == 'I') {
304: return raf.read() | (raf.read() << 8) | (raf.read() << 16)
305: | (raf.read() << 24);
306: }
307: return (raf.read() << 24) | (raf.read() << 16)
308: | (raf.read() << 8) | raf.read();
309: }
310:
311: /**
312: * Extends InputStream to wrap the data from the CCITT Fax with a suitable TIFF Header.
313: * For details see www.tiff.org, which contains useful information including pointers to the
314: * TIFF 6.0 Specification
315: *
316: */
317: private class TiffWrapper extends InputStream {
318:
319: private int currentOffset; // When reading, where in the tiffheader are we.
320: private byte[] tiffheader; // Byte array to store tiff header data
321: private InputStream datastream; // Original InputStream
322:
323: private TiffWrapper(InputStream rawstream, COSDictionary options) {
324: buildHeader(options);
325: currentOffset = 0;
326: datastream = rawstream;
327: }
328:
329: // Implement basic methods from InputStream
330: /**
331: * {@inheritDoc}
332: */
333: public boolean markSupported() {
334: return false;
335: }
336:
337: /**
338: * {@inheritDoc}
339: */
340: public void reset() throws IOException {
341: throw new IOException("reset not supported");
342: }
343:
344: /**
345: * For simple read, take a byte from the tiffheader array or pass through.
346: *
347: * {@inheritDoc}
348: */
349: public int read() throws IOException {
350: if (currentOffset < tiffheader.length) {
351: return tiffheader[currentOffset++];
352: }
353: return datastream.read();
354: }
355:
356: /**
357: * For read methods only return as many bytes as we have left in the header
358: * if we've exhausted the header, pass through to the InputStream of the raw CCITT data.
359: *
360: * {@inheritDoc}
361: */
362: public int read(byte[] data) throws IOException {
363: if (currentOffset < tiffheader.length) {
364: int length = java.lang.Math.min(tiffheader.length
365: - currentOffset, data.length);
366: if (length > 0) {
367: System.arraycopy(tiffheader, currentOffset, data,
368: 0, length);
369: }
370: currentOffset += length;
371: return length;
372: } else {
373: return datastream.read(data);
374: }
375: }
376:
377: /**
378: * For read methods only return as many bytes as we have left in the header
379: * if we've exhausted the header, pass through to the InputStream of the raw CCITT data.
380: *
381: * {@inheritDoc}
382: */
383: public int read(byte[] data, int off, int len)
384: throws IOException {
385: if (currentOffset < tiffheader.length) {
386: int length = java.lang.Math.min(tiffheader.length
387: - currentOffset, len);
388: if (length > 0) {
389: System.arraycopy(tiffheader, currentOffset, data,
390: off, length);
391: }
392: currentOffset += length;
393: return length;
394: } else {
395: return datastream.read(data, off, len);
396: }
397: }
398:
399: /**
400: * When skipping if any header data not yet read, only allow to skip what we've in the buffer
401: * Otherwise just pass through.
402: *
403: * {@inheritDoc}
404: */
405: public long skip(long n) throws IOException {
406: if (currentOffset < tiffheader.length) {
407: long length = Math.min(tiffheader.length
408: - currentOffset, n);
409: currentOffset += length;
410: return length;
411: } else {
412: return datastream.skip(n);
413: }
414: }
415:
416: // Static data for the beginning of the TIFF header
417: private final byte[] basicHeader = { 'I', 'I', 42, 0, 8, 0, 0,
418: 0, // File introducer and pointer to first IFD
419: 0, 0 }; // Number of tags start with two
420:
421: private int additionalOffset; // Offset in header to additional data
422:
423: // Builds up the tiffheader based on the options passed through.
424: private void buildHeader(COSDictionary options) {
425:
426: final int numOfTags = 10; // The maximum tags we'll fill
427: final int maxAdditionalData = 24; // The maximum amount of additional data
428: // outside the IFDs. (bytes)
429:
430: // The length of the header will be the length of the basic header (10)
431: // plus 12 bytes for each IFD, 4 bytes as a pointer to the next IFD (will be 0)
432: // plus the length of the additional data
433:
434: tiffheader = new byte[10 + (12 * numOfTags) + 4
435: + maxAdditionalData];
436: java.util.Arrays.fill(tiffheader, (byte) 0);
437: System.arraycopy(basicHeader, 0, tiffheader, 0,
438: basicHeader.length);
439:
440: // Additional data outside the IFD starts after the IFD's and pointer to the next IFD (0)
441: additionalOffset = 10 + (12 * numOfTags) + 4;
442:
443: // Now work out the variable values from TIFF defaults,
444: // PDF Defaults and the Dictionary for this XObject
445: short cols = 1728;
446: short rows = 0;
447: short blackis1 = 0;
448: short comptype = 3; // T4 compression
449: long t4options = 0; // Will set if 1d or 2d T4
450:
451: COSDictionary decodeParms = (COSDictionary) options
452: .getDictionaryObject("DecodeParms");
453:
454: if (decodeParms != null) {
455: cols = (short) decodeParms.getInt("Columns", cols);
456: rows = (short) decodeParms.getInt("Rows", rows);
457: if (decodeParms.getBoolean("BlackIs1", false)) {
458: blackis1 = 1;
459: }
460: int k = decodeParms.getInt("K"); // Mandatory parm
461: if (k < 0) {
462: //T6
463: comptype = 4;
464: }
465: if (k > 0) {
466: //T4 2D
467: comptype = 3;
468: t4options = 1;
469: }
470: // else k = 0, leave as default T4 1D compression
471: }
472:
473: // If we couldn't get the number of rows, use the main item from XObject
474: if (rows == 0) {
475: rows = (short) options.getInt("Height", rows);
476: }
477:
478: // Now put the tags into the tiffheader
479: // These musn't exceed the maximum set above, and by TIFF spec should be sorted into
480: // Numeric sequence.
481:
482: addTag(256, cols); // Columns
483: addTag(257, rows); // Rows
484: addTag(259, comptype); // T6
485: addTag(262, blackis1); // Photometric Interpretation
486: addTag(273, tiffheader.length); // Offset to start of image data - updated below
487: addTag(279, options.getInt("Length")); // Length of image data
488: addTag(282, 300, 1); // X Resolution 300 (default unit Inches) This is arbitary
489: addTag(283, 300, 1); // Y Resolution 300 (default unit Inches) This is arbitary
490: if (comptype == 3) {
491: addTag(292, t4options);
492: }
493: addTag(305, "PDFBOX"); // Software generating image
494: }
495:
496: /* Tiff types 1 = byte, 2=ascii, 3=short, 4=ulong 5=rational */
497:
498: private void addTag(int tag, long value) {
499: // Adds a tag of type 4 (ulong)
500: int count = ++tiffheader[8];
501: int offset = (count - 1) * 12 + 10;
502: tiffheader[offset] = (byte) (tag & 0xff);
503: tiffheader[offset + 1] = (byte) ((tag >> 8) & 0xff);
504: tiffheader[offset + 2] = 4; // Type Long
505: tiffheader[offset + 4] = 1; // One Value
506: tiffheader[offset + 8] = (byte) (value & 0xff);
507: tiffheader[offset + 9] = (byte) ((value >> 8) & 0xff);
508: tiffheader[offset + 10] = (byte) ((value >> 16) & 0xff);
509: tiffheader[offset + 11] = (byte) ((value >> 24) & 0xff);
510: }
511:
512: private void addTag(int tag, short value) {
513: // Adds a tag of type 3 (short)
514: int count = ++tiffheader[8];
515: int offset = (count - 1) * 12 + 10;
516: tiffheader[offset] = (byte) (tag & 0xff);
517: tiffheader[offset + 1] = (byte) ((tag >> 8) & 0xff);
518: tiffheader[offset + 2] = 3; // Type Short
519: tiffheader[offset + 4] = 1; // One Value
520: tiffheader[offset + 8] = (byte) (value & 0xff);
521: tiffheader[offset + 9] = (byte) ((value >> 8) & 0xff);
522: }
523:
524: private void addTag(int tag, String value) {
525: // Adds a tag of type 2 (ascii)
526: int count = ++tiffheader[8];
527: int offset = (count - 1) * 12 + 10;
528: tiffheader[offset] = (byte) (tag & 0xff);
529: tiffheader[offset + 1] = (byte) ((tag >> 8) & 0xff);
530: tiffheader[offset + 2] = 2; // Type Ascii
531: tiffheader[offset + 4] = 1; // One Value
532: tiffheader[offset + 8] = (byte) (additionalOffset & 0xff);
533: tiffheader[offset + 9] = (byte) ((additionalOffset >> 8) & 0xff);
534: tiffheader[offset + 10] = (byte) ((additionalOffset >> 16) & 0xff);
535: tiffheader[offset + 11] = (byte) ((additionalOffset >> 24) & 0xff);
536: System.arraycopy(value.getBytes(), 0, tiffheader,
537: additionalOffset, value.length());
538: additionalOffset += value.length() + 1;
539: }
540:
541: private void addTag(int tag, long numerator, long denominator) {
542: // Adds a tag of type 5 (rational)
543: int count = ++tiffheader[8];
544: int offset = (count - 1) * 12 + 10;
545: tiffheader[offset] = (byte) (tag & 0xff);
546: tiffheader[offset + 1] = (byte) ((tag >> 8) & 0xff);
547: tiffheader[offset + 2] = 5; // Type Rational
548: tiffheader[offset + 4] = 1; // One Value
549: tiffheader[offset + 8] = (byte) (additionalOffset & 0xff);
550: tiffheader[offset + 9] = (byte) ((additionalOffset >> 8) & 0xff);
551: tiffheader[offset + 10] = (byte) ((additionalOffset >> 16) & 0xff);
552: tiffheader[offset + 11] = (byte) ((additionalOffset >> 24) & 0xff);
553: tiffheader[additionalOffset++] = (byte) ((numerator) & 0xFF);
554: tiffheader[additionalOffset++] = (byte) ((numerator >> 8) & 0xFF);
555: tiffheader[additionalOffset++] = (byte) ((numerator >> 16) & 0xFF);
556: tiffheader[additionalOffset++] = (byte) ((numerator >> 24) & 0xFF);
557: tiffheader[additionalOffset++] = (byte) ((denominator) & 0xFF);
558: tiffheader[additionalOffset++] = (byte) ((denominator >> 8) & 0xFF);
559: tiffheader[additionalOffset++] = (byte) ((denominator >> 16) & 0xFF);
560: tiffheader[additionalOffset++] = (byte) ((denominator >> 24) & 0xFF);
561: }
562: }
563: }
|