001: /* ====================================================================
002: Licensed to the Apache Software Foundation (ASF) under one or more
003: contributor license agreements. See the NOTICE file distributed with
004: this work for additional information regarding copyright ownership.
005: The ASF licenses this file to You under the Apache License, Version 2.0
006: (the "License"); you may not use this file except in compliance with
007: the License. You may obtain a copy of the License at
008:
009: http://www.apache.org/licenses/LICENSE-2.0
010:
011: Unless required by applicable law or agreed to in writing, software
012: distributed under the License is distributed on an "AS IS" BASIS,
013: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: See the License for the specific language governing permissions and
015: limitations under the License.
016: ==================================================================== */
017:
018: package org.apache.poi.hwpf.usermodel;
019:
020: import org.apache.poi.util.LittleEndian;
021: import org.apache.poi.util.POILogger;
022: import org.apache.poi.util.POILogFactory;
023:
024: import java.io.OutputStream;
025: import java.io.IOException;
026: import java.io.ByteArrayInputStream;
027: import java.io.ByteArrayOutputStream;
028: import java.util.zip.InflaterInputStream;
029:
030: /**
031: * Represents embedded picture extracted from Word Document
032: * @author Dmitry Romanov
033: */
034: public class Picture {
035: private static final POILogger log = POILogFactory
036: .getLogger(Picture.class);
037:
038: // public static final int FILENAME_OFFSET = 0x7C;
039: // public static final int FILENAME_SIZE_OFFSET = 0x6C;
040: static final int MFPMM_OFFSET = 0x6;
041: static final int BLOCK_TYPE_OFFSET = 0xE;
042: static final int PICT_HEADER_OFFSET = 0x4;
043: static final int UNKNOWN_HEADER_SIZE = 0x49;
044:
045: public static final byte[] GIF = new byte[] { 'G', 'I', 'F' };
046: public static final byte[] PNG = new byte[] { (byte) 0x89, 0x50,
047: 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A };
048: public static final byte[] JPG = new byte[] { (byte) 0xFF,
049: (byte) 0xD8 };
050: public static final byte[] BMP = new byte[] { 'B', 'M' };
051: public static final byte[] TIFF = new byte[] { 0x49, 0x49, 0x2A,
052: 0x00 };
053: public static final byte[] TIFF1 = new byte[] { 0x4D, 0x4D, 0x00,
054: 0x2A };
055:
056: public static final byte[] EMF = { 0x01, 0x00, 0x00, 0x00 };
057: public static final byte[] WMF1 = { (byte) 0xD7, (byte) 0xCD,
058: (byte) 0xC6, (byte) 0x9A, 0x00, 0x00 };
059: public static final byte[] WMF2 = { 0x01, 0x00, 0x09, 0x00, 0x00,
060: 0x03 }; // Windows 3.x
061: // TODO: DIB, PICT
062:
063: public static final byte[] IHDR = new byte[] { 'I', 'H', 'D', 'R' };
064:
065: public static final byte[] COMPRESSED1 = { (byte) 0xFE, 0x78,
066: (byte) 0xDA };
067: public static final byte[] COMPRESSED2 = { (byte) 0xFE, 0x78,
068: (byte) 0x9C };
069:
070: private int dataBlockStartOfsset;
071: private int pictureBytesStartOffset;
072: private int dataBlockSize;
073: private int size;
074: // private String fileName;
075: private byte[] rawContent;
076: private byte[] content;
077: private byte[] _dataStream;
078: private int aspectRatioX;
079: private int aspectRatioY;
080: private int height = -1;
081: private int width = -1;
082:
083: public Picture(int dataBlockStartOfsset, byte[] _dataStream,
084: boolean fillBytes) {
085: this ._dataStream = _dataStream;
086: this .dataBlockStartOfsset = dataBlockStartOfsset;
087: this .dataBlockSize = LittleEndian.getInt(_dataStream,
088: dataBlockStartOfsset);
089: this .pictureBytesStartOffset = getPictureBytesStartOffset(
090: dataBlockStartOfsset, _dataStream, dataBlockSize);
091: this .size = dataBlockSize
092: - (pictureBytesStartOffset - dataBlockStartOfsset);
093:
094: if (size < 0) {
095:
096: }
097:
098: this .aspectRatioX = extractAspectRatioX(_dataStream,
099: dataBlockStartOfsset);
100: this .aspectRatioY = extractAspectRatioY(_dataStream,
101: dataBlockStartOfsset);
102: // this.fileName = extractFileName(dataBlockStartOfsset, _dataStream);
103: // if (fileName==null || fileName.length()==0) {
104: // fileName = "clipboard";
105: // }
106:
107: if (fillBytes) {
108: fillImageContent();
109: }
110: }
111:
112: private void fillWidthHeight() {
113: String ext = suggestFileExtension();
114: // trying to extract width and height from pictures content:
115: if ("jpg".equalsIgnoreCase(ext)) {
116: fillJPGWidthHeight();
117: } else if ("png".equalsIgnoreCase(ext)) {
118: fillPNGWidthHeight();
119: }
120: }
121:
122: private static int extractAspectRatioX(byte[] _dataStream,
123: int dataBlockStartOffset) {
124: return LittleEndian.getShort(_dataStream,
125: dataBlockStartOffset + 0x20) / 10;
126: }
127:
128: private static int extractAspectRatioY(byte[] _dataStream,
129: int dataBlockStartOffset) {
130: return LittleEndian.getShort(_dataStream,
131: dataBlockStartOffset + 0x22) / 10;
132: }
133:
134: /**
135: * Tries to suggest a filename: hex representation of picture structure offset in "Data" stream plus extension that
136: * is tried to determine from first byte of picture's content.
137: *
138: * @return suggested file name
139: */
140: public String suggestFullFileName() {
141: String fileExt = suggestFileExtension();
142: return Integer.toHexString(dataBlockStartOfsset)
143: + (fileExt.length() > 0 ? "." + fileExt : "");
144: }
145:
146: /**
147: * Writes Picture's content bytes to specified OutputStream.
148: * Is useful when there is need to write picture bytes directly to stream, omitting its representation in
149: * memory as distinct byte array.
150: *
151: * @param out a stream to write to
152: * @throws IOException if some exception is occured while writing to specified out
153: */
154: public void writeImageContent(OutputStream out) throws IOException {
155: if (rawContent != null && rawContent.length > 0) {
156: out.write(rawContent, 0, size);
157: } else {
158: out.write(_dataStream, pictureBytesStartOffset, size);
159: }
160: }
161:
162: /**
163: * @return picture's content as byte array
164: */
165: public byte[] getContent() {
166: if (content == null || content.length <= 0) {
167: fillImageContent();
168: }
169: return content;
170: }
171:
172: public byte[] getRawContent() {
173: if (rawContent == null || rawContent.length <= 0) {
174: fillRawImageContent();
175: }
176: return rawContent;
177: }
178:
179: /**
180: *
181: * @return size in bytes of the picture
182: */
183: public int getSize() {
184: return size;
185: }
186:
187: /**
188: * returns horizontal aspect ratio for picture provided by user
189: */
190: public int getAspectRatioX() {
191: return aspectRatioX;
192: }
193:
194: /**
195: * returns vertical aspect ratio for picture provided by user
196: */
197: public int getAspectRatioY() {
198: return aspectRatioY;
199: }
200:
201: /**
202: * tries to suggest extension for picture's file by matching signatures of popular image formats to first bytes
203: * of picture's contents
204: * @return suggested file extension
205: */
206: public String suggestFileExtension() {
207: String extension = suggestFileExtension(_dataStream,
208: pictureBytesStartOffset);
209: if ("".equals(extension)) {
210: // May be compressed. Get the uncompressed content and inspect that.
211: extension = suggestFileExtension(getContent(), 0);
212: }
213: return extension;
214: }
215:
216: private String suggestFileExtension(byte[] _dataStream,
217: int pictureBytesStartOffset) {
218: if (matchSignature(_dataStream, JPG, pictureBytesStartOffset)) {
219: return "jpg";
220: } else if (matchSignature(_dataStream, PNG,
221: pictureBytesStartOffset)) {
222: return "png";
223: } else if (matchSignature(_dataStream, GIF,
224: pictureBytesStartOffset)) {
225: return "gif";
226: } else if (matchSignature(_dataStream, BMP,
227: pictureBytesStartOffset)) {
228: return "bmp";
229: } else if (matchSignature(_dataStream, TIFF,
230: pictureBytesStartOffset)
231: || matchSignature(_dataStream, TIFF1,
232: pictureBytesStartOffset)) {
233: return "tiff";
234: } else {
235: // Need to load the image content before we can try the following tests
236: fillImageContent();
237:
238: if (matchSignature(content, WMF1, 0)
239: || matchSignature(content, WMF2, 0)) {
240: return "wmf";
241: } else if (matchSignature(content, EMF, 0)) {
242: return "emf";
243: }
244: }
245: // TODO: DIB, PICT
246: return "";
247: }
248:
249: private static boolean matchSignature(byte[] dataStream,
250: byte[] signature, int pictureBytesOffset) {
251: boolean matched = pictureBytesOffset < dataStream.length;
252: for (int i = 0; (i + pictureBytesOffset) < dataStream.length
253: && i < signature.length; i++) {
254: if (dataStream[i + pictureBytesOffset] != signature[i]) {
255: matched = false;
256: break;
257: }
258: }
259: return matched;
260: }
261:
262: // public String getFileName()
263: // {
264: // return fileName;
265: // }
266:
267: // private static String extractFileName(int blockStartIndex, byte[] dataStream) {
268: // int fileNameStartOffset = blockStartIndex + 0x7C;
269: // int fileNameSizeOffset = blockStartIndex + FILENAME_SIZE_OFFSET;
270: // int fileNameSize = LittleEndian.getShort(dataStream, fileNameSizeOffset);
271: //
272: // int fileNameIndex = fileNameStartOffset;
273: // char[] fileNameChars = new char[(fileNameSize-1)/2];
274: // int charIndex = 0;
275: // while(charIndex<fileNameChars.length) {
276: // short aChar = LittleEndian.getShort(dataStream, fileNameIndex);
277: // fileNameChars[charIndex] = (char)aChar;
278: // charIndex++;
279: // fileNameIndex += 2;
280: // }
281: // String fileName = new String(fileNameChars);
282: // return fileName.trim();
283: // }
284:
285: private void fillRawImageContent() {
286: this .rawContent = new byte[size];
287: System.arraycopy(_dataStream, pictureBytesStartOffset,
288: rawContent, 0, size);
289: }
290:
291: private void fillImageContent() {
292: byte[] rawContent = getRawContent();
293:
294: // HACK: Detect compressed images. In reality there should be some way to determine
295: // this from the first 32 bytes, but I can't see any similarity between all the
296: // samples I have obtained, nor any similarity in the data block contents.
297: if (matchSignature(rawContent, COMPRESSED1, 32)
298: || matchSignature(rawContent, COMPRESSED2, 32)) {
299: try {
300: InflaterInputStream in = new InflaterInputStream(
301: new ByteArrayInputStream(rawContent, 33,
302: rawContent.length - 33));
303: ByteArrayOutputStream out = new ByteArrayOutputStream();
304: byte[] buf = new byte[4096];
305: int readBytes;
306: while ((readBytes = in.read(buf)) > 0) {
307: out.write(buf, 0, readBytes);
308: }
309: content = out.toByteArray();
310: } catch (IOException e) {
311: // Problems reading from the actual ByteArrayInputStream should never happen
312: // so this will only ever be a ZipException.
313: log
314: .log(
315: POILogger.INFO,
316: "Possibly corrupt compression or non-compressed data",
317: e);
318: }
319: } else {
320: // Raw data is not compressed.
321: content = rawContent;
322: }
323: }
324:
325: private static int getPictureBytesStartOffset(
326: int dataBlockStartOffset, byte[] _dataStream,
327: int dataBlockSize) {
328: final int dataBlockEndOffset = dataBlockSize
329: + dataBlockStartOffset;
330: int realPicoffset = dataBlockStartOffset;
331:
332: int PICTFBlockSize = LittleEndian.getShort(_dataStream,
333: dataBlockStartOffset + PICT_HEADER_OFFSET);
334: int PICTF1BlockOffset = PICTFBlockSize + PICT_HEADER_OFFSET;
335: int PICTF1BlockSize = LittleEndian.getShort(_dataStream,
336: dataBlockStartOffset + PICTF1BlockOffset);
337:
338: int unknownHeaderOffset = (PICTF1BlockSize + PICTF1BlockOffset) < dataBlockEndOffset ? (PICTF1BlockSize + PICTF1BlockOffset)
339: : PICTF1BlockOffset;
340: realPicoffset += (unknownHeaderOffset + UNKNOWN_HEADER_SIZE);
341: if (realPicoffset >= dataBlockEndOffset) {
342: realPicoffset -= UNKNOWN_HEADER_SIZE;
343: }
344: return realPicoffset;
345: }
346:
347: private void fillJPGWidthHeight() {
348: /*
349: http://www.codecomments.com/archive281-2004-3-158083.html
350:
351: Algorhitm proposed by Patrick TJ McPhee:
352:
353: read 2 bytes
354: make sure they are 'ffd8'x
355: repeatedly:
356: read 2 bytes
357: make sure the first one is 'ff'x
358: if the second one is 'd9'x stop
359: else if the second one is c0 or c2 (or possibly other values ...)
360: skip 2 bytes
361: read one byte into depth
362: read two bytes into height
363: read two bytes into width
364: else
365: read two bytes into length
366: skip forward length-2 bytes
367:
368: Also used Ruby code snippet from: http://www.bigbold.com/snippets/posts/show/805 for reference
369: */
370: int pointer = pictureBytesStartOffset + 2;
371: int firstByte = _dataStream[pointer];
372: int secondByte = _dataStream[pointer + 1];
373:
374: int endOfPicture = pictureBytesStartOffset + size;
375: while (pointer < endOfPicture - 1) {
376: do {
377: firstByte = _dataStream[pointer];
378: secondByte = _dataStream[pointer + 1];
379: } while (!(firstByte == (byte) 0xFF)
380: && pointer < endOfPicture - 1);
381:
382: if (firstByte == ((byte) 0xFF)
383: && pointer < endOfPicture - 1) {
384: if (secondByte == (byte) 0xD9
385: || secondByte == (byte) 0xDA) {
386: break;
387: } else if ((secondByte & 0xF0) == 0xC0
388: && secondByte != (byte) 0xC4
389: && secondByte != (byte) 0xC8
390: && secondByte != (byte) 0xCC) {
391: pointer += 5;
392: this .height = getBigEndianShort(_dataStream,
393: pointer);
394: this .width = getBigEndianShort(_dataStream,
395: pointer + 2);
396: break;
397: } else {
398: pointer++;
399: pointer++;
400: int length = getBigEndianShort(_dataStream, pointer);
401: pointer += length;
402: }
403: } else {
404: pointer++;
405: }
406: }
407: }
408:
409: private void fillPNGWidthHeight() {
410: /*
411: Used PNG file format description from http://www.wotsit.org/download.asp?f=png
412: */
413: int HEADER_START = pictureBytesStartOffset + PNG.length + 4;
414: if (matchSignature(_dataStream, IHDR, HEADER_START)) {
415: int IHDR_CHUNK_WIDTH = HEADER_START + 4;
416: this .width = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH);
417: this .height = getBigEndianInt(_dataStream,
418: IHDR_CHUNK_WIDTH + 4);
419: }
420: }
421:
422: /**
423: * returns pixel width of the picture or -1 if dimensions determining was failed
424: */
425: public int getWidth() {
426: if (width == -1) {
427: fillWidthHeight();
428: }
429: return width;
430: }
431:
432: /**
433: * returns pixel height of the picture or -1 if dimensions determining was failed
434: */
435: public int getHeight() {
436: if (height == -1) {
437: fillWidthHeight();
438: }
439: return height;
440: }
441:
442: private static int getBigEndianInt(byte[] data, int offset) {
443: return (((data[offset] & 0xFF) << 24)
444: + ((data[offset + 1] & 0xFF) << 16)
445: + ((data[offset + 2] & 0xFF) << 8) + (data[offset + 3] & 0xFF));
446: }
447:
448: private static int getBigEndianShort(byte[] data, int offset) {
449: return (((data[offset] & 0xFF) << 8) + (data[offset + 1] & 0xFF));
450: }
451:
452: }
|