001: /* ====================================================================
002: Licensed to the Apache Software Foundation (ASF) under one or more
003: contributor license agreements. See the NOTICE file distributed with
004: this work for additional information regarding copyright ownership.
005: The ASF licenses this file to You under the Apache License, Version 2.0
006: (the "License"); you may not use this file except in compliance with
007: the License. You may obtain a copy of the License at
008:
009: http://www.apache.org/licenses/LICENSE-2.0
010:
011: Unless required by applicable law or agreed to in writing, software
012: distributed under the License is distributed on an "AS IS" BASIS,
013: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: See the License for the specific language governing permissions and
015: limitations under the License.
016: ==================================================================== */
017:
018: package org.apache.poi.hwpf.model;
019:
020: import org.apache.poi.util.LittleEndian;
021: import org.apache.poi.hwpf.usermodel.CharacterRun;
022: import org.apache.poi.hwpf.usermodel.Picture;
023:
024: import java.util.List;
025: import java.util.ArrayList;
026:
027: /**
028: * Holds information about all pictures embedded in Word Document either via "Insert -> Picture -> From File" or via
029: * clipboard. Responsible for images extraction and determining whether some document�s piece contains embedded image.
030: * Analyzes raw data bytestream �Data� (where Word stores all embedded objects) provided by HWPFDocument.
031: *
032: * Word stores images as is within so called "Data stream" - the stream within a Word docfile containing various data
033: * that hang off of characters in the main stream. For example, binary data describing in-line pictures and/or
034: * formfields an also embedded objects-native data. Word picture structures are concatenated one after the other in
035: * the data stream if the document contains pictures.
036: * Data stream is easily reachable via HWPFDocument._dataStream property.
037: * A picture is represented in the document text stream as a special character, an Unicode \u0001 whose
038: * CharacterRun.isSpecial() returns true. The file location of the picture in the Word binary file is accessed
039: * via CharacterRun.getPicOffset(). The CharacterRun.getPicOffset() is a byte offset into the data stream.
040: * Beginning at the position recorded in picOffset, a header data structure, will be stored.
041: *
042: * @author Dmitry Romanov
043: */
044: public class PicturesTable {
045: static final int TYPE_IMAGE = 0x08;
046: static final int TYPE_IMAGE_WORD2000 = 0x00;
047: static final int TYPE_IMAGE_PASTED_FROM_CLIPBOARD = 0xA;
048: static final int TYPE_IMAGE_PASTED_FROM_CLIPBOARD_WORD2000 = 0x2;
049: static final int TYPE_HORIZONTAL_LINE = 0xE;
050: static final int BLOCK_TYPE_OFFSET = 0xE;
051: static final int MM_MODE_TYPE_OFFSET = 0x6;
052:
053: private byte[] _dataStream;
054:
055: /** @link dependency
056: * @stereotype instantiate*/
057: /*# Picture lnkPicture; */
058:
059: /**
060: *
061: * @param _dataStream
062: */
063: public PicturesTable(byte[] _dataStream) {
064: this ._dataStream = _dataStream;
065: }
066:
067: /**
068: * determines whether specified CharacterRun contains reference to a picture
069: * @param run
070: */
071: public boolean hasPicture(CharacterRun run) {
072: if (run.isSpecialCharacter() && !run.isObj() && !run.isOle2()
073: && !run.isData() && "\u0001".equals(run.text())) {
074: return isBlockContainsImage(run.getPicOffset());
075: }
076: return false;
077: }
078:
079: /**
080: * determines whether specified CharacterRun contains reference to a picture
081: * @param run
082: */
083: public boolean hasHorizontalLine(CharacterRun run) {
084: if (run.isSpecialCharacter() && "\u0001".equals(run.text())) {
085: return isBlockContainsHorizontalLine(run.getPicOffset());
086: }
087: return false;
088: }
089:
090: private boolean isPictureRecognized(short blockType,
091: short mappingModeOfMETAFILEPICT) {
092: return (blockType == TYPE_IMAGE
093: || blockType == TYPE_IMAGE_PASTED_FROM_CLIPBOARD
094: || (blockType == TYPE_IMAGE_WORD2000 && mappingModeOfMETAFILEPICT == 0x64) || (blockType == TYPE_IMAGE_PASTED_FROM_CLIPBOARD_WORD2000 && mappingModeOfMETAFILEPICT == 0x64));
095: }
096:
097: private static short getBlockType(byte[] dataStream, int pictOffset) {
098: return LittleEndian.getShort(dataStream, pictOffset
099: + BLOCK_TYPE_OFFSET);
100: }
101:
102: private static short getMmMode(byte[] dataStream, int pictOffset) {
103: return LittleEndian.getShort(dataStream, pictOffset
104: + MM_MODE_TYPE_OFFSET);
105: }
106:
107: /**
108: * Returns picture object tied to specified CharacterRun
109: * @param run
110: * @param fillBytes if true, Picture will be returned with filled byte array that represent picture's contents. If you don't want
111: * to have that byte array in memory but only write picture's contents to stream, pass false and then use Picture.writeImageContent
112: * @see Picture#writeImageContent(java.io.OutputStream)
113: * @return a Picture object if picture exists for specified CharacterRun, null otherwise. PicturesTable.hasPicture is used to determine this.
114: * @see #hasPicture(org.apache.poi.hwpf.usermodel.CharacterRun)
115: */
116: public Picture extractPicture(CharacterRun run, boolean fillBytes) {
117: if (hasPicture(run)) {
118: return new Picture(run.getPicOffset(), _dataStream,
119: fillBytes);
120: }
121: return null;
122: }
123:
124: /**
125: * @return a list of Picture objects found in current document
126: */
127: public List getAllPictures() {
128: ArrayList pictures = new ArrayList();
129:
130: int pos = 0;
131: boolean atEnd = false;
132:
133: while (pos < _dataStream.length && !atEnd) {
134: if (isBlockContainsImage(pos)) {
135: pictures.add(new Picture(pos, _dataStream, false));
136: }
137:
138: int skipOn = LittleEndian.getInt(_dataStream, pos);
139: if (skipOn <= 0) {
140: atEnd = true;
141: }
142: pos += skipOn;
143: }
144:
145: return pictures;
146: }
147:
148: private boolean isBlockContainsImage(int i) {
149: return isPictureRecognized(getBlockType(_dataStream, i),
150: getMmMode(_dataStream, i));
151: }
152:
153: private boolean isBlockContainsHorizontalLine(int i) {
154: return getBlockType(_dataStream, i) == TYPE_HORIZONTAL_LINE
155: && getMmMode(_dataStream, i) == 0x64;
156: }
157:
158: }
|