001: /* ====================================================================
002: Licensed to the Apache Software Foundation (ASF) under one or more
003: contributor license agreements. See the NOTICE file distributed with
004: this work for additional information regarding copyright ownership.
005: The ASF licenses this file to You under the Apache License, Version 2.0
006: (the "License"); you may not use this file except in compliance with
007: the License. You may obtain a copy of the License at
008:
009: http://www.apache.org/licenses/LICENSE-2.0
010:
011: Unless required by applicable law or agreed to in writing, software
012: distributed under the License is distributed on an "AS IS" BASIS,
013: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: See the License for the specific language governing permissions and
015: limitations under the License.
016: ==================================================================== */
017:
018: package org.apache.poi.hwpf.model;
019:
020: import org.apache.poi.poifs.common.POIFSConstants;
021: import org.apache.poi.util.LittleEndian;
022:
023: import java.util.ArrayList;
024: import java.util.List;
025: import java.util.Arrays;
026:
027: /**
028: * Represents a PAP FKP. The style properties for paragraph and character runs
029: * are stored in fkps. There are PAP fkps for paragraph properties and CHP fkps
030: * for character run properties. The first part of the fkp for both CHP and PAP
031: * fkps consists of an array of 4 byte int offsets in the main stream for that
032: * Paragraph's or Character run's text. The ending offset is the next
033: * value in the array. For example, if an fkp has X number of Paragraph's
034: * stored in it then there are (x + 1) 4 byte ints in the beginning array. The
035: * number X is determined by the last byte in a 512 byte fkp.
036: *
037: * CHP and PAP fkps also store the compressed styles(grpprl) that correspond to
038: * the offsets on the front of the fkp. The offset of the grpprls is determined
039: * differently for CHP fkps and PAP fkps.
040: *
041: * @author Ryan Ackley
042: */
043: public class PAPFormattedDiskPage extends FormattedDiskPage {
044:
045: private static final int BX_SIZE = 13;
046: private static final int FC_SIZE = 4;
047:
048: private ArrayList _papxList = new ArrayList();
049: private ArrayList _overFlow;
050: private byte[] _dataStream;
051:
052: public PAPFormattedDiskPage(byte[] dataStream) {
053: _dataStream = dataStream;
054: }
055:
056: /**
057: * Creates a PAPFormattedDiskPage from a 512 byte array
058: */
059: public PAPFormattedDiskPage(byte[] documentStream,
060: byte[] dataStream, int offset, int fcMin) {
061: super (documentStream, offset);
062:
063: for (int x = 0; x < _crun; x++) {
064: _papxList.add(new PAPX(getStart(x) - fcMin, getEnd(x)
065: - fcMin, getGrpprl(x), getParagraphHeight(x),
066: dataStream));
067: }
068: _fkp = null;
069: _dataStream = dataStream;
070: }
071:
072: /**
073: * Fills the queue for writing.
074: *
075: * @param filler a List of PAPXs
076: */
077: public void fill(List filler) {
078: _papxList.addAll(filler);
079: }
080:
081: /**
082: * Used when writing out a Word docunment. This method is part of a sequence
083: * that is necessary because there is no easy and efficient way to
084: * determine the number PAPX's that will fit into one FKP. THe sequence is
085: * as follows:
086: *
087: * fill()
088: * toByteArray()
089: * getOverflow()
090: *
091: * @return The remaining PAPXs that didn't fit into this FKP.
092: */
093: ArrayList getOverflow() {
094: return _overFlow;
095: }
096:
097: /**
098: * Gets the PAPX at index.
099: * @param index The index to get the PAPX for.
100: * @return The PAPX at index.
101: */
102: public PAPX getPAPX(int index) {
103: return (PAPX) _papxList.get(index);
104: }
105:
106: /**
107: * Gets the papx for the paragraph at index in this fkp.
108: *
109: * @param index The index of the papx to get.
110: * @return a papx grpprl.
111: */
112: protected byte[] getGrpprl(int index) {
113: int papxOffset = 2 * LittleEndian.getUnsignedByte(_fkp, _offset
114: + (((_crun + 1) * FC_SIZE) + (index * BX_SIZE)));
115: int size = 2 * LittleEndian.getUnsignedByte(_fkp, _offset
116: + papxOffset);
117: if (size == 0) {
118: size = 2 * LittleEndian.getUnsignedByte(_fkp, _offset
119: + ++papxOffset);
120: } else {
121: size--;
122: }
123:
124: byte[] papx = new byte[size];
125: System.arraycopy(_fkp, _offset + ++papxOffset, papx, 0, size);
126: return papx;
127: }
128:
129: /**
130: * Creates a byte array representation of this data structure. Suitable for
131: * writing to a Word document.
132: *
133: * @param fcMin The file offset in the main stream where text begins.
134: * @return A byte array representing this data structure.
135: */
136: protected byte[] toByteArray(int fcMin) {
137: byte[] buf = new byte[512];
138: int size = _papxList.size();
139: int grpprlOffset = 0;
140: int bxOffset = 0;
141: int fcOffset = 0;
142: byte[] lastGrpprl = new byte[0];
143:
144: // total size is currently the size of one FC
145: int totalSize = FC_SIZE;
146:
147: int index = 0;
148: for (; index < size; index++) {
149: byte[] grpprl = ((PAPX) _papxList.get(index)).getGrpprl();
150: int grpprlLength = grpprl.length;
151:
152: // is grpprl huge?
153: if (grpprlLength > 488) {
154: grpprlLength = 8; // set equal to size of sprmPHugePapx grpprl
155: }
156:
157: // check to see if we have enough room for an FC, a BX, and the grpprl
158: // and the 1 byte size of the grpprl.
159: int addition = 0;
160: if (!Arrays.equals(grpprl, lastGrpprl)) {
161: addition = (FC_SIZE + BX_SIZE + grpprlLength + 1);
162: } else {
163: addition = (FC_SIZE + BX_SIZE);
164: }
165:
166: totalSize += addition;
167:
168: // if size is uneven we will have to add one so the first grpprl falls
169: // on a word boundary
170: if (totalSize > 511 + (index % 2)) {
171: totalSize -= addition;
172: break;
173: }
174:
175: // grpprls must fall on word boundaries
176: if (grpprlLength % 2 > 0) {
177: totalSize += 1;
178: } else {
179: totalSize += 2;
180: }
181: lastGrpprl = grpprl;
182: }
183:
184: // see if we couldn't fit some
185: if (index != size) {
186: _overFlow = new ArrayList();
187: _overFlow.addAll(_papxList.subList(index, size));
188: }
189:
190: // index should equal number of papxs that will be in this fkp now.
191: buf[511] = (byte) index;
192:
193: bxOffset = (FC_SIZE * index) + FC_SIZE;
194: grpprlOffset = 511;
195:
196: PAPX papx = null;
197: lastGrpprl = new byte[0];
198: for (int x = 0; x < index; x++) {
199: papx = (PAPX) _papxList.get(x);
200: byte[] phe = papx.getParagraphHeight().toByteArray();
201: byte[] grpprl = papx.getGrpprl();
202:
203: // is grpprl huge?
204: if (grpprl.length > 488) {
205: // if so do we have storage at getHugeGrpprlOffset()
206: int hugeGrpprlOffset = papx.getHugeGrpprlOffset();
207: if (hugeGrpprlOffset == -1) // then we have no storage...
208: {
209: throw new UnsupportedOperationException(
210: "This Paragraph has no dataStream storage.");
211: } else // we have some storage...
212: {
213: // get the size of the existing storage
214: int maxHugeGrpprlSize = LittleEndian.getUShort(
215: _dataStream, hugeGrpprlOffset);
216:
217: if (maxHugeGrpprlSize < grpprl.length - 2) // grpprl.length-2 because we don't store the istd
218: throw new UnsupportedOperationException(
219: "This Paragraph's dataStream storage is too small.");
220: }
221:
222: // store grpprl at hugeGrpprlOffset
223: System.arraycopy(grpprl, 2, _dataStream,
224: hugeGrpprlOffset + 2, grpprl.length - 2); // grpprl.length-2 because we don't store the istd
225: LittleEndian.putUShort(_dataStream, hugeGrpprlOffset,
226: grpprl.length - 2);
227:
228: // grpprl = grpprl containing only a sprmPHugePapx2
229: int istd = LittleEndian.getUShort(grpprl, 0);
230: grpprl = new byte[8];
231: LittleEndian.putUShort(grpprl, 0, istd);
232: LittleEndian.putUShort(grpprl, 2, 0x6646); // sprmPHugePapx2
233: LittleEndian.putInt(grpprl, 4, hugeGrpprlOffset);
234: }
235:
236: boolean same = Arrays.equals(lastGrpprl, grpprl);
237: if (!same) {
238: grpprlOffset -= (grpprl.length + (2 - grpprl.length % 2));
239: grpprlOffset -= (grpprlOffset % 2);
240: }
241: LittleEndian.putInt(buf, fcOffset, papx.getStart() + fcMin);
242: buf[bxOffset] = (byte) (grpprlOffset / 2);
243: System.arraycopy(phe, 0, buf, bxOffset + 1, phe.length);
244:
245: // refer to the section on PAPX in the spec. Places a size on the front
246: // of the PAPX. Has to do with how the grpprl stays on word
247: // boundaries.
248: if (!same) {
249: int copyOffset = grpprlOffset;
250: if ((grpprl.length % 2) > 0) {
251: buf[copyOffset++] = (byte) ((grpprl.length + 1) / 2);
252: } else {
253: buf[++copyOffset] = (byte) ((grpprl.length) / 2);
254: copyOffset++;
255: }
256: System.arraycopy(grpprl, 0, buf, copyOffset,
257: grpprl.length);
258: lastGrpprl = grpprl;
259: }
260:
261: bxOffset += BX_SIZE;
262: fcOffset += FC_SIZE;
263:
264: }
265:
266: LittleEndian.putInt(buf, fcOffset, papx.getEnd() + fcMin);
267: return buf;
268: }
269:
270: /**
271: * Used to get the ParagraphHeight of a PAPX at a particular index.
272: * @param index
273: * @return The ParagraphHeight
274: */
275: private ParagraphHeight getParagraphHeight(int index) {
276: int pheOffset = _offset + 1
277: + (((_crun + 1) * 4) + (index * 13));
278:
279: ParagraphHeight phe = new ParagraphHeight(_fkp, pheOffset);
280:
281: return phe;
282: }
283: }
|