001: /* ====================================================================
002: Licensed to the Apache Software Foundation (ASF) under one or more
003: contributor license agreements. See the NOTICE file distributed with
004: this work for additional information regarding copyright ownership.
005: The ASF licenses this file to You under the Apache License, Version 2.0
006: (the "License"); you may not use this file except in compliance with
007: the License. You may obtain a copy of the License at
008:
009: http://www.apache.org/licenses/LICENSE-2.0
010:
011: Unless required by applicable law or agreed to in writing, software
012: distributed under the License is distributed on an "AS IS" BASIS,
013: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: See the License for the specific language governing permissions and
015: limitations under the License.
016: ==================================================================== */
017:
018: package org.apache.poi.hdf.event;
019:
020: import org.apache.poi.hdf.model.util.BTreeSet;
021: import org.apache.poi.hdf.model.util.NumberFormatter;
022: import org.apache.poi.hdf.model.hdftypes.*;
023:
024: import org.apache.poi.util.LittleEndian;
025:
026: import java.util.ArrayList;
027:
028: public class EventBridge implements HDFLowLevelParsingListener {
029:
030: private static int HEADER_EVEN_INDEX = 0;
031: private static int HEADER_ODD_INDEX = 1;
032: private static int FOOTER_EVEN_INDEX = 2;
033: private static int FOOTER_ODD_INDEX = 3;
034: private static int HEADER_FIRST_INDEX = 4;
035: private static int FOOTER_FIRST_INDEX = 5;
036:
037: /** This class translates low level events into high level events for this
038: * listener */
039: HDFParsingListener _listener;
040: /** stylesheet for this document */
041: StyleSheet _stsh;
042: /** name says it all */
043: DocumentProperties _dop;
044: /** StyleDescription for the current paragraph. */
045: StyleDescription _currentStd;
046: /** List info for this doc */
047: ListTables _listTables;
048:
049: /** "WordDocument" from the POIFS */
050: byte[] _mainDocument;
051: /** Table0 or Table1 from POIFS */
052: byte[] _tableStream;
053:
054: /** text offset in main stream */
055: int _fcMin;
056: int _ccpText;
057: int _ccpFtn;
058: int _hdrSize;
059: int _hdrOffset;
060:
061: /** text pieces */
062: BTreeSet _text = new BTreeSet();
063:
064: private boolean _beginHeaders;
065: BTreeSet _hdrSections = new BTreeSet();
066: BTreeSet _hdrParagraphs = new BTreeSet();
067: BTreeSet _hdrCharacterRuns = new BTreeSet();
068:
069: int _sectionCounter = 1;
070: ArrayList _hdrs = new ArrayList();
071:
072: private boolean _holdParagraph = false;
073: private int _endHoldIndex = -1;
074: private ArrayList _onHold;
075:
076: public EventBridge(HDFParsingListener listener) {
077: _listener = listener;
078: }
079:
080: public void mainDocument(byte[] mainDocument) {
081: _mainDocument = mainDocument;
082: }
083:
084: public void tableStream(byte[] tableStream) {
085: _tableStream = tableStream;
086: }
087:
088: public void miscellaneous(int fcMin, int ccpText, int ccpFtn,
089: int fcPlcfhdd, int lcbPlcfhdd) {
090: _fcMin = fcMin;
091: _ccpText = ccpText;
092: _ccpFtn = ccpFtn;
093: _hdrOffset = fcPlcfhdd;
094: _hdrSize = lcbPlcfhdd;
095: }
096:
097: public void document(DocumentProperties dop) {
098: _dop = dop;
099: }
100:
101: public void bodySection(SepxNode sepx) {
102: SectionProperties sep = (SectionProperties) StyleSheet
103: .uncompressProperty(sepx.getSepx(),
104: new SectionProperties(), _stsh);
105: HeaderFooter[] hdrArray = findSectionHdrFtrs(_sectionCounter);
106: _hdrs.add(hdrArray);
107: _listener.section(sep, sepx.getStart() - _fcMin, sepx.getEnd()
108: - _fcMin);
109: _sectionCounter++;
110: }
111:
112: public void hdrSection(SepxNode sepx) {
113: _beginHeaders = true;
114: _hdrSections.add(sepx);
115: }
116:
117: public void endSections() {
118: for (int x = 1; x < _sectionCounter; x++) {
119: HeaderFooter[] hdrArray = (HeaderFooter[]) _hdrs.get(x - 1);
120: HeaderFooter hf = null;
121:
122: if (!hdrArray[HeaderFooter.HEADER_EVEN - 1].isEmpty()) {
123: hf = hdrArray[HeaderFooter.HEADER_EVEN - 1];
124: _listener.header(x - 1, HeaderFooter.HEADER_EVEN);
125: flushHeaderProps(hf.getStart(), hf.getEnd());
126: }
127: if (!hdrArray[HeaderFooter.HEADER_ODD - 1].isEmpty()) {
128: hf = hdrArray[HeaderFooter.HEADER_ODD - 1];
129: _listener.header(x - 1, HeaderFooter.HEADER_ODD);
130: flushHeaderProps(hf.getStart(), hf.getEnd());
131: }
132: if (!hdrArray[HeaderFooter.FOOTER_EVEN - 1].isEmpty()) {
133: hf = hdrArray[HeaderFooter.FOOTER_EVEN - 1];
134: _listener.footer(x - 1, HeaderFooter.FOOTER_EVEN);
135: flushHeaderProps(hf.getStart(), hf.getEnd());
136: }
137: if (!hdrArray[HeaderFooter.FOOTER_ODD - 1].isEmpty()) {
138: hf = hdrArray[HeaderFooter.FOOTER_EVEN - 1];
139: _listener.footer(x - 1, HeaderFooter.FOOTER_EVEN);
140: flushHeaderProps(hf.getStart(), hf.getEnd());
141: }
142: if (!hdrArray[HeaderFooter.HEADER_FIRST - 1].isEmpty()) {
143: hf = hdrArray[HeaderFooter.HEADER_FIRST - 1];
144: _listener.header(x - 1, HeaderFooter.HEADER_FIRST);
145: flushHeaderProps(hf.getStart(), hf.getEnd());
146: }
147: if (!hdrArray[HeaderFooter.FOOTER_FIRST - 1].isEmpty()) {
148: hf = hdrArray[HeaderFooter.FOOTER_FIRST - 1];
149: _listener.footer(x - 1, HeaderFooter.FOOTER_FIRST);
150: flushHeaderProps(hf.getStart(), hf.getEnd());
151: }
152: }
153: }
154:
155: public void paragraph(PapxNode papx) {
156: if (_beginHeaders) {
157: _hdrParagraphs.add(papx);
158: }
159: byte[] bytePapx = papx.getPapx();
160: int istd = LittleEndian.getShort(bytePapx, 0);
161: _currentStd = _stsh.getStyleDescription(istd);
162:
163: ParagraphProperties pap = (ParagraphProperties) StyleSheet
164: .uncompressProperty(bytePapx, _currentStd.getPAP(),
165: _stsh);
166:
167: if (pap.getFTtp() > 0) {
168: TableProperties tap = (TableProperties) StyleSheet
169: .uncompressProperty(bytePapx,
170: new TableProperties(), _stsh);
171: _listener.tableRowEnd(tap, papx.getStart() - _fcMin, papx
172: .getEnd()
173: - _fcMin);
174: } else if (pap.getIlfo() > 0) {
175: _holdParagraph = true;
176: _endHoldIndex = papx.getEnd();
177: _onHold.add(papx);
178: } else {
179: _listener.paragraph(pap, papx.getStart() - _fcMin, papx
180: .getEnd()
181: - _fcMin);
182: }
183: }
184:
185: public void characterRun(ChpxNode chpx) {
186: if (_beginHeaders) {
187: _hdrCharacterRuns.add(chpx);
188: }
189:
190: int start = chpx.getStart();
191: int end = chpx.getEnd();
192: //check to see if we should hold this characterRun
193: if (_holdParagraph) {
194: _onHold.add(chpx);
195: if (end >= _endHoldIndex) {
196: _holdParagraph = false;
197: _endHoldIndex = -1;
198: flushHeldParagraph();
199: _onHold = new ArrayList();
200: }
201: }
202:
203: byte[] byteChpx = chpx.getChpx();
204:
205: CharacterProperties chp = (CharacterProperties) StyleSheet
206: .uncompressProperty(byteChpx, _currentStd.getCHP(),
207: _stsh);
208:
209: ArrayList textList = BTreeSet.findProperties(start, end,
210: _text.root);
211: String text = getTextFromNodes(textList, start, end);
212:
213: _listener.characterRun(chp, text, start - _fcMin, end - _fcMin);
214: }
215:
216: public void text(TextPiece t) {
217: _text.add(t);
218: }
219:
220: public void fonts(FontTable fontTbl) {
221: }
222:
223: public void lists(ListTables listTbl) {
224: _listTables = listTbl;
225: }
226:
227: public void styleSheet(StyleSheet stsh) {
228: _stsh = stsh;
229: }
230:
231: private void flushHeaderProps(int start, int end) {
232: ArrayList list = BTreeSet.findProperties(start, end,
233: _hdrSections.root);
234: int size = list.size();
235:
236: for (int x = 0; x < size; x++) {
237: SepxNode oldNode = (SepxNode) list.get(x);
238: int secStart = Math.max(oldNode.getStart(), start);
239: int secEnd = Math.min(oldNode.getEnd(), end);
240:
241: //SepxNode node = new SepxNode(-1, secStart, secEnd, oldNode.getSepx());
242: //bodySection(node);
243:
244: ArrayList parList = BTreeSet.findProperties(secStart,
245: secEnd, _hdrParagraphs.root);
246: int parSize = parList.size();
247:
248: for (int y = 0; y < parSize; y++) {
249: PapxNode oldParNode = (PapxNode) parList.get(y);
250: int parStart = Math
251: .max(oldParNode.getStart(), secStart);
252: int parEnd = Math.min(oldParNode.getEnd(), secEnd);
253:
254: PapxNode parNode = new PapxNode(parStart, parEnd,
255: oldParNode.getPapx());
256: paragraph(parNode);
257:
258: ArrayList charList = BTreeSet.findProperties(parStart,
259: parEnd, _hdrCharacterRuns.root);
260: int charSize = charList.size();
261:
262: for (int z = 0; z < charSize; z++) {
263: ChpxNode oldCharNode = (ChpxNode) charList.get(z);
264: int charStart = Math.max(oldCharNode.getStart(),
265: parStart);
266: int charEnd = Math
267: .min(oldCharNode.getEnd(), parEnd);
268:
269: ChpxNode charNode = new ChpxNode(charStart,
270: charEnd, oldCharNode.getChpx());
271: characterRun(charNode);
272: }
273: }
274:
275: }
276:
277: }
278:
279: private String getTextFromNodes(ArrayList list, int start, int end) {
280: int size = list.size();
281:
282: StringBuffer sb = new StringBuffer();
283:
284: for (int x = 0; x < size; x++) {
285: TextPiece piece = (TextPiece) list.get(x);
286: int charStart = Math.max(start, piece.getStart());
287: int charEnd = Math.min(end, piece.getEnd());
288:
289: if (piece.usesUnicode()) {
290: for (int y = charStart; y < charEnd; y += 2) {
291: sb.append((char) LittleEndian.getShort(
292: _mainDocument, y));
293: }
294: } else {
295: for (int y = charStart; y < charEnd; y++) {
296: sb.append(_mainDocument[y]);
297: }
298: }
299: }
300: return sb.toString();
301: }
302:
303: private void flushHeldParagraph() {
304: PapxNode papx = (PapxNode) _onHold.get(0);
305: byte[] bytePapx = papx.getPapx();
306: int istd = LittleEndian.getShort(bytePapx, 0);
307: StyleDescription std = _stsh.getStyleDescription(istd);
308:
309: ParagraphProperties pap = (ParagraphProperties) StyleSheet
310: .uncompressProperty(bytePapx, _currentStd.getPAP(),
311: _stsh);
312: LVL lvl = _listTables.getLevel(pap.getIlfo(), pap.getIlvl());
313: pap = (ParagraphProperties) StyleSheet.uncompressProperty(
314: lvl._papx, pap, _stsh, false);
315:
316: int size = _onHold.size() - 1;
317:
318: CharacterProperties numChp = (CharacterProperties) StyleSheet
319: .uncompressProperty(((ChpxNode) _onHold.get(size))
320: .getChpx(), std.getCHP(), _stsh);
321:
322: numChp = (CharacterProperties) StyleSheet.uncompressProperty(
323: lvl._chpx, numChp, _stsh);
324: String bulletText = getBulletText(lvl, pap);
325:
326: _listener.listEntry(bulletText, numChp, pap, papx.getStart()
327: - _fcMin, papx.getEnd() - _fcMin);
328: for (int x = 1; x <= size; x++) {
329: characterRun((ChpxNode) _onHold.get(x));
330: }
331:
332: }
333:
334: private String getBulletText(LVL lvl, ParagraphProperties pap) {
335: StringBuffer bulletBuffer = new StringBuffer();
336: for (int x = 0; x < lvl._xst.length; x++) {
337: if (lvl._xst[x] < 9) {
338: LVL numLevel = _listTables.getLevel(pap.getIlfo(),
339: lvl._xst[x]);
340: int num = numLevel._iStartAt;
341: if (lvl == numLevel) {
342: numLevel._iStartAt++;
343: } else if (num > 1) {
344: num--;
345: }
346: bulletBuffer.append(NumberFormatter.getNumber(num,
347: lvl._nfc));
348:
349: } else {
350: bulletBuffer.append(lvl._xst[x]);
351: }
352:
353: }
354:
355: switch (lvl._ixchFollow) {
356: case 0:
357: bulletBuffer.append('\u0009');
358: break;
359: case 1:
360: bulletBuffer.append(' ');
361: break;
362: }
363: return bulletBuffer.toString();
364: }
365:
366: private HeaderFooter[] findSectionHdrFtrs(int index) {
367: HeaderFooter[] hdrArray = new HeaderFooter[6];
368:
369: for (int x = 1; x < 7; x++) {
370: hdrArray[x - 1] = createSectionHdrFtr(index, x);
371: }
372:
373: return hdrArray;
374: }
375:
376: private HeaderFooter createSectionHdrFtr(int index, int type) {
377: if (_hdrSize < 50) {
378: return new HeaderFooter(0, 0, 0);
379: }
380:
381: int start = _fcMin + _ccpText + _ccpFtn;
382: int end = start;
383: int arrayIndex = 0;
384:
385: switch (type) {
386: case HeaderFooter.HEADER_EVEN:
387: arrayIndex = (HEADER_EVEN_INDEX + (index * 6));
388: break;
389: case HeaderFooter.FOOTER_EVEN:
390: arrayIndex = (FOOTER_EVEN_INDEX + (index * 6));
391: break;
392: case HeaderFooter.HEADER_ODD:
393: arrayIndex = (HEADER_ODD_INDEX + (index * 6));
394: break;
395: case HeaderFooter.FOOTER_ODD:
396: arrayIndex = (FOOTER_ODD_INDEX + (index * 6));
397: break;
398: case HeaderFooter.HEADER_FIRST:
399: arrayIndex = (HEADER_FIRST_INDEX + (index * 6));
400: break;
401: case HeaderFooter.FOOTER_FIRST:
402: arrayIndex = (FOOTER_FIRST_INDEX + (index * 6));
403: break;
404: }
405: start += LittleEndian.getInt(_tableStream, _hdrOffset
406: + (arrayIndex * 4));
407: end += LittleEndian.getInt(_tableStream, _hdrOffset
408: + (arrayIndex + 1) * 4);
409:
410: HeaderFooter retValue = new HeaderFooter(type, start, end);
411:
412: if ((end - start) == 0 && index > 1) {
413: retValue = createSectionHdrFtr(type, index - 1);
414: }
415: return retValue;
416: }
417: }
|