001: /* ====================================================================
002: Licensed to the Apache Software Foundation (ASF) under one or more
003: contributor license agreements. See the NOTICE file distributed with
004: this work for additional information regarding copyright ownership.
005: The ASF licenses this file to You under the Apache License, Version 2.0
006: (the "License"); you may not use this file except in compliance with
007: the License. You may obtain a copy of the License at
008:
009: http://www.apache.org/licenses/LICENSE-2.0
010:
011: Unless required by applicable law or agreed to in writing, software
012: distributed under the License is distributed on an "AS IS" BASIS,
013: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: See the License for the specific language governing permissions and
015: limitations under the License.
016: ==================================================================== */
017:
018: package org.apache.poi.hsmf.parsers;
019:
020: import java.io.ByteArrayOutputStream;
021: import java.io.IOException;
022: import java.util.ArrayList;
023: import java.util.HashMap;
024: import java.util.Iterator;
025:
026: import org.apache.poi.hsmf.datatypes.Chunk;
027: import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
028: import org.apache.poi.hsmf.exceptions.DirectoryChunkNotFoundException;
029: import org.apache.poi.poifs.filesystem.DirectoryEntry;
030: import org.apache.poi.poifs.filesystem.DirectoryNode;
031: import org.apache.poi.poifs.filesystem.DocumentNode;
032: import org.apache.poi.poifs.filesystem.POIFSDocument;
033: import org.apache.poi.poifs.filesystem.POIFSFileSystem;
034: import org.apache.poi.poifs.property.DirectoryProperty;
035: import org.apache.poi.poifs.property.DocumentProperty;
036: import org.apache.poi.poifs.storage.BlockWritable;
037:
038: /**
039: * Provides a HashMap with the ability to parse a PIOFS object and provide
040: * an 'easy to access' hashmap structure for the document chunks inside it.
041: *
042: * @author Travis Ferguson
043: */
044: public class POIFSChunkParser {
045: /**
046: * Constructor
047: * @param fs
048: * @throws IOException
049: */
050: public POIFSChunkParser(POIFSFileSystem fs) throws IOException {
051: this .setFileSystem(fs);
052: }
053:
054: /**
055: * Set the POIFileSystem object that this object is using.
056: * @param fs
057: * @throws IOException
058: */
059: public void setFileSystem(POIFSFileSystem fs) throws IOException {
060: this .fs = fs;
061: this .reparseFileSystem();
062: }
063:
064: /**
065: * Get a reference to the FileSystem object that this object is currently using.
066: * @return
067: */
068: public POIFSFileSystem getFileSystem() {
069: return this .fs;
070: }
071:
072: /**
073: * Reparse the FileSystem object, resetting all the chunks stored in this object
074: * @throws IOException
075: *
076: */
077: public void reparseFileSystem() throws IOException {
078: // first clear this object of all chunks
079: DirectoryEntry root = this .fs.getRoot();
080: Iterator iter = root.getEntries();
081:
082: this .directoryMap = this .processPOIIterator(iter);
083: }
084:
085: /**
086: * Pull the chunk data that's stored in this object's hashmap out and return it as a HashMap.
087: * @param entryName
088: * @return
089: */
090: public Object getChunk(HashMap dirMap, String entryName) {
091: if (dirMap == null)
092: return null;
093: else {
094: return dirMap.get(entryName);
095: }
096: }
097:
098: /**
099: * Pull a directory/hashmap out of this hashmap and return it
100: * @param directoryName
101: * @return HashMap containing the chunks stored in the named directoryChunk
102: * @throws DirectoryChunkNotFoundException This is thrown should the directoryMap HashMap on this object be null
103: * or for some reason the directory is not found, is equal to null, or is for some reason not a HashMap/aka Directory Node.
104: */
105: public HashMap getDirectoryChunk(String directoryName)
106: throws DirectoryChunkNotFoundException {
107: DirectoryChunkNotFoundException excep = new DirectoryChunkNotFoundException(
108: directoryName);
109: Object obj = getChunk(this .directoryMap, directoryName);
110: if (obj == null || !(obj instanceof HashMap))
111: throw excep;
112:
113: return (HashMap) obj;
114: }
115:
116: /**
117: * Pulls a ByteArrayOutputStream from this objects HashMap, this can be used to read a byte array of the contents of the given chunk.
118: * @param directoryMap, chunk
119: * @return
120: * @throws ChunkNotFoundException
121: */
122: public Chunk getDocumentNode(HashMap dirNode, Chunk chunk)
123: throws ChunkNotFoundException {
124: String entryName = chunk.getEntryName();
125: ChunkNotFoundException excep = new ChunkNotFoundException(
126: entryName);
127: Object obj = getChunk(dirNode, entryName);
128: if (obj == null || !(obj instanceof ByteArrayOutputStream))
129: throw excep;
130:
131: chunk.setValue((ByteArrayOutputStream) obj);
132:
133: return chunk;
134: }
135:
136: /**
137: * Pulls a Chunk out of this objects root Node tree.
138: * @param chunk
139: * @return
140: * @throws ChunkNotFoundException
141: */
142: public Chunk getDocumentNode(Chunk chunk)
143: throws ChunkNotFoundException {
144: return getDocumentNode(this .directoryMap, chunk);
145: }
146:
147: /**
148: * Processes an iterator returned by a POIFS call to getRoot().getEntries()
149: * @param iter
150: * @return
151: * @throws IOException
152: */
153: private HashMap processPOIIterator(Iterator iter)
154: throws IOException {
155: HashMap currentNode = new HashMap();
156:
157: while (iter.hasNext()) {
158: Object obj = iter.next();
159: if (obj instanceof DocumentNode) {
160: this .processDocumentNode((DocumentNode) obj,
161: currentNode);
162: } else if (obj instanceof DirectoryNode) {
163: String blockName = ((DirectoryNode) obj).getName();
164: Iterator viewIt = null;
165: if (((DirectoryNode) obj).preferArray()) {
166: Object[] arr = ((DirectoryNode) obj)
167: .getViewableArray();
168: ArrayList viewList = new ArrayList(arr.length);
169:
170: for (int i = 0; i < arr.length; i++) {
171: viewList.add(arr[i]);
172: }
173: viewIt = viewList.iterator();
174: } else {
175: viewIt = ((DirectoryNode) obj)
176: .getViewableIterator();
177: }
178: //store the next node on the hashmap
179: currentNode.put(blockName, processPOIIterator(viewIt));
180: } else if (obj instanceof DirectoryProperty) {
181: //don't do anything with the directory property chunk...
182: } else {
183: System.err.println("Unknown node: " + obj.toString());
184: }
185: }
186: return currentNode;
187: }
188:
189: /**
190: * Processes a document node and adds it to the current directory HashMap
191: * @param obj
192: * @throws java.io.IOException
193: */
194: private void processDocumentNode(DocumentNode obj,
195: HashMap currentObj) throws IOException {
196: String blockName = ((DocumentNode) obj).getName();
197:
198: Iterator viewIt = null;
199: if (((DocumentNode) obj).preferArray()) {
200: Object[] arr = ((DocumentNode) obj).getViewableArray();
201: ArrayList viewList = new ArrayList(arr.length);
202:
203: for (int i = 0; i < arr.length; i++) {
204: viewList.add(arr[i]);
205: }
206: viewIt = viewList.iterator();
207: } else {
208: viewIt = ((DocumentNode) obj).getViewableIterator();
209: }
210:
211: while (viewIt.hasNext()) {
212: Object view = viewIt.next();
213:
214: if (view instanceof DocumentProperty) {
215: //we don't care about the properties
216: } else if (view instanceof POIFSDocument) {
217: //check if our node has blocks or if it can just be read raw.
218: int blockCount = ((POIFSDocument) view).countBlocks();
219: //System.out.println("Block Name: " + blockName);
220: if (blockCount <= 0) {
221: ByteArrayOutputStream out = new ByteArrayOutputStream();
222:
223: BlockWritable[] bws = ((POIFSDocument) view)
224: .getSmallBlocks();
225: for (int i = 0; i < bws.length; i++) {
226: bws[i].writeBlocks(out);
227: }
228: currentObj.put(blockName, out);
229: } else {
230: ByteArrayOutputStream out = new ByteArrayOutputStream();
231: ((POIFSDocument) view).writeBlocks(out);
232: currentObj.put(blockName, out);
233: }
234: } else {
235: System.err.println("Unknown View Type: "
236: + view.toString());
237: }
238: }
239: }
240:
241: /* private instance variables */
242: private static final long serialVersionUID = 1L;
243: private POIFSFileSystem fs;
244: private HashMap directoryMap;
245: }
|