001: /* ====================================================================
002: Licensed to the Apache Software Foundation (ASF) under one or more
003: contributor license agreements. See the NOTICE file distributed with
004: this work for additional information regarding copyright ownership.
005: The ASF licenses this file to You under the Apache License, Version 2.0
006: (the "License"); you may not use this file except in compliance with
007: the License. You may obtain a copy of the License at
008:
009: http://www.apache.org/licenses/LICENSE-2.0
010:
011: Unless required by applicable law or agreed to in writing, software
012: distributed under the License is distributed on an "AS IS" BASIS,
013: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: See the License for the specific language governing permissions and
015: limitations under the License.
016: ==================================================================== */
017:
018: package org.apache.poi.hslf;
019:
020: import java.io.ByteArrayInputStream;
021: import java.io.ByteArrayOutputStream;
022: import java.io.FileInputStream;
023: import java.io.FileNotFoundException;
024: import java.io.IOException;
025: import java.io.InputStream;
026: import java.io.OutputStream;
027: import java.util.ArrayList;
028: import java.util.Arrays;
029: import java.util.Hashtable;
030: import java.util.Iterator;
031: import java.util.List;
032:
033: import org.apache.poi.POIDocument;
034: import org.apache.poi.hslf.exceptions.CorruptPowerPointFileException;
035: import org.apache.poi.hslf.exceptions.EncryptedPowerPointFileException;
036: import org.apache.poi.hslf.exceptions.HSLFException;
037: import org.apache.poi.hslf.record.CurrentUserAtom;
038: import org.apache.poi.hslf.record.ExOleObjStg;
039: import org.apache.poi.hslf.record.PersistPtrHolder;
040: import org.apache.poi.hslf.record.PositionDependentRecord;
041: import org.apache.poi.hslf.record.Record;
042: import org.apache.poi.hslf.record.UserEditAtom;
043: import org.apache.poi.hslf.usermodel.ObjectData;
044: import org.apache.poi.hslf.usermodel.PictureData;
045: import org.apache.poi.poifs.filesystem.DocumentEntry;
046: import org.apache.poi.poifs.filesystem.DocumentInputStream;
047: import org.apache.poi.poifs.filesystem.POIFSFileSystem;
048: import org.apache.poi.util.LittleEndian;
049: import org.apache.poi.util.POILogFactory;
050: import org.apache.poi.util.POILogger;
051:
052: /**
053: * This class contains the main functionality for the Powerpoint file
054: * "reader". It is only a very basic class for now
055: *
056: * @author Nick Burch
057: */
058:
059: public class HSLFSlideShow extends POIDocument {
060: // For logging
061: protected POILogger logger = POILogFactory.getLogger(this
062: .getClass());
063:
064: private InputStream istream;
065:
066: // Holds metadata on where things are in our document
067: private CurrentUserAtom currentUser;
068:
069: // Low level contents of the file
070: private byte[] _docstream;
071:
072: // Low level contents
073: private Record[] _records;
074:
075: // Raw Pictures contained in the pictures stream
076: private PictureData[] _pictures;
077:
078: // Embedded objects stored in storage records in the document stream, lazily populated.
079: private ObjectData[] _objects;
080:
081: /**
082: * Returns the underlying POIFSFileSystem for the document
083: * that is open.
084: */
085: protected POIFSFileSystem getPOIFSFileSystem() {
086: return filesystem;
087: }
088:
089: /**
090: * Constructs a Powerpoint document from fileName. Parses the document
091: * and places all the important stuff into data structures.
092: *
093: * @param fileName The name of the file to read.
094: * @throws IOException if there is a problem while parsing the document.
095: */
096: public HSLFSlideShow(String fileName) throws IOException {
097: this (new FileInputStream(fileName));
098: }
099:
100: /**
101: * Constructs a Powerpoint document from an input stream. Parses the
102: * document and places all the important stuff into data structures.
103: *
104: * @param inputStream the source of the data
105: * @throws IOException if there is a problem while parsing the document.
106: */
107: public HSLFSlideShow(InputStream inputStream) throws IOException {
108: //do Ole stuff
109: this (new POIFSFileSystem(inputStream));
110: istream = inputStream;
111: }
112:
113: /**
114: * Constructs a Powerpoint document from a POIFS Filesystem. Parses the
115: * document and places all the important stuff into data structures.
116: *
117: * @param filesystem the POIFS FileSystem to read from
118: * @throws IOException if there is a problem while parsing the document.
119: */
120: public HSLFSlideShow(POIFSFileSystem filesystem) throws IOException {
121: this .filesystem = filesystem;
122:
123: // First up, grab the "Current User" stream
124: // We need this before we can detect Encrypted Documents
125: readCurrentUserStream();
126:
127: // Next up, grab the data that makes up the
128: // PowerPoint stream
129: readPowerPointStream();
130:
131: // Check to see if we have an encrypted document,
132: // bailing out if we do
133: boolean encrypted = EncryptedSlideShow.checkIfEncrypted(this );
134: if (encrypted) {
135: throw new EncryptedPowerPointFileException(
136: "Encrypted PowerPoint files are not supported");
137: }
138:
139: // Now, build records based on the PowerPoint stream
140: buildRecords();
141:
142: // Look for Property Streams:
143: readProperties();
144:
145: // Look for any other streams
146: readOtherStreams();
147:
148: // Look for Picture Streams:
149: readPictures();
150: }
151:
152: /**
153: * Constructs a new, empty, Powerpoint document.
154: */
155: public HSLFSlideShow() throws IOException {
156: this (
157: HSLFSlideShow.class
158: .getResourceAsStream("/org/apache/poi/hslf/data/empty.ppt"));
159: }
160:
161: /**
162: * Shuts things down. Closes underlying streams etc
163: *
164: * @throws IOException
165: */
166: public void close() throws IOException {
167: if (istream != null) {
168: istream.close();
169: }
170: filesystem = null;
171: }
172:
173: /**
174: * Extracts the main PowerPoint document stream from the
175: * POI file, ready to be passed
176: *
177: * @throws IOException
178: */
179: private void readPowerPointStream() throws IOException {
180: // Get the main document stream
181: DocumentEntry docProps = (DocumentEntry) filesystem.getRoot()
182: .getEntry("PowerPoint Document");
183:
184: // Grab the document stream
185: _docstream = new byte[docProps.getSize()];
186: filesystem.createDocumentInputStream("PowerPoint Document")
187: .read(_docstream);
188: }
189:
190: /**
191: * Builds the list of records, based on the contents
192: * of the PowerPoint stream
193: */
194: private void buildRecords() {
195: // The format of records in a powerpoint file are:
196: // <little endian 2 byte "info">
197: // <little endian 2 byte "type">
198: // <little endian 4 byte "length">
199: // If it has a zero length, following it will be another record
200: // <xx xx yy yy 00 00 00 00> <xx xx yy yy zz zz zz zz>
201: // If it has a length, depending on its type it may have children or data
202: // If it has children, these will follow straight away
203: // <xx xx yy yy zz zz zz zz <xx xx yy yy zz zz zz zz>>
204: // If it has data, this will come straigh after, and run for the length
205: // <xx xx yy yy zz zz zz zz dd dd dd dd dd dd dd>
206: // All lengths given exclude the 8 byte record header
207: // (Data records are known as Atoms)
208:
209: // Document should start with:
210: // 0F 00 E8 03 ## ## ## ##
211: // (type 1000 = document, info 00 0f is normal, rest is document length)
212: // 01 00 E9 03 28 00 00 00
213: // (type 1001 = document atom, info 00 01 normal, 28 bytes long)
214: // 80 16 00 00 E0 10 00 00 xx xx xx xx xx xx xx xx
215: // 05 00 00 00 0A 00 00 00 xx xx xx
216: // (the contents of the document atom, not sure what it means yet)
217: // (records then follow)
218:
219: // When parsing a document, look to see if you know about that type
220: // of the current record. If you know it's a type that has children,
221: // process the record's data area looking for more records
222: // If you know about the type and it doesn't have children, either do
223: // something with the data (eg TextRun) or skip over it
224: // If you don't know about the type, play safe and skip over it (using
225: // its length to know where the next record will start)
226: //
227:
228: _records = read(_docstream, (int) currentUser
229: .getCurrentEditOffset());
230: }
231:
232: private Record[] read(byte[] docstream, int usrOffset) {
233: ArrayList lst = new ArrayList();
234: while (usrOffset != 0) {
235: UserEditAtom usr = (UserEditAtom) Record
236: .buildRecordAtOffset(docstream, usrOffset);
237: lst.add(new Integer(usrOffset));
238: int psrOffset = usr.getPersistPointersOffset();
239:
240: PersistPtrHolder ptr = (PersistPtrHolder) Record
241: .buildRecordAtOffset(docstream, psrOffset);
242: lst.add(new Integer(psrOffset));
243: Hashtable entries = ptr.getSlideLocationsLookup();
244: for (Iterator it = entries.keySet().iterator(); it
245: .hasNext();) {
246: Integer id = (Integer) it.next();
247: Integer offset = (Integer) entries.get(id);
248:
249: lst.add(offset);
250: }
251:
252: usrOffset = usr.getLastUserEditAtomOffset();
253: }
254: //sort found records by offset.
255: //(it is not necessary but SlideShow.findMostRecentCoreRecords() expects them sorted)
256: Object a[] = lst.toArray();
257: Arrays.sort(a);
258: Record[] rec = new Record[lst.size()];
259: for (int i = 0; i < a.length; i++) {
260: Integer offset = (Integer) a[i];
261: rec[i] = (Record) Record.buildRecordAtOffset(docstream,
262: offset.intValue());
263: }
264:
265: return rec;
266: }
267:
268: /**
269: * Find the "Current User" stream, and load it
270: */
271: private void readCurrentUserStream() {
272: try {
273: currentUser = new CurrentUserAtom(filesystem);
274: } catch (IOException ie) {
275: logger.log(POILogger.ERROR,
276: "Error finding Current User Atom:\n" + ie);
277: currentUser = new CurrentUserAtom();
278: }
279: }
280:
281: /**
282: * Find any other streams from the filesystem, and load them
283: */
284: private void readOtherStreams() {
285: // Currently, there aren't any
286: }
287:
288: /**
289: * Find and read in pictures contained in this presentation
290: */
291: private void readPictures() throws IOException {
292: byte[] pictstream;
293:
294: try {
295: DocumentEntry entry = (DocumentEntry) filesystem.getRoot()
296: .getEntry("Pictures");
297: pictstream = new byte[entry.getSize()];
298: DocumentInputStream is = filesystem
299: .createDocumentInputStream("Pictures");
300: is.read(pictstream);
301: } catch (FileNotFoundException e) {
302: // Silently catch exceptions if the presentation doesn't
303: // contain pictures - will use a null set instead
304: return;
305: }
306:
307: List p = new ArrayList();
308: int pos = 0;
309:
310: // An empty picture record (length 0) will take up 8 bytes
311: while (pos <= (pictstream.length - 8)) {
312: int offset = pos;
313:
314: // Image signature
315: int signature = LittleEndian.getUShort(pictstream, pos);
316: pos += LittleEndian.SHORT_SIZE;
317: // Image type + 0xF018
318: int type = LittleEndian.getUShort(pictstream, pos);
319: pos += LittleEndian.SHORT_SIZE;
320: // Image size (excluding the 8 byte header)
321: int imgsize = LittleEndian.getInt(pictstream, pos);
322: pos += LittleEndian.INT_SIZE;
323:
324: // The image size must be 0 or greater
325: // (0 is allowed, but odd, since we do wind on by the header each
326: // time, so we won't get stuck)
327: if (imgsize < 0) {
328: throw new CorruptPowerPointFileException(
329: "The file contains a picture, at position "
330: + p.size()
331: + ", which has a negatively sized data length, so we can't trust any of the picture data");
332: }
333:
334: // If they type (including the bonus 0xF018) is 0, skip it
335: if (type == 0) {
336: logger
337: .log(
338: POILogger.ERROR,
339: "Problem reading picture: Invalid image type 0, on picture with length "
340: + imgsize
341: + ".\nYou document will probably become corrupted if you save it!");
342: logger.log(POILogger.ERROR, "" + pos);
343: } else {
344: // Copy the data, ready to pass to PictureData
345: byte[] imgdata = new byte[imgsize];
346: if (imgsize > 0) {
347: System.arraycopy(pictstream, pos, imgdata, 0,
348: imgdata.length);
349: }
350:
351: // Build the PictureData object from the data
352: try {
353: PictureData pict = PictureData
354: .create(type - 0xF018);
355: pict.setRawData(imgdata);
356: pict.setOffset(offset);
357: p.add(pict);
358: } catch (IllegalArgumentException e) {
359: logger
360: .log(
361: POILogger.ERROR,
362: "Problem reading picture: "
363: + e
364: + "\nYou document will probably become corrupted if you save it!");
365: }
366: }
367:
368: pos += imgsize;
369: }
370:
371: _pictures = (PictureData[]) p
372: .toArray(new PictureData[p.size()]);
373: }
374:
375: /**
376: * Writes out the slideshow file the is represented by an instance
377: * of this class.
378: * It will write out the common OLE2 streams. If you require all
379: * streams to be written out, pass in preserveNodes
380: * @param out The OutputStream to write to.
381: * @throws IOException If there is an unexpected IOException from
382: * the passed in OutputStream
383: */
384: public void write(OutputStream out) throws IOException {
385: // Write out, but only the common streams
386: write(out, false);
387: }
388:
389: /**
390: * Writes out the slideshow file the is represented by an instance
391: * of this class.
392: * If you require all streams to be written out (eg Marcos, embeded
393: * documents), then set preserveNodes to true
394: * @param out The OutputStream to write to.
395: * @param preserveNodes Should all OLE2 streams be written back out, or only the common ones?
396: * @throws IOException If there is an unexpected IOException from
397: * the passed in OutputStream
398: */
399: public void write(OutputStream out, boolean preserveNodes)
400: throws IOException {
401: // Get a new Filesystem to write into
402: POIFSFileSystem outFS = new POIFSFileSystem();
403:
404: // The list of entries we've written out
405: List writtenEntries = new ArrayList(1);
406:
407: // Write out the Property Streams
408: writeProperties(outFS, writtenEntries);
409:
410: // For position dependent records, hold where they were and now are
411: // As we go along, update, and hand over, to any Position Dependent
412: // records we happen across
413: Hashtable oldToNewPositions = new Hashtable();
414:
415: // First pass - figure out where all the position dependent
416: // records are going to end up, in the new scheme
417: // (Annoyingly, some powerpoing files have PersistPtrHolders
418: // that reference slides after the PersistPtrHolder)
419: ByteArrayOutputStream baos = new ByteArrayOutputStream();
420: for (int i = 0; i < _records.length; i++) {
421: if (_records[i] instanceof PositionDependentRecord) {
422: PositionDependentRecord pdr = (PositionDependentRecord) _records[i];
423: int oldPos = pdr.getLastOnDiskOffset();
424: int newPos = baos.size();
425: pdr.setLastOnDiskOffset(newPos);
426: oldToNewPositions.put(new Integer(oldPos), new Integer(
427: newPos));
428: //System.out.println(oldPos + " -> " + newPos);
429: }
430:
431: // Dummy write out, so the position winds on properly
432: _records[i].writeOut(baos);
433: }
434:
435: // No go back through, actually writing ourselves out
436: baos.reset();
437: for (int i = 0; i < _records.length; i++) {
438: // For now, we're only handling PositionDependentRecord's that
439: // happen at the top level.
440: // In future, we'll need the handle them everywhere, but that's
441: // a bit trickier
442: if (_records[i] instanceof PositionDependentRecord) {
443: // We've already figured out their new location, and
444: // told them that
445: // Tell them of the positions of the other records though
446: PositionDependentRecord pdr = (PositionDependentRecord) _records[i];
447: pdr.updateOtherRecordReferences(oldToNewPositions);
448: }
449:
450: // Whatever happens, write out that record tree
451: _records[i].writeOut(baos);
452: }
453: // Update our cached copy of the bytes that make up the PPT stream
454: _docstream = baos.toByteArray();
455:
456: // Write the PPT stream into the POIFS layer
457: ByteArrayInputStream bais = new ByteArrayInputStream(baos
458: .toByteArray());
459: outFS.createDocument(bais, "PowerPoint Document");
460: writtenEntries.add("PowerPoint Document");
461:
462: // Update and write out the Current User atom
463: int oldLastUserEditAtomPos = (int) currentUser
464: .getCurrentEditOffset();
465: Integer newLastUserEditAtomPos = (Integer) oldToNewPositions
466: .get(new Integer(oldLastUserEditAtomPos));
467: if (newLastUserEditAtomPos == null) {
468: throw new HSLFException(
469: "Couldn't find the new location of the UserEditAtom that used to be at "
470: + oldLastUserEditAtomPos);
471: }
472: currentUser.setCurrentEditOffset(newLastUserEditAtomPos
473: .intValue());
474: currentUser.writeToFS(outFS);
475: writtenEntries.add("Current User");
476:
477: // Write any pictures, into another stream
478: if (_pictures != null) {
479: ByteArrayOutputStream pict = new ByteArrayOutputStream();
480: for (int i = 0; i < _pictures.length; i++) {
481: _pictures[i].write(pict);
482: }
483: outFS.createDocument(new ByteArrayInputStream(pict
484: .toByteArray()), "Pictures");
485: writtenEntries.add("Pictures");
486: }
487:
488: // If requested, write out any other streams we spot
489: if (preserveNodes) {
490: copyNodes(filesystem, outFS, writtenEntries);
491: }
492:
493: // Send the POIFSFileSystem object out to the underlying stream
494: outFS.writeFilesystem(out);
495: }
496:
497: /* ******************* adding methods follow ********************* */
498:
499: /**
500: * Adds a new root level record, at the end, but before the last
501: * PersistPtrIncrementalBlock.
502: */
503: public synchronized int appendRootLevelRecord(Record newRecord) {
504: int addedAt = -1;
505: Record[] r = new Record[_records.length + 1];
506: boolean added = false;
507: for (int i = (_records.length - 1); i >= 0; i--) {
508: if (added) {
509: // Just copy over
510: r[i] = _records[i];
511: } else {
512: r[(i + 1)] = _records[i];
513: if (_records[i] instanceof PersistPtrHolder) {
514: r[i] = newRecord;
515: added = true;
516: addedAt = i;
517: }
518: }
519: }
520: _records = r;
521: return addedAt;
522: }
523:
524: /**
525: * Add a new picture to this presentation.
526: */
527: public void addPicture(PictureData img) {
528: // Copy over the existing pictures, into an array one bigger
529: PictureData[] lst;
530: if (_pictures == null) {
531: lst = new PictureData[1];
532: } else {
533: lst = new PictureData[(_pictures.length + 1)];
534: System.arraycopy(_pictures, 0, lst, 0, _pictures.length);
535: }
536: // Add in the new image
537: lst[lst.length - 1] = img;
538: _pictures = lst;
539: }
540:
541: /* ******************* fetching methods follow ********************* */
542:
543: /**
544: * Returns an array of all the records found in the slideshow
545: */
546: public Record[] getRecords() {
547: return _records;
548: }
549:
550: /**
551: * Returns an array of the bytes of the file. Only correct after a
552: * call to open or write - at all other times might be wrong!
553: */
554: public byte[] getUnderlyingBytes() {
555: return _docstream;
556: }
557:
558: /**
559: * Fetch the Current User Atom of the document
560: */
561: public CurrentUserAtom getCurrentUserAtom() {
562: return currentUser;
563: }
564:
565: /**
566: * Return array of pictures contained in this presentation
567: *
568: * @return array with the read pictures or <code>null</code> if the
569: * presentation doesn't contain pictures.
570: */
571: public PictureData[] getPictures() {
572: return _pictures;
573: }
574:
575: /**
576: * Gets embedded object data from the slide show.
577: *
578: * @return the embedded objects.
579: */
580: public ObjectData[] getEmbeddedObjects() {
581: if (_objects == null) {
582: List objects = new ArrayList();
583: for (int i = 0; i < _records.length; i++) {
584: if (_records[i] instanceof ExOleObjStg) {
585: objects.add(new ObjectData(
586: (ExOleObjStg) _records[i]));
587: }
588: }
589: _objects = (ObjectData[]) objects
590: .toArray(new ObjectData[objects.size()]);
591: }
592: return _objects;
593: }
594: }
|