001: /* ====================================================================
002: Licensed to the Apache Software Foundation (ASF) under one or more
003: contributor license agreements. See the NOTICE file distributed with
004: this work for additional information regarding copyright ownership.
005: The ASF licenses this file to You under the Apache License, Version 2.0
006: (the "License"); you may not use this file except in compliance with
007: the License. You may obtain a copy of the License at
008:
009: http://www.apache.org/licenses/LICENSE-2.0
010:
011: Unless required by applicable law or agreed to in writing, software
012: distributed under the License is distributed on an "AS IS" BASIS,
013: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: See the License for the specific language governing permissions and
015: limitations under the License.
016: ==================================================================== */
017:
018: package org.apache.poi.hslf.dev;
019:
020: import org.apache.poi.util.LittleEndian;
021: import org.apache.poi.hslf.record.RecordTypes;
022: import org.apache.poi.poifs.filesystem.*;
023: import java.io.*;
024:
025: /**
026: * Utility class which dumps raw contents of a ppt file into XML format
027: *
028: * @author Yegor Kozlov
029: */
030:
031: public class PPTXMLDump {
032: public static final int HEADER_SIZE = 8; //size of the record header
033: public static final int PICT_HEADER_SIZE = 25; //size of the picture header
034: public final static String PPDOC_ENTRY = "PowerPoint Document";
035: public final static String PICTURES_ENTRY = "Pictures";
036: public static String CR = System.getProperty("line.separator");
037:
038: protected Writer out;
039: protected byte[] docstream;
040: protected byte[] pictstream;
041: protected boolean hexHeader = true;
042:
043: public PPTXMLDump(File ppt) throws IOException {
044: FileInputStream fis = new FileInputStream(ppt);
045: POIFSFileSystem fs = new POIFSFileSystem(fis);
046: fis.close();
047:
048: //read the document entry from OLE file system
049: DocumentEntry entry = (DocumentEntry) fs.getRoot().getEntry(
050: PPDOC_ENTRY);
051: docstream = new byte[entry.getSize()];
052: DocumentInputStream is = fs
053: .createDocumentInputStream(PPDOC_ENTRY);
054: is.read(docstream);
055:
056: try {
057: entry = (DocumentEntry) fs.getRoot().getEntry(
058: PICTURES_ENTRY);
059: pictstream = new byte[entry.getSize()];
060: is = fs.createDocumentInputStream(PICTURES_ENTRY);
061: is.read(pictstream);
062: } catch (FileNotFoundException e) {
063: //silently catch errors if the presentation does not contain pictures
064: }
065: }
066:
067: /**
068: * Dump the structure of the supplied PPT file into XML
069: * @param out <code>Writer</code> to write out
070: * @throws java.io.IOException
071: */
072: public void dump(Writer out) throws IOException {
073: this .out = out;
074:
075: int padding = 0;
076: write(out, "<Presentation>" + CR, padding);
077: padding++;
078: if (pictstream != null) {
079: write(out, "<Pictures>" + CR, padding);
080: dumpPictures(pictstream, padding);
081: write(out, "</Pictures>" + CR, padding);
082: }
083: //dump the structure of the powerpoint document
084: write(out, "<PowerPointDocument>" + CR, padding);
085: padding++;
086: dump(docstream, 0, docstream.length, padding);
087: padding--;
088: write(out, "</PowerPointDocument>" + CR, padding);
089: padding--;
090: write(out, "</Presentation>", padding);
091: }
092:
093: /**
094: * Dump a part of the document stream into XML
095: * @param data PPT binary data
096: * @param offset offset from the beginning of the document
097: * @param length of the document
098: * @param padding used for formatting results
099: * @throws java.io.IOException
100: */
101: public void dump(byte[] data, int offset, int length, int padding)
102: throws IOException {
103: int pos = offset;
104: while (pos <= (offset + length - HEADER_SIZE)) {
105: if (pos < 0)
106: break;
107:
108: //read record header
109: int info = LittleEndian.getUShort(data, pos);
110: pos += LittleEndian.SHORT_SIZE;
111: int type = LittleEndian.getUShort(data, pos);
112: pos += LittleEndian.SHORT_SIZE;
113: int size = (int) LittleEndian.getUInt(data, pos);
114: pos += LittleEndian.INT_SIZE;
115:
116: //get name of the record by type
117: String recname = RecordTypes.recordName(type);
118: write(out, "<" + recname + " info=\"" + info + "\" type=\""
119: + type + "\" size=\"" + size + "\" offset=\""
120: + (pos - 8) + "\"", padding);
121: if (hexHeader) {
122: out.write(" header=\"");
123: dump(out, data, pos - 8, 8, 0, false);
124: out.write("\"");
125: }
126: out.write(">" + CR);
127: padding++;
128: //this check works both for Escher and PowerPoint records
129: boolean isContainer = (info & 0x000F) == 0x000F;
130: if (isContainer) {
131: //continue to dump child records
132: dump(data, pos, size, padding);
133: } else {
134: //dump first 100 bytes of the atom data
135: dump(out, data, pos, Math.min(size, 100), padding, true);
136: }
137: padding--;
138: write(out, "</" + recname + ">" + CR, padding);
139:
140: pos += size;
141: }
142: }
143:
144: /**
145: * Dumps the Pictures OLE stream into XML.
146: *
147: * @param data from the Pictures OLE data stream
148: * @param padding
149: * @throws java.io.IOException
150: */
151: public void dumpPictures(byte[] data, int padding)
152: throws IOException {
153: int pos = 0;
154: while (pos < data.length) {
155: byte[] header = new byte[PICT_HEADER_SIZE];
156:
157: System.arraycopy(data, pos, header, 0, header.length);
158: int size = LittleEndian.getInt(header, 4) - 17;
159: byte[] pictdata = new byte[size];
160: System.arraycopy(data, pos + PICT_HEADER_SIZE, pictdata, 0,
161: pictdata.length);
162: pos += PICT_HEADER_SIZE + size;
163:
164: padding++;
165: write(out, "<picture size=\"" + size + "\" type=\""
166: + getPictureType(header) + "\">" + CR, padding);
167: padding++;
168: write(out, "<header>" + CR, padding);
169: dump(out, header, 0, header.length, padding, true);
170: write(out, "</header>" + CR, padding);
171: write(out, "<imgdata>" + CR, padding);
172: dump(out, pictdata, 0, Math.min(pictdata.length, 100),
173: padding, true);
174: write(out, "</imgdata>" + CR, padding);
175: padding--;
176: write(out, "</picture>" + CR, padding);
177: padding--;
178:
179: }
180: }
181:
182: public static void main(String[] args) throws Exception {
183: if (args.length == 0) {
184: System.out
185: .println("Usage: PPTXMLDump (options) pptfile\n"
186: + "Where options include:\n"
187: + " -f write output to <pptfile>.xml file in the current directory");
188: return;
189: }
190: boolean outFile = false;
191: for (int i = 0; i < args.length; i++) {
192:
193: if (args[i].startsWith("-")) {
194: if ("-f".equals(args[i])) {
195: //write ouput to a file
196: outFile = true;
197: }
198: } else {
199: File ppt = new File(args[i]);
200: PPTXMLDump dump = new PPTXMLDump(ppt);
201: System.out.println("Dumping " + args[i]);
202:
203: if (outFile) {
204: FileWriter out = new FileWriter(ppt.getName()
205: + ".xml");
206: dump.dump(out);
207: out.close();
208: } else {
209: StringWriter out = new StringWriter();
210: dump.dump(out);
211: System.out.println(out.toString());
212: }
213: }
214:
215: }
216: }
217:
218: /**
219: * write a string to <code>out</code> with the specified padding
220: */
221: private static void write(Writer out, String str, int padding)
222: throws IOException {
223: for (int i = 0; i < padding; i++)
224: out.write(" ");
225: out.write(str);
226: }
227:
228: private String getPictureType(byte[] header) {
229: String type;
230: int meta = LittleEndian.getUShort(header, 0);
231:
232: switch (meta) {
233: case 0x46A0:
234: type = "jpeg";
235: break;
236: case 0x2160:
237: type = "wmf";
238: break;
239: case 0x6E00:
240: type = "png";
241: break;
242: default:
243: type = "unknown";
244: break;
245: }
246: return type;
247: }
248:
249: /**
250: * dump binary data to <code>out</code> with the specified padding
251: */
252: private static void dump(Writer out, byte[] data, int offset,
253: int length, int padding, boolean nl) throws IOException {
254: int linesize = 25;
255: for (int i = 0; i < padding; i++)
256: out.write(" ");
257: int i;
258: for (i = offset; i < (offset + length); i++) {
259: int c = data[i];
260: out.write((char) hexval[(c & 0xF0) >> 4]);
261: out.write((char) hexval[(c & 0x0F) >> 0]);
262: out.write(' ');
263: if ((i + 1 - offset) % linesize == 0
264: && i != (offset + length - 1)) {
265: out.write(CR);
266: for (int j = 0; j < padding; j++)
267: out.write(" ");
268: }
269: }
270: if (nl && length > 0)
271: out.write(CR);
272: }
273:
274: private static final byte hexval[] = { (byte) '0', (byte) '1',
275: (byte) '2', (byte) '3', (byte) '4', (byte) '5', (byte) '6',
276: (byte) '7', (byte) '8', (byte) '9', (byte) 'A', (byte) 'B',
277: (byte) 'C', (byte) 'D', (byte) 'E', (byte) 'F' };
278:
279: }
|