001: // sevenzipParser.java
002: // -------------------------------------
003: // part of YACY
004: // (C) by Michael Peter Christen; mc@anomic.de
005: // first published on http://www.anomic.de
006: // Frankfurt, Germany, 2004
007: //
008: // This file ist contributed by Franz Brausze
009: //
010: // This program is free software; you can redistribute it and/or modify
011: // it under the terms of the GNU General Public License as published by
012: // the Free Software Foundation; either version 2 of the License, or
013: // (at your option) any later version.
014: //
015: // This program is distributed in the hope that it will be useful,
016: // but WITHOUT ANY WARRANTY; without even the implied warranty of
017: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
018: // GNU General Public License for more details.
019: //
020: // You should have received a copy of the GNU General Public License
021: // along with this program; if not, write to the Free Software
022: // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
023: //
024: // Using this software in any meaning (reading, learning, copying, compiling,
025: // running) means that you agree that the Author(s) is (are) not responsible
026: // for cost, loss of data or any harm that may be caused directly or indirectly
027: // by usage of this softare or this documentation. The usage of this software
028: // is on your own risk. The installation and usage (starting/running) of this
029: // software may allow other people or application to access your computer and
030: // any attached devices and is highly dependent on the configuration of the
031: // software which must be done by the user of the software; the author(s) is
032: // (are) also not responsible for proper configuration and usage of the
033: // software, even if provoked by documentation provided together with
034: // the software.
035: //
036: // Any changes to this file according to the GPL as documented in the file
037: // gpl.txt aside this file in the shipment you received can be done to the
038: // lines that follows this copyright notice here, but changes must not be
039: // done inside the copyright notive above. A re-distribution must contain
040: // the intact and unchanged copyright notice.
041: // Contributions and changes to the program code must be marked as such.
042:
043: package de.anomic.plasma.parser.sevenzip;
044:
045: import java.io.File;
046: import java.io.IOException;
047: import java.io.InputStream;
048: import java.util.Hashtable;
049:
050: import SevenZip.IInStream;
051: import SevenZip.MyRandomAccessFile;
052: import SevenZip.Archive.SevenZip.Handler;
053: import de.anomic.plasma.plasmaParserDocument;
054: import de.anomic.plasma.parser.AbstractParser;
055: import de.anomic.plasma.parser.Parser;
056: import de.anomic.plasma.parser.ParserException;
057: import de.anomic.server.serverCachedFileOutputStream;
058: import de.anomic.server.serverFileUtils;
059: import de.anomic.yacy.yacyURL;
060:
061: public class sevenzipParser extends AbstractParser implements Parser {
062:
063: /**
064: * a list of mime types that are supported by this parser class
065: * @see #getSupportedMimeTypes()
066: */
067: public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
068: static {
069: SUPPORTED_MIME_TYPES.put("application/x-7z-compressed", "7z");
070: }
071:
072: /**
073: * a list of library names that are needed by this parser
074: * @see Parser#getLibxDependences()
075: */
076: private static final String[] LIBX_DEPENDENCIES = new String[] { "J7Zip-modified.jar" };
077:
078: public sevenzipParser() {
079: super (LIBX_DEPENDENCIES);
080: super .parserName = "7zip Archive Parser";
081: }
082:
083: public plasmaParserDocument parse(yacyURL location,
084: String mimeType, String charset, IInStream source,
085: long maxRamSize) throws ParserException,
086: InterruptedException {
087: plasmaParserDocument doc = new plasmaParserDocument(location,
088: mimeType, charset);
089: Handler archive;
090: super .theLogger.logFine("opening 7zip archive...");
091: try {
092: archive = new Handler(source);
093: } catch (IOException e) {
094: throw new ParserException("error opening 7zip archive",
095: location, e);
096: }
097: checkInterruption();
098: SZParserExtractCallback aec = new SZParserExtractCallback(
099: super .theLogger, archive, maxRamSize, doc, location
100: .getFile());
101: super .theLogger.logFine("processing archive contents...");
102: try {
103: archive.Extract(null, -1, 0, aec);
104: return doc;
105: } catch (IOException e) {
106: if (e.getCause() instanceof InterruptedException)
107: throw (InterruptedException) e.getCause();
108: if (e.getCause() instanceof ParserException)
109: throw (ParserException) e.getCause();
110: throw new ParserException(
111: "error processing 7zip archive at internal file: "
112: + aec.getCurrentFilePath(), location, e);
113: } finally {
114: try {
115: archive.close();
116: } catch (IOException e) {
117: }
118: }
119: }
120:
121: public plasmaParserDocument parse(yacyURL location,
122: String mimeType, String charset, byte[] source)
123: throws ParserException, InterruptedException {
124: return parse(location, mimeType, charset,
125: new ByteArrayIInStream(source),
126: Parser.MAX_KEEP_IN_MEMORY_SIZE - source.length);
127: }
128:
129: public plasmaParserDocument parse(yacyURL location,
130: String mimeType, String charset, File sourceFile)
131: throws ParserException, InterruptedException {
132: try {
133: return parse(location, mimeType, charset,
134: new MyRandomAccessFile(sourceFile, "r"),
135: Parser.MAX_KEEP_IN_MEMORY_SIZE);
136: } catch (IOException e) {
137: throw new ParserException("error processing 7zip archive",
138: location, e);
139: }
140: }
141:
142: public plasmaParserDocument parse(yacyURL location,
143: String mimeType, String charset, InputStream source)
144: throws ParserException, InterruptedException {
145: try {
146: serverCachedFileOutputStream cfos = new serverCachedFileOutputStream(
147: Parser.MAX_KEEP_IN_MEMORY_SIZE);
148: serverFileUtils.copy(source, cfos);
149: if (cfos.isFallback()) {
150: return parse(location, mimeType, charset, cfos
151: .getContentFile());
152: } else {
153: return parse(location, mimeType, charset, cfos
154: .getContentBAOS());
155: }
156: } catch (IOException e) {
157: throw new ParserException("error processing 7zip archive",
158: location, e);
159: }
160: }
161:
162: public Hashtable<String, String> getSupportedMimeTypes() {
163: return SUPPORTED_MIME_TYPES;
164: }
165: }
|