001: //rpmParser.java
002: //------------------------
003: //part of YaCy
004: //(C) by Michael Peter Christen; mc@anomic.de
005: //first published on http://www.anomic.de
006: //Frankfurt, Germany, 2005
007: //
008: //this file is contributed by Martin Thelian
009: //last major change: 20.11.2005
010: //
011: //This program is free software; you can redistribute it and/or modify
012: //it under the terms of the GNU General Public License as published by
013: //the Free Software Foundation; either version 2 of the License, or
014: //(at your option) any later version.
015: //
016: //This program is distributed in the hope that it will be useful,
017: //but WITHOUT ANY WARRANTY; without even the implied warranty of
018: //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: //GNU General Public License for more details.
020: //
021: //You should have received a copy of the GNU General Public License
022: //along with this program; if not, write to the Free Software
023: //Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: //
025: //Using this software in any meaning (reading, learning, copying, compiling,
026: //running) means that you agree that the Author(s) is (are) not responsible
027: //for cost, loss of data or any harm that may be caused directly or indirectly
028: //by usage of this softare or this documentation. The usage of this software
029: //is on your own risk. The installation and usage (starting/running) of this
030: //software may allow other people or application to access your computer and
031: //any attached devices and is highly dependent on the configuration of the
032: //software which must be done by the user of the software; the author(s) is
033: //(are) also not responsible for proper configuration and usage of the
034: //software, even if provoked by documentation provided together with
035: //the software.
036: //
037: //Any changes to this file according to the GPL as documented in the file
038: //gpl.txt aside this file in the shipment you received can be done to the
039: //lines that follows this copyright notice here, but changes must not be
040: //done inside the copyright notive above. A re-distribution must contain
041: //the intact and unchanged copyright notice.
042: //Contributions and changes to the program code must be marked as such.
043:
044: package de.anomic.plasma.parser.rpm;
045:
046: import java.io.ByteArrayInputStream;
047: import java.io.File;
048: import java.io.InputStream;
049: import java.util.HashMap;
050: import java.util.Hashtable;
051:
052: import com.jguild.jrpm.io.RPMFile;
053: import com.jguild.jrpm.io.datatype.DataTypeIf;
054:
055: import de.anomic.http.httpc;
056: import de.anomic.plasma.plasmaParserDocument;
057: import de.anomic.plasma.parser.AbstractParser;
058: import de.anomic.plasma.parser.Parser;
059: import de.anomic.plasma.parser.ParserException;
060: import de.anomic.server.serverFileUtils;
061: import de.anomic.yacy.yacyURL;
062:
063: /**
064: * @author theli
065: *
066: */
067: public class rpmParser extends AbstractParser implements Parser {
068:
069: /**
070: * a list of mime types that are supported by this parser class
071: * @see #getSupportedMimeTypes()
072: */
073: public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
074: static {
075: SUPPORTED_MIME_TYPES.put("application/x-rpm", "rpm");
076: SUPPORTED_MIME_TYPES.put("application/x-redhat packet manager",
077: "rpm");
078: SUPPORTED_MIME_TYPES.put(
079: "application/x-redhat-package-manager", "rpm");
080: }
081:
082: /**
083: * a list of library names that are needed by this parser
084: * @see Parser#getLibxDependences()
085: */
086: private static final String[] LIBX_DEPENDENCIES = new String[] { "jrpm-head.jar" };
087:
088: public rpmParser() {
089: super (LIBX_DEPENDENCIES);
090: this .parserName = "rpm Parser";
091: }
092:
093: public Hashtable<String, String> getSupportedMimeTypes() {
094: return SUPPORTED_MIME_TYPES;
095: }
096:
097: public plasmaParserDocument parse(yacyURL location,
098: String mimeType, String charset, InputStream source)
099: throws ParserException {
100: File dstFile = null;
101: try {
102: dstFile = File.createTempFile("rpmParser", ".tmp");
103: serverFileUtils.copy(source, dstFile);
104: return parse(location, mimeType, charset, dstFile);
105: } catch (Exception e) {
106: return null;
107: } finally {
108: if (dstFile != null) {
109: dstFile.delete();
110: }
111: }
112: }
113:
114: public plasmaParserDocument parse(yacyURL location,
115: String mimeType, String charset, File sourceFile)
116: throws ParserException, InterruptedException {
117: RPMFile rpmFile = null;
118: try {
119: String summary = null, description = null, packager = null, name = sourceFile
120: .getName();
121: HashMap<yacyURL, String> anchors = new HashMap<yacyURL, String>();
122: StringBuffer content = new StringBuffer();
123:
124: // opening the rpm file
125: rpmFile = new RPMFile(sourceFile);
126:
127: // parsing the file
128: rpmFile.parse();
129:
130: // getting all header names
131: String[] headerNames = rpmFile.getTagNames();
132: for (int i = 0; i < headerNames.length; i++) {
133: // check for interruption
134: checkInterruption();
135:
136: // getting the next tag
137: DataTypeIf tag = rpmFile.getTag(headerNames[i]);
138: if (tag == null)
139: continue;
140:
141: content.append(headerNames[i]).append(": ").append(
142: tag.toString()).append("\n");
143:
144: if (headerNames[i].equalsIgnoreCase("N"))
145: name = tag.toString();
146: else if (headerNames[i].equalsIgnoreCase("SUMMARY"))
147: summary = tag.toString();
148: else if (headerNames[i].equalsIgnoreCase("DESCRIPTION"))
149: description = tag.toString();
150: else if (headerNames[i].equalsIgnoreCase("PACKAGER"))
151: packager = tag.toString();
152: else if (headerNames[i].equalsIgnoreCase("URL"))
153: anchors.put(new yacyURL(tag.toString(), null), tag
154: .toString());
155: }
156:
157: // closing the rpm file
158: rpmFile.close();
159: rpmFile = null;
160: if (summary == null)
161: summary = name;
162:
163: plasmaParserDocument theDoc = new plasmaParserDocument(
164: location, mimeType, "UTF-8", null, summary,
165: packager, null, description, content.toString()
166: .getBytes("UTF-8"), anchors, null);
167:
168: return theDoc;
169: } catch (Exception e) {
170: if (e instanceof InterruptedException)
171: throw (InterruptedException) e;
172: if (e instanceof ParserException)
173: throw (ParserException) e;
174:
175: throw new ParserException(
176: "Unexpected error while parsing rpm file. "
177: + e.getMessage(), location);
178: } finally {
179: if (rpmFile != null)
180: try {
181: rpmFile.close();
182: } catch (Exception e) {/* ignore this */
183: }
184: }
185: }
186:
187: public void reset() {
188: // Nothing todo here at the moment
189: super .reset();
190: }
191:
192: public static void main(String[] args) {
193: try {
194: yacyURL contentUrl = new yacyURL(args[0], null);
195:
196: rpmParser testParser = new rpmParser();
197: byte[] content = httpc.singleGET(contentUrl, contentUrl
198: .getHost(), 10000, null, null, null, null);
199: ByteArrayInputStream input = new ByteArrayInputStream(
200: content);
201: testParser.parse(contentUrl, "application/x-rpm", null,
202: input);
203: } catch (Exception e) {
204: e.printStackTrace();
205: }
206: }
207: }
|