001: package ca.ulaval.bibl.lius.index.OpenOffice;
002:
003: /*
004: * LIUS - Lucene Index Update and Search
005: * http://sourceforge.net/projects/lius/
006: *
007: * Copyright (c) 2005, Laval University Library. All rights reserved.
008: *
009: * This library is free software; you can redistribute it and/or
010: * modify it under the terms of the GNU Lesser General Public
011: * License as published by the Free Software Foundation; either
012: * version 2.1 of the License, or (at your option) any later version.
013: *
014: * This library is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
017: * Lesser General Public License for more details.
018: *
019: * You should have received a copy of the GNU Lesser General Public
020: * License along with this library; if not, write to the Free Software
021: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
022: */
023:
024: import java.io.BufferedOutputStream;
025: import java.io.File;
026: import java.io.FileOutputStream;
027: import java.io.IOException;
028: import java.io.InputStream;
029: import java.io.OutputStream;
030: import java.util.ArrayList;
031: import java.util.Collection;
032: import java.util.Enumeration;
033: import java.util.List;
034: import java.util.zip.ZipEntry;
035: import java.util.zip.ZipFile;
036:
037: import org.apache.log4j.Logger;
038: import org.apache.lucene.document.Document;
039: import org.jdom.JDOMException;
040: import org.jdom.Namespace;
041: import org.jdom.input.SAXBuilder;
042:
043: import ca.ulaval.bibl.lius.config.LiusConfig;
044: import ca.ulaval.bibl.lius.config.LiusConfigBuilder;
045: import ca.ulaval.bibl.lius.index.XML.XmlFileIndexer;
046:
047: /**
048: *
049: * @author Rida Benjelloun (rida.benjelloun@bibl.ulaval.ca)
050: *
051: */
052:
053: public class OOIndexer
054:
055: extends XmlFileIndexer {
056:
057: private List zipFiles = new ArrayList();
058:
059: static Logger logger = Logger.getRootLogger();
060:
061: private final String TMP_UNZIP_DIR = "tmpUnzipOOForLius";
062:
063: private final Namespace NS_OO = Namespace.getNamespace("office",
064:
065: "http://openoffice.org/2000/office");
066:
067: public final Namespace NS_DC = Namespace.getNamespace("dc",
068:
069: "http://purl.org/dc/elements/1.1/");
070:
071: private final Namespace NS_OOMETA = Namespace.getNamespace("meta",
072:
073: "http://openoffice.org/2000/meta");
074:
075: public Object parse(Object file) {
076: org.jdom.Document xmlDocContent = new org.jdom.Document();
077: org.jdom.Document xmlMeta = new org.jdom.Document();
078:
079: try {
080: List files = (List) file;
081: SAXBuilder builder = new SAXBuilder();
082: builder.setEntityResolver(new OpenOfficeEntityResolver());
083: builder.setValidation(false);
084: xmlDocContent = builder.build(new File((String) files
085: .get(0)));
086: xmlMeta = builder.build(new File((String) files.get(1)));
087: xmlDocContent.getRootElement().addContent(
088: xmlMeta.getRootElement().getChild("meta", NS_OO)
089: .detach());
090: xmlDocContent.getRootElement().addNamespaceDeclaration(
091: NS_DC);
092: xmlDocContent.getRootElement().addNamespaceDeclaration(
093: NS_OOMETA);
094: } catch (JDOMException e) {
095: logger.error(e.getMessage());
096: deleteTmp((String) file);
097: } catch (IOException e) {
098: logger.error(e.getMessage());
099: deleteTmp((String) file);
100: }
101:
102: return xmlDocContent;
103:
104: }
105:
106: private List getFilesToIndex(String file) {
107: String dir = (file.substring(0, ((String) file)
108: .lastIndexOf(File.separator)));
109: zipFiles = unzip((String) file, dir + File.separator
110: + TMP_UNZIP_DIR + File.separator);
111: List ls = new ArrayList();
112: ls.add(0, dir + File.separator + TMP_UNZIP_DIR + File.separator
113: + "content.xml");
114: ls.add(1, dir + File.separator + TMP_UNZIP_DIR + File.separator
115: + "meta.xml");
116:
117: return ls;
118: }
119:
120: public Document createLuceneDocument(String file, LiusConfig lc) {
121: Document doc = createLuceneDocument(file, lc.getOOFields());
122: return doc;
123: }
124:
125: public Collection getLiusFields(LiusConfig lc) {
126: return lc.getOOFields();
127: }
128:
129: public Collection getPopulatedCollection(Object file,
130: Collection liusFields) {
131:
132: List toIndex = getFilesToIndex((String) file);
133: org.jdom.Document jdomDoc = (org.jdom.Document) this
134: .parse(toIndex);
135: Collection coll = super .getPopulatedCollection(jdomDoc,
136: liusFields);
137: String dir = (((String) file).substring(0, ((String) file)
138: .lastIndexOf(File.separator)))
139: + File.separator + TMP_UNZIP_DIR;
140: deleteDir(new File(dir));
141: return coll;
142: }
143:
144: public Collection getPopulatedCollection(Object file,
145: String liusConfig) {
146: LiusConfig lc = LiusConfigBuilder.getSingletonInstance()
147: .getLiusConfig(liusConfig);
148: return getPopulatedCollection(file, lc);
149: }
150:
151: public Collection getPopulatedCollection(Object file, LiusConfig lc) {
152: return getPopulatedCollection(file, lc.getOOFields());
153: }
154:
155: private List unzip(String zip, String destination) {
156: List destLs = new ArrayList();
157: Enumeration entries;
158: ZipFile zipFile;
159: File dest = new File(destination);
160: dest.mkdir();
161: if (dest.isDirectory()) {
162:
163: try {
164: zipFile = new ZipFile(zip);
165:
166: entries = zipFile.entries();
167:
168: while (entries.hasMoreElements()) {
169: ZipEntry entry = (ZipEntry) entries.nextElement();
170:
171: if (entry.isDirectory()) {
172:
173: (new File(dest.getAbsolutePath()
174: + File.separator + entry.getName()))
175: .mkdirs();
176: continue;
177: }
178:
179: if (entry.getName().lastIndexOf("/") > 0) {
180: File f = new File(dest.getAbsolutePath()
181: + File.separator
182: + entry.getName().substring(
183: 0,
184: entry.getName()
185: .lastIndexOf("/")));
186: f.mkdirs();
187: }
188: copyInputStream(zipFile.getInputStream(entry),
189: new BufferedOutputStream(
190: new FileOutputStream(dest
191: .getAbsolutePath()
192: + File.separator
193: + entry.getName())));
194: destLs.add(dest.getAbsolutePath() + File.separator
195: + TMP_UNZIP_DIR + File.separator
196: + entry.getName());
197: }
198:
199: zipFile.close();
200: } catch (IOException e) {
201: deleteDir(new File(destination));
202: logger.error(e.getMessage());
203: }
204: } else {
205: logger.info("vous avez spécifié un fichier");
206: }
207: return destLs;
208: }
209:
210: private void copyInputStream(InputStream in, OutputStream out)
211: throws IOException {
212: byte[] buffer = new byte[1024];
213: int len;
214:
215: while ((len = in.read(buffer)) >= 0)
216: out.write(buffer, 0, len);
217:
218: in.close();
219: out.close();
220: }
221:
222: public boolean deleteDir(File dir) {
223: if (dir.isDirectory()) {
224: String[] children = dir.list();
225: for (int i = 0; i < children.length; i++) {
226: boolean success = deleteDir(new File(dir, children[i]));
227: if (!success) {
228: return false;
229: }
230: }
231: }
232: return dir.delete();
233: }
234:
235: protected void deleteTmp(String file) {
236: String dir = (((String) file).substring(0, ((String) file)
237: .lastIndexOf(File.separator)))
238: + File.separator + TMP_UNZIP_DIR;
239: deleteDir(new File(dir));
240: }
241:
242: }
|