001: /*
002:
003: * LIUS - Lucene Index Update and Search
004: * http://sourceforge.net/projects/lius/
005: *
006: * Copyright (c) 2005, Laval University Library. All rights reserved.
007: *
008: * This library is free software; you can redistribute it and/or
009: * modify it under the terms of the GNU Lesser General Public
010: * License as published by the Free Software Foundation; either
011: * version 2.1 of the License, or (at your option) any later version.
012: *
013: * This library is distributed in the hope that it will be useful,
014: * but WITHOUT ANY WARRANTY; without even the implied warranty of
015: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
016: * Lesser General Public License for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public
019: * License along with this library; if not, write to the Free Software
020: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
021: */
022:
023: package ca.ulaval.bibl.lius.index.XML;
024:
025: import java.util.ArrayList;
026: import java.util.Collection;
027: import java.util.HashMap;
028: import java.util.Iterator;
029: import java.util.List;
030: import java.util.Set;
031:
032: import org.apache.log4j.Logger;
033: import org.jdom.Element;
034: import org.jdom.JDOMException;
035: import org.jdom.xpath.XPath;
036:
037: import ca.ulaval.bibl.lius.config.LiusConfig;
038: import ca.ulaval.bibl.lius.config.LiusConfigBuilder;
039:
040: /**
041: *
042: * Classe se basant sur JDOM et XPATH pour indexer des noeuds de documents XML.
043: *
044: * <br/><br/>
045: *
046: * Class based on JDOM and XPATH for indexing nodes in XML documents.
047: *
048: * @author Rida Benjelloun (rida.benjelloun@bibl.ulaval.ca)
049: *
050: */
051:
052: public class XmlNodeIndexer
053:
054: extends XmlFileIndexer {
055:
056: static Logger logger = Logger.getRootLogger();
057:
058: /**
059: *
060: * Méthode spécifique à l'indexation par noeuds. Elle retourne une liste de
061: *
062: * documents Lucene. Actuellement cette méthode est uniquement utilisée pour
063: *
064: * stocker des noeuds d'un même document XML dans plusieurs documents
065: * Lucene.
066: *
067: * Elle prend comme arguments le fichier à indexer et le fichier de
068: * configuration
069: *
070: * de Lius.
071: *
072: * <br/><br/>
073: *
074: * Method specific for indexing nodes. It returns a list of Lucene
075: * documents.
076: *
077: * Actually, the method is only used to save nodes of the same XML document
078: * in
079: *
080: * many Lucene documents. Its parameters are the XML file to index and the
081: * Lius
082: *
083: * configuration file.
084: *
085: */
086:
087: public List createLuceneDocForEachNodeOfDocument(String xmlFile,
088:
089: String liusXmlConfigFilePath) {
090:
091: LiusConfig lc = LiusConfigBuilder.getSingletonInstance()
092: .getLiusConfig(
093:
094: liusXmlConfigFilePath);
095:
096: HashMap nodesAndLuceneFields = lc.getXmlNodesFields();
097:
098: List listOfLuceneDocuments = createLuceneDocForEachNodeOfDocument(
099: xmlFile,
100:
101: nodesAndLuceneFields);
102:
103: return listOfLuceneDocuments;
104:
105: }
106:
107: /**
108: * Méthode spécifique à l'indexation par noeuds. Elle retourne une liste de
109: *
110: * documents lucene. Actuellement cette méthode est uniquement utilisée pour
111: *
112: * stocker des noeuds d'un même document XML dans plusieurs documents
113: * Lucene.
114: *
115: * Elle prend comme arguments le fichier à indexer et le fichier de
116: * configuration
117: *
118: * de Lius sous forme d'un objet de type LiusConfig.
119: *
120: * <br/><br/>
121: *
122: * Method specific for indexing nodes. It returns a list of Lucene
123: * documents.
124: *
125: * Actually, the method is only used to save nodes of the same XML document
126: * in
127: *
128: * many Lucene documents. Its parameters are the XML file to index and the
129: * Lius
130: *
131: * configuration file as a LiusConfig object.
132: *
133: */
134:
135: public List createLuceneDocForEachNodeOfDocument(String xmlFile,
136:
137: LiusConfig lc) {
138:
139: HashMap nodesAndLuceneFields = lc.getXmlNodesFields();
140:
141: List listOfLuceneDocuments = createLuceneDocForEachNodeOfDocument(
142: xmlFile,
143:
144: nodesAndLuceneFields);
145:
146: return listOfLuceneDocuments;
147:
148: }
149:
150: /**
151: *
152: * Méthode spécifique à l'indexation par noeuds. Elle retourne une liste de
153: *
154: * documents Lucene. Actuellement cette méthode est uniquement utilisée pour
155: *
156: * stocker des noeuds d'un même document XML dans plusieurs documents
157: * Lucene.
158: *
159: * Elle prend comme arguments le fichier à indexer et un HashMap dont la clé
160: *
161: * est l'expression XPath pour sélectionner le noeud à placer dans le
162: * document
163: *
164: * Lucene, et la valeur une collection d'objets de type LiusFields,
165: * permettant
166: *
167: * de spécifier les champs etc.
168: *
169: * <br/><br/>
170: *
171: * Method specific for indexing nodes. It returns a list of Lucene
172: * documents.
173: *
174: * Actually, the method is only used to save nodes of the same XML document
175: * in
176: *
177: * many Lucene documents. Its parameters are the XML file to index, a
178: * HashMap
179: *
180: * which key is an XPath expression to select the node to place in the
181: * Lucene
182: *
183: * document and the value is a collection of LiusField objects, permitting
184: * to
185: *
186: * specify fields etc.
187: *
188: */
189:
190: public List createLuceneDocForEachNodeOfDocument(
191: String fileToIndex,
192:
193: HashMap nodesAndLuceneFields) {
194:
195: List listOfLuceneDocuments = new ArrayList();
196:
197: org.jdom.Document xmlDoc = (org.jdom.Document) parse(fileToIndex);
198:
199: try {
200:
201: Set s = nodesAndLuceneFields.keySet();
202:
203: Object[] a = s.toArray();
204:
205: for (int i = 0; i < a.length; i++) {
206:
207: String XpathNode = (String) a[i];
208:
209: System.out.println(XpathNode);
210:
211: List ls = XPath.selectNodes(xmlDoc, XpathNode);
212:
213: Iterator it = ls.iterator();
214:
215: while (it.hasNext()) {
216:
217: Element elem = (Element) it.next();
218:
219: org.apache.lucene.document.Document luceneDoc =
220:
221: super .storeNodeInLuceneDocument(elem,
222:
223: (Collection) nodesAndLuceneFields.
224:
225: get(
226:
227: XpathNode));
228:
229: listOfLuceneDocuments.add(luceneDoc);
230:
231: }
232:
233: }
234:
235: }
236:
237: catch (JDOMException e) {
238:
239: logger.error(e.getMessage());
240:
241: }
242:
243: return listOfLuceneDocuments;
244:
245: }
246:
247: }
|