Source Code Cross Referenced for SearchManager.java in » GIS » geonetwork » org » fao » geonet » kernel » search » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » GIS » geonetwork » org.fao.geonet.kernel.search
Source Cross Referenced Class Diagram Java Document (Java Doc)
001:        //===	Copyright (C) 2001-2007 Food and Agriculture Organization of the
002:        //===	United Nations (FAO-UN), United Nations World Food Programme (WFP)
003:        //===	and United Nations Environment Programme (UNEP)
004:        //===
005:        //===	This program is free software; you can redistribute it and/or modify
006:        //===	it under the terms of the GNU General Public License as published by
007:        //===	the Free Software Foundation; either version 2 of the License, or (at
008:        //===	your option) any later version.
009:        //===
010:        //===	This program is distributed in the hope that it will be useful, but
011:        //===	WITHOUT ANY WARRANTY; without even the implied warranty of
012:        //===	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
013:        //===	General Public License for more details.
014:        //===
015:        //===	You should have received a copy of the GNU General Public License
016:        //===	along with this program; if not, write to the Free Software
017:        //===	Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
018:        //===
019:        //===	Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2,
020:        //===	Rome - Italy. email: geonetwork@osgeo.org
021:        //==============================================================================
022:
023:        package org.fao.geonet.kernel.search;
024:
025:        import com.k_int.IR.Searchable;
026:        import com.k_int.hss.HeterogeneousSetOfSearchable;
027:        import com.k_int.util.LoggingFacade.LogContextFactory;
028:        import com.k_int.util.LoggingFacade.LoggingContext;
029:        import com.k_int.util.Repository.CollectionDirectory;
030:        import java.io.File;
031:        import java.util.Enumeration;
032:        import java.util.Hashtable;
033:        import java.util.Iterator;
034:        import java.util.List;
035:        import java.util.Properties;
036:        import java.util.Vector;
037:        import javax.naming.Context;
038:        import javax.naming.InitialContext;
039:        import jeeves.utils.Log;
040:        import jeeves.utils.Xml;
041:        import org.apache.lucene.analysis.standard.StandardAnalyzer;
042:        import org.apache.lucene.document.Document;
043:        import org.apache.lucene.document.Field;
044:        import org.apache.lucene.index.IndexReader;
045:        import org.apache.lucene.index.IndexWriter;
046:        import org.apache.lucene.index.Term;
047:        import org.apache.lucene.index.TermEnum;
048:        import org.fao.geonet.constants.Geonet;
049:        import org.jdom.Element;
050:
051:        /**
052:         * Indexes metadata using Lucene.
053:         */
054:        public class SearchManager {
055:            public static final int LUCENE = 1;
056:            public static final int Z3950 = 2;
057:            public static final int UNUSED = 3;
058:
059:            private static final String SEARCH_STYLESHEETS_DIR_PATH = "xml/search";
060:            private static final String SCHEMA_STYLESHEETS_DIR_PATH = "xml/schemas";
061:
062:            private File _stylesheetsDir;
063:            private File _schemasDir;
064:            private File _luceneDir;
065:            private LoggingContext _cat;
066:            private Searchable _hssSearchable;
067:
068:            //-----------------------------------------------------------------------------
069:
070:            /**
071:             * @param appPath
072:             * @param luceneDir
073:             * @throws Exception
074:             */
075:            public SearchManager(String appPath, String luceneDir)
076:                    throws Exception {
077:                _stylesheetsDir = new File(appPath, SEARCH_STYLESHEETS_DIR_PATH);
078:                _schemasDir = new File(appPath, SCHEMA_STYLESHEETS_DIR_PATH);
079:
080:                if (!_stylesheetsDir.isDirectory())
081:                    throw new Exception("directory " + _stylesheetsDir
082:                            + " not found");
083:
084:                initLucene(appPath, luceneDir);
085:                initZ3950(appPath);
086:            }
087:
088:            //-----------------------------------------------------------------------------
089:
090:            public void end() throws Exception {
091:                endZ3950();
092:            }
093:
094:            //-----------------------------------------------------------------------------
095:
096:            public MetaSearcher newSearcher(int type, String stylesheetName)
097:                    throws Exception {
098:                switch (type) {
099:                case LUCENE:
100:                    return new LuceneSearcher(this , stylesheetName);
101:                case Z3950:
102:                    return new Z3950Searcher(this , stylesheetName);
103:                case UNUSED:
104:                    return new UnusedSearcher();
105:
106:                default:
107:                    throw new Exception("unknown MetaSearcher type: " + type);
108:                }
109:            }
110:
111:            /**
112:             * Lucene init/end methods. Creates the Lucene index directory.
113:             * @param appPath
114:             * @param luceneDir
115:             * @throws Exception
116:             */
117:            private void initLucene(String appPath, String luceneDir)
118:                    throws Exception {
119:                _luceneDir = new File(luceneDir);
120:
121:                if (!_luceneDir.isAbsolute())
122:                    _luceneDir = new File(appPath + luceneDir);
123:
124:                //--- the lucene dir cannot be inside the CVS so it is better to create it here
125:
126:                _luceneDir.mkdirs();
127:
128:                setupIndex(false); // RGFIX: check if this is correct
129:            }
130:
131:            //-----------------------------------------------------------------------------
132:            // Z39.50 init/end methods
133:
134:            /** 
135:             * Initializes the Z3950 client searcher.
136:             * @param appPath
137:             * @throws Exception
138:             */
139:            private void initZ3950(String appPath) throws Exception {
140:                _cat = LogContextFactory.getContext("GeoNetwork"); // FIXME: maybe it should use the webapp path
141:
142:                String configClass = "com.k_int.util.Repository.XMLDataSource";
143:                String configUrl = "file:///" + appPath
144:                        + jeeves.constants.Jeeves.Path.XML
145:                        + "/repositories.xml";
146:                String directoryNamingLocation = "/Services/IR/Directory"; // RGFIX: change to use servlet context
147:
148:                Properties props = new Properties();
149:                props.setProperty("CollectionDataSourceClassName", configClass);
150:                props.setProperty("RepositoryDataSourceURL", configUrl);
151:                props.setProperty("DirectoryServiceName",
152:                        directoryNamingLocation); // RGFIX: check this
153:                // set up the collection directory and register it with the naming service in the
154:                // default way
155:                // RGFIX: this could not work for different servlet instances, should be changed to use servlet context
156:                CollectionDirectory cd = new CollectionDirectory(configClass,
157:                        configUrl);
158:                Context context = new InitialContext();
159:                Context services_context = context.createSubcontext("Services");
160:                Context ir_context = services_context.createSubcontext("IR");
161:                ir_context.bind("Directory", cd);
162:
163:                // pull in the repository
164:                _hssSearchable = new HeterogeneousSetOfSearchable();
165:                _hssSearchable.init(props);
166:            }
167:
168:            /** deinitializes the Z3950 client searcher
169:             */
170:            private void endZ3950() {
171:                if (_hssSearchable != null) {
172:                    _hssSearchable.destroy();
173:                    _hssSearchable = null;
174:                }
175:            }
176:
177:            //--------------------------------------------------------------------------------
178:            // indexing methods
179:
180:            /**
181:             * Indexes a metadata record.
182:             * @param type
183:             * @param metadata
184:             * @param id
185:             * @param moreFields
186:             * @param isTemplate
187:             * @param title
188:             * @throws Exception
189:             */
190:            public synchronized void index(String type, Element metadata,
191:                    String id, List moreFields, String isTemplate, String title)
192:                    throws Exception {
193:                delete("_id", id);
194:
195:                Element xmlDoc;
196:
197:                // check for subtemplates
198:                if (isTemplate.equals("s")) {
199:                    // create empty document with only title  and "any" fields
200:                    xmlDoc = new Element("Document");
201:
202:                    StringBuffer sb = new StringBuffer();
203:                    allText(metadata, sb);
204:                    addField(xmlDoc, "title", title, true, true, true);
205:                    addField(xmlDoc, "any", sb.toString(), true, true, true);
206:                } else {
207:                    Log.debug(Geonet.INDEX_ENGINE, "Metadata to index:\n"
208:                            + Xml.getString(metadata));
209:
210:                    xmlDoc = getIndexFields(type, metadata);
211:
212:                    Log.debug(Geonet.INDEX_ENGINE, "Indexing fields:\n"
213:                            + Xml.getString(xmlDoc));
214:                }
215:                // add _id field
216:                addField(xmlDoc, "_id", id, true, true, false);
217:
218:                // add more fields
219:                for (Iterator iter = moreFields.iterator(); iter.hasNext();) {
220:                    Element field = (Element) iter.next();
221:                    xmlDoc.addContent(field);
222:                }
223:
224:                Log.debug(Geonet.INDEX_ENGINE, "Lucene document:\n"
225:                        + Xml.getString(xmlDoc));
226:
227:                Document doc = newDocument(xmlDoc);
228:                IndexWriter writer = new IndexWriter(_luceneDir,
229:                        new StandardAnalyzer(new String[] {}), false);
230:                try {
231:                    writer.addDocument(doc);
232:                    lazyOptimize(writer);
233:                } finally {
234:                    writer.close();
235:                }
236:            }
237:
238:            /**
239:             * Creates a new field for the Lucene index.
240:             * @param xmlDoc
241:             * @param name
242:             * @param value
243:             * @param store
244:             * @param index
245:             * @param token
246:             */
247:            private void addField(Element xmlDoc, String name, String value,
248:                    boolean store, boolean index, boolean token) {
249:                Element field = new Element("Field");
250:                field.setAttribute("name", name);
251:                field.setAttribute("string", value);
252:                field.setAttribute("store", store + "");
253:                field.setAttribute("index", index + "");
254:                field.setAttribute("token", token + "");
255:                xmlDoc.addContent(field);
256:            }
257:
258:            /**
259:             * Extracts text from metadata record.
260:             * @param metadata
261:             * @param sb
262:             * @return all text in the metadata elements for indexing
263:             */
264:            private void allText(Element metadata, StringBuffer sb) {
265:                String text = metadata.getText().trim();
266:                if (text.length() > 0) {
267:                    if (sb.length() > 0)
268:                        sb.append(" ");
269:                    sb.append(text);
270:                }
271:                List children = metadata.getChildren();
272:                if (children.size() > 0) {
273:                    for (Iterator i = children.iterator(); i.hasNext();)
274:                        allText((Element) i.next(), sb);
275:                }
276:            }
277:
278:            //--------------------------------------------------------------------------------
279:            //  delete a document
280:
281:            public synchronized void delete(String fld, String txt)
282:                    throws Exception {
283:                // possibly remove old document
284:                IndexReader reader = IndexReader.open(_luceneDir);
285:                try {
286:                    reader.deleteDocuments(new Term(fld, txt));
287:
288:                    // RGFIX: should I optimize here, or at least increase updateCount?
289:                } finally {
290:                    reader.close();
291:                }
292:            }
293:
294:            //--------------------------------------------------------------------------------
295:
296:            public Hashtable getDocs() throws Exception {
297:                IndexReader reader = IndexReader.open(_luceneDir);
298:                try {
299:                    Hashtable docs = new Hashtable();
300:                    for (int i = 0; i < reader.numDocs(); i++) {
301:                        if (reader.isDeleted(i))
302:                            continue; // FIXME: strange lucene hack: sometimes it tries to load a deleted document
303:
304:                        Hashtable record = new Hashtable();
305:                        Document doc = reader.document(i);
306:                        String id = doc.get("_id");
307:                        for (Enumeration j = doc.fields(); j.hasMoreElements();) {
308:                            Field field = (Field) j.nextElement();
309:                            record.put(field.name(), field.stringValue());
310:                        }
311:                        docs.put(id, record);
312:                    }
313:                    return docs;
314:                } finally {
315:                    reader.close();
316:                }
317:            }
318:
319:            //--------------------------------------------------------------------------------
320:
321:            public Vector getTerms(String fld) throws Exception {
322:                Vector terms = new Vector();
323:
324:                IndexReader reader = IndexReader.open(_luceneDir);
325:                try {
326:                    TermEnum enu = reader.terms(new Term(fld, ""));
327:                    while (enu.next()) {
328:                        Term term = enu.term();
329:                        if (term.field().equals(fld))
330:                            terms.add(enu.term().text());
331:                    }
332:                } finally {
333:                    reader.close();
334:                }
335:                return terms;
336:            }
337:
338:            //-----------------------------------------------------------------------------
339:            // utilities
340:
341:            Element getIndexFields(String schema, Element xml) throws Exception {
342:                File schemaDir = new File(_schemasDir, schema);
343:
344:                try {
345:                    String styleSheet = new File(schemaDir, "index-fields.xsl")
346:                            .getAbsolutePath();
347:                    return Xml.transform(xml, styleSheet);
348:                } catch (Exception e) {
349:                    Log.error(Geonet.SEARCH_ENGINE,
350:                            "Indexing stylesheet contains errors : "
351:                                    + e.getMessage());
352:                    throw e;
353:                }
354:            }
355:
356:            //-----------------------------------------------------------------------------
357:            // utilities
358:
359:            Element transform(String styleSheetName, Element xml)
360:                    throws Exception {
361:                try {
362:                    String styleSheetPath = new File(_stylesheetsDir,
363:                            styleSheetName).getAbsolutePath();
364:                    return Xml.transform(xml, styleSheetPath);
365:                } catch (Exception e) {
366:                    Log.error(Geonet.SEARCH_ENGINE,
367:                            "Search stylesheet contains errors : "
368:                                    + e.getMessage());
369:                    throw e;
370:                }
371:            }
372:
373:            public File getLuceneDir() {
374:                return _luceneDir;
375:            }
376:
377:            Searchable getSearchable() {
378:                return _hssSearchable;
379:            }
380:
381:            //-----------------------------------------------------------------------------
382:            // private methods
383:
384:            // creates an index in directory luceneDir with StandardAnalyzer if not present
385:            private void setupIndex(boolean rebuild) throws Exception {
386:                // if rebuild forced don't check
387:                boolean badIndex = true;
388:                if (!rebuild) {
389:                    try {
390:                        IndexReader reader = IndexReader.open(_luceneDir);
391:                        reader.close();
392:                        badIndex = false;
393:                    } catch (Exception e) {
394:                        System.err
395:                                .println("exception while opening lucene index, going to rebuild it: "
396:                                        + e.getMessage());
397:                    }
398:                }
399:                // if rebuild forced or bad index then rebuild index
400:                if (rebuild || badIndex) {
401:                    System.err.println("rebuilding lucene index");
402:
403:                    IndexWriter writer = new IndexWriter(_luceneDir,
404:                            new StandardAnalyzer(new String[] {}), true);
405:                    writer.close();
406:                }
407:            }
408:
409:            // creates a new document
410:            private Document newDocument(Element xml) {
411:                Document doc = new Document();
412:                for (Iterator iter = xml.getChildren().iterator(); iter
413:                        .hasNext();) {
414:                    Element field = (Element) iter.next();
415:                    String name = field.getAttributeValue("name");
416:                    String string = field.getAttributeValue("string")
417:                            .toLowerCase(); // RGFIX: should be only needed for non-tokenized fields
418:                    if (string.trim().length() > 0) {
419:                        String sStore = field.getAttributeValue("store");
420:                        String sIndex = field.getAttributeValue("index");
421:                        String sToken = field.getAttributeValue("token");
422:                        boolean bStore = sStore != null
423:                                && sStore.equals("true");
424:                        boolean bIndex = sIndex != null
425:                                && sIndex.equals("true");
426:                        boolean token = sToken != null && sToken.equals("true");
427:                        Field.Store store = null;
428:                        if (bStore) {
429:                            store = Field.Store.YES;
430:                        } else {
431:                            store = Field.Store.NO;
432:                        }
433:                        Field.Index index = null;
434:                        if (bIndex && token) {
435:                            index = Field.Index.TOKENIZED;
436:                        }
437:                        if (bIndex && !token) {
438:                            index = Field.Index.UN_TOKENIZED;
439:                        }
440:                        if (!bIndex) {
441:                            index = Field.Index.NO;
442:                        }
443:                        doc.add(new Field(name, string, store, index));
444:                    }
445:                }
446:                return doc;
447:            }
448:
449:            //--------------------------------------------------------------------------------
450:
451:            private static final long TIME_BETWEEN_OPTS = 1000; // time between two optimizations in ms
452:            private static final int UPDTATES_BETWEEN_OPTS = 10; // number of updates between two optimizations
453:
454:            private long lastOptTime = 0; // time since last optimization
455:            private int updateCount = UPDTATES_BETWEEN_OPTS - 1; // number of updates since last uptimization
456:            private boolean optimizing = false; // true iff optimization is in progress
457:            private Object mutex = new Object(); // RGFIX: check concurrent access from multiple servlets
458:
459:            /**
460:             * lazy optimization: optimize index if
461:             * at least TIME_BETWEEN_OPTS time passed or
462:             * at least UPDTATES_BETWEEN_OPTS updates were performed
463:             * since last optimization
464:             * @param writer
465:             * @throws Exception
466:             */
467:            private void lazyOptimize(IndexWriter writer) throws Exception {
468:                if (optimizing)
469:                    return;
470:
471:                boolean doOptimize;
472:                synchronized (mutex) {
473:                    if (System.currentTimeMillis() - lastOptTime < TIME_BETWEEN_OPTS
474:                            && ++updateCount < UPDTATES_BETWEEN_OPTS)
475:                        doOptimize = false;
476:                    else {
477:                        doOptimize = true;
478:                        optimizing = true;
479:                        updateCount = 0;
480:                    }
481:                }
482:                if (doOptimize) {
483:                    // System.out.println("**** OPTIMIZING"); // DEBUG
484:
485:                    writer.optimize();
486:                    lastOptTime = System.currentTimeMillis();
487:                    optimizing = false;
488:                }
489:            }
490:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.