001: /*
002: * Piscator: a small SQL/XML search engine
003: * Copyright (C) 2007 Luk Morbee
004: *
005: * This program is free software; you can redistribute it and/or modify
006: * it under the terms of the GNU General Public License as published by
007: * the Free Software Foundation; either version 2 of the License, or
008: * (at your option) any later version.
009: *
010: * This program is distributed in the hope that it will be useful,
011: * but WITHOUT ANY WARRANTY; without even the implied warranty of
012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
013: * GNU General Public License for more details.
014: *
015: * You should have received a copy of the GNU General Public License
016: * along with this program; if not, write to the Free Software
017: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
018: */
019: package piscator;
020:
021: import java.io.BufferedReader;
022: import java.io.File;
023: import java.io.IOException;
024: import java.io.InputStreamReader;
025:
026: import javax.xml.parsers.DocumentBuilderFactory;
027: import javax.xml.parsers.ParserConfigurationException;
028: import javax.xml.transform.TransformerException;
029:
030: import org.springframework.context.ApplicationContext;
031: import org.springframework.context.support.ClassPathXmlApplicationContext;
032: import org.w3c.dom.Document;
033: import org.w3c.dom.Element;
034: import org.xml.sax.SAXException;
035:
036: import com.sun.org.apache.xpath.internal.XPathAPI;
037:
038: import piscator.service.Indexer;
039: import piscator.service.Searcher;
040: import piscator.service.StoreServer;
041:
042: /**
043: * <p>
044: * Piscator is a small SQL/XML search engine.<br/>
045: * Once an XML feed is loaded, it can be queried using plain SQL.<br/>
046: * </p>
047: * <p>
048: * It can be usefull in portal sites that require querying capabilities that exceed full text searching.
049: * </p>
050: * <p>
051: * The setup is almost identical to the DB2 side tables approach: an xml file is plit in separate
052: * documents. These documents are stored in a document table composed of only two columns (doc_id, xml).
053: * To make the xml SQL-queryable, the search fields of interest are extracted from each document and
054: * stored in side tables.<br/>
055: * All of this is configurable through a config file which is very similar to a DB2 DAD file.
056: * </p>
057: * <p>
058: * To test it you can run the SearchEngine stand-alone:<br/>
059: * <b>java -classpath piscator.jar;../lib/hsqldb.jar;../lib/junit.jar;../lib/spring.jar;../lib/commons-logging.jar piscator.SearchEngine -d ../xml/test-documents.xml -c ../xml/test-config.xml</b><br/>
060: * </p>
061: */
062: public class SearchEngine {
063:
064: private ApplicationContext context = null;
065:
066: public SearchEngine() {
067: context = new ClassPathXmlApplicationContext(
068: "/applicationContext.xml");
069: }
070:
071: public static void main(String[] args) {
072: String documentsFilePath = null;
073: String configFilePath = null;
074: for (int i = 0; i < args.length; i += 1) {
075: String arg = args[i];
076: if (args[i].equals("-d") && i < args.length - 1) {
077: i += 1;
078: documentsFilePath = args[i];
079: } else if (args[i].equals("-c") && i < args.length - 1) {
080: i += 1;
081: configFilePath = args[i];
082: }
083: }
084: if (documentsFilePath == null || configFilePath == null) {
085: System.out
086: .println("Usage: SearchEngine -d documentsfile -c configfile");
087: System.exit(1);
088: }
089:
090: SearchEngine searchEngine = new SearchEngine();
091: File documentsFile = new File(documentsFilePath);
092: File configFile = new File(configFilePath);
093: try {
094: searchEngine.load(documentsFile, configFile);
095:
096: BufferedReader br = new BufferedReader(
097: new InputStreamReader(System.in));
098: while (true) {
099: System.out.print("Enter your query (SQL): ");
100: String query = br.readLine();
101: if (query == null || query.length() == 0) {
102: System.out.println("No query specified!");
103: } else if ("exit".equals(query) || "quit".equals(query)) {
104: System.exit(0);
105: } else {
106: try {
107: System.out.println(searchEngine.getXML(query));
108: } catch (Exception e) {
109: e.printStackTrace();
110: }
111: }
112: }
113:
114: } catch (Exception e) {
115: e.printStackTrace();
116: System.exit(1);
117: }
118: }
119:
120: /**
121: * Splits the documents file in seperate documents that are stored in a document table.
122: * Then it extracts, stores and indexes the search fields in side tables.
123: */
124: public void load(File documentsFile, File configFile)
125: throws SAXException, IOException,
126: ParserConfigurationException, TransformerException {
127: Document document = DocumentBuilderFactory.newInstance()
128: .newDocumentBuilder().parse(configFile);
129: Element config = (Element) XPathAPI.selectSingleNode(document,
130: "config");
131: StoreServer storeServer = (StoreServer) context
132: .getBean("storeServer");
133: storeServer.storeDocuments(documentsFile, config);
134: Indexer indexer = (Indexer) context.getBean("indexer");
135: indexer.index(config);
136: }
137:
138: /**
139: * Appends and returns the documents found.
140: * The sql statement however is suposed to return at least the document xml column!.
141: */
142: public String getDocuments(String sql) {
143: Searcher searcher = (Searcher) context.getBean("searcher");
144: return searcher.getDocuments(sql);
145: }
146:
147: /**
148: * Returns the result of the sql statement as xml.
149: */
150: public String getXML(String sql) {
151: Searcher searcher = (Searcher) context.getBean("searcher");
152: return searcher.getXML(sql);
153: }
154:
155: }
|