001: /*
002: * Copyright 2004-2008 H2 Group. Licensed under the H2 License, Version 1.0
003: * (http://h2database.com/html/license.html).
004: * Initial Developer: H2 Group
005: */
006: package org.h2.fulltext;
007:
008: //#ifdef JDK14
009: import java.io.IOException;
010: import java.sql.Connection;
011: import java.sql.DatabaseMetaData;
012: import java.sql.PreparedStatement;
013: import java.sql.ResultSet;
014: import java.sql.SQLException;
015: import java.sql.Statement;
016: import java.sql.Types;
017: import java.util.ArrayList;
018: import java.util.HashMap;
019:
020: import org.apache.lucene.analysis.Analyzer;
021: import org.apache.lucene.analysis.standard.StandardAnalyzer;
022: import org.apache.lucene.document.DateTools;
023: import org.apache.lucene.document.Document;
024: import org.apache.lucene.document.Field;
025: import org.apache.lucene.index.IndexModifier;
026: import org.apache.lucene.index.IndexReader;
027: import org.apache.lucene.index.Term;
028: import org.apache.lucene.queryParser.QueryParser;
029: import org.apache.lucene.search.Hits;
030: import org.apache.lucene.search.IndexSearcher;
031: import org.apache.lucene.search.Query;
032: import org.apache.lucene.search.Searcher;
033: import org.h2.api.Trigger;
034: import org.h2.command.Parser;
035: import org.h2.engine.Session;
036: import org.h2.expression.ExpressionColumn;
037: import org.h2.jdbc.JdbcConnection;
038: import org.h2.store.fs.FileSystem;
039: import org.h2.tools.SimpleResultSet;
040: import org.h2.util.ByteUtils;
041: import org.h2.util.StringUtils;
042:
043: //#endif
044:
045: /**
046: * This class implements the full text search based on Apache Lucene.
047: */
048: public class FullTextLucene extends FullText
049: //#ifdef JDK14
050: implements Trigger
051: //#endif
052: {
053:
054: //#ifdef JDK14
055: private static HashMap indexers = new HashMap();
056: private static final String FIELD_DATA = "DATA";
057: private static final String FIELD_QUERY = "QUERY";
058: private static final String FIELD_COLUMN_PREFIX = "_";
059: private static final String TRIGGER_PREFIX = "FTL_";
060: private static final String SCHEMA = "FTL";
061: private String schemaName;
062: private String tableName;
063: private int[] keys;
064: private int[] indexColumns;
065: private String[] columnNames;
066: private int[] dataTypes;
067: private IndexModifier indexer;
068:
069: //#endif
070:
071: /**
072: * Create a new full text index for a table and column list. Each table may
073: * only have one index at any time.
074: *
075: * @param conn the connection
076: * @param schema the schema name of the table
077: * @param table the table name
078: * @param columnList the column list (null for all columns)
079: */
080: //#ifdef JDK14
081: public static void createIndex(Connection conn, String schema,
082: String table, String columnList) throws SQLException {
083: init(conn);
084: PreparedStatement prep = conn.prepareStatement("INSERT INTO "
085: + SCHEMA
086: + ".INDEXES(SCHEMA, TABLE, COLUMNS) VALUES(?, ?, ?)");
087: prep.setString(1, schema);
088: prep.setString(2, table);
089: prep.setString(3, columnList);
090: prep.execute();
091: createTrigger(conn, schema, table);
092: indexExistingRows(conn, schema, table);
093: }
094:
095: //#endif
096:
097: /**
098: * Re-creates the full text index for this database
099: *
100: * @param conn the connection
101: */
102: //#ifdef JDK14
103: public static void reindex(Connection conn) throws SQLException {
104: init(conn);
105: removeAllTriggers(conn);
106: removeIndexFiles(conn);
107: Statement stat = conn.createStatement();
108: ResultSet rs = stat.executeQuery("SELECT * FROM " + SCHEMA
109: + ".INDEXES");
110: while (rs.next()) {
111: String schema = rs.getString("SCHEMA");
112: String table = rs.getString("TABLE");
113: createTrigger(conn, schema, table);
114: indexExistingRows(conn, schema, table);
115: }
116: }
117:
118: //#endif
119:
120: /**
121: * Drops all full text indexes from the database.
122: *
123: * @param conn the connection
124: */
125: //#ifdef JDK14
126: public static void dropAll(Connection conn) throws SQLException {
127: Statement stat = conn.createStatement();
128: stat.execute("DROP SCHEMA IF EXISTS " + SCHEMA);
129: removeAllTriggers(conn);
130: removeIndexFiles(conn);
131: }
132:
133: //#endif
134:
135: /**
136: * Initializes full text search functionality for this database. This adds
137: * the following Java functions to the database:
138: * <ul>
139: * <li>FTL_CREATE_INDEX(schemaNameString, tableNameString,
140: * columnListString) </li>
141: * <li>FTL_SEARCH(queryString, limitInt, offsetInt): result set </li>
142: * <li>FTL_REINDEX() </li>
143: * <li>FTL_DROP_ALL() </li>
144: * </ul>
145: * It also adds a schema FTL to the database where bookkeeping information
146: * is stored. This function may be called from a Java application, or by
147: * using the SQL statements:
148: *
149: * <pre>
150: * CREATE ALIAS IF NOT EXISTS FTL_INIT FOR
151: * "org.h2.fulltext.FullTextLucene.init";
152: * CALL FTL_INIT();
153: * </pre>
154: *
155: * @param conn
156: */
157: //#ifdef JDK14
158: public static void init(Connection conn) throws SQLException {
159: Statement stat = conn.createStatement();
160: stat.execute("CREATE SCHEMA IF NOT EXISTS " + SCHEMA);
161: stat
162: .execute("CREATE TABLE IF NOT EXISTS "
163: + SCHEMA
164: + ".INDEXES(SCHEMA VARCHAR, TABLE VARCHAR, COLUMNS VARCHAR, PRIMARY KEY(SCHEMA, TABLE))");
165: stat
166: .execute("CREATE ALIAS IF NOT EXISTS FTL_CREATE_INDEX FOR \""
167: + FullTextLucene.class.getName()
168: + ".createIndex\"");
169: stat.execute("CREATE ALIAS IF NOT EXISTS FTL_SEARCH FOR \""
170: + FullTextLucene.class.getName() + ".search\"");
171: stat
172: .execute("CREATE ALIAS IF NOT EXISTS FTL_SEARCH_DATA FOR \""
173: + FullTextLucene.class.getName()
174: + ".searchData\"");
175: stat.execute("CREATE ALIAS IF NOT EXISTS FTL_REINDEX FOR \""
176: + FullTextLucene.class.getName() + ".reindex\"");
177: stat.execute("CREATE ALIAS IF NOT EXISTS FTL_DROP_ALL FOR \""
178: + FullTextLucene.class.getName() + ".dropAll\"");
179: }
180:
181: //#endif
182:
183: /**
184: * INTERNAL
185: */
186: //#ifdef JDK14
187: public void init(Connection conn, String schemaName,
188: String triggerName, String tableName, boolean before,
189: int type) throws SQLException {
190: init(conn);
191: this .schemaName = schemaName;
192: this .tableName = tableName;
193: this .indexer = getIndexModifier(conn);
194: ArrayList keyList = new ArrayList();
195: DatabaseMetaData meta = conn.getMetaData();
196: ResultSet rs = meta.getColumns(null, schemaName, tableName,
197: null);
198: ArrayList columnList = new ArrayList();
199: while (rs.next()) {
200: columnList.add(rs.getString("COLUMN_NAME"));
201: }
202: dataTypes = new int[columnList.size()];
203: columnNames = new String[columnList.size()];
204: columnList.toArray(columnNames);
205: rs = meta.getColumns(null, schemaName, tableName, null);
206: for (int i = 0; rs.next(); i++) {
207: dataTypes[i] = rs.getInt("DATA_TYPE");
208: }
209: if (keyList.size() == 0) {
210: rs = meta.getPrimaryKeys(null, schemaName, tableName);
211: while (rs.next()) {
212: keyList.add(rs.getString("COLUMN_NAME"));
213: }
214: }
215: if (keyList.size() == 0) {
216: throw new SQLException("No primary key for table "
217: + tableName);
218: }
219: ArrayList indexList = new ArrayList();
220: PreparedStatement prep = conn
221: .prepareStatement("SELECT COLUMNS FROM " + SCHEMA
222: + ".INDEXES WHERE SCHEMA=? AND TABLE=?");
223: prep.setString(1, schemaName);
224: prep.setString(2, tableName);
225: rs = prep.executeQuery();
226: if (rs.next()) {
227: String columns = rs.getString(1);
228: if (columns != null) {
229: String[] list = StringUtils.arraySplit(columns, ',',
230: true);
231: for (int i = 0; i < list.length; i++) {
232: indexList.add(list[i]);
233: }
234: }
235: }
236: if (indexList.size() == 0) {
237: indexList.addAll(columnList);
238: }
239: keys = new int[keyList.size()];
240: setColumns(keys, keyList, columnList);
241: indexColumns = new int[indexList.size()];
242: setColumns(indexColumns, indexList, columnList);
243: }
244:
245: //#endif
246:
247: /**
248: * INTERNAL
249: */
250: //#ifdef JDK14
251: public void fire(Connection conn, Object[] oldRow, Object[] newRow)
252: throws SQLException {
253: if (oldRow != null) {
254: delete(oldRow);
255: }
256: if (newRow != null) {
257: insert(newRow);
258: }
259: }
260:
261: //#endif
262:
263: /**
264: * Searches from the full text index for this database. The result contains
265: * the primary key data as an array. The returned result set has the
266: * following columns:
267: * <ul>
268: * <li>SCHEMA (varchar): The schema name. Example: PUBLIC </li>
269: * <li>TABLE (varchar): The table name. Example: TEST </li>
270: * <li>COLUMNS (array of varchar): Comma separated list of quoted column
271: * names. The column names are quoted if necessary. Example: (ID) </li>
272: * <li>KEYS (array of values): Comma separated list of values. Example: (1)
273: * </li>
274: * </ul>
275: *
276: * @param conn the connection
277: * @param text the search query
278: * @param limit the maximum number of rows or 0 for no limit
279: * @param offset the offset or 0 for no offset
280: * @return the result set
281: */
282: //#ifdef JDK14
283: public static ResultSet searchData(Connection conn, String text,
284: int limit, int offset) throws SQLException {
285: return search(conn, text, limit, offset, true);
286: }
287:
288: //#endif
289:
290: /**
291: * Searches from the full text index for this database.
292: * The returned result set has the following column:
293: * <ul><li>QUERY (varchar): The query to use to get the data.
294: * The query does not include 'SELECT * FROM '. Example:
295: * PUBLIC.TEST WHERE ID = 1
296: * </li></ul>
297: *
298: * @param conn the connection
299: * @param text the search query
300: * @param limit the maximum number of rows or 0 for no limit
301: * @param offset the offset or 0 for no offset
302: * @return the result set
303: */
304: //#ifdef JDK14
305: public static ResultSet search(Connection conn, String text,
306: int limit, int offset) throws SQLException {
307: return search(conn, text, limit, offset, false);
308: }
309:
310: private static ResultSet search(Connection conn, String text,
311: int limit, int offset, boolean data) throws SQLException {
312: SimpleResultSet result = createResultSet(data);
313: if (conn.getMetaData().getURL().startsWith("jdbc:columnlist:")) {
314: // this is just to query the result set columns
315: return result;
316: }
317: String path = getIndexPath(conn);
318: try {
319: IndexModifier indexer = getIndexModifier(conn);
320: indexer.flush();
321: IndexReader reader = IndexReader.open(path);
322: Analyzer analyzer = new StandardAnalyzer();
323: Searcher searcher = new IndexSearcher(reader);
324: QueryParser parser = new QueryParser(FIELD_DATA, analyzer);
325: Query query = parser.parse(text);
326: Hits hits = searcher.search(query);
327: int max = hits.length();
328: if (limit == 0) {
329: limit = max;
330: }
331: for (int i = 0; i < limit && i + offset < max; i++) {
332: Document doc = hits.doc(i + offset);
333: String q = doc.get(FIELD_QUERY);
334: if (data) {
335: int idx = q.indexOf(" WHERE ");
336: JdbcConnection c = (JdbcConnection) conn;
337: Session session = (Session) c.getSession();
338: Parser p = new Parser(session);
339: String tab = q.substring(0, idx);
340: ExpressionColumn expr = (ExpressionColumn) p
341: .parseExpression(tab);
342: String schemaName = expr.getOriginalAliasName();
343: String tableName = expr.getColumnName();
344: q = q.substring(idx + " WHERE ".length());
345: Object[][] columnData = parseKey(conn, q);
346: Object[] row = new Object[] { schemaName,
347: tableName, columnData[0], columnData[1] };
348: result.addRow(row);
349: } else {
350: result.addRow(new Object[] { q });
351: }
352: }
353: // TODO keep it open if possible
354: reader.close();
355: } catch (Exception e) {
356: throw convertException(e);
357: }
358: return result;
359: }
360:
361: private static void removeAllTriggers(Connection conn)
362: throws SQLException {
363: Statement stat = conn.createStatement();
364: ResultSet rs = stat
365: .executeQuery("SELECT * FROM INFORMATION_SCHEMA.TRIGGERS");
366: Statement stat2 = conn.createStatement();
367: while (rs.next()) {
368: String schema = rs.getString("TRIGGER_SCHEMA");
369: String name = rs.getString("TRIGGER_NAME");
370: if (name.startsWith(TRIGGER_PREFIX)) {
371: name = StringUtils.quoteIdentifier(schema) + "."
372: + StringUtils.quoteIdentifier(name);
373: stat2.execute("DROP TRIGGER " + name);
374: }
375: }
376: }
377:
378: private static void removeIndexFiles(Connection conn)
379: throws SQLException {
380: String path = getIndexPath(conn);
381: IndexModifier index = (IndexModifier) indexers.get(path);
382: if (index != null) {
383: indexers.remove(path);
384: try {
385: index.flush();
386: index.close();
387: } catch (IOException e) {
388: throw convertException(e);
389: }
390: }
391: FileSystem.getInstance(path).deleteRecursive(path);
392: }
393:
394: private String getQuery(Object[] row) throws SQLException {
395: StringBuffer buff = new StringBuffer();
396: if (schemaName != null) {
397: buff.append(StringUtils.quoteIdentifier(schemaName));
398: buff.append(".");
399: }
400: buff.append(StringUtils.quoteIdentifier(tableName));
401: buff.append(" WHERE ");
402: for (int i = 0; i < keys.length; i++) {
403: if (i > 0) {
404: buff.append(" AND ");
405: }
406: int columnIndex = keys[i];
407: buff.append(StringUtils
408: .quoteIdentifier(columnNames[columnIndex]));
409: Object o = row[columnIndex];
410: if (o == null) {
411: buff.append(" IS NULL");
412: } else {
413: buff.append("=");
414: buff.append(quoteSQL(o, dataTypes[columnIndex]));
415: }
416: }
417: String key = buff.toString();
418: return key;
419: }
420:
421: private String quoteString(String data) {
422: if (data.indexOf('\'') < 0) {
423: return "'" + data + "'";
424: }
425: StringBuffer buff = new StringBuffer(data.length() + 2);
426: buff.append('\'');
427: for (int i = 0; i < data.length(); i++) {
428: char ch = data.charAt(i);
429: if (ch == '\'') {
430: buff.append(ch);
431: }
432: buff.append(ch);
433: }
434: buff.append('\'');
435: return buff.toString();
436: }
437:
438: private String quoteBinary(byte[] data) {
439: return "'" + ByteUtils.convertBytesToString(data) + "'";
440: }
441:
442: private String asString(Object data, int type) throws SQLException {
443: if (data == null) {
444: return "NULL";
445: }
446: switch (type) {
447: case Types.BIT:
448: case Types.BOOLEAN:
449: case Types.INTEGER:
450: case Types.BIGINT:
451: case Types.DECIMAL:
452: case Types.DOUBLE:
453: case Types.FLOAT:
454: case Types.NUMERIC:
455: case Types.REAL:
456: case Types.SMALLINT:
457: case Types.TINYINT:
458: case Types.DATE:
459: case Types.TIME:
460: case Types.TIMESTAMP:
461: case Types.LONGVARCHAR:
462: case Types.CHAR:
463: case Types.VARCHAR:
464: return data.toString();
465: case Types.VARBINARY:
466: case Types.LONGVARBINARY:
467: case Types.BINARY:
468: case Types.JAVA_OBJECT:
469: case Types.CLOB:
470: case Types.OTHER:
471: case Types.BLOB:
472: case Types.STRUCT:
473: case Types.REF:
474: case Types.NULL:
475: case Types.ARRAY:
476: case Types.DATALINK:
477: case Types.DISTINCT:
478: throw new SQLException("FULLTEXT",
479: "Unsupported column data type: " + type);
480: }
481: return "";
482: }
483:
484: private String quoteSQL(Object data, int type) throws SQLException {
485: if (data == null) {
486: return "NULL";
487: }
488: switch (type) {
489: case Types.BIT:
490: case Types.BOOLEAN:
491: case Types.INTEGER:
492: case Types.BIGINT:
493: case Types.DECIMAL:
494: case Types.DOUBLE:
495: case Types.FLOAT:
496: case Types.NUMERIC:
497: case Types.REAL:
498: case Types.SMALLINT:
499: case Types.TINYINT:
500: return data.toString();
501: case Types.DATE:
502: case Types.TIME:
503: case Types.TIMESTAMP:
504: case Types.LONGVARCHAR:
505: case Types.CHAR:
506: case Types.VARCHAR:
507: return quoteString(data.toString());
508: case Types.VARBINARY:
509: case Types.LONGVARBINARY:
510: case Types.BINARY:
511: return quoteBinary((byte[]) data);
512: case Types.JAVA_OBJECT:
513: case Types.CLOB:
514: case Types.OTHER:
515: case Types.BLOB:
516: case Types.STRUCT:
517: case Types.REF:
518: case Types.NULL:
519: case Types.ARRAY:
520: case Types.DATALINK:
521: case Types.DISTINCT:
522: throw new SQLException("FULLTEXT",
523: "Unsupported key data type: " + type);
524: }
525: return "";
526: }
527:
528: private void insert(Object[] row) throws SQLException {
529: String query = getQuery(row);
530: Document doc = new Document();
531: doc.add(new Field(FIELD_QUERY, query, Field.Store.YES,
532: Field.Index.UN_TOKENIZED));
533: long time = System.currentTimeMillis();
534: doc.add(new Field("modified", DateTools.timeToString(time,
535: DateTools.Resolution.SECOND), Field.Store.YES,
536: Field.Index.UN_TOKENIZED));
537: StringBuffer allData = new StringBuffer();
538: for (int i = 0; i < indexColumns.length; i++) {
539: int index = indexColumns[i];
540: String columnName = columnNames[index];
541: String data = asString(row[index], dataTypes[index]);
542: doc.add(new Field(FIELD_COLUMN_PREFIX + columnName, data,
543: Field.Store.NO, Field.Index.TOKENIZED));
544: if (i > 0) {
545: allData.append(" ");
546: }
547: allData.append(data);
548: }
549: doc.add(new Field(FIELD_DATA, allData.toString(),
550: Field.Store.NO, Field.Index.TOKENIZED));
551: try {
552: indexer.addDocument(doc);
553: } catch (IOException e) {
554: throw convertException(e);
555: }
556: }
557:
558: private void delete(Object[] row) throws SQLException {
559: String query = getQuery(row);
560: try {
561: Term term = new Term(FIELD_QUERY, query);
562: indexer.deleteDocuments(term);
563: } catch (IOException e) {
564: throw convertException(e);
565: }
566: }
567:
568: private static SQLException convertException(Exception e) {
569: SQLException e2 = new SQLException("FULLTEXT",
570: "Error while indexing document");
571: e2.initCause(e);
572: return e2;
573: }
574:
575: private static void createTrigger(Connection conn, String schema,
576: String table) throws SQLException {
577: Statement stat = conn.createStatement();
578: String trigger = StringUtils.quoteIdentifier(schema) + "."
579: + StringUtils.quoteIdentifier(TRIGGER_PREFIX + table);
580: stat.execute("DROP TRIGGER IF EXISTS " + trigger);
581: StringBuffer buff = new StringBuffer(
582: "CREATE TRIGGER IF NOT EXISTS ");
583: buff.append(trigger);
584: buff.append(" AFTER INSERT, UPDATE, DELETE ON ");
585: buff.append(StringUtils.quoteIdentifier(schema) + "."
586: + StringUtils.quoteIdentifier(table));
587: buff.append(" FOR EACH ROW CALL \"");
588: buff.append(FullTextLucene.class.getName());
589: buff.append("\"");
590: stat.execute(buff.toString());
591: }
592:
593: private static void indexExistingRows(Connection conn,
594: String schema, String table) throws SQLException {
595: FullTextLucene existing = new FullTextLucene();
596: existing.init(conn, schema, null, table, false, INSERT);
597: StringBuffer buff = new StringBuffer("SELECT * FROM ");
598: buff.append(StringUtils.quoteIdentifier(schema) + "."
599: + StringUtils.quoteIdentifier(table));
600: ResultSet rs = conn.createStatement().executeQuery(
601: buff.toString());
602: int columnCount = rs.getMetaData().getColumnCount();
603: while (rs.next()) {
604: Object[] row = new Object[columnCount];
605: for (int i = 0; i < columnCount; i++) {
606: row[i] = rs.getObject(i + 1);
607: }
608: existing.fire(conn, null, row);
609: }
610: }
611:
612: private static IndexModifier getIndexModifier(Connection conn)
613: throws SQLException {
614: try {
615: String path = getIndexPath(conn);
616: IndexModifier indexer;
617: synchronized (indexers) {
618: indexer = (IndexModifier) indexers.get(path);
619: if (indexer == null) {
620: // TODO: create flag = true means re-create
621: indexer = new IndexModifier(path,
622: new StandardAnalyzer(), true);
623: indexers.put(path, indexer);
624: }
625: }
626: return indexer;
627: } catch (IOException e) {
628: throw convertException(e);
629: }
630: }
631:
632: private static String getIndexPath(Connection conn)
633: throws SQLException {
634: Statement stat = conn.createStatement();
635: ResultSet rs = stat.executeQuery("CALL DATABASE_PATH()");
636: rs.next();
637: String path = rs.getString(1);
638: if (path == null) {
639: throw new SQLException("FULLTEXT",
640: "Fulltext search for in-memory databases is not supported.");
641: }
642: rs.close();
643: return path;
644: }
645:
646: private void setColumns(int[] index, ArrayList keys,
647: ArrayList columns) throws SQLException {
648: for (int i = 0; i < keys.size(); i++) {
649: String key = (String) keys.get(i);
650: int found = -1;
651: for (int j = 0; found == -1 && j < columns.size(); j++) {
652: String column = (String) columns.get(j);
653: if (column.equals(key)) {
654: found = j;
655: }
656: }
657: if (found < 0) {
658: throw new SQLException("FULLTEXT", "Column not found: "
659: + key);
660: }
661: index[i] = found;
662: }
663: }
664: //#endif
665:
666: }
|