0001: /**********************************************************************************
0002: * $URL: https://source.sakaiproject.org/svn/search/tags/sakai_2-4-1/search-impl/impl/src/java/org/sakaiproject/search/component/dao/impl/SearchIndexBuilderWorkerDaoJdbcImpl.java $
0003: * $Id: SearchIndexBuilderWorkerDaoJdbcImpl.java 29635 2007-04-26 14:44:09Z ajpoland@iupui.edu $
0004: ***********************************************************************************
0005: *
0006: * Copyright (c) 2003, 2004, 2005, 2006 The Sakai Foundation.
0007: *
0008: * Licensed under the Educational Community License, Version 1.0 (the "License");
0009: * you may not use this file except in compliance with the License.
0010: * You may obtain a copy of the License at
0011: *
0012: * http://www.opensource.org/licenses/ecl1.php
0013: *
0014: * Unless required by applicable law or agreed to in writing, software
0015: * distributed under the License is distributed on an "AS IS" BASIS,
0016: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0017: * See the License for the specific language governing permissions and
0018: * limitations under the License.
0019: *
0020: **********************************************************************************/package org.sakaiproject.search.component.dao.impl;
0021:
0022: import java.io.File;
0023: import java.io.IOException;
0024: import java.io.Reader;
0025: import java.sql.Connection;
0026: import java.sql.Date;
0027: import java.sql.PreparedStatement;
0028: import java.sql.ResultSet;
0029: import java.sql.SQLException;
0030: import java.sql.Statement;
0031: import java.util.ArrayList;
0032: import java.util.Iterator;
0033: import java.util.List;
0034: import java.util.Map;
0035:
0036: import javax.sql.DataSource;
0037:
0038: import org.apache.commons.id.IdentifierGenerator;
0039: import org.apache.commons.id.uuid.VersionFourGenerator;
0040: import org.apache.commons.logging.Log;
0041: import org.apache.commons.logging.LogFactory;
0042: import org.apache.lucene.document.Document;
0043: import org.apache.lucene.document.Field;
0044: import org.apache.lucene.index.IndexReader;
0045: import org.apache.lucene.index.IndexWriter;
0046: import org.apache.lucene.index.Term;
0047: import org.apache.lucene.store.Directory;
0048: import org.hibernate.HibernateException;
0049: import org.sakaiproject.component.api.ComponentManager;
0050: import org.sakaiproject.component.cover.ServerConfigurationService;
0051: import org.sakaiproject.entity.api.Reference; // import org.sakaiproject.entity.api.Entity;
0052: // import org.sakaiproject.entity.api.EntityManager;
0053: // import org.sakaiproject.entity.api.Reference;
0054: import org.sakaiproject.search.api.EntityContentProducer;
0055: import org.sakaiproject.search.api.SearchIndexBuilder;
0056: import org.sakaiproject.search.api.SearchIndexBuilderWorker;
0057: import org.sakaiproject.search.api.SearchService;
0058: import org.sakaiproject.search.api.rdf.RDFIndexException;
0059: import org.sakaiproject.search.api.rdf.RDFSearchService;
0060: import org.sakaiproject.search.component.Messages;
0061: import org.sakaiproject.search.dao.SearchIndexBuilderWorkerDao;
0062: import org.sakaiproject.search.index.IndexStorage;
0063: import org.sakaiproject.search.model.SearchBuilderItem;
0064: import org.sakaiproject.search.model.impl.SearchBuilderItemImpl;
0065: import org.sakaiproject.site.api.Site;
0066: import org.sakaiproject.site.api.ToolConfiguration;
0067: import org.sakaiproject.site.api.SiteService.SelectionType;
0068: import org.sakaiproject.site.api.SiteService.SortType;
0069: import org.sakaiproject.site.cover.SiteService;
0070:
0071: public class SearchIndexBuilderWorkerDaoJdbcImpl implements
0072: SearchIndexBuilderWorkerDao {
0073:
0074: private static final String SEARCH_BUILDER_ITEM_FIELDS = " name, context, searchaction, searchstate, version, id "; //$NON-NLS-1$
0075:
0076: private static final String SEARCH_BUILDER_ITEM_T = "searchbuilderitem"; //$NON-NLS-1$
0077:
0078: private static final String SEARCH_BUILDER_ITEM_FIELDS_PARAMS = " ?, ?, ?, ?, ?, ? "; //$NON-NLS-1$
0079:
0080: private static final String SEARCH_BUILDER_ITEM_FIELDS_UPDATE = " name = ?, context = ?, searchaction = ?, searchstate = ?, version = ? where id = ? "; //$NON-NLS-1$
0081:
0082: private static Log log = LogFactory
0083: .getLog(SearchIndexBuilderWorkerDaoJdbcImpl.class);
0084:
0085: /**
0086: * sync object
0087: */
0088: // private Object threadStartLock = new Object();
0089: /**
0090: * dependency: the search index builder that is accepting new items
0091: */
0092: private SearchIndexBuilder searchIndexBuilder = null;
0093:
0094: private boolean enabled = false;
0095:
0096: // private EntityManager entityManager;
0097:
0098: private RDFSearchService rdfSearchService = null;
0099:
0100: private IdentifierGenerator idgenerator = new VersionFourGenerator();
0101:
0102: /**
0103: * injected to abstract the storage impl
0104: */
0105: private IndexStorage indexStorage = null;
0106:
0107: private DataSource dataSource = null;
0108:
0109: public void init() {
0110: ComponentManager cm = org.sakaiproject.component.cover.ComponentManager
0111: .getInstance();
0112: // entityManager = (EntityManager) load(cm,
0113: // EntityManager.class.getName(),
0114: // true);
0115: searchIndexBuilder = (SearchIndexBuilder) load(cm,
0116: SearchIndexBuilder.class.getName(), true);
0117: rdfSearchService = (RDFSearchService) load(cm,
0118: RDFSearchService.class.getName(), false);
0119:
0120: enabled = "true".equals(ServerConfigurationService.getString(
0121: "search.enable", "false"));
0122:
0123: try {
0124: if (searchIndexBuilder == null) {
0125: log
0126: .error("Search Index Worker needs searchIndexBuilder "); //$NON-NLS-1$
0127: }
0128: // if (entityManager == null)
0129: // {
0130: // log.error("Search Index Worker needs EntityManager ");
0131: // //$NON-NLS-1$
0132: // }
0133: if (indexStorage == null) {
0134: log.error("Search Index Worker needs indexStorage "); //$NON-NLS-1$
0135: }
0136: if (rdfSearchService == null) {
0137: log
0138: .info("No RDFSearchService has been defined, RDF Indexing not enabled"); //$NON-NLS-1$
0139: } else {
0140: log
0141: .warn("Experimental RDF Search Service is enabled using implementation " //$NON-NLS-1$
0142: + rdfSearchService);
0143: }
0144:
0145: } catch (Throwable t) {
0146: log.error("Failed to init ", t); //$NON-NLS-1$
0147: }
0148: }
0149:
0150: private Object load(ComponentManager cm, String name,
0151: boolean aserror) {
0152: Object o = cm.get(name);
0153: if (o == null) {
0154: if (aserror) {
0155: log.error("Cant find Spring component named " + name); //$NON-NLS-1$
0156: }
0157: }
0158: return o;
0159: }
0160:
0161: private void processDeletes(SearchIndexBuilderWorker worker,
0162: Connection connection, List runtimeToDo)
0163: throws SQLException, IOException {
0164:
0165: if (indexStorage.indexExists()) {
0166: IndexReader indexReader = null;
0167: try {
0168: indexReader = indexStorage.getIndexReader();
0169:
0170: // Open the index
0171: for (Iterator tditer = runtimeToDo.iterator(); worker
0172: .isRunning()
0173: && tditer.hasNext();) {
0174: SearchBuilderItem sbi = (SearchBuilderItem) tditer
0175: .next();
0176: if (!SearchBuilderItem.STATE_LOCKED.equals(sbi
0177: .getSearchstate())) {
0178: // should only be getting pending
0179: // items
0180: log.warn(" Found Item that was not pending " //$NON-NLS-1$
0181: + sbi.getName());
0182: continue;
0183: }
0184: if (SearchBuilderItem.ACTION_UNKNOWN.equals(sbi
0185: .getSearchaction())) {
0186: sbi
0187: .setSearchstate(SearchBuilderItem.STATE_COMPLETED);
0188: updateOrSave(connection, sbi);
0189: connection.commit();
0190:
0191: continue;
0192: }
0193: // remove document
0194: // if this is mult segment it might not work.
0195: try {
0196: indexReader.deleteDocuments(new Term(
0197: SearchService.FIELD_REFERENCE, sbi
0198: .getName()));
0199: if (SearchBuilderItem.ACTION_DELETE.equals(sbi
0200: .getSearchaction())) {
0201: sbi
0202: .setSearchstate(SearchBuilderItem.STATE_COMPLETED);
0203: updateOrSave(connection, sbi);
0204: connection.commit();
0205: } else {
0206: sbi
0207: .setSearchstate(SearchBuilderItem.STATE_PENDING_2);
0208: }
0209:
0210: } catch (IOException ex) {
0211: log.warn("Failed to delete Page ", ex); //$NON-NLS-1$
0212: }
0213: }
0214: } finally {
0215: if (indexReader != null) {
0216: indexStorage.closeIndexReader(indexReader);
0217: indexReader = null;
0218: }
0219: }
0220: }
0221:
0222: }
0223:
0224: private void processAdd(SearchIndexBuilderWorker worker,
0225: Connection connection, List runtimeToDo) throws Exception {
0226: IndexWriter indexWrite = null;
0227: try {
0228: if (worker.isRunning()) {
0229: indexWrite = indexStorage.getIndexWriter(false);
0230: }
0231: long last = System.currentTimeMillis();
0232:
0233: for (Iterator tditer = runtimeToDo.iterator(); worker
0234: .isRunning()
0235: && tditer.hasNext();) {
0236:
0237: Reader contentReader = null;
0238: try {
0239: SearchBuilderItem sbi = (SearchBuilderItem) tditer
0240: .next();
0241: // only add adds, that have been deleted or are locked
0242: // sucessfully
0243: if (!SearchBuilderItem.STATE_PENDING_2.equals(sbi
0244: .getSearchstate())
0245: && !SearchBuilderItem.STATE_LOCKED
0246: .equals(sbi.getSearchstate())) {
0247: continue;
0248: }
0249: // Reference ref =
0250: // entityManager.newReference(sbi.getName());
0251: String ref = sbi.getName();
0252: if (ref == null) {
0253: log
0254: .error("Unrecognised trigger object presented to index builder " //$NON-NLS-1$
0255: + sbi);
0256: }
0257:
0258: long startDocIndex = System.currentTimeMillis();
0259: worker.setStartDocIndex(startDocIndex);
0260: worker.setNowIndexing(ref);
0261:
0262: try {
0263: try {
0264: // Entity entity = ref.getEntity();
0265: EntityContentProducer sep = searchIndexBuilder
0266: .newEntityContentProducer(ref);
0267: boolean indexDoc = true;
0268: if (searchIndexBuilder
0269: .isOnlyIndexSearchToolSites()) {
0270: try {
0271: String siteId = sep.getSiteId(sbi
0272: .getName());
0273: Site s = SiteService
0274: .getSite(siteId);
0275: ToolConfiguration t = s
0276: .getToolForCommonId("sakai.search"); //$NON-NLS-1$
0277: if (t == null) {
0278: indexDoc = false;
0279: log
0280: .debug("Not indexing " //$NON-NLS-1$
0281: + sbi.getName()
0282: + " as it has no search tool"); //$NON-NLS-1$
0283: }
0284: } catch (Exception ex) {
0285: indexDoc = false;
0286: log
0287: .debug(
0288: "Not indexing " + sbi.getName() //$NON-NLS-1$
0289: + " as it has no site", ex); //$NON-NLS-1$
0290:
0291: }
0292: }
0293: if (indexDoc && sep != null
0294: && sep.isForIndex(ref)
0295: && sep.getSiteId(ref) != null) {
0296:
0297: Document doc = new Document();
0298: Reference r;
0299: String container = sep
0300: .getContainer(ref);
0301: if (container == null)
0302: container = ""; //$NON-NLS-1$
0303: doc.add(new Field(
0304: SearchService.DATE_STAMP,
0305: String.valueOf(System
0306: .currentTimeMillis()),
0307: Field.Store.COMPRESS,
0308: Field.Index.UN_TOKENIZED));
0309: doc.add(new Field(
0310: SearchService.FIELD_CONTAINER,
0311: filterNull(container),
0312: Field.Store.COMPRESS,
0313: Field.Index.UN_TOKENIZED));
0314: doc.add(new Field(
0315: SearchService.FIELD_ID,
0316: filterNull(sep.getId(ref)),
0317: Field.Store.COMPRESS,
0318: Field.Index.NO));
0319: doc.add(new Field(
0320: SearchService.FIELD_TYPE,
0321: filterNull(sep.getType(ref)),
0322: Field.Store.COMPRESS,
0323: Field.Index.UN_TOKENIZED));
0324: doc
0325: .add(new Field(
0326: SearchService.FIELD_SUBTYPE,
0327: filterNull(sep
0328: .getSubType(ref)),
0329: Field.Store.COMPRESS,
0330: Field.Index.UN_TOKENIZED));
0331: doc.add(new Field(
0332: SearchService.FIELD_REFERENCE,
0333: filterNull(ref),
0334: Field.Store.COMPRESS,
0335: Field.Index.UN_TOKENIZED));
0336:
0337: doc.add(new Field(
0338: SearchService.FIELD_CONTEXT,
0339: filterNull(sep.getSiteId(ref)),
0340: Field.Store.COMPRESS,
0341: Field.Index.UN_TOKENIZED));
0342: if (sep.isContentFromReader(ref)) {
0343: contentReader = sep
0344: .getContentReader(ref);
0345: doc
0346: .add(new Field(
0347: SearchService.FIELD_CONTENTS,
0348: contentReader,
0349: Field.TermVector.YES));
0350: } else {
0351: doc
0352: .add(new Field(
0353: SearchService.FIELD_CONTENTS,
0354: filterNull(sep
0355: .getContent(ref)),
0356: Field.Store.NO,
0357: Field.Index.TOKENIZED,
0358: Field.TermVector.YES));
0359: }
0360:
0361: doc.add(new Field(
0362: SearchService.FIELD_TITLE,
0363: filterNull(sep.getTitle(ref)),
0364: Field.Store.COMPRESS,
0365: Field.Index.TOKENIZED,
0366: Field.TermVector.YES));
0367: doc.add(new Field(
0368: SearchService.FIELD_TOOL,
0369: filterNull(sep.getTool()),
0370: Field.Store.COMPRESS,
0371: Field.Index.UN_TOKENIZED));
0372: doc.add(new Field(
0373: SearchService.FIELD_URL,
0374: filterNull(sep.getUrl(ref)),
0375: Field.Store.COMPRESS,
0376: Field.Index.UN_TOKENIZED));
0377: doc.add(new Field(
0378: SearchService.FIELD_SITEID,
0379: filterNull(sep.getSiteId(ref)),
0380: Field.Store.COMPRESS,
0381: Field.Index.UN_TOKENIZED));
0382:
0383: // add the custom properties
0384:
0385: Map m = sep.getCustomProperties();
0386: if (m != null) {
0387: for (Iterator cprops = m.keySet()
0388: .iterator(); cprops
0389: .hasNext();) {
0390: String key = (String) cprops
0391: .next();
0392: Object value = m.get(key);
0393: String[] values = null;
0394: if (value instanceof String) {
0395: values = new String[1];
0396: values[0] = (String) value;
0397: }
0398: if (value instanceof String[]) {
0399: values = (String[]) value;
0400: }
0401: if (values == null) {
0402: log
0403: .info("Null Custom Properties value has been suppled by " //$NON-NLS-1$
0404: + sep
0405: + " in index " //$NON-NLS-1$
0406: + key);
0407: } else {
0408: for (int i = 0; i < values.length; i++) {
0409: doc
0410: .add(new Field(
0411: key,
0412: filterNull(values[i]),
0413: Field.Store.COMPRESS,
0414: Field.Index.UN_TOKENIZED));
0415: }
0416: }
0417: }
0418: }
0419:
0420: log.debug("Indexing Document " + doc); //$NON-NLS-1$
0421:
0422: indexWrite.addDocument(doc);
0423:
0424: log
0425: .debug("Done Indexing Document " + doc); //$NON-NLS-1$
0426:
0427: processRDF(sep);
0428:
0429: } else {
0430: if (log.isDebugEnabled()) {
0431: if (!indexDoc) {
0432: log
0433: .debug("Ignored Document: Fileteed out by site " + ref); //$NON-NLS-1$
0434: } else if (sep == null) {
0435: log
0436: .debug("Ignored Document: No EntityContentProducer " + ref); //$NON-NLS-1$
0437:
0438: } else if (!sep.isForIndex(ref)) {
0439: log
0440: .debug("Ignored Document: Marked as Ignore " + ref); //$NON-NLS-1$
0441:
0442: } else if (sep.getSiteId(ref) == null) {
0443: log
0444: .debug("Ignored Document: No Site ID " + ref); //$NON-NLS-1$
0445:
0446: } else {
0447: log
0448: .debug("Ignored Document: Reason Unknown " + ref); //$NON-NLS-1$
0449:
0450: }
0451: }
0452: }
0453: } catch (Exception e1) {
0454: log
0455: .debug(
0456: " Failed to index document for " + ref + " cause: " //$NON-NLS-1$
0457: + e1.getMessage(),
0458: e1);
0459: }
0460: sbi
0461: .setSearchstate(SearchBuilderItem.STATE_COMPLETED);
0462: updateOrSave(connection, sbi);
0463: connection.commit();
0464: } catch (Exception e1) {
0465: log.debug(" Failed to index document cause: " //$NON-NLS-1$
0466: + e1.getMessage());
0467: }
0468: long endDocIndex = System.currentTimeMillis();
0469: worker.setLastIndex(endDocIndex - startDocIndex);
0470: if ((endDocIndex - startDocIndex) > 60000L) {
0471: log
0472: .warn("Slow index operation " //$NON-NLS-1$
0473: + String
0474: .valueOf((endDocIndex - startDocIndex) / 1000)
0475: + " seconds to index " //$NON-NLS-1$
0476: + ref);
0477: }
0478: // update this node lock to indicate its
0479: // still alove, no document should
0480: // take more than 2 mins to process
0481: // ony do this check once every minute
0482: long now = System.currentTimeMillis();
0483: if ((now - last) > (60L * 1000L)) {
0484: last = System.currentTimeMillis();
0485: if (!worker.getLockTransaction(
0486: 15L * 60L * 1000L, true)) {
0487: throw new Exception(
0488: "Transaction Lock Expired while indexing " //$NON-NLS-1$
0489: + ref);
0490: }
0491: }
0492:
0493: } finally {
0494: if (contentReader != null) {
0495: try {
0496: contentReader.close();
0497: } catch (IOException ioex) {
0498: }
0499: }
0500: }
0501:
0502: }
0503: worker.setStartDocIndex(System.currentTimeMillis());
0504: worker
0505: .setNowIndexing(Messages
0506: .getString("SearchIndexBuilderWorkerDaoJdbcImpl.33")); //$NON-NLS-1$
0507: } catch (Exception ex) {
0508: log.error("Failed to Add Documents ", ex);
0509: throw new Exception(ex);
0510: } finally {
0511: if (indexWrite != null) {
0512: if (log.isDebugEnabled()) {
0513: log.debug("Closing Index Writer With "
0514: + indexWrite.docCount() + " documents");
0515: Directory d = indexWrite.getDirectory();
0516: String[] s = d.list();
0517: log.debug("Directory Contains ");
0518: for (int i = 0; i < s.length; i++) {
0519: File f = new File(s[i]);
0520: log.debug("\t" + String.valueOf(f.length())
0521: + "\t" + new Date(f.lastModified())
0522: + "\t" + s[i]);
0523: }
0524: }
0525: indexStorage.closeIndexWriter(indexWrite);
0526: }
0527: }
0528:
0529: }
0530:
0531: /**
0532: * @param title
0533: * @return
0534: */
0535: private String filterNull(String s) {
0536: if (s == null) {
0537: return "";
0538: }
0539: return s;
0540: }
0541:
0542: private int completeUpdate(SearchIndexBuilderWorker worker,
0543: Connection connection, List runtimeToDo) throws Exception {
0544: try {
0545:
0546: for (Iterator tditer = runtimeToDo.iterator(); worker
0547: .isRunning()
0548: && tditer.hasNext();) {
0549: SearchBuilderItem sbi = (SearchBuilderItem) tditer
0550: .next();
0551: if (SearchBuilderItem.STATE_COMPLETED.equals(sbi
0552: .getSearchstate())) {
0553: if (SearchBuilderItem.ACTION_DELETE.equals(sbi
0554: .getSearchaction())) {
0555: delete(connection, sbi);
0556: connection.commit();
0557: } else {
0558: updateOrSave(connection, sbi);
0559: connection.commit();
0560: }
0561:
0562: }
0563: }
0564: return runtimeToDo.size();
0565: } catch (Exception ex) {
0566: log
0567: .warn("Failed to update state in database due to " //$NON-NLS-1$
0568: + ex.getMessage()
0569: + " this will be corrected on the next run of the IndexBuilder, no cause for alarm"); //$NON-NLS-1$
0570: }
0571: return 0;
0572:
0573: }
0574:
0575: /*
0576: * (non-Javadoc)
0577: *
0578: * @see org.sakaiproject.search.component.dao.impl.SearchIndexBuilderWorkerDao#processToDoListTransaction()
0579: */
0580: public void processToDoListTransaction(
0581: SearchIndexBuilderWorker worker, int indexBatchSize) {
0582: Connection connection = null;
0583: try {
0584: connection = dataSource.getConnection();
0585: long startTime = System.currentTimeMillis();
0586: int totalDocs = 0;
0587:
0588: // Load the list
0589:
0590: List runtimeToDo = findPending(indexBatchSize, connection,
0591: worker);
0592:
0593: totalDocs = runtimeToDo.size();
0594:
0595: log.debug("Processing " + totalDocs + " documents"); //$NON-NLS-1$ //$NON-NLS-2$
0596:
0597: if (totalDocs > 0) {
0598: log.debug("Preupdate Start");
0599: indexStorage.doPreIndexUpdate();
0600: log.debug("Preupdate End");
0601:
0602: // get lock
0603:
0604: // this needs to be exclusive
0605: log.debug("Process Deletes Start");
0606:
0607: processDeletes(worker, connection, runtimeToDo);
0608: log.debug("Process Deletes End");
0609:
0610: // upate and release lock
0611: // after a process Deletes the index needs to updated
0612:
0613: // can be parallel
0614: log.debug("Process Add Start");
0615:
0616: processAdd(worker, connection, runtimeToDo);
0617: log.debug("Process Add End");
0618: log.debug("Complete Update Start");
0619:
0620: completeUpdate(worker, connection, runtimeToDo);
0621: log.debug("Complete Update End");
0622:
0623: // get lock
0624: try {
0625: log.debug("Post update Start");
0626: indexStorage.doPostIndexUpdate();
0627: log.debug("Post update End");
0628: } catch (IOException e) {
0629: log.error("Failed to do Post Index Update", e); //$NON-NLS-1$
0630: }
0631: // release lock
0632:
0633: }
0634:
0635: if (worker.isRunning()) {
0636: long endTime = System.currentTimeMillis();
0637: float totalTime = endTime - startTime;
0638: float ndocs = totalDocs;
0639: if (totalDocs > 0) {
0640: float docspersec = 1000 * ndocs / totalTime;
0641: log
0642: .info("Completed Process List of " + totalDocs + " at " //$NON-NLS-1$ //$NON-NLS-2$
0643: + docspersec
0644: + " documents/per second "); //$NON-NLS-1$
0645: }
0646: }
0647: } catch (Exception ex) {
0648: log
0649: .warn("Failed to perform index cycle " + ex.getMessage()); //$NON-NLS-1$
0650: log.debug("Traceback is ", ex); //$NON-NLS-1$
0651: } finally {
0652: try {
0653: connection.close();
0654: } catch (Exception ex) {
0655: }
0656: }
0657:
0658: }
0659:
0660: /*
0661: * (non-Javadoc)
0662: *
0663: * @see org.sakaiproject.search.dao.SearchIndexBuilderWorkerDao#createIndexTransaction(org.sakaiproject.search.api.SearchIndexBuilderWorker)
0664: */
0665: public void createIndexTransaction(SearchIndexBuilderWorker worker) {
0666: Connection connection = null;
0667: try {
0668: connection = dataSource.getConnection();
0669: long startTime = System.currentTimeMillis();
0670: int totalDocs = 0;
0671:
0672: log.debug("Preupdate Start");
0673: indexStorage.doPreIndexUpdate();
0674: log.debug("Preupdate End");
0675:
0676: createIndex(worker, connection);
0677:
0678: // get lock
0679: try {
0680: log.debug("Post update Start");
0681: indexStorage.doPostIndexUpdate();
0682: log.debug("Post update End");
0683: } catch (IOException e) {
0684: log.error("Failed to do Post Index Update", e); //$NON-NLS-1$
0685: }
0686: log.info("Created Index"); //$NON-NLS-1$
0687: } catch (Exception ex) {
0688: log.warn("Failed to create Index " + ex.getMessage()); //$NON-NLS-1$
0689: log.debug("Traceback is ", ex); //$NON-NLS-1$
0690: } finally {
0691: try {
0692: connection.close();
0693: } catch (Exception ex) {
0694: }
0695: }
0696: }
0697:
0698: /**
0699: * @param worker
0700: * @param connection
0701: * @throws Exception
0702: */
0703: private void createIndex(SearchIndexBuilderWorker worker,
0704: Connection connection) throws Exception {
0705: IndexWriter indexWrite = null;
0706: try {
0707: if (worker.isRunning()) {
0708: indexWrite = indexStorage.getIndexWriter(false);
0709: }
0710: long last = System.currentTimeMillis();
0711:
0712: Document doc = new Document();
0713: doc.add(new Field(SearchService.DATE_STAMP, String
0714: .valueOf(System.currentTimeMillis()),
0715: Field.Store.COMPRESS, Field.Index.UN_TOKENIZED));
0716: doc.add(new Field(SearchService.FIELD_ID,
0717: "---INDEX-CREATED---", Field.Store.COMPRESS,
0718: Field.Index.UN_TOKENIZED));
0719: indexWrite.addDocument(doc);
0720:
0721: } catch (Exception ex) {
0722: log.error("Failed to Add Documents ", ex);
0723: throw new Exception(ex);
0724: } finally {
0725: if (indexWrite != null) {
0726: if (log.isDebugEnabled()) {
0727: log.debug("Closing Index Writer With "
0728: + indexWrite.docCount() + " documents");
0729: Directory d = indexWrite.getDirectory();
0730: String[] s = d.list();
0731: log.debug("Directory Contains ");
0732: for (int i = 0; i < s.length; i++) {
0733: File f = new File(s[i]);
0734: log.debug("\t" + String.valueOf(f.length())
0735: + "\t" + new Date(f.lastModified())
0736: + "\t" + s[i]);
0737: }
0738: }
0739: indexStorage.closeIndexWriter(indexWrite);
0740: }
0741: }
0742: }
0743:
0744: private void processRDF(EntityContentProducer sep)
0745: throws RDFIndexException {
0746: if (rdfSearchService != null) {
0747: String s = sep.getCustomRDF();
0748: if (s != null) {
0749: rdfSearchService.addData(s);
0750: }
0751: }
0752: }
0753:
0754: private List getSiteMasterItems(Connection connection)
0755: throws SQLException {
0756: PreparedStatement pst = null;
0757: ResultSet rst = null;
0758: try {
0759: pst = connection.prepareStatement("select " //$NON-NLS-1$
0760: + SEARCH_BUILDER_ITEM_FIELDS + " from " //$NON-NLS-1$
0761: + SEARCH_BUILDER_ITEM_T + " where name like ? " //$NON-NLS-1$
0762: + " and context <> ? "); //$NON-NLS-1$
0763: pst.clearParameters();
0764: pst.setString(1, SearchBuilderItem.SITE_MASTER_PATTERN);
0765: pst.setString(2, SearchBuilderItem.GLOBAL_CONTEXT);
0766: rst = pst.executeQuery();
0767: ArrayList a = new ArrayList();
0768: while (rst.next()) {
0769: SearchBuilderItemImpl sbi = new SearchBuilderItemImpl();
0770: populateSearchBuilderItem(rst, sbi);
0771: a.add(sbi);
0772: }
0773: return a;
0774: } finally {
0775: try {
0776: rst.close();
0777: } catch (Exception ex) {
0778: }
0779: try {
0780: pst.close();
0781: } catch (Exception ex) {
0782: }
0783: }
0784: }
0785:
0786: /**
0787: * get the Instance Master
0788: *
0789: * @return
0790: * @throws HibernateException
0791: */
0792: private SearchBuilderItem getMasterItem(Connection connection)
0793: throws SQLException {
0794: log.debug("get Master Items with " + connection); //$NON-NLS-1$
0795:
0796: PreparedStatement pst = null;
0797: ResultSet rst = null;
0798: try {
0799: pst = connection.prepareStatement("select " //$NON-NLS-1$
0800: + SEARCH_BUILDER_ITEM_FIELDS + " from " //$NON-NLS-1$
0801: + SEARCH_BUILDER_ITEM_T + " where name = ? "); //$NON-NLS-1$
0802: pst.clearParameters();
0803: pst.setString(1, SearchBuilderItem.GLOBAL_MASTER);
0804: rst = pst.executeQuery();
0805: SearchBuilderItemImpl sbi = new SearchBuilderItemImpl();
0806: if (rst.next()) {
0807: populateSearchBuilderItem(rst, sbi);
0808: } else {
0809: sbi.setName(SearchBuilderItem.INDEX_MASTER);
0810: sbi.setContext(SearchBuilderItem.GLOBAL_CONTEXT);
0811: sbi.setSearchaction(SearchBuilderItem.ACTION_UNKNOWN);
0812: sbi.setSearchstate(SearchBuilderItem.STATE_UNKNOWN);
0813: }
0814: return sbi;
0815: } finally {
0816: try {
0817: rst.close();
0818: } catch (Exception ex) {
0819: }
0820: try {
0821: pst.close();
0822: } catch (Exception ex) {
0823: }
0824: }
0825: }
0826:
0827: private void populateSearchBuilderItem(ResultSet rst,
0828: SearchBuilderItemImpl sbi) throws SQLException {
0829: sbi.setName(rst.getString(1));
0830: sbi.setContext(rst.getString(2));
0831: sbi.setSearchaction(new Integer(rst.getInt(3)));
0832: sbi.setSearchstate(new Integer(rst.getInt(4)));
0833: sbi.setVersion(rst.getDate(5));
0834: sbi.setId(rst.getString(6));
0835: }
0836:
0837: private int populateStatement(PreparedStatement pst,
0838: SearchBuilderItem sbi) throws SQLException {
0839: pst.setString(1, sbi.getName());
0840: pst.setString(2, sbi.getContext());
0841: pst.setInt(3, sbi.getSearchaction().intValue());
0842: pst.setInt(4, sbi.getSearchstate().intValue());
0843: pst.setDate(5, new Date(sbi.getVersion().getTime()));
0844: pst.setString(6, sbi.getId());
0845: return 6;
0846:
0847: }
0848:
0849: private void updateOrSave(Connection connection,
0850: SearchBuilderItem sbi) throws SQLException {
0851: PreparedStatement pst = null;
0852: try {
0853: try {
0854: save(connection, sbi);
0855: } catch (SQLException sqlex) {
0856:
0857: pst = connection.prepareStatement("update " //$NON-NLS-1$
0858: + SEARCH_BUILDER_ITEM_T + " set " //$NON-NLS-1$
0859: + SEARCH_BUILDER_ITEM_FIELDS_UPDATE);
0860: populateStatement(pst, sbi);
0861: pst.executeUpdate();
0862: }
0863: } catch (SQLException ex) {
0864: log.warn("Failed ", ex); //$NON-NLS-1$
0865: throw ex;
0866: } finally {
0867: try {
0868: pst.close();
0869: } catch (Exception ex) {
0870: }
0871: }
0872: }
0873:
0874: private void save(Connection connection, SearchBuilderItem sbi)
0875: throws SQLException {
0876: PreparedStatement pst = null;
0877: try {
0878: pst = connection.prepareStatement(" insert into " //$NON-NLS-1$
0879: + SEARCH_BUILDER_ITEM_T + " ( " //$NON-NLS-1$
0880: + SEARCH_BUILDER_ITEM_FIELDS + " ) values ( " //$NON-NLS-1$
0881: + SEARCH_BUILDER_ITEM_FIELDS_PARAMS + " ) "); //$NON-NLS-1$
0882: pst.clearParameters();
0883: populateStatement(pst, sbi);
0884: pst.executeUpdate();
0885: } finally {
0886: try {
0887: pst.close();
0888: } catch (Exception ex) {
0889: }
0890: }
0891:
0892: }
0893:
0894: private void delete(Connection connection, SearchBuilderItem sbi)
0895: throws SQLException {
0896: PreparedStatement pst = null;
0897: try {
0898: pst = connection.prepareStatement(" delete from " //$NON-NLS-1$
0899: + SEARCH_BUILDER_ITEM_T + " where id = ? "); //$NON-NLS-1$
0900: pst.clearParameters();
0901: pst.setString(1, sbi.getId());
0902: pst.execute();
0903: } catch (SQLException ex) {
0904: log.warn("Failed ", ex); //$NON-NLS-1$
0905: throw ex;
0906: } finally {
0907: try {
0908: pst.close();
0909: } catch (Exception ex) {
0910: }
0911: }
0912:
0913: }
0914:
0915: /**
0916: * get the action for the site master
0917: *
0918: * @param siteMaster
0919: * @return
0920: */
0921: private Integer getSiteMasterAction(SearchBuilderItem siteMaster) {
0922: if (siteMaster.getName().startsWith(
0923: SearchBuilderItem.INDEX_MASTER)
0924: && !SearchBuilderItem.GLOBAL_CONTEXT.equals(siteMaster
0925: .getContext())) {
0926: if (SearchBuilderItem.STATE_PENDING.equals(siteMaster
0927: .getSearchstate())) {
0928: return siteMaster.getSearchaction();
0929: }
0930: }
0931: return SearchBuilderItem.STATE_UNKNOWN;
0932: }
0933:
0934: /**
0935: * Get the site that the siteMaster references
0936: *
0937: * @param siteMaster
0938: * @return
0939: */
0940: private String getSiteMasterSite(SearchBuilderItem siteMaster) {
0941: if (siteMaster.getName().startsWith(
0942: SearchBuilderItem.INDEX_MASTER)
0943: && !SearchBuilderItem.GLOBAL_CONTEXT.equals(siteMaster
0944: .getContext())) {
0945: // this depends on the pattern, perhapse it should be a parse
0946: return siteMaster.getName().substring(
0947: SearchBuilderItem.INDEX_MASTER.length() + 1);
0948: }
0949: return null;
0950:
0951: }
0952:
0953: private Integer getMasterAction(Connection connection)
0954: throws SQLException {
0955: return getMasterAction(getMasterItem(connection));
0956: }
0957:
0958: /**
0959: * get the master action of known master item
0960: *
0961: * @param master
0962: * @return
0963: */
0964: private Integer getMasterAction(SearchBuilderItem master) {
0965: if (master.getName().equals(SearchBuilderItem.GLOBAL_MASTER)) {
0966: if (SearchBuilderItem.STATE_PENDING.equals(master
0967: .getSearchstate())) {
0968: return master.getSearchaction();
0969: }
0970: }
0971: return SearchBuilderItem.STATE_UNKNOWN;
0972: }
0973:
0974: private List findPending(int batchSize, Connection connection,
0975: SearchIndexBuilderWorker worker) throws SQLException {
0976: // Pending is the first 100 items
0977: // State == PENDING
0978: // Action != Unknown
0979: long start = System.currentTimeMillis();
0980: try {
0981: log.debug("TXFind pending with " + connection); //$NON-NLS-1$
0982:
0983: SearchBuilderItem masterItem = getMasterItem(connection);
0984: Integer masterAction = getMasterAction(masterItem);
0985: log.debug(" Master Item is " + masterItem.getName() + ":" //$NON-NLS-1$ //$NON-NLS-2$
0986: + masterItem.getSearchaction() + ":" //$NON-NLS-1$
0987: + masterItem.getSearchstate() + "::" //$NON-NLS-1$
0988: + masterItem.getVersion());
0989: if (SearchBuilderItem.ACTION_REFRESH.equals(masterAction)) {
0990: log.debug(" Master Action is " + masterAction); //$NON-NLS-1$
0991: log
0992: .debug(" REFRESH = " + SearchBuilderItem.ACTION_REFRESH); //$NON-NLS-1$
0993: log
0994: .debug(" RELOAD = " + SearchBuilderItem.ACTION_REBUILD); //$NON-NLS-1$
0995: // get a complete list of all items, before the master
0996: // action version
0997: // if there are none, update the master action action to
0998: // completed
0999: // and return a blank list
1000:
1001: refreshIndex(connection, masterItem);
1002:
1003: } else if (SearchBuilderItem.ACTION_REBUILD
1004: .equals(masterAction)) {
1005: rebuildIndex(connection, masterItem, worker);
1006: } else {
1007: // get all site masters and perform the required action.
1008: List siteMasters = getSiteMasterItems(connection);
1009: for (Iterator i = siteMasters.iterator(); i.hasNext();) {
1010: SearchBuilderItem siteMaster = (SearchBuilderItem) i
1011: .next();
1012: Integer action = getSiteMasterAction(siteMaster);
1013: if (SearchBuilderItem.ACTION_REBUILD.equals(action)) {
1014: rebuildIndex(connection, siteMaster, worker);
1015: } else if (SearchBuilderItem.ACTION_REFRESH
1016: .equals(action)) {
1017: refreshIndex(connection, siteMaster);
1018: }
1019: }
1020: }
1021: PreparedStatement pst = null;
1022: PreparedStatement lockedPst = null;
1023: ResultSet rst = null;
1024: try {
1025: pst = connection
1026: .prepareStatement("select " //$NON-NLS-1$
1027: + SEARCH_BUILDER_ITEM_FIELDS
1028: + " from " //$NON-NLS-1$
1029: + SEARCH_BUILDER_ITEM_T
1030: + " where searchstate = ? and searchaction <> ? and " //$NON-NLS-1$
1031: + " not ( name like ? ) order by version "); //$NON-NLS-1$
1032: lockedPst = connection.prepareStatement("update " //$NON-NLS-1$
1033: + SEARCH_BUILDER_ITEM_T
1034: + " set searchstate = ? " //$NON-NLS-1$
1035: + " where id = ? and searchstate = ? "); //$NON-NLS-1$
1036: pst.clearParameters();
1037: pst.setInt(1, SearchBuilderItem.STATE_PENDING
1038: .intValue());
1039: pst.setInt(2, SearchBuilderItem.ACTION_UNKNOWN
1040: .intValue());
1041: pst.setString(3, SearchBuilderItem.SITE_MASTER_PATTERN);
1042: rst = pst.executeQuery();
1043: ArrayList a = new ArrayList();
1044: while (rst.next() && a.size() < batchSize) {
1045:
1046: SearchBuilderItemImpl sbi = new SearchBuilderItemImpl();
1047: populateSearchBuilderItem(rst, sbi);
1048: lockedPst.clearParameters();
1049: lockedPst.setInt(1, SearchBuilderItem.STATE_LOCKED
1050: .intValue());
1051: lockedPst.setString(2, sbi.getId());
1052: lockedPst.setInt(3, SearchBuilderItem.STATE_PENDING
1053: .intValue());
1054: if (lockedPst.executeUpdate() == 1) {
1055: sbi
1056: .setSearchstate(SearchBuilderItem.STATE_LOCKED);
1057: a.add(sbi);
1058: }
1059: connection.commit();
1060:
1061: }
1062: return a;
1063: } finally {
1064: try {
1065: rst.close();
1066: } catch (Exception ex) {
1067: }
1068: try {
1069: pst.close();
1070: } catch (Exception ex) {
1071: }
1072: }
1073:
1074: } finally {
1075: long finish = System.currentTimeMillis();
1076: log.debug(" findPending took " + (finish - start) + " ms"); //$NON-NLS-1$ //$NON-NLS-2$
1077: }
1078: }
1079:
1080: public int countPending(Connection connection) {
1081:
1082: PreparedStatement pst = null;
1083: ResultSet rst = null;
1084: try {
1085: pst = connection.prepareStatement("select count(*) from " //$NON-NLS-1$
1086: + SEARCH_BUILDER_ITEM_T
1087: + " where searchstate = ? and searchaction <> ?"); //$NON-NLS-1$
1088: pst.clearParameters();
1089: pst.setInt(1, SearchBuilderItem.STATE_PENDING.intValue());
1090: pst.setInt(2, SearchBuilderItem.ACTION_UNKNOWN.intValue());
1091: rst = pst.executeQuery();
1092: if (rst.next()) {
1093: return rst.getInt(1);
1094: }
1095: return 0;
1096: } catch (SQLException sqlex) {
1097: return 0;
1098: } finally {
1099: try {
1100: pst.close();
1101: } catch (Exception ex) {
1102: }
1103: ;
1104: }
1105:
1106: }
1107:
1108: private void rebuildIndex(Connection connection,
1109: SearchBuilderItem controlItem,
1110: SearchIndexBuilderWorker worker) throws SQLException {
1111: // delete all and return the master action only
1112: // the caller will then rebuild the index from scratch
1113: log
1114: .debug("DELETE ALL RECORDS =========================================================="); //$NON-NLS-1$
1115: Statement stm = null;
1116: try {
1117: stm = connection.createStatement();
1118: if (SearchBuilderItem.GLOBAL_CONTEXT.equals(controlItem
1119: .getContext())) {
1120: stm
1121: .execute("delete from searchbuilderitem where name <> '" //$NON-NLS-1$
1122: + SearchBuilderItem.GLOBAL_MASTER
1123: + "' "); //$NON-NLS-1$
1124: } else {
1125: stm
1126: .execute("delete from searchbuilderitem where context = '" //$NON-NLS-1$
1127: + controlItem.getContext()
1128: + "' and name <> '" //$NON-NLS-1$
1129: + controlItem.getName() + "' "); //$NON-NLS-1$
1130:
1131: }
1132:
1133: log
1134: .debug("DONE DELETE ALL RECORDS ==========================================================="); //$NON-NLS-1$
1135: connection.commit();
1136: log
1137: .debug("ADD ALL RECORDS ==========================================================="); //$NON-NLS-1$
1138: long lastupdate = System.currentTimeMillis();
1139: List contextList = new ArrayList();
1140: if (SearchBuilderItem.GLOBAL_CONTEXT.equals(controlItem
1141: .getContext())) {
1142:
1143: for (Iterator i = SiteService.getSites(
1144: SelectionType.ANY, null, null, null,
1145: SortType.NONE, null).iterator(); i.hasNext();) {
1146: Site s = (Site) i.next();
1147: if (!SiteService.isSpecialSite(s.getId())
1148: || SiteService.isUserSite(s.getId())) {
1149: if (searchIndexBuilder
1150: .isOnlyIndexSearchToolSites()) {
1151: ToolConfiguration t = s
1152: .getToolForCommonId("sakai.search"); //$NON-NLS-1$
1153: if (t != null) {
1154: contextList.add(s.getId());
1155: }
1156: } else {
1157: contextList.add(s.getId());
1158: }
1159: }
1160: }
1161: } else {
1162: contextList.add(controlItem.getContext());
1163: }
1164: for (Iterator c = contextList.iterator(); c.hasNext();) {
1165: String siteContext = (String) c.next();
1166: log.info("Rebuild for " + siteContext); //$NON-NLS-1$
1167: for (Iterator i = searchIndexBuilder
1168: .getContentProducers().iterator(); i.hasNext();) {
1169: EntityContentProducer ecp = (EntityContentProducer) i
1170: .next();
1171:
1172: Iterator contentIterator = null;
1173: contentIterator = ecp
1174: .getSiteContentIterator(siteContext);
1175: log.debug("Using ECP " + ecp); //$NON-NLS-1$
1176:
1177: int added = 0;
1178: for (; contentIterator.hasNext();) {
1179: if ((System.currentTimeMillis() - lastupdate) > 60000L) {
1180: lastupdate = System.currentTimeMillis();
1181: if (!worker.getLockTransaction(
1182: 15L * 60L * 1000L, true)) {
1183: throw new RuntimeException(
1184: "Transaction Lock Expired while Rebuilding Index "); //$NON-NLS-1$
1185: }
1186: }
1187: String resourceName = (String) contentIterator
1188: .next();
1189: log.debug("Checking " + resourceName); //$NON-NLS-1$
1190: if (resourceName == null
1191: || resourceName.length() > 255) {
1192: log
1193: .warn("Entity Reference Longer than 255 characters, ignored: Reference=" //$NON-NLS-1$
1194: + resourceName);
1195: continue;
1196: }
1197: SearchBuilderItem sbi = new SearchBuilderItemImpl();
1198: sbi.setName(resourceName);
1199: sbi
1200: .setSearchaction(SearchBuilderItem.ACTION_ADD);
1201: sbi
1202: .setSearchstate(SearchBuilderItem.STATE_PENDING);
1203: sbi.setId(idgenerator.nextIdentifier()
1204: .toString());
1205: sbi.setVersion(new Date(System
1206: .currentTimeMillis()));
1207: String context = null;
1208: try {
1209: context = ecp.getSiteId(resourceName);
1210: } catch (Exception ex) {
1211: log
1212: .debug("No context for resource " + resourceName //$NON-NLS-1$
1213: + " defaulting to none"); //$NON-NLS-1$
1214: }
1215: if (context == null || context.length() == 0) {
1216: context = "none"; //$NON-NLS-1$
1217: }
1218: sbi.setContext(context);
1219: try {
1220: updateOrSave(connection, sbi);
1221: } catch (SQLException sqlex) {
1222: log
1223: .error("Failed to update " + sqlex.getMessage()); //$NON-NLS-1$
1224: }
1225: connection.commit();
1226:
1227: }
1228: log.debug(" Added " + added); //$NON-NLS-1$
1229: }
1230: }
1231: log
1232: .debug("DONE ADD ALL RECORDS ==========================================================="); //$NON-NLS-1$
1233: controlItem
1234: .setSearchstate(SearchBuilderItem.STATE_COMPLETED);
1235: updateOrSave(connection, controlItem);
1236: connection.commit();
1237: } finally {
1238: try {
1239: stm.close();
1240: } catch (Exception ex) {
1241: }
1242: }
1243:
1244: }
1245:
1246: private void refreshIndex(Connection connection,
1247: SearchBuilderItem controlItem) throws SQLException {
1248: // delete all and return the master action only
1249: // the caller will then rebuild the index from scratch
1250: log
1251: .debug("UPDATE ALL RECORDS =========================================================="); //$NON-NLS-1$
1252: Statement stm = null;
1253: try {
1254: stm = connection.createStatement();
1255: if (SearchBuilderItem.GLOBAL_CONTEXT.equals(controlItem
1256: .getContext())) {
1257: stm
1258: .execute("update searchbuilderitem set searchstate = " //$NON-NLS-1$
1259: + SearchBuilderItem.STATE_PENDING
1260: + " where name not like '" //$NON-NLS-1$
1261: + SearchBuilderItem.SITE_MASTER_PATTERN
1262: + "' and name <> '" + SearchBuilderItem.GLOBAL_MASTER //$NON-NLS-1$
1263: + "' "); //$NON-NLS-1$
1264:
1265: } else {
1266: stm
1267: .execute("update searchbuilderitem set searchstate = " //$NON-NLS-1$
1268: + SearchBuilderItem.STATE_PENDING
1269: + " where context = '" + controlItem.getContext() //$NON-NLS-1$
1270: + "' and name <> '" + controlItem.getName() + "'"); //$NON-NLS-1$ //$NON-NLS-2$
1271:
1272: }
1273: controlItem
1274: .setSearchstate(SearchBuilderItem.STATE_COMPLETED);
1275: updateOrSave(connection, controlItem);
1276: connection.commit();
1277: } finally {
1278: try {
1279: stm.close();
1280: } catch (Exception ex) {
1281: }
1282: ;
1283: }
1284: }
1285:
1286: /**
1287: * @return Returns the indexStorage.
1288: */
1289: public IndexStorage getIndexStorage() {
1290: return indexStorage;
1291: }
1292:
1293: /**
1294: * @param indexStorage
1295: * The indexStorage to set.
1296: */
1297: public void setIndexStorage(IndexStorage indexStorage) {
1298: this .indexStorage = indexStorage;
1299: }
1300:
1301: /**
1302: * @return Returns the dataSource.
1303: */
1304: public DataSource getDataSource() {
1305: return dataSource;
1306: }
1307:
1308: /**
1309: * @param dataSource
1310: * The dataSource to set.
1311: */
1312: public void setDataSource(DataSource dataSource) {
1313: this .dataSource = dataSource;
1314: }
1315:
1316: public boolean isLockRequired() {
1317: return !indexStorage.isMultipleIndexers();
1318: }
1319:
1320: /*
1321: * (non-Javadoc)
1322: *
1323: * @see org.sakaiproject.search.dao.SearchIndexBuilderWorkerDao#indexExists()
1324: */
1325: public boolean indexExists() {
1326: return indexStorage.centralIndexExists();
1327: }
1328:
1329: }
|