0001: /*
0002: * IndexBrowse.java
0003: *
0004: * Copyright (c) 2002-2007, Hewlett-Packard Company and Massachusetts
0005: * Institute of Technology. All rights reserved.
0006: *
0007: * Redistribution and use in source and binary forms, with or without
0008: * modification, are permitted provided that the following conditions are
0009: * met:
0010: *
0011: * - Redistributions of source code must retain the above copyright
0012: * notice, this list of conditions and the following disclaimer.
0013: *
0014: * - Redistributions in binary form must reproduce the above copyright
0015: * notice, this list of conditions and the following disclaimer in the
0016: * documentation and/or other materials provided with the distribution.
0017: *
0018: * - Neither the name of the Hewlett-Packard Company nor the name of the
0019: * Massachusetts Institute of Technology nor the names of their
0020: * contributors may be used to endorse or promote products derived from
0021: * this software without specific prior written permission.
0022: *
0023: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
0024: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
0025: * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
0026: * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
0027: * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
0028: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
0029: * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
0030: * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0031: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
0032: * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
0033: * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
0034: * DAMAGE.
0035: */
0036: package org.dspace.browse;
0037:
0038: import java.io.IOException;
0039: import java.sql.SQLException;
0040: import java.util.ArrayList;
0041: import java.util.Date;
0042: import java.util.HashMap;
0043: import java.util.HashSet;
0044: import java.util.List;
0045: import java.util.Map;
0046: import java.util.Set;
0047: import java.util.StringTokenizer;
0048:
0049: import org.apache.commons.cli.CommandLine;
0050: import org.apache.commons.cli.CommandLineParser;
0051: import org.apache.commons.cli.HelpFormatter;
0052: import org.apache.commons.cli.Options;
0053: import org.apache.commons.cli.ParseException;
0054: import org.apache.commons.cli.PosixParser;
0055: import org.apache.commons.lang.StringUtils;
0056: import org.apache.log4j.Logger;
0057: import org.dspace.content.DCValue;
0058: import org.dspace.content.Item;
0059: import org.dspace.core.Context;
0060: import org.dspace.sort.SortOption;
0061: import org.dspace.sort.SortException;
0062: import org.dspace.sort.OrderFormat;
0063:
0064: /**
0065: * Tool to create Browse indexes. This class is used from the command line to
0066: * create and destroy the browse indices from configuration, and also from within
0067: * the application to add and remove content from those tables.
0068: *
0069: * To see a full definition of the usage of this class just run it without any
0070: * arguments, and you will get the help message.
0071: *
0072: * @author Richard Jones
0073: */
0074: public class IndexBrowse {
0075: /** logger */
0076: private static Logger log = Logger.getLogger(IndexBrowse.class);
0077:
0078: /** DSpace context */
0079: private Context context;
0080:
0081: /** whether to destroy and rebuild the database */
0082: private boolean rebuild = false;
0083:
0084: /** whether to destroy the database */
0085: private boolean delete = false;
0086:
0087: /** the index number to start working from (for debug only) */
0088: private int start = 1;
0089:
0090: /** whether to execute the commands generated against the database */
0091: private boolean execute = false;
0092:
0093: /** whether there is an output file into which to write SQL */
0094: private boolean fileOut = false;
0095:
0096: /** whether the output should be written to the standadr out */
0097: private boolean stdOut = false;
0098:
0099: /** the name of the output file */
0100: private String outFile = null;
0101:
0102: /** should the operations be verbose */
0103: private boolean verbose = false;
0104:
0105: /** the configured browse indices */
0106: private BrowseIndex[] bis;
0107:
0108: /** the DAO for write operations on the database */
0109: private BrowseCreateDAO dao;
0110:
0111: /** the outputter class */
0112: private BrowseOutput output;
0113:
0114: /**
0115: * Construct a new index browse. If done this way, an internal
0116: * DSpace context will be created. Better instead to call
0117: *
0118: * <code>
0119: * new IndexBrowse(context);
0120: * </code>
0121: *
0122: * with your desired context (when using with the application)
0123: *
0124: * @throws SQLException
0125: * @throws BrowseException
0126: */
0127: public IndexBrowse() throws SQLException, BrowseException {
0128: this (new Context());
0129: }
0130:
0131: /**
0132: * Create a new IndexBrowse object. This will ignore any authorisations
0133: * applied to the Context
0134: *
0135: * @param context
0136: * @throws SQLException
0137: * @throws BrowseException
0138: */
0139: public IndexBrowse(Context context) throws SQLException,
0140: BrowseException {
0141: this .context = context;
0142: this .context.setIgnoreAuthorization(true);
0143:
0144: // get the browse indices, and ensure that
0145: // we have all the relevant tables prepped
0146: this .bis = BrowseIndex.getBrowseIndices();
0147: checkConfig();
0148:
0149: // get the DAO for the create operations
0150: dao = BrowseDAOFactory.getCreateInstance(context);
0151:
0152: // set the outputter
0153: output = new BrowseOutput();
0154:
0155: // then generate all the metadata bits that we
0156: // are going to use
0157: for (int k = 0; k < bis.length; k++) {
0158: bis[k].generateMdBits();
0159: }
0160: }
0161:
0162: /**
0163: * @return Returns the verbose.
0164: */
0165: public boolean isVerbose() {
0166: return verbose;
0167: }
0168:
0169: /**
0170: * @param verbose The verbose to set.
0171: */
0172: public void setVerbose(boolean verbose) {
0173: this .verbose = verbose;
0174: output.setVerbose(verbose);
0175: }
0176:
0177: /**
0178: * @return true if to rebuild the database, false if not
0179: */
0180: public boolean rebuild() {
0181: return rebuild;
0182: }
0183:
0184: /**
0185: * @param bool whether to rebuild the database or not
0186: */
0187: public void setRebuild(boolean bool) {
0188: this .rebuild = bool;
0189: }
0190:
0191: /**
0192: * @return true if to delete the database, false if not
0193: */
0194: public boolean delete() {
0195: return delete;
0196: }
0197:
0198: /**
0199: * @param bool whetehr to delete the database or not
0200: */
0201: public void setDelete(boolean bool) {
0202: this .delete = bool;
0203: }
0204:
0205: /**
0206: * @param start the index to start working up from
0207: */
0208: public void setStart(int start) {
0209: this .start = start;
0210: }
0211:
0212: /**
0213: * @return the index to start working up from
0214: */
0215: public int getStart() {
0216: return this .start;
0217: }
0218:
0219: /**
0220: * @param bool whether to execute the database commands or not
0221: */
0222: public void setExecute(boolean bool) {
0223: this .execute = bool;
0224: }
0225:
0226: /**
0227: * @return true if to execute database commands, false if not
0228: */
0229: public boolean execute() {
0230: return this .execute;
0231: }
0232:
0233: /**
0234: * @param bool whether to use an output file
0235: */
0236: public void setFileOut(boolean bool) {
0237: this .fileOut = bool;
0238: output.setFile(bool);
0239: }
0240:
0241: /**
0242: * @return true if using an output file, false if not
0243: */
0244: public boolean isFileOut() {
0245: return this .fileOut;
0246: }
0247:
0248: /**
0249: * @param bool whether to write to standard out
0250: */
0251: public void setStdOut(boolean bool) {
0252: this .stdOut = bool;
0253: output.setPrint(bool);
0254: }
0255:
0256: /**
0257: * @return true if to write to standard out, false if not
0258: */
0259: public boolean toStdOut() {
0260: return this .stdOut;
0261: }
0262:
0263: /**
0264: * @param file the name of the output file
0265: */
0266: public void setOutFile(String file) {
0267: this .outFile = file;
0268: output.setFileName(file);
0269: }
0270:
0271: /**
0272: * @return the name of the output file
0273: */
0274: public String getOutFile() {
0275: return this .outFile;
0276: }
0277:
0278: private void removeIndex(int itemID, String table)
0279: throws BrowseException {
0280: dao.deleteByItemID(table, itemID);
0281: }
0282:
0283: /**
0284: * Prune indexes - called from the public interfaces or at the end of a batch indexing process
0285: */
0286: private void pruneIndexes() throws BrowseException {
0287: // go over the indices and prune
0288: for (int i = 0; i < bis.length; i++) {
0289: if (bis[i].isMetadataIndex()) {
0290: log.debug("Pruning metadata index: "
0291: + bis[i].getTableName());
0292: dao.pruneExcess(bis[i].getTableName(), bis[i]
0293: .getMapTableName(), false);
0294: dao.pruneDistinct(bis[i].getDistinctTableName(), bis[i]
0295: .getMapTableName());
0296: }
0297: }
0298:
0299: dao.pruneExcess(
0300: BrowseIndex.getItemBrowseIndex().getTableName(), null,
0301: false);
0302: dao.pruneExcess(BrowseIndex.getWithdrawnBrowseIndex()
0303: .getTableName(), null, true);
0304: }
0305:
0306: /**
0307: * Index the given item
0308: *
0309: * @param item the item to index
0310: * @throws BrowseException
0311: */
0312: public void indexItem(Item item) throws BrowseException {
0313: // If the item is not archived AND has not been withdrawn
0314: // we can assume that it has *never* been archived - in that case,
0315: // there won't be anything in the browse index, so we can just skip processing.
0316: // If it is either archived or withdrawn, then there may be something in the browse
0317: // tables, so we *must* process it.
0318: // Caveat: an Item.update() that changes isArchived() from TRUE to FALSE, whilst leaving
0319: // isWithdrawn() as FALSE, may result in stale data in the browse tables.
0320: // Such an update should never occur though, and if it does, probably indicates a major
0321: // problem with the code updating the Item.
0322: if (item.isArchived() || item.isWithdrawn()) {
0323: indexItem(new ItemMetadataProxy(item));
0324:
0325: // Ensure that we remove any invalid entries
0326: pruneIndexes();
0327: }
0328: }
0329:
0330: /**
0331: * Index the given item
0332: *
0333: * @param item the item to index
0334: * @throws BrowseException
0335: */
0336: private void indexItem(ItemMetadataProxy item)
0337: throws BrowseException {
0338: // Map to store the metadata from the Item
0339: // so that we don't grab it multiple times
0340: Map<String, String> itemMDMap = new HashMap<String, String>();
0341:
0342: try {
0343: boolean reqCommunityMappings = false;
0344: Map<Integer, String> sortMap = getSortValues(item,
0345: itemMDMap);
0346: if (item.isArchived() && !item.isWithdrawn()) {
0347: // Try to update an existing record in the item index
0348: if (!dao.updateIndex(BrowseIndex.getItemBrowseIndex()
0349: .getTableName(), item.getID(), sortMap)) {
0350: // Record doesn't exist - ensure that it doesn't exist in the withdrawn index,
0351: // and add it to the archived item index
0352: removeIndex(item.getID(), BrowseIndex
0353: .getWithdrawnBrowseIndex().getTableName());
0354: dao.insertIndex(BrowseIndex.getItemBrowseIndex()
0355: .getTableName(), item.getID(), sortMap);
0356: }
0357:
0358: reqCommunityMappings = true;
0359: } else if (item.isWithdrawn()) {
0360: // Try to update an existing record in the withdrawn index
0361: if (!dao.updateIndex(BrowseIndex
0362: .getWithdrawnBrowseIndex().getTableName(), item
0363: .getID(), sortMap)) {
0364: // Record doesn't exist - ensure that it doesn't exist in the item index,
0365: // and add it to the withdrawn item index
0366: removeIndex(item.getID(), BrowseIndex
0367: .getItemBrowseIndex().getTableName());
0368: dao.insertIndex(BrowseIndex
0369: .getWithdrawnBrowseIndex().getTableName(),
0370: item.getID(), sortMap);
0371: }
0372: } else {
0373: // This item shouldn't exist in either index - ensure that it is removed
0374: removeIndex(item.getID(), BrowseIndex
0375: .getItemBrowseIndex().getTableName());
0376: removeIndex(item.getID(), BrowseIndex
0377: .getWithdrawnBrowseIndex().getTableName());
0378: }
0379:
0380: // Update the community mappings if they are required, or remove them if they aren't
0381: if (reqCommunityMappings) {
0382: dao.updateCommunityMappings(item.getID());
0383: } else {
0384: dao.deleteCommunityMappings(item.getID());
0385: }
0386:
0387: // Now update the metadata indexes
0388: for (int i = 0; i < bis.length; i++) {
0389: log.debug("Indexing for item " + item.getID()
0390: + ", for index: " + bis[i].getTableName());
0391:
0392: if (bis[i].isMetadataIndex()) {
0393: Set<Integer> distIDSet = new HashSet<Integer>();
0394:
0395: // now index the new details - but only if it's archived and not withdrawn
0396: if (item.isArchived() && !item.isWithdrawn()) {
0397: // get the metadata from the item
0398: for (int mdIdx = 0; mdIdx < bis[i]
0399: .getMetadataCount(); mdIdx++) {
0400: String[] md = bis[i].getMdBits(mdIdx);
0401: DCValue[] values = item.getMetadata(md[0],
0402: md[1], md[2], Item.ANY);
0403:
0404: // if we have values to index on, then do so
0405: if (values != null) {
0406: for (int x = 0; x < values.length; x++) {
0407: // Ensure that there is a value to index before inserting it
0408: if (StringUtils
0409: .isEmpty(values[x].value)) {
0410: log
0411: .error("Null metadata value for item "
0412: + item.getID()
0413: + ", field: "
0414: + values[x].schema
0415: + "."
0416: + values[x].element
0417: + (values[x].qualifier == null ? ""
0418: : "."
0419: + values[x].qualifier));
0420: } else {
0421: // get the normalised version of the value
0422: String nVal = OrderFormat
0423: .makeSortString(
0424: values[x].value,
0425: values[x].language,
0426: bis[i]
0427: .getDataType());
0428: distIDSet
0429: .add(dao
0430: .getDistinctID(
0431: bis[i]
0432: .getDistinctTableName(),
0433: values[x].value,
0434: nVal));
0435: }
0436: }
0437: }
0438: }
0439: }
0440:
0441: // Do we have any mappings?
0442: if (distIDSet.isEmpty()) {
0443: // remove any old mappings
0444: removeIndex(item.getID(), bis[i]
0445: .getMapTableName());
0446: } else {
0447: // Update the existing mappings
0448: int[] distIDarr = new int[distIDSet.size()];
0449: int didx = 0;
0450: for (Integer distID : distIDSet) {
0451: distIDarr[didx++] = distID;
0452: }
0453: dao.updateDistinctMappings(bis[i]
0454: .getMapTableName(), item.getID(),
0455: distIDarr);
0456: }
0457: }
0458: }
0459: } catch (SQLException e) {
0460: log.error("caught exception: ", e);
0461: throw new BrowseException(e);
0462: }
0463: }
0464:
0465: /**
0466: * Get the normalised values for each of the sort columns
0467: * @param item
0468: * @param itemMDMap
0469: * @return
0470: * @throws BrowseException
0471: * @throws SQLException
0472: */
0473: private Map<Integer, String> getSortValues(ItemMetadataProxy item,
0474: Map itemMDMap) throws BrowseException, SQLException {
0475: try {
0476: // now obtain the sort order values that we will use
0477: Map<Integer, String> sortMap = new HashMap<Integer, String>();
0478: for (SortOption so : SortOption.getSortOptions()) {
0479: Integer key = new Integer(so.getNumber());
0480: String metadata = so.getMetadata();
0481:
0482: // If we've already used the metadata for this Item
0483: // it will be cached in the map
0484: DCValue value = null;
0485:
0486: if (itemMDMap != null)
0487: value = (DCValue) itemMDMap.get(metadata);
0488:
0489: // We haven't used this metadata before, so grab it from the item
0490: if (value == null) {
0491: String[] somd = so.getMdBits();
0492: DCValue[] dcv = item.getMetadata(somd[0], somd[1],
0493: somd[2], Item.ANY);
0494:
0495: if (dcv == null) {
0496: continue;
0497: }
0498:
0499: // we only use the first dc value
0500: if (dcv.length > 0) {
0501: // Set it as the current metadata value to use
0502: // and add it to the map
0503: value = dcv[0];
0504:
0505: if (itemMDMap != null)
0506: itemMDMap.put(metadata, dcv[0]);
0507: }
0508: }
0509:
0510: // normalise the values as we insert into the sort map
0511: if (value != null && value.value != null) {
0512: String nValue = OrderFormat.makeSortString(
0513: value.value, value.language, so.getType());
0514: sortMap.put(key, nValue);
0515: }
0516: }
0517:
0518: return sortMap;
0519: } catch (SortException se) {
0520: throw new BrowseException("Error in SortOptions", se);
0521: }
0522: }
0523:
0524: /**
0525: * @deprecated
0526: * @param item
0527: * @return
0528: * @throws BrowseException
0529: */
0530: public boolean itemAdded(Item item) throws BrowseException {
0531: indexItem(item);
0532: return true;
0533: }
0534:
0535: /**
0536: * @deprecated
0537: * @param item
0538: * @return
0539: * @throws BrowseException
0540: */
0541: public boolean itemChanged(Item item) throws BrowseException {
0542: indexItem(item);
0543: return true;
0544: }
0545:
0546: /**
0547: * remove all the indices for the given item
0548: *
0549: * @param item the item to be removed
0550: * @return
0551: * @throws BrowseException
0552: */
0553: public boolean itemRemoved(Item item) throws BrowseException {
0554: return itemRemoved(item.getID());
0555: }
0556:
0557: public boolean itemRemoved(int itemID) throws BrowseException {
0558: // go over the indices and index the item
0559: for (int i = 0; i < bis.length; i++) {
0560: if (bis[i].isMetadataIndex()) {
0561: log.debug("Removing indexing for removed item "
0562: + itemID + ", for index: "
0563: + bis[i].getTableName());
0564: removeIndex(itemID, bis[i].getMapTableName());
0565: }
0566: }
0567:
0568: // Remove from the item indexes (archive and withdrawn)
0569: removeIndex(itemID, BrowseIndex.getItemBrowseIndex()
0570: .getTableName());
0571: removeIndex(itemID, BrowseIndex.getWithdrawnBrowseIndex()
0572: .getTableName());
0573: dao.deleteCommunityMappings(itemID);
0574:
0575: // Ensure that we remove any invalid entries
0576: pruneIndexes();
0577:
0578: return true;
0579: }
0580:
0581: /**
0582: * Creates Browse indexes, destroying the old ones.
0583: *
0584: * @param argv
0585: * Command-line arguments
0586: */
0587: public static void main(String[] argv) throws SQLException,
0588: BrowseException, ParseException {
0589: Context context = new Context();
0590: IndexBrowse indexer = new IndexBrowse(context);
0591:
0592: // create an options object and populate it
0593: CommandLineParser parser = new PosixParser();
0594: Options options = new Options();
0595:
0596: // these are mutually exclusive, and represent the primary actions
0597: options
0598: .addOption(
0599: "t",
0600: "tables",
0601: false,
0602: "create the tables only, do not attempt to index. Mutually exclusive with -f and -i");
0603: options
0604: .addOption("i", "index", false,
0605: "actually do the indexing. Mutually exclusive with -t and -f");
0606: options
0607: .addOption(
0608: "f",
0609: "full",
0610: false,
0611: "make the tables, and do the indexing. This forces -x. Mutually exclusive with -t and -i");
0612:
0613: // these options can be specified only with the -f option
0614: options
0615: .addOption(
0616: "r",
0617: "rebuild",
0618: false,
0619: "should we rebuild all the indices, which removes old index tables and creates new ones. For use with -f. Mutually exclusive with -d");
0620: options
0621: .addOption(
0622: "d",
0623: "delete",
0624: false,
0625: "delete all the indices, but don't create new ones. For use with -f. This is mutually exclusive with -r");
0626:
0627: // these options can be specified only with the -t and -f options
0628: options
0629: .addOption(
0630: "o",
0631: "out",
0632: true,
0633: "[-o <filename>] write the remove and create SQL to the given file. For use with -t and -f"); // FIXME: not currently working
0634: options
0635: .addOption("p", "print", false,
0636: "write the remove and create SQL to the stdout. For use with -t and -f");
0637: options
0638: .addOption(
0639: "x",
0640: "execute",
0641: false,
0642: "execute all the remove and create SQL against the database. For use with -t and -f");
0643: options
0644: .addOption(
0645: "s",
0646: "start",
0647: true,
0648: "[-s <int>] start from this index number and work upward (mostly only useful for debugging). For use with -t and -f");
0649:
0650: // this option can be used with any argument
0651: options
0652: .addOption(
0653: "v",
0654: "verbose",
0655: false,
0656: "print extra information to the stdout. If used in conjunction with -p, you cannot use the stdout to generate your database structure");
0657:
0658: // display the help. If this is spefified, it trumps all other arguments
0659: options
0660: .addOption("h", "help", false,
0661: "show this help documentation. Overrides all other arguments");
0662:
0663: CommandLine line = parser.parse(options, argv);
0664:
0665: // display the help
0666: if (line.hasOption("h")) {
0667: indexer.usage(options);
0668: return;
0669: }
0670:
0671: if (line.hasOption("v")) {
0672: indexer.setVerbose(true);
0673: }
0674:
0675: if (line.hasOption("i")) {
0676: indexer.createIndex();
0677: return;
0678: }
0679:
0680: if (line.hasOption("f")) {
0681: if (line.hasOption('r')) {
0682: indexer.setRebuild(true);
0683: } else if (line.hasOption("d")) {
0684: indexer.setDelete(true);
0685: }
0686: }
0687:
0688: if (line.hasOption("f") || line.hasOption("t")) {
0689: if (line.hasOption("s")) {
0690: indexer.setStart(Integer.parseInt(line
0691: .getOptionValue("s")));
0692: }
0693: if (line.hasOption("x")) {
0694: indexer.setExecute(true);
0695: }
0696: if (line.hasOption("p")) {
0697: indexer.setStdOut(true);
0698: }
0699: if (line.hasOption("o")) {
0700: indexer.setFileOut(true);
0701: indexer.setOutFile(line.getOptionValue("o"));
0702: }
0703: }
0704:
0705: if (line.hasOption("t")) {
0706: indexer.prepTables();
0707: return;
0708: }
0709:
0710: if (line.hasOption("f")) {
0711: indexer.setExecute(true);
0712: indexer.initBrowse();
0713: return;
0714: }
0715:
0716: indexer.usage(options);
0717: context.complete();
0718: }
0719:
0720: /**
0721: * output the usage information
0722: *
0723: * @param options
0724: */
0725: private void usage(Options options) {
0726: HelpFormatter formatter = new HelpFormatter();
0727: formatter.printHelp("IndexBrowse", options);
0728: }
0729:
0730: /**
0731: * Prepare the tables for the browse indices
0732: *
0733: * @throws BrowseException
0734: */
0735: private void prepTables() throws BrowseException {
0736: try {
0737: // first, erase the existing indexes
0738: clearDatabase();
0739:
0740: createItemTables();
0741:
0742: // for each current browse index, make all the relevant tables
0743: for (int i = 0; i < bis.length; i++) {
0744: createTables(bis[i]);
0745:
0746: // prepare some CLI output
0747: StringBuffer logMe = new StringBuffer();
0748: for (SortOption so : SortOption.getSortOptions()) {
0749: logMe.append(" ").append(so.getMetadata()).append(
0750: " ");
0751: }
0752:
0753: output.message("Creating browse index "
0754: + bis[i].getName() + ": index by "
0755: + bis[i].getMetadata() + " sortable by: "
0756: + logMe.toString());
0757: }
0758: } catch (SortException se) {
0759: throw new BrowseException("Error in SortOptions", se);
0760: }
0761: }
0762:
0763: /**
0764: * delete all the existing browse tables
0765: *
0766: * @throws BrowseException
0767: */
0768: public void clearDatabase() throws BrowseException {
0769: try {
0770: output.message("Deleting old indices");
0771:
0772: // notice that we have to do this without reference to the BrowseIndex[]
0773: // because they do not necessarily reflect what currently exists in
0774: // the database
0775:
0776: int i = getStart();
0777: while (true) {
0778: String tableName = BrowseIndex.getTableName(i, false,
0779: false, false, false);
0780: String distinctTableName = BrowseIndex.getTableName(i,
0781: false, false, true, false);
0782: String distinctMapName = BrowseIndex.getTableName(i,
0783: false, false, false, true);
0784: String sequence = BrowseIndex.getSequenceName(i, false,
0785: false);
0786: String mapSequence = BrowseIndex.getSequenceName(i,
0787: false, true);
0788: String distinctSequence = BrowseIndex.getSequenceName(
0789: i, true, false);
0790:
0791: // These views are no longer used, but as we are cleaning the database,
0792: // they may exist and need to be removed
0793: String colViewName = BrowseIndex.getTableName(i, false,
0794: true, false, false);
0795: String comViewName = BrowseIndex.getTableName(i, true,
0796: false, false, false);
0797: String distinctColViewName = BrowseIndex.getTableName(
0798: i, false, true, false, true);
0799: String distinctComViewName = BrowseIndex.getTableName(
0800: i, true, false, false, true);
0801:
0802: output.message("Checking for " + tableName);
0803: if (dao.testTableExistance(tableName)) {
0804: output.message("...found");
0805:
0806: output
0807: .message("Deleting old index and associated resources: "
0808: + tableName);
0809:
0810: // prepare a statement which will delete the table and associated
0811: // resources
0812: String dropper = dao.dropIndexAndRelated(tableName,
0813: this .execute());
0814: String dropSeq = dao.dropSequence(sequence, this
0815: .execute());
0816: output.sql(dropper);
0817: output.sql(dropSeq);
0818:
0819: // These views are no longer used, but as we are cleaning the database,
0820: // they may exist and need to be removed
0821: String dropColView = dao.dropView(colViewName, this
0822: .execute());
0823: String dropComView = dao.dropView(comViewName, this
0824: .execute());
0825: output.sql(dropColView);
0826: output.sql(dropComView);
0827: }
0828:
0829: // NOTE: we need a secondary context to check for the existance
0830: // of the table, because if an SQLException is thrown, then
0831: // the connection is aborted, and no more transaction stuff can be
0832: // done. Therefore we use a blank context to make the requests,
0833: // not caring if it gets aborted or not
0834: output.message("Checking for " + distinctTableName);
0835: if (!dao.testTableExistance(distinctTableName)) {
0836: if (i < bis.length || i < 10) {
0837: output
0838: .message("... doesn't exist; but will carry on as there may be something that conflicts");
0839: } else {
0840: output
0841: .message("... doesn't exist; no more tables to delete");
0842: break;
0843: }
0844: } else {
0845: output.message("...found");
0846:
0847: output
0848: .message("Deleting old index and associated resources: "
0849: + distinctTableName);
0850:
0851: // prepare statements that will delete the distinct value tables
0852: String dropDistinctTable = dao.dropIndexAndRelated(
0853: distinctTableName, this .execute());
0854: String dropMap = dao.dropIndexAndRelated(
0855: distinctMapName, this .execute());
0856: String dropDistinctMapSeq = dao.dropSequence(
0857: mapSequence, this .execute());
0858: String dropDistinctSeq = dao.dropSequence(
0859: distinctSequence, this .execute());
0860: output.sql(dropDistinctTable);
0861: output.sql(dropMap);
0862: output.sql(dropDistinctMapSeq);
0863: output.sql(dropDistinctSeq);
0864:
0865: // These views are no longer used, but as we are cleaning the database,
0866: // they may exist and need to be removed
0867: String dropDistinctColView = dao.dropView(
0868: distinctColViewName, this .execute());
0869: String dropDistinctComView = dao.dropView(
0870: distinctComViewName, this .execute());
0871: output.sql(dropDistinctColView);
0872: output.sql(dropDistinctComView);
0873: }
0874:
0875: i++;
0876: }
0877:
0878: dropItemTables(BrowseIndex.getItemBrowseIndex());
0879: dropItemTables(BrowseIndex.getWithdrawnBrowseIndex());
0880:
0881: if (execute()) {
0882: context.commit();
0883: }
0884: } catch (SQLException e) {
0885: log.error("caught exception: ", e);
0886: throw new BrowseException(e);
0887: }
0888: }
0889:
0890: /**
0891: * drop the tables and related database entries for the internal
0892: * 'item' tables
0893: * @param bix
0894: * @throws BrowseException
0895: */
0896: private void dropItemTables(BrowseIndex bix) throws BrowseException {
0897: if (dao.testTableExistance(bix.getTableName())) {
0898: String tableName = bix.getTableName();
0899: String dropper = dao.dropIndexAndRelated(tableName, this
0900: .execute());
0901: String dropSeq = dao.dropSequence(bix.getSequenceName(
0902: false, false), this .execute());
0903: output.sql(dropper);
0904: output.sql(dropSeq);
0905:
0906: // These views are no longer used, but as we are cleaning the database,
0907: // they may exist and need to be removed
0908: String colViewName = bix.getTableName(false, true, false,
0909: false);
0910: String comViewName = bix.getTableName(true, false, false,
0911: false);
0912: String dropColView = dao.dropView(colViewName, this
0913: .execute());
0914: String dropComView = dao.dropView(comViewName, this
0915: .execute());
0916: output.sql(dropColView);
0917: output.sql(dropComView);
0918: }
0919: }
0920:
0921: /**
0922: * Create the internal full item tables
0923: * @throws BrowseException
0924: */
0925: private void createItemTables() throws BrowseException {
0926: try {
0927: // prepare the array list of sort options
0928: List<Integer> sortCols = new ArrayList<Integer>();
0929: for (SortOption so : SortOption.getSortOptions()) {
0930: sortCols.add(new Integer(so.getNumber()));
0931: }
0932:
0933: createItemTables(BrowseIndex.getItemBrowseIndex(), sortCols);
0934: createItemTables(BrowseIndex.getWithdrawnBrowseIndex(),
0935: sortCols);
0936:
0937: if (execute()) {
0938: context.commit();
0939: }
0940: } catch (SortException se) {
0941: throw new BrowseException("Error in SortOptions", se);
0942: } catch (SQLException e) {
0943: log.error("caught exception: ", e);
0944: throw new BrowseException(e);
0945: }
0946: }
0947:
0948: /**
0949: * Create the internal full item tables for a particular index
0950: * (ie. withdrawn / in archive)
0951: * @param bix
0952: * @param sortCols
0953: * @throws BrowseException
0954: */
0955: private void createItemTables(BrowseIndex bix,
0956: List<Integer> sortCols) throws BrowseException {
0957: String tableName = bix.getTableName();
0958:
0959: String itemSeq = dao.createSequence(bix.getSequenceName(false,
0960: false), this .execute());
0961: String itemTable = dao.createPrimaryTable(tableName, sortCols,
0962: execute);
0963: String[] itemIndices = dao.createDatabaseIndices(tableName,
0964: sortCols, false, this .execute());
0965:
0966: output.sql(itemSeq);
0967: output.sql(itemTable);
0968: for (int i = 0; i < itemIndices.length; i++) {
0969: output.sql(itemIndices[i]);
0970: }
0971: }
0972:
0973: /**
0974: * Create the browse tables for the given browse index
0975: *
0976: * @param bi the browse index to create
0977: * @throws BrowseException
0978: */
0979: private void createTables(BrowseIndex bi) throws BrowseException {
0980: try {
0981: // if this is a single view, create the DISTINCT tables and views
0982: if (bi.isMetadataIndex()) {
0983: // if this is a single view, create the DISTINCT tables and views
0984: String distinctTableName = bi.getDistinctTableName();
0985: String distinctSeq = bi.getSequenceName(true, false);
0986: String distinctMapName = bi.getMapTableName();
0987: String mapSeq = bi.getSequenceName(false, true);
0988:
0989: // FIXME: at the moment we have not defined INDEXes for this data
0990: // add this later when necessary
0991:
0992: String distinctTableSeq = dao.createSequence(
0993: distinctSeq, this .execute());
0994: String distinctMapSeq = dao.createSequence(mapSeq, this
0995: .execute());
0996: String createDistinctTable = dao.createDistinctTable(
0997: distinctTableName, this .execute());
0998: String createDistinctMap = dao.createDistinctMap(
0999: distinctTableName, distinctMapName, this
1000: .execute());
1001: String[] mapIndices = dao.createMapIndices(
1002: distinctTableName, distinctMapName, this
1003: .execute());
1004:
1005: output.sql(distinctTableSeq);
1006: output.sql(distinctMapSeq);
1007: output.sql(createDistinctTable);
1008: output.sql(createDistinctMap);
1009: for (int i = 0; i < mapIndices.length; i++) {
1010: output.sql(mapIndices[i]);
1011: }
1012: }
1013:
1014: if (execute()) {
1015: context.commit();
1016: }
1017: } catch (SQLException e) {
1018: log.error("caught exception: ", e);
1019: throw new BrowseException(e);
1020: }
1021: }
1022:
1023: /**
1024: * index everything
1025: *
1026: * @throws SQLException
1027: * @throws BrowseException
1028: */
1029: public void initBrowse() throws SQLException, BrowseException {
1030: Date start = new Date();
1031:
1032: output.message("Creating browse indexes for DSpace");
1033:
1034: Date initDate = new Date();
1035: long init = initDate.getTime() - start.getTime();
1036:
1037: output
1038: .message("init complete (" + Long.toString(init)
1039: + " ms)");
1040:
1041: if (delete()) {
1042: output.message("Deleting browse tables");
1043:
1044: clearDatabase();
1045:
1046: output.message("Browse tables deleted");
1047: return;
1048: } else if (rebuild()) {
1049: output.message("Preparing browse tables");
1050:
1051: prepTables();
1052:
1053: output.message("Browse tables prepared");
1054: }
1055:
1056: Date prepDate = new Date();
1057: long prep = prepDate.getTime() - start.getTime();
1058: long prepinit = prepDate.getTime() - initDate.getTime();
1059:
1060: output.message("tables prepped (" + Long.toString(prep)
1061: + " ms, " + Long.toString(prepinit) + " ms)");
1062:
1063: int count = createIndex();
1064:
1065: context.complete();
1066:
1067: Date endDate = new Date();
1068: long end = endDate.getTime() - start.getTime();
1069: long endprep = endDate.getTime() - prepDate.getTime();
1070:
1071: output.message("content indexed (" + Long.toString(end)
1072: + " ms, " + Long.toString(endprep) + " ms)");
1073: output.message("Items indexed: " + Integer.toString(count));
1074:
1075: if (count > 0) {
1076: long overall = end / count;
1077: long specific = endprep / count;
1078:
1079: output.message("Overall average time per item: "
1080: + Long.toString(overall) + " ms");
1081: output.message("Index only average time per item: "
1082: + Long.toString(specific) + " ms");
1083: }
1084:
1085: output.message("Browse indexing completed");
1086: }
1087:
1088: /**
1089: * create the indices for all the items
1090: *
1091: * @return
1092: * @throws BrowseException
1093: */
1094: private int createIndex() throws BrowseException {
1095: try {
1096: // first, pre-prepare the known metadata fields that we want to query
1097: // on
1098: for (int k = 0; k < bis.length; k++) {
1099: bis[k].generateMdBits();
1100: }
1101:
1102: // now get the ids of ALL the items in the database
1103: BrowseItemDAO biDao = BrowseDAOFactory
1104: .getItemInstance(context);
1105: BrowseItem[] items = biDao.findAll();
1106:
1107: // go through every item id, grab the relevant metadata
1108: // and write it into the database
1109:
1110: for (int j = 0; j < items.length; j++) {
1111: indexItem(new ItemMetadataProxy(items[j].getID(),
1112: items[j]));
1113:
1114: // after each item we commit the context and clear the cache
1115: context.commit();
1116: context.clearCache();
1117: }
1118:
1119: // penultimately we have to delete any items that couldn't be located in the
1120: // index list
1121: pruneIndexes();
1122:
1123: // Make sure the deletes are written back
1124: context.commit();
1125:
1126: return items.length;
1127: } catch (SQLException e) {
1128: log.error("caught exception: ", e);
1129: throw new BrowseException(e);
1130: }
1131: }
1132:
1133: /**
1134: * Currently does nothing
1135: *
1136: */
1137: private void checkConfig() {
1138: // FIXME: exactly in what way do we want to check the config?
1139: }
1140:
1141: /**
1142: * Take a string representation of a metadata field, and return it as an array.
1143: * This is just a convenient utility method to basically break the metadata
1144: * representation up by its delimiter (.), and stick it in an array, inserting
1145: * the value of the init parameter when there is no metadata field part.
1146: *
1147: * @param mfield the string representation of the metadata
1148: * @param init the default value of the array elements
1149: * @return a three element array with schema, element and qualifier respectively
1150: */
1151: public String[] interpretField(String mfield, String init)
1152: throws IOException {
1153: StringTokenizer sta = new StringTokenizer(mfield, ".");
1154: String[] field = { init, init, init };
1155:
1156: int i = 0;
1157: while (sta.hasMoreTokens()) {
1158: field[i++] = sta.nextToken();
1159: }
1160:
1161: // error checks to make sure we have at least a schema and qualifier for both
1162: if (field[0] == null || field[1] == null) {
1163: throw new IOException("at least a schema and element be "
1164: + "specified in configuration. You supplied: "
1165: + mfield);
1166: }
1167:
1168: return field;
1169: }
1170:
1171: // private inner class
1172: // Hides the Item / BrowseItem in such a way that we can remove
1173: // the duplication in indexing an item.
1174: private class ItemMetadataProxy {
1175: private Item item;
1176: private BrowseItem browseItem;
1177: private int id;
1178:
1179: ItemMetadataProxy(Item item) {
1180: this .item = item;
1181: this .browseItem = null;
1182: this .id = 0;
1183: }
1184:
1185: ItemMetadataProxy(int id, BrowseItem browseItem) {
1186: this .item = null;
1187: this .browseItem = browseItem;
1188: this .id = id;
1189: }
1190:
1191: public DCValue[] getMetadata(String schema, String element,
1192: String qualifier, String lang) throws SQLException {
1193: if (item != null) {
1194: return item.getMetadata(schema, element, qualifier,
1195: lang);
1196: }
1197:
1198: return browseItem.getMetadata(schema, element, qualifier,
1199: lang);
1200: }
1201:
1202: public int getID() {
1203: if (item != null) {
1204: return item.getID();
1205: }
1206:
1207: return id;
1208: }
1209:
1210: /**
1211: * Is the Item archived?
1212: * @return
1213: */
1214: public boolean isArchived() {
1215: if (item != null) {
1216: return item.isArchived();
1217: }
1218:
1219: return browseItem.isArchived();
1220: }
1221:
1222: /**
1223: * Is the Item withdrawn?
1224: * @return
1225: */
1226: public boolean isWithdrawn() {
1227: if (item != null) {
1228: return item.isWithdrawn();
1229: }
1230:
1231: return browseItem.isWithdrawn();
1232: }
1233: }
1234: }
|