Source Code Cross Referenced for IndexBrowse.java in  » Content-Management-System » dspace » org » dspace » browse » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Content Management System » dspace » org.dspace.browse 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        /*
0002:         * IndexBrowse.java
0003:         *
0004:         * Copyright (c) 2002-2007, Hewlett-Packard Company and Massachusetts
0005:         * Institute of Technology.  All rights reserved.
0006:         *
0007:         * Redistribution and use in source and binary forms, with or without
0008:         * modification, are permitted provided that the following conditions are
0009:         * met:
0010:         *
0011:         * - Redistributions of source code must retain the above copyright
0012:         * notice, this list of conditions and the following disclaimer.
0013:         *
0014:         * - Redistributions in binary form must reproduce the above copyright
0015:         * notice, this list of conditions and the following disclaimer in the
0016:         * documentation and/or other materials provided with the distribution.
0017:         *
0018:         * - Neither the name of the Hewlett-Packard Company nor the name of the
0019:         * Massachusetts Institute of Technology nor the names of their
0020:         * contributors may be used to endorse or promote products derived from
0021:         * this software without specific prior written permission.
0022:         *
0023:         * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
0024:         * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
0025:         * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
0026:         * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
0027:         * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
0028:         * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
0029:         * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
0030:         * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0031:         * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
0032:         * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
0033:         * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
0034:         * DAMAGE.
0035:         */
0036:        package org.dspace.browse;
0037:
0038:        import java.io.IOException;
0039:        import java.sql.SQLException;
0040:        import java.util.ArrayList;
0041:        import java.util.Date;
0042:        import java.util.HashMap;
0043:        import java.util.HashSet;
0044:        import java.util.List;
0045:        import java.util.Map;
0046:        import java.util.Set;
0047:        import java.util.StringTokenizer;
0048:
0049:        import org.apache.commons.cli.CommandLine;
0050:        import org.apache.commons.cli.CommandLineParser;
0051:        import org.apache.commons.cli.HelpFormatter;
0052:        import org.apache.commons.cli.Options;
0053:        import org.apache.commons.cli.ParseException;
0054:        import org.apache.commons.cli.PosixParser;
0055:        import org.apache.commons.lang.StringUtils;
0056:        import org.apache.log4j.Logger;
0057:        import org.dspace.content.DCValue;
0058:        import org.dspace.content.Item;
0059:        import org.dspace.core.Context;
0060:        import org.dspace.sort.SortOption;
0061:        import org.dspace.sort.SortException;
0062:        import org.dspace.sort.OrderFormat;
0063:
0064:        /**
0065:         * Tool to create Browse indexes.  This class is used from the command line to
0066:         * create and destroy the browse indices from configuration, and also from within
0067:         * the application to add and remove content from those tables.
0068:         * 
0069:         * To see a full definition of the usage of this class just run it without any
0070:         * arguments, and you will get the help message.
0071:         * 
0072:         * @author Richard Jones
0073:         */
0074:        public class IndexBrowse {
0075:            /** logger */
0076:            private static Logger log = Logger.getLogger(IndexBrowse.class);
0077:
0078:            /** DSpace context */
0079:            private Context context;
0080:
0081:            /** whether to destroy and rebuild the database */
0082:            private boolean rebuild = false;
0083:
0084:            /** whether to destroy the database */
0085:            private boolean delete = false;
0086:
0087:            /** the index number to start working from (for debug only) */
0088:            private int start = 1;
0089:
0090:            /** whether to execute the commands generated against the database */
0091:            private boolean execute = false;
0092:
0093:            /** whether there is an output file into which to write SQL */
0094:            private boolean fileOut = false;
0095:
0096:            /** whether the output should be written to the standadr out */
0097:            private boolean stdOut = false;
0098:
0099:            /** the name of the output file */
0100:            private String outFile = null;
0101:
0102:            /** should the operations be verbose */
0103:            private boolean verbose = false;
0104:
0105:            /** the configured browse indices */
0106:            private BrowseIndex[] bis;
0107:
0108:            /** the DAO for write operations on the database */
0109:            private BrowseCreateDAO dao;
0110:
0111:            /** the outputter class */
0112:            private BrowseOutput output;
0113:
0114:            /**
0115:             * Construct a new index browse.  If done this way, an internal
0116:             * DSpace context will be created.  Better instead to call
0117:             * 
0118:             * <code>
0119:             * new IndexBrowse(context);
0120:             * </code>
0121:             * 
0122:             * with your desired context (when using with the application)
0123:             * 
0124:             * @throws SQLException
0125:             * @throws BrowseException
0126:             */
0127:            public IndexBrowse() throws SQLException, BrowseException {
0128:                this (new Context());
0129:            }
0130:
0131:            /**
0132:             * Create a new IndexBrowse object.  This will ignore any authorisations
0133:             * applied to the Context
0134:             * 
0135:             * @param context
0136:             * @throws SQLException
0137:             * @throws BrowseException
0138:             */
0139:            public IndexBrowse(Context context) throws SQLException,
0140:                    BrowseException {
0141:                this .context = context;
0142:                this .context.setIgnoreAuthorization(true);
0143:
0144:                // get the browse indices, and ensure that
0145:                // we have all the relevant tables prepped
0146:                this .bis = BrowseIndex.getBrowseIndices();
0147:                checkConfig();
0148:
0149:                // get the DAO for the create operations
0150:                dao = BrowseDAOFactory.getCreateInstance(context);
0151:
0152:                // set the outputter
0153:                output = new BrowseOutput();
0154:
0155:                // then generate all the metadata bits that we
0156:                // are going to use
0157:                for (int k = 0; k < bis.length; k++) {
0158:                    bis[k].generateMdBits();
0159:                }
0160:            }
0161:
0162:            /**
0163:             * @return Returns the verbose.
0164:             */
0165:            public boolean isVerbose() {
0166:                return verbose;
0167:            }
0168:
0169:            /**
0170:             * @param verbose The verbose to set.
0171:             */
0172:            public void setVerbose(boolean verbose) {
0173:                this .verbose = verbose;
0174:                output.setVerbose(verbose);
0175:            }
0176:
0177:            /**
0178:             * @return	true if to rebuild the database, false if not
0179:             */
0180:            public boolean rebuild() {
0181:                return rebuild;
0182:            }
0183:
0184:            /**
0185:             * @param bool		whether to rebuild the database or not
0186:             */
0187:            public void setRebuild(boolean bool) {
0188:                this .rebuild = bool;
0189:            }
0190:
0191:            /**
0192:             * @return		true if to delete the database, false if not
0193:             */
0194:            public boolean delete() {
0195:                return delete;
0196:            }
0197:
0198:            /**
0199:             * @param bool	whetehr to delete the database or not
0200:             */
0201:            public void setDelete(boolean bool) {
0202:                this .delete = bool;
0203:            }
0204:
0205:            /**
0206:             * @param start		the index to start working up from
0207:             */
0208:            public void setStart(int start) {
0209:                this .start = start;
0210:            }
0211:
0212:            /**
0213:             * @return		the index to start working up from
0214:             */
0215:            public int getStart() {
0216:                return this .start;
0217:            }
0218:
0219:            /**
0220:             * @param bool		whether to execute the database commands or not
0221:             */
0222:            public void setExecute(boolean bool) {
0223:                this .execute = bool;
0224:            }
0225:
0226:            /**
0227:             * @return		true if to execute database commands, false if not
0228:             */
0229:            public boolean execute() {
0230:                return this .execute;
0231:            }
0232:
0233:            /**
0234:             * @param bool	whether to use an output file
0235:             */
0236:            public void setFileOut(boolean bool) {
0237:                this .fileOut = bool;
0238:                output.setFile(bool);
0239:            }
0240:
0241:            /**
0242:             * @return		true if using an output file, false if not
0243:             */
0244:            public boolean isFileOut() {
0245:                return this .fileOut;
0246:            }
0247:
0248:            /**
0249:             * @param bool		whether to write to standard out
0250:             */
0251:            public void setStdOut(boolean bool) {
0252:                this .stdOut = bool;
0253:                output.setPrint(bool);
0254:            }
0255:
0256:            /**
0257:             * @return	true if to write to standard out, false if not
0258:             */
0259:            public boolean toStdOut() {
0260:                return this .stdOut;
0261:            }
0262:
0263:            /**
0264:             * @param file		the name of the output file
0265:             */
0266:            public void setOutFile(String file) {
0267:                this .outFile = file;
0268:                output.setFileName(file);
0269:            }
0270:
0271:            /**
0272:             * @return	the name of the output file
0273:             */
0274:            public String getOutFile() {
0275:                return this .outFile;
0276:            }
0277:
0278:            private void removeIndex(int itemID, String table)
0279:                    throws BrowseException {
0280:                dao.deleteByItemID(table, itemID);
0281:            }
0282:
0283:            /**
0284:             * Prune indexes - called from the public interfaces or at the end of a batch indexing process
0285:             */
0286:            private void pruneIndexes() throws BrowseException {
0287:                // go over the indices and prune
0288:                for (int i = 0; i < bis.length; i++) {
0289:                    if (bis[i].isMetadataIndex()) {
0290:                        log.debug("Pruning metadata index: "
0291:                                + bis[i].getTableName());
0292:                        dao.pruneExcess(bis[i].getTableName(), bis[i]
0293:                                .getMapTableName(), false);
0294:                        dao.pruneDistinct(bis[i].getDistinctTableName(), bis[i]
0295:                                .getMapTableName());
0296:                    }
0297:                }
0298:
0299:                dao.pruneExcess(
0300:                        BrowseIndex.getItemBrowseIndex().getTableName(), null,
0301:                        false);
0302:                dao.pruneExcess(BrowseIndex.getWithdrawnBrowseIndex()
0303:                        .getTableName(), null, true);
0304:            }
0305:
0306:            /**
0307:             * Index the given item
0308:             * 
0309:             * @param item	the item to index
0310:             * @throws BrowseException
0311:             */
0312:            public void indexItem(Item item) throws BrowseException {
0313:                // If the item is not archived AND has not been withdrawn
0314:                // we can assume that it has *never* been archived - in that case,
0315:                // there won't be anything in the browse index, so we can just skip processing.
0316:                // If it is either archived or withdrawn, then there may be something in the browse
0317:                // tables, so we *must* process it.
0318:                // Caveat: an Item.update() that changes isArchived() from TRUE to FALSE, whilst leaving
0319:                // isWithdrawn() as FALSE, may result in stale data in the browse tables.
0320:                // Such an update should never occur though, and if it does, probably indicates a major
0321:                // problem with the code updating the Item.
0322:                if (item.isArchived() || item.isWithdrawn()) {
0323:                    indexItem(new ItemMetadataProxy(item));
0324:
0325:                    // Ensure that we remove any invalid entries
0326:                    pruneIndexes();
0327:                }
0328:            }
0329:
0330:            /**
0331:             * Index the given item
0332:             * 
0333:             * @param item  the item to index
0334:             * @throws BrowseException
0335:             */
0336:            private void indexItem(ItemMetadataProxy item)
0337:                    throws BrowseException {
0338:                // Map to store the metadata from the Item
0339:                // so that we don't grab it multiple times
0340:                Map<String, String> itemMDMap = new HashMap<String, String>();
0341:
0342:                try {
0343:                    boolean reqCommunityMappings = false;
0344:                    Map<Integer, String> sortMap = getSortValues(item,
0345:                            itemMDMap);
0346:                    if (item.isArchived() && !item.isWithdrawn()) {
0347:                        // Try to update an existing record in the item index
0348:                        if (!dao.updateIndex(BrowseIndex.getItemBrowseIndex()
0349:                                .getTableName(), item.getID(), sortMap)) {
0350:                            // Record doesn't exist - ensure that it doesn't exist in the withdrawn index,
0351:                            // and add it to the archived item index
0352:                            removeIndex(item.getID(), BrowseIndex
0353:                                    .getWithdrawnBrowseIndex().getTableName());
0354:                            dao.insertIndex(BrowseIndex.getItemBrowseIndex()
0355:                                    .getTableName(), item.getID(), sortMap);
0356:                        }
0357:
0358:                        reqCommunityMappings = true;
0359:                    } else if (item.isWithdrawn()) {
0360:                        // Try to update an existing record in the withdrawn index
0361:                        if (!dao.updateIndex(BrowseIndex
0362:                                .getWithdrawnBrowseIndex().getTableName(), item
0363:                                .getID(), sortMap)) {
0364:                            // Record doesn't exist - ensure that it doesn't exist in the item index,
0365:                            // and add it to the withdrawn item index
0366:                            removeIndex(item.getID(), BrowseIndex
0367:                                    .getItemBrowseIndex().getTableName());
0368:                            dao.insertIndex(BrowseIndex
0369:                                    .getWithdrawnBrowseIndex().getTableName(),
0370:                                    item.getID(), sortMap);
0371:                        }
0372:                    } else {
0373:                        // This item shouldn't exist in either index - ensure that it is removed
0374:                        removeIndex(item.getID(), BrowseIndex
0375:                                .getItemBrowseIndex().getTableName());
0376:                        removeIndex(item.getID(), BrowseIndex
0377:                                .getWithdrawnBrowseIndex().getTableName());
0378:                    }
0379:
0380:                    // Update the community mappings if they are required, or remove them if they aren't
0381:                    if (reqCommunityMappings) {
0382:                        dao.updateCommunityMappings(item.getID());
0383:                    } else {
0384:                        dao.deleteCommunityMappings(item.getID());
0385:                    }
0386:
0387:                    // Now update the metadata indexes
0388:                    for (int i = 0; i < bis.length; i++) {
0389:                        log.debug("Indexing for item " + item.getID()
0390:                                + ", for index: " + bis[i].getTableName());
0391:
0392:                        if (bis[i].isMetadataIndex()) {
0393:                            Set<Integer> distIDSet = new HashSet<Integer>();
0394:
0395:                            // now index the new details - but only if it's archived and not withdrawn
0396:                            if (item.isArchived() && !item.isWithdrawn()) {
0397:                                // get the metadata from the item
0398:                                for (int mdIdx = 0; mdIdx < bis[i]
0399:                                        .getMetadataCount(); mdIdx++) {
0400:                                    String[] md = bis[i].getMdBits(mdIdx);
0401:                                    DCValue[] values = item.getMetadata(md[0],
0402:                                            md[1], md[2], Item.ANY);
0403:
0404:                                    // if we have values to index on, then do so
0405:                                    if (values != null) {
0406:                                        for (int x = 0; x < values.length; x++) {
0407:                                            // Ensure that there is a value to index before inserting it
0408:                                            if (StringUtils
0409:                                                    .isEmpty(values[x].value)) {
0410:                                                log
0411:                                                        .error("Null metadata value for item "
0412:                                                                + item.getID()
0413:                                                                + ", field: "
0414:                                                                + values[x].schema
0415:                                                                + "."
0416:                                                                + values[x].element
0417:                                                                + (values[x].qualifier == null ? ""
0418:                                                                        : "."
0419:                                                                                + values[x].qualifier));
0420:                                            } else {
0421:                                                // get the normalised version of the value
0422:                                                String nVal = OrderFormat
0423:                                                        .makeSortString(
0424:                                                                values[x].value,
0425:                                                                values[x].language,
0426:                                                                bis[i]
0427:                                                                        .getDataType());
0428:                                                distIDSet
0429:                                                        .add(dao
0430:                                                                .getDistinctID(
0431:                                                                        bis[i]
0432:                                                                                .getDistinctTableName(),
0433:                                                                        values[x].value,
0434:                                                                        nVal));
0435:                                            }
0436:                                        }
0437:                                    }
0438:                                }
0439:                            }
0440:
0441:                            // Do we have any mappings?
0442:                            if (distIDSet.isEmpty()) {
0443:                                // remove any old mappings
0444:                                removeIndex(item.getID(), bis[i]
0445:                                        .getMapTableName());
0446:                            } else {
0447:                                // Update the existing mappings
0448:                                int[] distIDarr = new int[distIDSet.size()];
0449:                                int didx = 0;
0450:                                for (Integer distID : distIDSet) {
0451:                                    distIDarr[didx++] = distID;
0452:                                }
0453:                                dao.updateDistinctMappings(bis[i]
0454:                                        .getMapTableName(), item.getID(),
0455:                                        distIDarr);
0456:                            }
0457:                        }
0458:                    }
0459:                } catch (SQLException e) {
0460:                    log.error("caught exception: ", e);
0461:                    throw new BrowseException(e);
0462:                }
0463:            }
0464:
0465:            /**
0466:             * Get the normalised values for each of the sort columns
0467:             * @param item
0468:             * @param itemMDMap
0469:             * @return
0470:             * @throws BrowseException
0471:             * @throws SQLException
0472:             */
0473:            private Map<Integer, String> getSortValues(ItemMetadataProxy item,
0474:                    Map itemMDMap) throws BrowseException, SQLException {
0475:                try {
0476:                    // now obtain the sort order values that we will use
0477:                    Map<Integer, String> sortMap = new HashMap<Integer, String>();
0478:                    for (SortOption so : SortOption.getSortOptions()) {
0479:                        Integer key = new Integer(so.getNumber());
0480:                        String metadata = so.getMetadata();
0481:
0482:                        // If we've already used the metadata for this Item
0483:                        // it will be cached in the map
0484:                        DCValue value = null;
0485:
0486:                        if (itemMDMap != null)
0487:                            value = (DCValue) itemMDMap.get(metadata);
0488:
0489:                        // We haven't used this metadata before, so grab it from the item
0490:                        if (value == null) {
0491:                            String[] somd = so.getMdBits();
0492:                            DCValue[] dcv = item.getMetadata(somd[0], somd[1],
0493:                                    somd[2], Item.ANY);
0494:
0495:                            if (dcv == null) {
0496:                                continue;
0497:                            }
0498:
0499:                            // we only use the first dc value
0500:                            if (dcv.length > 0) {
0501:                                // Set it as the current metadata value to use
0502:                                // and add it to the map
0503:                                value = dcv[0];
0504:
0505:                                if (itemMDMap != null)
0506:                                    itemMDMap.put(metadata, dcv[0]);
0507:                            }
0508:                        }
0509:
0510:                        // normalise the values as we insert into the sort map
0511:                        if (value != null && value.value != null) {
0512:                            String nValue = OrderFormat.makeSortString(
0513:                                    value.value, value.language, so.getType());
0514:                            sortMap.put(key, nValue);
0515:                        }
0516:                    }
0517:
0518:                    return sortMap;
0519:                } catch (SortException se) {
0520:                    throw new BrowseException("Error in SortOptions", se);
0521:                }
0522:            }
0523:
0524:            /**
0525:             * @deprecated
0526:             * @param item
0527:             * @return
0528:             * @throws BrowseException
0529:             */
0530:            public boolean itemAdded(Item item) throws BrowseException {
0531:                indexItem(item);
0532:                return true;
0533:            }
0534:
0535:            /**
0536:             * @deprecated
0537:             * @param item
0538:             * @return
0539:             * @throws BrowseException
0540:             */
0541:            public boolean itemChanged(Item item) throws BrowseException {
0542:                indexItem(item);
0543:                return true;
0544:            }
0545:
0546:            /**
0547:             * remove all the indices for the given item
0548:             * 
0549:             * @param item		the item to be removed
0550:             * @return
0551:             * @throws BrowseException
0552:             */
0553:            public boolean itemRemoved(Item item) throws BrowseException {
0554:                return itemRemoved(item.getID());
0555:            }
0556:
0557:            public boolean itemRemoved(int itemID) throws BrowseException {
0558:                // go over the indices and index the item
0559:                for (int i = 0; i < bis.length; i++) {
0560:                    if (bis[i].isMetadataIndex()) {
0561:                        log.debug("Removing indexing for removed item "
0562:                                + itemID + ", for index: "
0563:                                + bis[i].getTableName());
0564:                        removeIndex(itemID, bis[i].getMapTableName());
0565:                    }
0566:                }
0567:
0568:                // Remove from the item indexes (archive and withdrawn)
0569:                removeIndex(itemID, BrowseIndex.getItemBrowseIndex()
0570:                        .getTableName());
0571:                removeIndex(itemID, BrowseIndex.getWithdrawnBrowseIndex()
0572:                        .getTableName());
0573:                dao.deleteCommunityMappings(itemID);
0574:
0575:                // Ensure that we remove any invalid entries
0576:                pruneIndexes();
0577:
0578:                return true;
0579:            }
0580:
0581:            /**
0582:             * Creates Browse indexes, destroying the old ones.
0583:             * 
0584:             * @param argv
0585:             *            Command-line arguments
0586:             */
0587:            public static void main(String[] argv) throws SQLException,
0588:                    BrowseException, ParseException {
0589:                Context context = new Context();
0590:                IndexBrowse indexer = new IndexBrowse(context);
0591:
0592:                // create an options object and populate it
0593:                CommandLineParser parser = new PosixParser();
0594:                Options options = new Options();
0595:
0596:                // these are mutually exclusive, and represent the primary actions
0597:                options
0598:                        .addOption(
0599:                                "t",
0600:                                "tables",
0601:                                false,
0602:                                "create the tables only, do not attempt to index.  Mutually exclusive with -f and -i");
0603:                options
0604:                        .addOption("i", "index", false,
0605:                                "actually do the indexing.  Mutually exclusive with -t and -f");
0606:                options
0607:                        .addOption(
0608:                                "f",
0609:                                "full",
0610:                                false,
0611:                                "make the tables, and do the indexing.  This forces -x.  Mutually exclusive with -t and -i");
0612:
0613:                // these options can be specified only with the -f option
0614:                options
0615:                        .addOption(
0616:                                "r",
0617:                                "rebuild",
0618:                                false,
0619:                                "should we rebuild all the indices, which removes old index tables and creates new ones.  For use with -f. Mutually exclusive with -d");
0620:                options
0621:                        .addOption(
0622:                                "d",
0623:                                "delete",
0624:                                false,
0625:                                "delete all the indices, but don't create new ones.  For use with -f. This is mutually exclusive with -r");
0626:
0627:                // these options can be specified only with the -t and -f options
0628:                options
0629:                        .addOption(
0630:                                "o",
0631:                                "out",
0632:                                true,
0633:                                "[-o <filename>] write the remove and create SQL to the given file. For use with -t and -f"); // FIXME: not currently working
0634:                options
0635:                        .addOption("p", "print", false,
0636:                                "write the remove and create SQL to the stdout. For use with -t and -f");
0637:                options
0638:                        .addOption(
0639:                                "x",
0640:                                "execute",
0641:                                false,
0642:                                "execute all the remove and create SQL against the database. For use with -t and -f");
0643:                options
0644:                        .addOption(
0645:                                "s",
0646:                                "start",
0647:                                true,
0648:                                "[-s <int>] start from this index number and work upward (mostly only useful for debugging). For use with -t and -f");
0649:
0650:                // this option can be used with any argument
0651:                options
0652:                        .addOption(
0653:                                "v",
0654:                                "verbose",
0655:                                false,
0656:                                "print extra information to the stdout.  If used in conjunction with -p, you cannot use the stdout to generate your database structure");
0657:
0658:                // display the help.  If this is spefified, it trumps all other arguments
0659:                options
0660:                        .addOption("h", "help", false,
0661:                                "show this help documentation.  Overrides all other arguments");
0662:
0663:                CommandLine line = parser.parse(options, argv);
0664:
0665:                // display the help
0666:                if (line.hasOption("h")) {
0667:                    indexer.usage(options);
0668:                    return;
0669:                }
0670:
0671:                if (line.hasOption("v")) {
0672:                    indexer.setVerbose(true);
0673:                }
0674:
0675:                if (line.hasOption("i")) {
0676:                    indexer.createIndex();
0677:                    return;
0678:                }
0679:
0680:                if (line.hasOption("f")) {
0681:                    if (line.hasOption('r')) {
0682:                        indexer.setRebuild(true);
0683:                    } else if (line.hasOption("d")) {
0684:                        indexer.setDelete(true);
0685:                    }
0686:                }
0687:
0688:                if (line.hasOption("f") || line.hasOption("t")) {
0689:                    if (line.hasOption("s")) {
0690:                        indexer.setStart(Integer.parseInt(line
0691:                                .getOptionValue("s")));
0692:                    }
0693:                    if (line.hasOption("x")) {
0694:                        indexer.setExecute(true);
0695:                    }
0696:                    if (line.hasOption("p")) {
0697:                        indexer.setStdOut(true);
0698:                    }
0699:                    if (line.hasOption("o")) {
0700:                        indexer.setFileOut(true);
0701:                        indexer.setOutFile(line.getOptionValue("o"));
0702:                    }
0703:                }
0704:
0705:                if (line.hasOption("t")) {
0706:                    indexer.prepTables();
0707:                    return;
0708:                }
0709:
0710:                if (line.hasOption("f")) {
0711:                    indexer.setExecute(true);
0712:                    indexer.initBrowse();
0713:                    return;
0714:                }
0715:
0716:                indexer.usage(options);
0717:                context.complete();
0718:            }
0719:
0720:            /**
0721:             * output the usage information
0722:             * 
0723:             * @param options
0724:             */
0725:            private void usage(Options options) {
0726:                HelpFormatter formatter = new HelpFormatter();
0727:                formatter.printHelp("IndexBrowse", options);
0728:            }
0729:
0730:            /**
0731:             * Prepare the tables for the browse indices
0732:             * 
0733:             * @throws BrowseException
0734:             */
0735:            private void prepTables() throws BrowseException {
0736:                try {
0737:                    // first, erase the existing indexes
0738:                    clearDatabase();
0739:
0740:                    createItemTables();
0741:
0742:                    // for each current browse index, make all the relevant tables
0743:                    for (int i = 0; i < bis.length; i++) {
0744:                        createTables(bis[i]);
0745:
0746:                        // prepare some CLI output
0747:                        StringBuffer logMe = new StringBuffer();
0748:                        for (SortOption so : SortOption.getSortOptions()) {
0749:                            logMe.append(" ").append(so.getMetadata()).append(
0750:                                    " ");
0751:                        }
0752:
0753:                        output.message("Creating browse index "
0754:                                + bis[i].getName() + ": index by "
0755:                                + bis[i].getMetadata() + " sortable by: "
0756:                                + logMe.toString());
0757:                    }
0758:                } catch (SortException se) {
0759:                    throw new BrowseException("Error in SortOptions", se);
0760:                }
0761:            }
0762:
0763:            /**
0764:             * delete all the existing browse tables
0765:             * 
0766:             * @throws BrowseException
0767:             */
0768:            public void clearDatabase() throws BrowseException {
0769:                try {
0770:                    output.message("Deleting old indices");
0771:
0772:                    // notice that we have to do this without reference to the BrowseIndex[]
0773:                    // because they do not necessarily reflect what currently exists in
0774:                    // the database
0775:
0776:                    int i = getStart();
0777:                    while (true) {
0778:                        String tableName = BrowseIndex.getTableName(i, false,
0779:                                false, false, false);
0780:                        String distinctTableName = BrowseIndex.getTableName(i,
0781:                                false, false, true, false);
0782:                        String distinctMapName = BrowseIndex.getTableName(i,
0783:                                false, false, false, true);
0784:                        String sequence = BrowseIndex.getSequenceName(i, false,
0785:                                false);
0786:                        String mapSequence = BrowseIndex.getSequenceName(i,
0787:                                false, true);
0788:                        String distinctSequence = BrowseIndex.getSequenceName(
0789:                                i, true, false);
0790:
0791:                        // These views are no longer used, but as we are cleaning the database,
0792:                        // they may exist and need to be removed
0793:                        String colViewName = BrowseIndex.getTableName(i, false,
0794:                                true, false, false);
0795:                        String comViewName = BrowseIndex.getTableName(i, true,
0796:                                false, false, false);
0797:                        String distinctColViewName = BrowseIndex.getTableName(
0798:                                i, false, true, false, true);
0799:                        String distinctComViewName = BrowseIndex.getTableName(
0800:                                i, true, false, false, true);
0801:
0802:                        output.message("Checking for " + tableName);
0803:                        if (dao.testTableExistance(tableName)) {
0804:                            output.message("...found");
0805:
0806:                            output
0807:                                    .message("Deleting old index and associated resources: "
0808:                                            + tableName);
0809:
0810:                            // prepare a statement which will delete the table and associated
0811:                            // resources
0812:                            String dropper = dao.dropIndexAndRelated(tableName,
0813:                                    this .execute());
0814:                            String dropSeq = dao.dropSequence(sequence, this 
0815:                                    .execute());
0816:                            output.sql(dropper);
0817:                            output.sql(dropSeq);
0818:
0819:                            // These views are no longer used, but as we are cleaning the database,
0820:                            // they may exist and need to be removed
0821:                            String dropColView = dao.dropView(colViewName, this 
0822:                                    .execute());
0823:                            String dropComView = dao.dropView(comViewName, this 
0824:                                    .execute());
0825:                            output.sql(dropColView);
0826:                            output.sql(dropComView);
0827:                        }
0828:
0829:                        // NOTE: we need a secondary context to check for the existance
0830:                        // of the table, because if an SQLException is thrown, then
0831:                        // the connection is aborted, and no more transaction stuff can be
0832:                        // done.  Therefore we use a blank context to make the requests,
0833:                        // not caring if it gets aborted or not
0834:                        output.message("Checking for " + distinctTableName);
0835:                        if (!dao.testTableExistance(distinctTableName)) {
0836:                            if (i < bis.length || i < 10) {
0837:                                output
0838:                                        .message("... doesn't exist; but will carry on as there may be something that conflicts");
0839:                            } else {
0840:                                output
0841:                                        .message("... doesn't exist; no more tables to delete");
0842:                                break;
0843:                            }
0844:                        } else {
0845:                            output.message("...found");
0846:
0847:                            output
0848:                                    .message("Deleting old index and associated resources: "
0849:                                            + distinctTableName);
0850:
0851:                            // prepare statements that will delete the distinct value tables
0852:                            String dropDistinctTable = dao.dropIndexAndRelated(
0853:                                    distinctTableName, this .execute());
0854:                            String dropMap = dao.dropIndexAndRelated(
0855:                                    distinctMapName, this .execute());
0856:                            String dropDistinctMapSeq = dao.dropSequence(
0857:                                    mapSequence, this .execute());
0858:                            String dropDistinctSeq = dao.dropSequence(
0859:                                    distinctSequence, this .execute());
0860:                            output.sql(dropDistinctTable);
0861:                            output.sql(dropMap);
0862:                            output.sql(dropDistinctMapSeq);
0863:                            output.sql(dropDistinctSeq);
0864:
0865:                            // These views are no longer used, but as we are cleaning the database,
0866:                            // they may exist and need to be removed
0867:                            String dropDistinctColView = dao.dropView(
0868:                                    distinctColViewName, this .execute());
0869:                            String dropDistinctComView = dao.dropView(
0870:                                    distinctComViewName, this .execute());
0871:                            output.sql(dropDistinctColView);
0872:                            output.sql(dropDistinctComView);
0873:                        }
0874:
0875:                        i++;
0876:                    }
0877:
0878:                    dropItemTables(BrowseIndex.getItemBrowseIndex());
0879:                    dropItemTables(BrowseIndex.getWithdrawnBrowseIndex());
0880:
0881:                    if (execute()) {
0882:                        context.commit();
0883:                    }
0884:                } catch (SQLException e) {
0885:                    log.error("caught exception: ", e);
0886:                    throw new BrowseException(e);
0887:                }
0888:            }
0889:
0890:            /**
0891:             * drop the tables and related database entries for the internal
0892:             * 'item' tables
0893:             * @param bix
0894:             * @throws BrowseException
0895:             */
0896:            private void dropItemTables(BrowseIndex bix) throws BrowseException {
0897:                if (dao.testTableExistance(bix.getTableName())) {
0898:                    String tableName = bix.getTableName();
0899:                    String dropper = dao.dropIndexAndRelated(tableName, this 
0900:                            .execute());
0901:                    String dropSeq = dao.dropSequence(bix.getSequenceName(
0902:                            false, false), this .execute());
0903:                    output.sql(dropper);
0904:                    output.sql(dropSeq);
0905:
0906:                    // These views are no longer used, but as we are cleaning the database,
0907:                    // they may exist and need to be removed
0908:                    String colViewName = bix.getTableName(false, true, false,
0909:                            false);
0910:                    String comViewName = bix.getTableName(true, false, false,
0911:                            false);
0912:                    String dropColView = dao.dropView(colViewName, this 
0913:                            .execute());
0914:                    String dropComView = dao.dropView(comViewName, this 
0915:                            .execute());
0916:                    output.sql(dropColView);
0917:                    output.sql(dropComView);
0918:                }
0919:            }
0920:
0921:            /**
0922:             * Create the internal full item tables
0923:             * @throws BrowseException
0924:             */
0925:            private void createItemTables() throws BrowseException {
0926:                try {
0927:                    // prepare the array list of sort options
0928:                    List<Integer> sortCols = new ArrayList<Integer>();
0929:                    for (SortOption so : SortOption.getSortOptions()) {
0930:                        sortCols.add(new Integer(so.getNumber()));
0931:                    }
0932:
0933:                    createItemTables(BrowseIndex.getItemBrowseIndex(), sortCols);
0934:                    createItemTables(BrowseIndex.getWithdrawnBrowseIndex(),
0935:                            sortCols);
0936:
0937:                    if (execute()) {
0938:                        context.commit();
0939:                    }
0940:                } catch (SortException se) {
0941:                    throw new BrowseException("Error in SortOptions", se);
0942:                } catch (SQLException e) {
0943:                    log.error("caught exception: ", e);
0944:                    throw new BrowseException(e);
0945:                }
0946:            }
0947:
0948:            /**
0949:             * Create the internal full item tables for a particular index
0950:             * (ie. withdrawn / in archive)
0951:             * @param bix
0952:             * @param sortCols
0953:             * @throws BrowseException
0954:             */
0955:            private void createItemTables(BrowseIndex bix,
0956:                    List<Integer> sortCols) throws BrowseException {
0957:                String tableName = bix.getTableName();
0958:
0959:                String itemSeq = dao.createSequence(bix.getSequenceName(false,
0960:                        false), this .execute());
0961:                String itemTable = dao.createPrimaryTable(tableName, sortCols,
0962:                        execute);
0963:                String[] itemIndices = dao.createDatabaseIndices(tableName,
0964:                        sortCols, false, this .execute());
0965:
0966:                output.sql(itemSeq);
0967:                output.sql(itemTable);
0968:                for (int i = 0; i < itemIndices.length; i++) {
0969:                    output.sql(itemIndices[i]);
0970:                }
0971:            }
0972:
0973:            /**
0974:             * Create the browse tables for the given browse index
0975:             * 
0976:             * @param bi		the browse index to create
0977:             * @throws BrowseException
0978:             */
0979:            private void createTables(BrowseIndex bi) throws BrowseException {
0980:                try {
0981:                    // if this is a single view, create the DISTINCT tables and views
0982:                    if (bi.isMetadataIndex()) {
0983:                        // if this is a single view, create the DISTINCT tables and views
0984:                        String distinctTableName = bi.getDistinctTableName();
0985:                        String distinctSeq = bi.getSequenceName(true, false);
0986:                        String distinctMapName = bi.getMapTableName();
0987:                        String mapSeq = bi.getSequenceName(false, true);
0988:
0989:                        // FIXME: at the moment we have not defined INDEXes for this data
0990:                        // add this later when necessary
0991:
0992:                        String distinctTableSeq = dao.createSequence(
0993:                                distinctSeq, this .execute());
0994:                        String distinctMapSeq = dao.createSequence(mapSeq, this 
0995:                                .execute());
0996:                        String createDistinctTable = dao.createDistinctTable(
0997:                                distinctTableName, this .execute());
0998:                        String createDistinctMap = dao.createDistinctMap(
0999:                                distinctTableName, distinctMapName, this 
1000:                                        .execute());
1001:                        String[] mapIndices = dao.createMapIndices(
1002:                                distinctTableName, distinctMapName, this 
1003:                                        .execute());
1004:
1005:                        output.sql(distinctTableSeq);
1006:                        output.sql(distinctMapSeq);
1007:                        output.sql(createDistinctTable);
1008:                        output.sql(createDistinctMap);
1009:                        for (int i = 0; i < mapIndices.length; i++) {
1010:                            output.sql(mapIndices[i]);
1011:                        }
1012:                    }
1013:
1014:                    if (execute()) {
1015:                        context.commit();
1016:                    }
1017:                } catch (SQLException e) {
1018:                    log.error("caught exception: ", e);
1019:                    throw new BrowseException(e);
1020:                }
1021:            }
1022:
1023:            /**
1024:             * index everything
1025:             * 
1026:             * @throws SQLException
1027:             * @throws BrowseException
1028:             */
1029:            public void initBrowse() throws SQLException, BrowseException {
1030:                Date start = new Date();
1031:
1032:                output.message("Creating browse indexes for DSpace");
1033:
1034:                Date initDate = new Date();
1035:                long init = initDate.getTime() - start.getTime();
1036:
1037:                output
1038:                        .message("init complete (" + Long.toString(init)
1039:                                + " ms)");
1040:
1041:                if (delete()) {
1042:                    output.message("Deleting browse tables");
1043:
1044:                    clearDatabase();
1045:
1046:                    output.message("Browse tables deleted");
1047:                    return;
1048:                } else if (rebuild()) {
1049:                    output.message("Preparing browse tables");
1050:
1051:                    prepTables();
1052:
1053:                    output.message("Browse tables prepared");
1054:                }
1055:
1056:                Date prepDate = new Date();
1057:                long prep = prepDate.getTime() - start.getTime();
1058:                long prepinit = prepDate.getTime() - initDate.getTime();
1059:
1060:                output.message("tables prepped (" + Long.toString(prep)
1061:                        + " ms, " + Long.toString(prepinit) + " ms)");
1062:
1063:                int count = createIndex();
1064:
1065:                context.complete();
1066:
1067:                Date endDate = new Date();
1068:                long end = endDate.getTime() - start.getTime();
1069:                long endprep = endDate.getTime() - prepDate.getTime();
1070:
1071:                output.message("content indexed (" + Long.toString(end)
1072:                        + " ms, " + Long.toString(endprep) + " ms)");
1073:                output.message("Items indexed: " + Integer.toString(count));
1074:
1075:                if (count > 0) {
1076:                    long overall = end / count;
1077:                    long specific = endprep / count;
1078:
1079:                    output.message("Overall average time per item: "
1080:                            + Long.toString(overall) + " ms");
1081:                    output.message("Index only average time per item: "
1082:                            + Long.toString(specific) + " ms");
1083:                }
1084:
1085:                output.message("Browse indexing completed");
1086:            }
1087:
1088:            /**
1089:             * create the indices for all the items
1090:             * 
1091:             * @return
1092:             * @throws BrowseException
1093:             */
1094:            private int createIndex() throws BrowseException {
1095:                try {
1096:                    // first, pre-prepare the known metadata fields that we want to query
1097:                    // on
1098:                    for (int k = 0; k < bis.length; k++) {
1099:                        bis[k].generateMdBits();
1100:                    }
1101:
1102:                    // now get the ids of ALL the items in the database
1103:                    BrowseItemDAO biDao = BrowseDAOFactory
1104:                            .getItemInstance(context);
1105:                    BrowseItem[] items = biDao.findAll();
1106:
1107:                    // go through every item id, grab the relevant metadata
1108:                    // and write it into the database
1109:
1110:                    for (int j = 0; j < items.length; j++) {
1111:                        indexItem(new ItemMetadataProxy(items[j].getID(),
1112:                                items[j]));
1113:
1114:                        // after each item we commit the context and clear the cache
1115:                        context.commit();
1116:                        context.clearCache();
1117:                    }
1118:
1119:                    // penultimately we have to delete any items that couldn't be located in the
1120:                    // index list
1121:                    pruneIndexes();
1122:
1123:                    // Make sure the deletes are written back
1124:                    context.commit();
1125:
1126:                    return items.length;
1127:                } catch (SQLException e) {
1128:                    log.error("caught exception: ", e);
1129:                    throw new BrowseException(e);
1130:                }
1131:            }
1132:
1133:            /**
1134:             * Currently does nothing
1135:             *
1136:             */
1137:            private void checkConfig() {
1138:                // FIXME: exactly in what way do we want to check the config?
1139:            }
1140:
1141:            /**
1142:             * Take a string representation of a metadata field, and return it as an array.
1143:             * This is just a convenient utility method to basically break the metadata 
1144:             * representation up by its delimiter (.), and stick it in an array, inserting
1145:             * the value of the init parameter when there is no metadata field part.
1146:             * 
1147:             * @param mfield	the string representation of the metadata
1148:             * @param init	the default value of the array elements
1149:             * @return	a three element array with schema, element and qualifier respectively
1150:             */
1151:            public String[] interpretField(String mfield, String init)
1152:                    throws IOException {
1153:                StringTokenizer sta = new StringTokenizer(mfield, ".");
1154:                String[] field = { init, init, init };
1155:
1156:                int i = 0;
1157:                while (sta.hasMoreTokens()) {
1158:                    field[i++] = sta.nextToken();
1159:                }
1160:
1161:                // error checks to make sure we have at least a schema and qualifier for both
1162:                if (field[0] == null || field[1] == null) {
1163:                    throw new IOException("at least a schema and element be "
1164:                            + "specified in configuration.  You supplied: "
1165:                            + mfield);
1166:                }
1167:
1168:                return field;
1169:            }
1170:
1171:            // private inner class
1172:            //	 Hides the Item / BrowseItem in such a way that we can remove
1173:            //	 the duplication in indexing an item.
1174:            private class ItemMetadataProxy {
1175:                private Item item;
1176:                private BrowseItem browseItem;
1177:                private int id;
1178:
1179:                ItemMetadataProxy(Item item) {
1180:                    this .item = item;
1181:                    this .browseItem = null;
1182:                    this .id = 0;
1183:                }
1184:
1185:                ItemMetadataProxy(int id, BrowseItem browseItem) {
1186:                    this .item = null;
1187:                    this .browseItem = browseItem;
1188:                    this .id = id;
1189:                }
1190:
1191:                public DCValue[] getMetadata(String schema, String element,
1192:                        String qualifier, String lang) throws SQLException {
1193:                    if (item != null) {
1194:                        return item.getMetadata(schema, element, qualifier,
1195:                                lang);
1196:                    }
1197:
1198:                    return browseItem.getMetadata(schema, element, qualifier,
1199:                            lang);
1200:                }
1201:
1202:                public int getID() {
1203:                    if (item != null) {
1204:                        return item.getID();
1205:                    }
1206:
1207:                    return id;
1208:                }
1209:
1210:                /**
1211:                 * Is the Item archived?
1212:                 * @return
1213:                 */
1214:                public boolean isArchived() {
1215:                    if (item != null) {
1216:                        return item.isArchived();
1217:                    }
1218:
1219:                    return browseItem.isArchived();
1220:                }
1221:
1222:                /**
1223:                 * Is the Item withdrawn?
1224:                 * @return
1225:                 */
1226:                public boolean isWithdrawn() {
1227:                    if (item != null) {
1228:                        return item.isWithdrawn();
1229:                    }
1230:
1231:                    return browseItem.isWithdrawn();
1232:                }
1233:            }
1234:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.