001: package org.apache.lucene.demo;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.analysis.standard.StandardAnalyzer;
021: import org.apache.lucene.index.IndexWriter;
022:
023: import java.io.File;
024: import java.io.FileNotFoundException;
025: import java.io.IOException;
026: import java.util.Date;
027:
028: /** Index all text files under a directory. */
029: public class IndexFiles {
030:
031: private IndexFiles() {
032: }
033:
034: static final File INDEX_DIR = new File("index");
035:
036: /** Index all text files under a directory. */
037: public static void main(String[] args) {
038: String usage = "java org.apache.lucene.demo.IndexFiles <root_directory>";
039: if (args.length == 0) {
040: System.err.println("Usage: " + usage);
041: System.exit(1);
042: }
043:
044: if (INDEX_DIR.exists()) {
045: System.out.println("Cannot save index to '" + INDEX_DIR
046: + "' directory, please delete it first");
047: System.exit(1);
048: }
049:
050: final File docDir = new File(args[0]);
051: if (!docDir.exists() || !docDir.canRead()) {
052: System.out
053: .println("Document directory '"
054: + docDir.getAbsolutePath()
055: + "' does not exist or is not readable, please check the path");
056: System.exit(1);
057: }
058:
059: Date start = new Date();
060: try {
061: IndexWriter writer = new IndexWriter(INDEX_DIR,
062: new StandardAnalyzer(), true);
063: System.out.println("Indexing to directory '" + INDEX_DIR
064: + "'...");
065: indexDocs(writer, docDir);
066: System.out.println("Optimizing...");
067: writer.optimize();
068: writer.close();
069:
070: Date end = new Date();
071: System.out.println(end.getTime() - start.getTime()
072: + " total milliseconds");
073:
074: } catch (IOException e) {
075: System.out.println(" caught a " + e.getClass()
076: + "\n with message: " + e.getMessage());
077: }
078: }
079:
080: static void indexDocs(IndexWriter writer, File file)
081: throws IOException {
082: // do not try to index files that cannot be read
083: if (file.canRead()) {
084: if (file.isDirectory()) {
085: String[] files = file.list();
086: // an IO error could occur
087: if (files != null) {
088: for (int i = 0; i < files.length; i++) {
089: indexDocs(writer, new File(file, files[i]));
090: }
091: }
092: } else {
093: System.out.println("adding " + file);
094: try {
095: writer.addDocument(FileDocument.Document(file));
096: }
097: // at least on windows, some temporary files raise this exception with an "access denied" message
098: // checking if the file can be read doesn't help
099: catch (FileNotFoundException fnfe) {
100: ;
101: }
102: }
103: }
104: }
105:
106: }
|