01: package org.apache.lucene.demo;
02:
03: /**
04: * Licensed to the Apache Software Foundation (ASF) under one or more
05: * contributor license agreements. See the NOTICE file distributed with
06: * this work for additional information regarding copyright ownership.
07: * The ASF licenses this file to You under the Apache License, Version 2.0
08: * (the "License"); you may not use this file except in compliance with
09: * the License. You may obtain a copy of the License at
10: *
11: * http://www.apache.org/licenses/LICENSE-2.0
12: *
13: * Unless required by applicable law or agreed to in writing, software
14: * distributed under the License is distributed on an "AS IS" BASIS,
15: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16: * See the License for the specific language governing permissions and
17: * limitations under the License.
18: */
19:
20: import java.io.File;
21: import java.io.FileReader;
22:
23: import org.apache.lucene.document.DateTools;
24: import org.apache.lucene.document.Document;
25: import org.apache.lucene.document.Field;
26:
27: /** A utility for making Lucene Documents from a File. */
28:
29: public class FileDocument {
30: /** Makes a document for a File.
31: <p>
32: The document has three fields:
33: <ul>
34: <li><code>path</code>--containing the pathname of the file, as a stored,
35: untokenized field;
36: <li><code>modified</code>--containing the last modified date of the file as
37: a field as created by <a
38: href="lucene.document.DateTools.html">DateTools</a>; and
39: <li><code>contents</code>--containing the full contents of the file, as a
40: Reader field;
41: */
42: public static Document Document(File f)
43: throws java.io.FileNotFoundException {
44:
45: // make a new, empty document
46: Document doc = new Document();
47:
48: // Add the path of the file as a field named "path". Use a field that is
49: // indexed (i.e. searchable), but don't tokenize the field into words.
50: doc.add(new Field("path", f.getPath(), Field.Store.YES,
51: Field.Index.UN_TOKENIZED));
52:
53: // Add the last modified date of the file a field named "modified". Use
54: // a field that is indexed (i.e. searchable), but don't tokenize the field
55: // into words.
56: doc.add(new Field("modified", DateTools.timeToString(f
57: .lastModified(), DateTools.Resolution.MINUTE),
58: Field.Store.YES, Field.Index.UN_TOKENIZED));
59:
60: // Add the contents of the file to a field named "contents". Specify a Reader,
61: // so that the text of the file is tokenized and indexed, but not stored.
62: // Note that FileReader expects the file to be in the system's default encoding.
63: // If that's not the case searching for special characters will fail.
64: doc.add(new Field("contents", new FileReader(f)));
65:
66: // return the document
67: return doc;
68: }
69:
70: private FileDocument() {
71: }
72: }
|