001: package org.apache.lucene.misc;
002:
003: /**
004: * Copyright 2006 The Apache Software Foundation
005: *
006: * Licensed under the Apache License, Version 2.0 (the "License");
007: * you may not use this file except in compliance with the License.
008: * You may obtain a copy of the License at
009: *
010: * http://www.apache.org/licenses/LICENSE-2.0
011: *
012: * Unless required by applicable law or agreed to in writing, software
013: * distributed under the License is distributed on an "AS IS" BASIS,
014: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015: * See the License for the specific language governing permissions and
016: * limitations under the License.
017: */
018:
019: import org.apache.lucene.index.Term;
020: import org.apache.lucene.index.TermEnum;
021: import org.apache.lucene.index.TermDocs;
022: import org.apache.lucene.index.IndexReader;
023: import org.apache.lucene.search.Similarity;
024: import org.apache.lucene.store.Directory;
025: import org.apache.lucene.store.FSDirectory;
026:
027: import java.io.File;
028: import java.io.IOException;
029: import java.util.Date;
030:
031: /**
032: * Given a directory, a Similarity, and a list of fields, updates the
033: * fieldNorms in place for every document using the Similarity.lengthNorm.
034: *
035: * <p>
036: * NOTE: This only works if you do <b>not</b> use field/document boosts in your
037: * index.
038: * </p>
039: *
040: * @version $Id$
041: */
042: public class LengthNormModifier {
043:
044: /**
045: * Command Line Execution method.
046: *
047: * <pre>
048: * Usage: LengthNormModifier /path/index package.SimilarityClassName field1 field2 ...
049: * </pre>
050: */
051: public static void main(String[] args) throws IOException {
052: if (args.length < 3) {
053: System.err
054: .println("Usage: LengthNormModifier <index> <package.SimilarityClassName> <field1> [field2] ...");
055: System.exit(1);
056: }
057:
058: Similarity s = null;
059: try {
060: Class simClass = Class.forName(args[1]);
061: s = (Similarity) simClass.newInstance();
062: } catch (Exception e) {
063: System.err
064: .println("Couldn't instantiate similarity with empty constructor: "
065: + args[1]);
066: e.printStackTrace(System.err);
067: }
068:
069: File index = new File(args[0]);
070: Directory d = FSDirectory.getDirectory(index, false);
071:
072: LengthNormModifier lnm = new LengthNormModifier(d, s);
073:
074: for (int i = 2; i < args.length; i++) {
075: System.out.print("Updating field: " + args[i] + " "
076: + (new Date()).toString() + " ... ");
077: lnm.reSetNorms(args[i]);
078: System.out.println(new Date().toString());
079: }
080:
081: d.close();
082: }
083:
084: private Directory dir;
085: private Similarity sim;
086:
087: /**
088: * Constructor for code that wishes to use this class progaomatically.
089: *
090: * @param d The Directory to modify
091: * @param s The Similarity to use in <code>reSetNorms</code>
092: */
093: public LengthNormModifier(Directory d, Similarity s) {
094: dir = d;
095: sim = s;
096: }
097:
098: /**
099: * Resets the norms for the specified field.
100: *
101: * <p>
102: * Opens a new IndexReader on the Directory given to this instance,
103: * modifies the norms using the Similarity given to this instance,
104: * and closes the IndexReader.
105: * </p>
106: *
107: * @param field the field whose norms should be reset
108: */
109: public void reSetNorms(String field) throws IOException {
110: String fieldName = field.intern();
111: int[] termCounts = new int[0];
112:
113: IndexReader reader = null;
114: TermEnum termEnum = null;
115: TermDocs termDocs = null;
116: try {
117: reader = IndexReader.open(dir);
118: termCounts = new int[reader.maxDoc()];
119: try {
120: termEnum = reader.terms(new Term(field, ""));
121: try {
122: termDocs = reader.termDocs();
123: do {
124: Term term = termEnum.term();
125: if (term != null
126: && term.field().equals(fieldName)) {
127: termDocs.seek(termEnum.term());
128: while (termDocs.next()) {
129: termCounts[termDocs.doc()] += termDocs
130: .freq();
131: }
132: }
133: } while (termEnum.next());
134: } finally {
135: if (null != termDocs)
136: termDocs.close();
137: }
138: } finally {
139: if (null != termEnum)
140: termEnum.close();
141: }
142: } finally {
143: if (null != reader)
144: reader.close();
145: }
146:
147: try {
148: reader = IndexReader.open(dir);
149: for (int d = 0; d < termCounts.length; d++) {
150: if (!reader.isDeleted(d)) {
151: byte norm = sim.encodeNorm(sim.lengthNorm(
152: fieldName, termCounts[d]));
153: reader.setNorm(d, fieldName, norm);
154: }
155: }
156: } finally {
157: if (null != reader)
158: reader.close();
159: }
160: }
161:
162: }
|