001: package org.apache.lucene.index;
002:
003: /**
004: * Copyright 2006 The Apache Software Foundation
005: *
006: * Licensed under the Apache License, Version 2.0 (the "License");
007: * you may not use this file except in compliance with the License.
008: * You may obtain a copy of the License at
009: *
010: * http://www.apache.org/licenses/LICENSE-2.0
011: *
012: * Unless required by applicable law or agreed to in writing, software
013: * distributed under the License is distributed on an "AS IS" BASIS,
014: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015: * See the License for the specific language governing permissions and
016: * limitations under the License.
017: */
018:
019: import java.io.IOException;
020: import java.util.Date;
021:
022: import org.apache.lucene.search.Similarity;
023: import org.apache.lucene.store.Directory;
024: import org.apache.lucene.store.FSDirectory;
025:
026: /**
027: * Given a directory and a list of fields, updates the fieldNorms in place for every document.
028: *
029: * If Similarity class is specified, uses its lengthNorm method to set norms.
030: * If -n command line argument is used, removed field norms, as if
031: * {@link org.apache.lucene.document.Field.Index}.NO_NORMS was used.
032: *
033: * <p>
034: * NOTE: This will overwrite any length normalization or field/document boosts.
035: * </p>
036: *
037: * @author Chris Hostetter
038: * @author Otis Gospodnetic
039: */
040: public class FieldNormModifier {
041:
042: /**
043: * Command Line Execution method.
044: *
045: * <pre>
046: * Usage: FieldNormModifier /path/index <package.SimilarityClassName | -n> field1 field2 ...
047: * </pre>
048: */
049: public static void main(String[] args) throws IOException {
050: if (args.length < 3) {
051: System.err
052: .println("Usage: FieldNormModifier <index> <package.SimilarityClassName | -n> <field1> [field2] ...");
053: System.exit(1);
054: }
055:
056: Similarity s = null;
057: if (!args[1].equals("-n")) {
058: try {
059: Class simClass = Class.forName(args[1]);
060: s = (Similarity) simClass.newInstance();
061: } catch (Exception e) {
062: System.err
063: .println("Couldn't instantiate similarity with empty constructor: "
064: + args[1]);
065: e.printStackTrace(System.err);
066: System.exit(1);
067: }
068: }
069:
070: Directory d = FSDirectory.getDirectory(args[0], false);
071: FieldNormModifier fnm = new FieldNormModifier(d, s);
072:
073: for (int i = 2; i < args.length; i++) {
074: System.out.print("Updating field: " + args[i] + " "
075: + (new Date()).toString() + " ... ");
076: fnm.reSetNorms(args[i]);
077: System.out.println(new Date().toString());
078: }
079:
080: d.close();
081: }
082:
083: private Directory dir;
084: private Similarity sim;
085:
086: /**
087: * Constructor for code that wishes to use this class programatically
088: * If Similarity is null, kill the field norms.
089: *
090: * @param d the Directory to modify
091: * @param s the Similiary to use (can be null)
092: */
093: public FieldNormModifier(Directory d, Similarity s) {
094: dir = d;
095: sim = s;
096: }
097:
098: /**
099: * Resets the norms for the specified field.
100: *
101: * <p>
102: * Opens a new IndexReader on the Directory given to this instance,
103: * modifies the norms (either using the Similarity given to this instance, or by using fake norms,
104: * and closes the IndexReader.
105: * </p>
106: *
107: * @param field the field whose norms should be reset
108: */
109: public void reSetNorms(String field) throws IOException {
110: String fieldName = field.intern();
111: int[] termCounts = new int[0];
112: byte[] fakeNorms = new byte[0];
113:
114: IndexReader reader = null;
115: TermEnum termEnum = null;
116: TermDocs termDocs = null;
117: try {
118: reader = IndexReader.open(dir);
119: termCounts = new int[reader.maxDoc()];
120: // if we are killing norms, get fake ones
121: if (sim == null)
122: fakeNorms = SegmentReader.createFakeNorms(reader
123: .maxDoc());
124: try {
125: termEnum = reader.terms(new Term(field, ""));
126: try {
127: termDocs = reader.termDocs();
128: do {
129: Term term = termEnum.term();
130: if (term != null
131: && term.field().equals(fieldName)) {
132: termDocs.seek(termEnum.term());
133: while (termDocs.next()) {
134: termCounts[termDocs.doc()] += termDocs
135: .freq();
136: }
137: }
138: } while (termEnum.next());
139:
140: } finally {
141: if (null != termDocs)
142: termDocs.close();
143: }
144: } finally {
145: if (null != termEnum)
146: termEnum.close();
147: }
148: } finally {
149: if (null != reader)
150: reader.close();
151: }
152:
153: try {
154: reader = IndexReader.open(dir);
155: for (int d = 0; d < termCounts.length; d++) {
156: if (!reader.isDeleted(d)) {
157: if (sim == null)
158: reader.setNorm(d, fieldName, fakeNorms[0]);
159: else
160: reader.setNorm(d, fieldName, sim.encodeNorm(sim
161: .lengthNorm(fieldName, termCounts[d])));
162: }
163: }
164:
165: } finally {
166: if (null != reader)
167: reader.close();
168: }
169: }
170:
171: }
|