001: package org.apache.lucene.index;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.document.Document;
021: import org.apache.lucene.document.Fieldable;
022: import org.apache.lucene.store.Directory;
023: import org.apache.lucene.store.IndexInput;
024: import org.apache.lucene.store.IndexOutput;
025:
026: import java.io.IOException;
027: import java.util.*;
028:
029: /** Access to the Fieldable Info file that describes document fields and whether or
030: * not they are indexed. Each segment has a separate Fieldable Info file. Objects
031: * of this class are thread-safe for multiple readers, but only one thread can
032: * be adding documents at a time, with no other reader or writer threads
033: * accessing this object.
034: */
035: final class FieldInfos {
036:
037: static final byte IS_INDEXED = 0x1;
038: static final byte STORE_TERMVECTOR = 0x2;
039: static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x4;
040: static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x8;
041: static final byte OMIT_NORMS = 0x10;
042: static final byte STORE_PAYLOADS = 0x20;
043:
044: private ArrayList byNumber = new ArrayList();
045: private HashMap byName = new HashMap();
046:
047: FieldInfos() {
048: }
049:
050: /**
051: * Construct a FieldInfos object using the directory and the name of the file
052: * IndexInput
053: * @param d The directory to open the IndexInput from
054: * @param name The name of the file to open the IndexInput from in the Directory
055: * @throws IOException
056: */
057: FieldInfos(Directory d, String name) throws IOException {
058: IndexInput input = d.openInput(name);
059: try {
060: read(input);
061: } finally {
062: input.close();
063: }
064: }
065:
066: /**
067: * Returns a deep clone of this FieldInfos instance.
068: */
069: public Object clone() {
070: FieldInfos fis = new FieldInfos();
071: final int numField = byNumber.size();
072: for (int i = 0; i < numField; i++) {
073: FieldInfo fi = (FieldInfo) ((FieldInfo) byNumber.get(i))
074: .clone();
075: fis.byNumber.add(fi);
076: fis.byName.put(fi.name, fi);
077: }
078: return fis;
079: }
080:
081: /** Adds field info for a Document. */
082: public void add(Document doc) {
083: List fields = doc.getFields();
084: Iterator fieldIterator = fields.iterator();
085: while (fieldIterator.hasNext()) {
086: Fieldable field = (Fieldable) fieldIterator.next();
087: add(field.name(), field.isIndexed(), field
088: .isTermVectorStored(), field
089: .isStorePositionWithTermVector(), field
090: .isStoreOffsetWithTermVector(), field
091: .getOmitNorms());
092: }
093: }
094:
095: /**
096: * Add fields that are indexed. Whether they have termvectors has to be specified.
097: *
098: * @param names The names of the fields
099: * @param storeTermVectors Whether the fields store term vectors or not
100: * @param storePositionWithTermVector treu if positions should be stored.
101: * @param storeOffsetWithTermVector true if offsets should be stored
102: */
103: public void addIndexed(Collection names, boolean storeTermVectors,
104: boolean storePositionWithTermVector,
105: boolean storeOffsetWithTermVector) {
106: Iterator i = names.iterator();
107: while (i.hasNext()) {
108: add((String) i.next(), true, storeTermVectors,
109: storePositionWithTermVector,
110: storeOffsetWithTermVector);
111: }
112: }
113:
114: /**
115: * Assumes the fields are not storing term vectors.
116: *
117: * @param names The names of the fields
118: * @param isIndexed Whether the fields are indexed or not
119: *
120: * @see #add(String, boolean)
121: */
122: public void add(Collection names, boolean isIndexed) {
123: Iterator i = names.iterator();
124: while (i.hasNext()) {
125: add((String) i.next(), isIndexed);
126: }
127: }
128:
129: /**
130: * Calls 5 parameter add with false for all TermVector parameters.
131: *
132: * @param name The name of the Fieldable
133: * @param isIndexed true if the field is indexed
134: * @see #add(String, boolean, boolean, boolean, boolean)
135: */
136: public void add(String name, boolean isIndexed) {
137: add(name, isIndexed, false, false, false, false);
138: }
139:
140: /**
141: * Calls 5 parameter add with false for term vector positions and offsets.
142: *
143: * @param name The name of the field
144: * @param isIndexed true if the field is indexed
145: * @param storeTermVector true if the term vector should be stored
146: */
147: public void add(String name, boolean isIndexed,
148: boolean storeTermVector) {
149: add(name, isIndexed, storeTermVector, false, false, false);
150: }
151:
152: /** If the field is not yet known, adds it. If it is known, checks to make
153: * sure that the isIndexed flag is the same as was given previously for this
154: * field. If not - marks it as being indexed. Same goes for the TermVector
155: * parameters.
156: *
157: * @param name The name of the field
158: * @param isIndexed true if the field is indexed
159: * @param storeTermVector true if the term vector should be stored
160: * @param storePositionWithTermVector true if the term vector with positions should be stored
161: * @param storeOffsetWithTermVector true if the term vector with offsets should be stored
162: */
163: public void add(String name, boolean isIndexed,
164: boolean storeTermVector,
165: boolean storePositionWithTermVector,
166: boolean storeOffsetWithTermVector) {
167:
168: add(name, isIndexed, storeTermVector,
169: storePositionWithTermVector, storeOffsetWithTermVector,
170: false);
171: }
172:
173: /** If the field is not yet known, adds it. If it is known, checks to make
174: * sure that the isIndexed flag is the same as was given previously for this
175: * field. If not - marks it as being indexed. Same goes for the TermVector
176: * parameters.
177: *
178: * @param name The name of the field
179: * @param isIndexed true if the field is indexed
180: * @param storeTermVector true if the term vector should be stored
181: * @param storePositionWithTermVector true if the term vector with positions should be stored
182: * @param storeOffsetWithTermVector true if the term vector with offsets should be stored
183: * @param omitNorms true if the norms for the indexed field should be omitted
184: */
185: public void add(String name, boolean isIndexed,
186: boolean storeTermVector,
187: boolean storePositionWithTermVector,
188: boolean storeOffsetWithTermVector, boolean omitNorms) {
189: add(name, isIndexed, storeTermVector,
190: storePositionWithTermVector, storeOffsetWithTermVector,
191: omitNorms, false);
192: }
193:
194: /** If the field is not yet known, adds it. If it is known, checks to make
195: * sure that the isIndexed flag is the same as was given previously for this
196: * field. If not - marks it as being indexed. Same goes for the TermVector
197: * parameters.
198: *
199: * @param name The name of the field
200: * @param isIndexed true if the field is indexed
201: * @param storeTermVector true if the term vector should be stored
202: * @param storePositionWithTermVector true if the term vector with positions should be stored
203: * @param storeOffsetWithTermVector true if the term vector with offsets should be stored
204: * @param omitNorms true if the norms for the indexed field should be omitted
205: * @param storePayloads true if payloads should be stored for this field
206: */
207: public FieldInfo add(String name, boolean isIndexed,
208: boolean storeTermVector,
209: boolean storePositionWithTermVector,
210: boolean storeOffsetWithTermVector, boolean omitNorms,
211: boolean storePayloads) {
212: FieldInfo fi = fieldInfo(name);
213: if (fi == null) {
214: return addInternal(name, isIndexed, storeTermVector,
215: storePositionWithTermVector,
216: storeOffsetWithTermVector, omitNorms, storePayloads);
217: } else {
218: if (fi.isIndexed != isIndexed) {
219: fi.isIndexed = true; // once indexed, always index
220: }
221: if (fi.storeTermVector != storeTermVector) {
222: fi.storeTermVector = true; // once vector, always vector
223: }
224: if (fi.storePositionWithTermVector != storePositionWithTermVector) {
225: fi.storePositionWithTermVector = true; // once vector, always vector
226: }
227: if (fi.storeOffsetWithTermVector != storeOffsetWithTermVector) {
228: fi.storeOffsetWithTermVector = true; // once vector, always vector
229: }
230: if (fi.omitNorms != omitNorms) {
231: fi.omitNorms = false; // once norms are stored, always store
232: }
233: if (fi.storePayloads != storePayloads) {
234: fi.storePayloads = true;
235: }
236:
237: }
238: return fi;
239: }
240:
241: private FieldInfo addInternal(String name, boolean isIndexed,
242: boolean storeTermVector,
243: boolean storePositionWithTermVector,
244: boolean storeOffsetWithTermVector, boolean omitNorms,
245: boolean storePayloads) {
246: FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.size(),
247: storeTermVector, storePositionWithTermVector,
248: storeOffsetWithTermVector, omitNorms, storePayloads);
249: byNumber.add(fi);
250: byName.put(name, fi);
251: return fi;
252: }
253:
254: public int fieldNumber(String fieldName) {
255: FieldInfo fi = fieldInfo(fieldName);
256: return (fi != null) ? fi.number : -1;
257: }
258:
259: public FieldInfo fieldInfo(String fieldName) {
260: return (FieldInfo) byName.get(fieldName);
261: }
262:
263: /**
264: * Return the fieldName identified by its number.
265: *
266: * @param fieldNumber
267: * @return the fieldName or an empty string when the field
268: * with the given number doesn't exist.
269: */
270: public String fieldName(int fieldNumber) {
271: FieldInfo fi = fieldInfo(fieldNumber);
272: return (fi != null) ? fi.name : "";
273: }
274:
275: /**
276: * Return the fieldinfo object referenced by the fieldNumber.
277: * @param fieldNumber
278: * @return the FieldInfo object or null when the given fieldNumber
279: * doesn't exist.
280: */
281: public FieldInfo fieldInfo(int fieldNumber) {
282: return (fieldNumber >= 0) ? (FieldInfo) byNumber
283: .get(fieldNumber) : null;
284: }
285:
286: public int size() {
287: return byNumber.size();
288: }
289:
290: public boolean hasVectors() {
291: boolean hasVectors = false;
292: for (int i = 0; i < size(); i++) {
293: if (fieldInfo(i).storeTermVector) {
294: hasVectors = true;
295: break;
296: }
297: }
298: return hasVectors;
299: }
300:
301: public void write(Directory d, String name) throws IOException {
302: IndexOutput output = d.createOutput(name);
303: try {
304: write(output);
305: } finally {
306: output.close();
307: }
308: }
309:
310: public void write(IndexOutput output) throws IOException {
311: output.writeVInt(size());
312: for (int i = 0; i < size(); i++) {
313: FieldInfo fi = fieldInfo(i);
314: byte bits = 0x0;
315: if (fi.isIndexed)
316: bits |= IS_INDEXED;
317: if (fi.storeTermVector)
318: bits |= STORE_TERMVECTOR;
319: if (fi.storePositionWithTermVector)
320: bits |= STORE_POSITIONS_WITH_TERMVECTOR;
321: if (fi.storeOffsetWithTermVector)
322: bits |= STORE_OFFSET_WITH_TERMVECTOR;
323: if (fi.omitNorms)
324: bits |= OMIT_NORMS;
325: if (fi.storePayloads)
326: bits |= STORE_PAYLOADS;
327: output.writeString(fi.name);
328: output.writeByte(bits);
329: }
330: }
331:
332: private void read(IndexInput input) throws IOException {
333: int size = input.readVInt();//read in the size
334: for (int i = 0; i < size; i++) {
335: String name = input.readString().intern();
336: byte bits = input.readByte();
337: boolean isIndexed = (bits & IS_INDEXED) != 0;
338: boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
339: boolean storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
340: boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
341: boolean omitNorms = (bits & OMIT_NORMS) != 0;
342: boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
343:
344: addInternal(name, isIndexed, storeTermVector,
345: storePositionsWithTermVector,
346: storeOffsetWithTermVector, omitNorms, storePayloads);
347: }
348: }
349:
350: }
|