001: package org.apache.lucene.index;
002:
003: /**
004: * Copyright 2007 The Apache Software Foundation
005: *
006: * Licensed under the Apache License, Version 2.0 (the "License");
007: * you may not use this file except in compliance with the License.
008: * You may obtain a copy of the License at
009: *
010: * http://www.apache.org/licenses/LICENSE-2.0
011: *
012: * Unless required by applicable law or agreed to in writing, software
013: * distributed under the License is distributed on an "AS IS" BASIS,
014: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015: * See the License for the specific language governing permissions and
016: * limitations under the License.
017: */
018:
019: /**
020: * The TermVectorMapper can be used to map Term Vectors into your own
021: * structure instead of the parallel array structure used by
022: * {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
023: * <p/>
024: * It is up to the implementation to make sure it is thread-safe.
025: *
026: *
027: **/
028: public abstract class TermVectorMapper {
029:
030: private boolean ignoringPositions;
031: private boolean ignoringOffsets;
032:
033: protected TermVectorMapper() {
034: }
035:
036: /**
037: *
038: * @param ignoringPositions true if this mapper should tell Lucene to ignore positions even if they are stored
039: * @param ignoringOffsets similar to ignoringPositions
040: */
041: protected TermVectorMapper(boolean ignoringPositions,
042: boolean ignoringOffsets) {
043: this .ignoringPositions = ignoringPositions;
044: this .ignoringOffsets = ignoringOffsets;
045: }
046:
047: /**
048: * Tell the mapper what to expect in regards to field, number of terms, offset and position storage.
049: * This method will be called once before retrieving the vector for a field.
050: *
051: * This method will be called before {@link #map(String,int,TermVectorOffsetInfo[],int[])}.
052: * @param field The field the vector is for
053: * @param numTerms The number of terms that need to be mapped
054: * @param storeOffsets true if the mapper should expect offset information
055: * @param storePositions true if the mapper should expect positions info
056: */
057: public abstract void setExpectations(String field, int numTerms,
058: boolean storeOffsets, boolean storePositions);
059:
060: /**
061: * Map the Term Vector information into your own structure
062: * @param term The term to add to the vector
063: * @param frequency The frequency of the term in the document
064: * @param offsets null if the offset is not specified, otherwise the offset into the field of the term
065: * @param positions null if the position is not specified, otherwise the position in the field of the term
066: */
067: public abstract void map(String term, int frequency,
068: TermVectorOffsetInfo[] offsets, int[] positions);
069:
070: /**
071: * Indicate to Lucene that even if there are positions stored, this mapper is not interested in them and they
072: * can be skipped over. Derived classes should set this to true if they want to ignore positions. The default
073: * is false, meaning positions will be loaded if they are stored.
074: * @return false
075: */
076: public boolean isIgnoringPositions() {
077: return ignoringPositions;
078: }
079:
080: /**
081: *
082: * @see #isIgnoringPositions() Same principal as {@link #isIgnoringPositions()}, but applied to offsets. false by default.
083: * @return false
084: */
085: public boolean isIgnoringOffsets() {
086: return ignoringOffsets;
087: }
088:
089: /**
090: * Passes down the index of the document whose term vector is currently being mapped,
091: * once for each top level call to a term vector reader.
092: *<p/>
093: * Default implementation IGNORES the document number. Override if your implementation needs the document number.
094: * <p/>
095: * NOTE: Document numbers are internal to Lucene and subject to change depending on indexing operations.
096: *
097: * @param documentNumber index of document currently being mapped
098: */
099: public void setDocumentNumber(int documentNumber) {
100: }
101:
102: }
|