001: package org.apache.lucene.index;
002:
003: /**
004: * Copyright 2007 The Apache Software Foundation
005: *
006: * Licensed under the Apache License, Version 2.0 (the "License");
007: * you may not use this file except in compliance with the License.
008: * You may obtain a copy of the License at
009: *
010: * http://www.apache.org/licenses/LICENSE-2.0
011: *
012: * Unless required by applicable law or agreed to in writing, software
013: * distributed under the License is distributed on an "AS IS" BASIS,
014: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015: * See the License for the specific language governing permissions and
016: * limitations under the License.
017: */
018:
019: import java.util.ArrayList;
020: import java.util.HashMap;
021: import java.util.List;
022: import java.util.Map;
023:
024: /**
025: * For each Field, store position by position information. It ignores frequency information
026: * <p/>
027: * This is not thread-safe.
028: */
029: public class PositionBasedTermVectorMapper extends TermVectorMapper {
030: private Map/*<String, Map<Integer, TVPositionInfo>>*/fieldToTerms;
031:
032: private String currentField;
033: /**
034: * A Map of Integer and TVPositionInfo
035: */
036: private Map/*<Integer, TVPositionInfo>*/currentPositions;
037: private boolean storeOffsets;
038:
039: /**
040: *
041: *
042: */
043: public PositionBasedTermVectorMapper() {
044: super (false, false);
045: }
046:
047: public PositionBasedTermVectorMapper(boolean ignoringOffsets) {
048: super (false, ignoringOffsets);
049: }
050:
051: /**
052: * Never ignores positions. This mapper doesn't make much sense unless there are positions
053: * @return false
054: */
055: public boolean isIgnoringPositions() {
056: return false;
057: }
058:
059: /**
060: * Callback for the TermVectorReader.
061: * @param term
062: * @param frequency
063: * @param offsets
064: * @param positions
065: */
066: public void map(String term, int frequency,
067: TermVectorOffsetInfo[] offsets, int[] positions) {
068: for (int i = 0; i < positions.length; i++) {
069: Integer posVal = new Integer(positions[i]);
070: TVPositionInfo pos = (TVPositionInfo) currentPositions
071: .get(posVal);
072: if (pos == null) {
073: pos = new TVPositionInfo(positions[i], storeOffsets);
074: currentPositions.put(posVal, pos);
075: }
076: pos.addTerm(term, offsets != null ? offsets[i] : null);
077: }
078: }
079:
080: /**
081: * Callback mechanism used by the TermVectorReader
082: * @param field The field being read
083: * @param numTerms The number of terms in the vector
084: * @param storeOffsets Whether offsets are available
085: * @param storePositions Whether positions are available
086: */
087: public void setExpectations(String field, int numTerms,
088: boolean storeOffsets, boolean storePositions) {
089: if (storePositions == false) {
090: throw new RuntimeException(
091: "You must store positions in order to use this Mapper");
092: }
093: if (storeOffsets == true) {
094: //ignoring offsets
095: }
096: fieldToTerms = new HashMap(numTerms);
097: this .storeOffsets = storeOffsets;
098: currentField = field;
099: currentPositions = new HashMap();
100: fieldToTerms.put(currentField, currentPositions);
101: }
102:
103: /**
104: * Get the mapping between fields and terms, sorted by the comparator
105: *
106: * @return A map between field names and a Map. The sub-Map key is the position as the integer, the value is {@link org.apache.lucene.index.PositionBasedTermVectorMapper.TVPositionInfo}.
107: */
108: public Map getFieldToTerms() {
109: return fieldToTerms;
110: }
111:
112: /**
113: * Container for a term at a position
114: */
115: public static class TVPositionInfo {
116: private int position;
117: //a list of Strings
118: private List terms;
119: //A list of TermVectorOffsetInfo
120: private List offsets;
121:
122: public TVPositionInfo(int position, boolean storeOffsets) {
123: this .position = position;
124: terms = new ArrayList();
125: if (storeOffsets) {
126: offsets = new ArrayList();
127: }
128: }
129:
130: void addTerm(String term, TermVectorOffsetInfo info) {
131: terms.add(term);
132: if (offsets != null) {
133: offsets.add(info);
134: }
135: }
136:
137: /**
138: *
139: * @return The position of the term
140: */
141: public int getPosition() {
142: return position;
143: }
144:
145: /**
146: * Note, there may be multiple terms at the same position
147: * @return A List of Strings
148: */
149: public List getTerms() {
150: return terms;
151: }
152:
153: /**
154: * Parallel list (to {@link #getTerms()}) of TermVectorOffsetInfo objects. There may be multiple entries since there may be multiple terms at a position
155: * @return A List of TermVectorOffsetInfo objects, if offsets are store.
156: */
157: public List getOffsets() {
158: return offsets;
159: }
160: }
161:
162: }
|