001: package org.apache.lucene.index;
002:
003: /**
004: * Copyright 2007 The Apache Software Foundation
005: *
006: * Licensed under the Apache License, Version 2.0 (the "License");
007: * you may not use this file except in compliance with the License.
008: * You may obtain a copy of the License at
009: *
010: * http://www.apache.org/licenses/LICENSE-2.0
011: *
012: * Unless required by applicable law or agreed to in writing, software
013: * distributed under the License is distributed on an "AS IS" BASIS,
014: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015: * See the License for the specific language governing permissions and
016: * limitations under the License.
017: */
018:
019: import java.util.*;
020:
021: /**
022: * Store a sorted collection of {@link org.apache.lucene.index.TermVectorEntry}s. Collects all term information
023: * into a single, SortedSet.
024: * <br/>
025: * NOTE: This Mapper ignores all Field information for the Document. This means that if you are using offset/positions you will not
026: * know what Fields they correlate with.
027: * <br/>
028: * This is not thread-safe
029: */
030: public class SortedTermVectorMapper extends TermVectorMapper {
031:
032: private SortedSet currentSet;
033: private Map termToTVE = new HashMap();
034: private boolean storeOffsets;
035: private boolean storePositions;
036: /**
037: * Stand-in name for the field in {@link TermVectorEntry}.
038: */
039: public static final String ALL = "_ALL_";
040:
041: /**
042: *
043: * @param comparator A Comparator for sorting {@link TermVectorEntry}s
044: */
045: public SortedTermVectorMapper(Comparator comparator) {
046: this (false, false, comparator);
047: }
048:
049: public SortedTermVectorMapper(boolean ignoringPositions,
050: boolean ignoringOffsets, Comparator comparator) {
051: super (ignoringPositions, ignoringOffsets);
052: currentSet = new TreeSet(comparator);
053: }
054:
055: /**
056: *
057: * @param term The term to map
058: * @param frequency The frequency of the term
059: * @param offsets Offset information, may be null
060: * @param positions Position information, may be null
061: */
062: //We need to combine any previous mentions of the term
063: public void map(String term, int frequency,
064: TermVectorOffsetInfo[] offsets, int[] positions) {
065: TermVectorEntry entry = (TermVectorEntry) termToTVE.get(term);
066: if (entry == null) {
067: entry = new TermVectorEntry(ALL, term, frequency,
068: storeOffsets == true ? offsets : null,
069: storePositions == true ? positions : null);
070: termToTVE.put(term, entry);
071: currentSet.add(entry);
072: } else {
073: entry.setFrequency(entry.getFrequency() + frequency);
074: if (storeOffsets) {
075: TermVectorOffsetInfo[] existingOffsets = entry
076: .getOffsets();
077: //A few diff. cases here: offsets is null, existing offsets is null, both are null, same for positions
078: if (existingOffsets != null && offsets != null
079: && offsets.length > 0) {
080: //copy over the existing offsets
081: TermVectorOffsetInfo[] newOffsets = new TermVectorOffsetInfo[existingOffsets.length
082: + offsets.length];
083: System.arraycopy(existingOffsets, 0, newOffsets, 0,
084: existingOffsets.length);
085: System.arraycopy(offsets, 0, newOffsets,
086: existingOffsets.length, offsets.length);
087: entry.setOffsets(newOffsets);
088: } else if (existingOffsets == null && offsets != null
089: && offsets.length > 0) {
090: entry.setOffsets(offsets);
091: }
092: //else leave it alone
093: }
094: if (storePositions) {
095: int[] existingPositions = entry.getPositions();
096: if (existingPositions != null && positions != null
097: && positions.length > 0) {
098: int[] newPositions = new int[existingPositions.length
099: + positions.length];
100: System.arraycopy(existingPositions, 0,
101: newPositions, 0, existingPositions.length);
102: System.arraycopy(positions, 0, newPositions,
103: existingPositions.length, positions.length);
104: entry.setPositions(newPositions);
105: } else if (existingPositions == null
106: && positions != null && positions.length > 0) {
107: entry.setPositions(positions);
108: }
109: }
110: }
111:
112: }
113:
114: public void setExpectations(String field, int numTerms,
115: boolean storeOffsets, boolean storePositions) {
116:
117: this .storeOffsets = storeOffsets;
118: this .storePositions = storePositions;
119: }
120:
121: /**
122: * The TermVectorEntrySet. A SortedSet of {@link TermVectorEntry} objects. Sort is by the comparator passed into the constructor.
123: *<br/>
124: * This set will be empty until after the mapping process takes place.
125: *
126: * @return The SortedSet of {@link TermVectorEntry}.
127: */
128: public SortedSet getTermVectorEntrySet() {
129: return currentSet;
130: }
131:
132: }
|