001: package org.apache.lucene.index;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.store.Directory;
021: import org.apache.lucene.store.IndexOutput;
022: import org.apache.lucene.util.StringHelper;
023:
024: import java.io.IOException;
025:
026: final class TermVectorsWriter {
027:
028: private IndexOutput tvx = null, tvd = null, tvf = null;
029: private FieldInfos fieldInfos;
030:
031: public TermVectorsWriter(Directory directory, String segment,
032: FieldInfos fieldInfos) throws IOException {
033: // Open files for TermVector storage
034: tvx = directory.createOutput(segment + "."
035: + IndexFileNames.VECTORS_INDEX_EXTENSION);
036: tvx.writeInt(TermVectorsReader.FORMAT_VERSION);
037: tvd = directory.createOutput(segment + "."
038: + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
039: tvd.writeInt(TermVectorsReader.FORMAT_VERSION);
040: tvf = directory.createOutput(segment + "."
041: + IndexFileNames.VECTORS_FIELDS_EXTENSION);
042: tvf.writeInt(TermVectorsReader.FORMAT_VERSION);
043:
044: this .fieldInfos = fieldInfos;
045: }
046:
047: /**
048: * Add a complete document specified by all its term vectors. If document has no
049: * term vectors, add value for tvx.
050: *
051: * @param vectors
052: * @throws IOException
053: */
054: public final void addAllDocVectors(TermFreqVector[] vectors)
055: throws IOException {
056:
057: tvx.writeLong(tvd.getFilePointer());
058:
059: if (vectors != null) {
060: final int numFields = vectors.length;
061: tvd.writeVInt(numFields);
062:
063: long[] fieldPointers = new long[numFields];
064:
065: for (int i = 0; i < numFields; i++) {
066: fieldPointers[i] = tvf.getFilePointer();
067:
068: final int fieldNumber = fieldInfos
069: .fieldNumber(vectors[i].getField());
070:
071: // 1st pass: write field numbers to tvd
072: tvd.writeVInt(fieldNumber);
073:
074: final int numTerms = vectors[i].size();
075: tvf.writeVInt(numTerms);
076:
077: final TermPositionVector tpVector;
078:
079: final byte bits;
080: final boolean storePositions;
081: final boolean storeOffsets;
082:
083: if (vectors[i] instanceof TermPositionVector) {
084: // May have positions & offsets
085: tpVector = (TermPositionVector) vectors[i];
086: storePositions = tpVector.size() > 0
087: && tpVector.getTermPositions(0) != null;
088: storeOffsets = tpVector.size() > 0
089: && tpVector.getOffsets(0) != null;
090: bits = (byte) ((storePositions ? TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR
091: : 0) + (storeOffsets ? TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR
092: : 0));
093: } else {
094: tpVector = null;
095: bits = 0;
096: storePositions = false;
097: storeOffsets = false;
098: }
099:
100: tvf.writeVInt(bits);
101:
102: final String[] terms = vectors[i].getTerms();
103: final int[] freqs = vectors[i].getTermFrequencies();
104:
105: String lastTermText = "";
106: for (int j = 0; j < numTerms; j++) {
107: final String termText = terms[j];
108: int start = StringHelper.stringDifference(
109: lastTermText, termText);
110: int length = termText.length() - start;
111: tvf.writeVInt(start); // write shared prefix length
112: tvf.writeVInt(length); // write delta length
113: tvf.writeChars(termText, start, length); // write delta chars
114: lastTermText = termText;
115:
116: final int termFreq = freqs[j];
117:
118: tvf.writeVInt(termFreq);
119:
120: if (storePositions) {
121: final int[] positions = tpVector
122: .getTermPositions(j);
123: if (positions == null)
124: throw new IllegalStateException(
125: "Trying to write positions that are null!");
126: assert positions.length == termFreq;
127:
128: // use delta encoding for positions
129: int lastPosition = 0;
130: for (int k = 0; k < positions.length; k++) {
131: final int position = positions[k];
132: tvf.writeVInt(position - lastPosition);
133: lastPosition = position;
134: }
135: }
136:
137: if (storeOffsets) {
138: final TermVectorOffsetInfo[] offsets = tpVector
139: .getOffsets(j);
140: if (offsets == null)
141: throw new IllegalStateException(
142: "Trying to write offsets that are null!");
143: assert offsets.length == termFreq;
144:
145: // use delta encoding for offsets
146: int lastEndOffset = 0;
147: for (int k = 0; k < offsets.length; k++) {
148: final int startOffset = offsets[k]
149: .getStartOffset();
150: final int endOffset = offsets[k]
151: .getEndOffset();
152: tvf.writeVInt(startOffset - lastEndOffset);
153: tvf.writeVInt(endOffset - startOffset);
154: lastEndOffset = endOffset;
155: }
156: }
157: }
158: }
159:
160: // 2nd pass: write field pointers to tvd
161: long lastFieldPointer = 0;
162: for (int i = 0; i < numFields; i++) {
163: final long fieldPointer = fieldPointers[i];
164: tvd.writeVLong(fieldPointer - lastFieldPointer);
165: lastFieldPointer = fieldPointer;
166: }
167: } else
168: tvd.writeVInt(0);
169: }
170:
171: /** Close all streams. */
172: final void close() throws IOException {
173: // make an effort to close all streams we can but remember and re-throw
174: // the first exception encountered in this process
175: IOException keep = null;
176: if (tvx != null)
177: try {
178: tvx.close();
179: } catch (IOException e) {
180: if (keep == null)
181: keep = e;
182: }
183: if (tvd != null)
184: try {
185: tvd.close();
186: } catch (IOException e) {
187: if (keep == null)
188: keep = e;
189: }
190: if (tvf != null)
191: try {
192: tvf.close();
193: } catch (IOException e) {
194: if (keep == null)
195: keep = e;
196: }
197: if (keep != null)
198: throw (IOException) keep.fillInStackTrace();
199: }
200: }
|