001: package org.apache.lucene.index;
002:
003: /**
004: * Copyright 2004 The Apache Software Foundation
005: *
006: * Licensed under the Apache License, Version 2.0 (the "License"); you may not
007: * use this file except in compliance with the License. You may obtain a copy of
008: * the License at
009: *
010: * http://www.apache.org/licenses/LICENSE-2.0
011: *
012: * Unless required by applicable law or agreed to in writing, software
013: * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
014: * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
015: * License for the specific language governing permissions and limitations under
016: * the License.
017: */
018:
019: import java.io.ByteArrayOutputStream;
020: import java.io.IOException;
021: import java.util.Iterator;
022: import java.util.zip.Deflater;
023:
024: import org.apache.lucene.document.Document;
025: import org.apache.lucene.document.Fieldable;
026: import org.apache.lucene.store.Directory;
027: import org.apache.lucene.store.RAMOutputStream;
028: import org.apache.lucene.store.IndexOutput;
029: import org.apache.lucene.store.IndexInput;
030:
031: final class FieldsWriter {
032: static final byte FIELD_IS_TOKENIZED = 0x1;
033: static final byte FIELD_IS_BINARY = 0x2;
034: static final byte FIELD_IS_COMPRESSED = 0x4;
035:
036: private FieldInfos fieldInfos;
037:
038: private IndexOutput fieldsStream;
039:
040: private IndexOutput indexStream;
041:
042: private boolean doClose;
043:
044: FieldsWriter(Directory d, String segment, FieldInfos fn)
045: throws IOException {
046: fieldInfos = fn;
047: fieldsStream = d.createOutput(segment + ".fdt");
048: indexStream = d.createOutput(segment + ".fdx");
049: doClose = true;
050: }
051:
052: FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn)
053: throws IOException {
054: fieldInfos = fn;
055: fieldsStream = fdt;
056: indexStream = fdx;
057: doClose = false;
058: }
059:
060: // Writes the contents of buffer into the fields stream
061: // and adds a new entry for this document into the index
062: // stream. This assumes the buffer was already written
063: // in the correct fields format.
064: void flushDocument(int numStoredFields, RAMOutputStream buffer)
065: throws IOException {
066: indexStream.writeLong(fieldsStream.getFilePointer());
067: fieldsStream.writeVInt(numStoredFields);
068: buffer.writeTo(fieldsStream);
069: }
070:
071: void flush() throws IOException {
072: indexStream.flush();
073: fieldsStream.flush();
074: }
075:
076: final void close() throws IOException {
077: if (doClose) {
078: fieldsStream.close();
079: indexStream.close();
080: }
081: }
082:
083: final void writeField(FieldInfo fi, Fieldable field)
084: throws IOException {
085: // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
086: // and field.binaryValue() already returns the compressed value for a field
087: // with isCompressed()==true, so we disable compression in that case
088: boolean disableCompression = (field instanceof FieldsReader.FieldForMerge);
089: fieldsStream.writeVInt(fi.number);
090: byte bits = 0;
091: if (field.isTokenized())
092: bits |= FieldsWriter.FIELD_IS_TOKENIZED;
093: if (field.isBinary())
094: bits |= FieldsWriter.FIELD_IS_BINARY;
095: if (field.isCompressed())
096: bits |= FieldsWriter.FIELD_IS_COMPRESSED;
097:
098: fieldsStream.writeByte(bits);
099:
100: if (field.isCompressed()) {
101: // compression is enabled for the current field
102: byte[] data = null;
103:
104: if (disableCompression) {
105: // optimized case for merging, the data
106: // is already compressed
107: data = field.binaryValue();
108: } else {
109: // check if it is a binary field
110: if (field.isBinary()) {
111: data = compress(field.binaryValue());
112: } else {
113: data = compress(field.stringValue().getBytes(
114: "UTF-8"));
115: }
116: }
117: final int len = data.length;
118: fieldsStream.writeVInt(len);
119: fieldsStream.writeBytes(data, len);
120: } else {
121: // compression is disabled for the current field
122: if (field.isBinary()) {
123: byte[] data = field.binaryValue();
124: final int len = data.length;
125: fieldsStream.writeVInt(len);
126: fieldsStream.writeBytes(data, len);
127: } else {
128: fieldsStream.writeString(field.stringValue());
129: }
130: }
131: }
132:
133: /** Bulk write a contiguous series of documents. The
134: * lengths array is the length (in bytes) of each raw
135: * document. The stream IndexInput is the
136: * fieldsStream from which we should bulk-copy all
137: * bytes. */
138: final void addRawDocuments(IndexInput stream, int[] lengths,
139: int numDocs) throws IOException {
140: long position = fieldsStream.getFilePointer();
141: long start = position;
142: for (int i = 0; i < numDocs; i++) {
143: indexStream.writeLong(position);
144: position += lengths[i];
145: }
146: fieldsStream.copyBytes(stream, position - start);
147: assert fieldsStream.getFilePointer() == position;
148: }
149:
150: final void addDocument(Document doc) throws IOException {
151: indexStream.writeLong(fieldsStream.getFilePointer());
152:
153: int storedCount = 0;
154: Iterator fieldIterator = doc.getFields().iterator();
155: while (fieldIterator.hasNext()) {
156: Fieldable field = (Fieldable) fieldIterator.next();
157: if (field.isStored())
158: storedCount++;
159: }
160: fieldsStream.writeVInt(storedCount);
161:
162: fieldIterator = doc.getFields().iterator();
163: while (fieldIterator.hasNext()) {
164: Fieldable field = (Fieldable) fieldIterator.next();
165: if (field.isStored())
166: writeField(fieldInfos.fieldInfo(field.name()), field);
167: }
168: }
169:
170: private final byte[] compress(byte[] input) {
171:
172: // Create the compressor with highest level of compression
173: Deflater compressor = new Deflater();
174: compressor.setLevel(Deflater.BEST_COMPRESSION);
175:
176: // Give the compressor the data to compress
177: compressor.setInput(input);
178: compressor.finish();
179:
180: /*
181: * Create an expandable byte array to hold the compressed data.
182: * You cannot use an array that's the same size as the orginal because
183: * there is no guarantee that the compressed data will be smaller than
184: * the uncompressed data.
185: */
186: ByteArrayOutputStream bos = new ByteArrayOutputStream(
187: input.length);
188:
189: // Compress the data
190: byte[] buf = new byte[1024];
191: while (!compressor.finished()) {
192: int count = compressor.deflate(buf);
193: bos.write(buf, 0, count);
194: }
195:
196: compressor.end();
197:
198: // Get the compressed data
199: return bos.toByteArray();
200: }
201: }
|