001: package org.apache.lucene.index;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.store.Directory;
021: import org.apache.lucene.store.IndexOutput;
022: import org.apache.lucene.store.IndexInput;
023: import java.util.LinkedList;
024: import java.util.HashSet;
025: import java.util.Iterator;
026: import java.io.IOException;
027:
028: /**
029: * Combines multiple files into a single compound file.
030: * The file format:<br>
031: * <ul>
032: * <li>VInt fileCount</li>
033: * <li>{Directory}
034: * fileCount entries with the following structure:</li>
035: * <ul>
036: * <li>long dataOffset</li>
037: * <li>String fileName</li>
038: * </ul>
039: * <li>{File Data}
040: * fileCount entries with the raw data of the corresponding file</li>
041: * </ul>
042: *
043: * The fileCount integer indicates how many files are contained in this compound
044: * file. The {directory} that follows has that many entries. Each directory entry
045: * contains a long pointer to the start of this file's data section, and a String
046: * with that file's name.
047: *
048: *
049: * @version $Id: CompoundFileWriter.java 606441 2007-12-22 10:06:28Z mikemccand $
050: */
051: final class CompoundFileWriter {
052:
053: private static final class FileEntry {
054: /** source file */
055: String file;
056:
057: /** temporary holder for the start of directory entry for this file */
058: long directoryOffset;
059:
060: /** temporary holder for the start of this file's data section */
061: long dataOffset;
062: }
063:
064: private Directory directory;
065: private String fileName;
066: private HashSet ids;
067: private LinkedList entries;
068: private boolean merged = false;
069: private SegmentMerger.CheckAbort checkAbort;
070:
071: /** Create the compound stream in the specified file. The file name is the
072: * entire name (no extensions are added).
073: * @throws NullPointerException if <code>dir</code> or <code>name</code> is null
074: */
075: public CompoundFileWriter(Directory dir, String name) {
076: this (dir, name, null);
077: }
078:
079: CompoundFileWriter(Directory dir, String name,
080: SegmentMerger.CheckAbort checkAbort) {
081: if (dir == null)
082: throw new NullPointerException("directory cannot be null");
083: if (name == null)
084: throw new NullPointerException("name cannot be null");
085: this .checkAbort = checkAbort;
086: directory = dir;
087: fileName = name;
088: ids = new HashSet();
089: entries = new LinkedList();
090: }
091:
092: /** Returns the directory of the compound file. */
093: public Directory getDirectory() {
094: return directory;
095: }
096:
097: /** Returns the name of the compound file. */
098: public String getName() {
099: return fileName;
100: }
101:
102: /** Add a source stream. <code>file</code> is the string by which the
103: * sub-stream will be known in the compound stream.
104: *
105: * @throws IllegalStateException if this writer is closed
106: * @throws NullPointerException if <code>file</code> is null
107: * @throws IllegalArgumentException if a file with the same name
108: * has been added already
109: */
110: public void addFile(String file) {
111: if (merged)
112: throw new IllegalStateException(
113: "Can't add extensions after merge has been called");
114:
115: if (file == null)
116: throw new NullPointerException("file cannot be null");
117:
118: if (!ids.add(file))
119: throw new IllegalArgumentException("File " + file
120: + " already added");
121:
122: FileEntry entry = new FileEntry();
123: entry.file = file;
124: entries.add(entry);
125: }
126:
127: /** Merge files with the extensions added up to now.
128: * All files with these extensions are combined sequentially into the
129: * compound stream. After successful merge, the source files
130: * are deleted.
131: * @throws IllegalStateException if close() had been called before or
132: * if no file has been added to this object
133: */
134: public void close() throws IOException {
135: if (merged)
136: throw new IllegalStateException("Merge already performed");
137:
138: if (entries.isEmpty())
139: throw new IllegalStateException(
140: "No entries to merge have been defined");
141:
142: merged = true;
143:
144: // open the compound stream
145: IndexOutput os = null;
146: try {
147: os = directory.createOutput(fileName);
148:
149: // Write the number of entries
150: os.writeVInt(entries.size());
151:
152: // Write the directory with all offsets at 0.
153: // Remember the positions of directory entries so that we can
154: // adjust the offsets later
155: Iterator it = entries.iterator();
156: while (it.hasNext()) {
157: FileEntry fe = (FileEntry) it.next();
158: fe.directoryOffset = os.getFilePointer();
159: os.writeLong(0); // for now
160: os.writeString(fe.file);
161: }
162:
163: // Open the files and copy their data into the stream.
164: // Remember the locations of each file's data section.
165: byte buffer[] = new byte[16384];
166: it = entries.iterator();
167: while (it.hasNext()) {
168: FileEntry fe = (FileEntry) it.next();
169: fe.dataOffset = os.getFilePointer();
170: copyFile(fe, os, buffer);
171: }
172:
173: // Write the data offsets into the directory of the compound stream
174: it = entries.iterator();
175: while (it.hasNext()) {
176: FileEntry fe = (FileEntry) it.next();
177: os.seek(fe.directoryOffset);
178: os.writeLong(fe.dataOffset);
179: }
180:
181: // Close the output stream. Set the os to null before trying to
182: // close so that if an exception occurs during the close, the
183: // finally clause below will not attempt to close the stream
184: // the second time.
185: IndexOutput tmp = os;
186: os = null;
187: tmp.close();
188:
189: } finally {
190: if (os != null)
191: try {
192: os.close();
193: } catch (IOException e) {
194: }
195: }
196: }
197:
198: /** Copy the contents of the file with specified extension into the
199: * provided output stream. Use the provided buffer for moving data
200: * to reduce memory allocation.
201: */
202: private void copyFile(FileEntry source, IndexOutput os,
203: byte buffer[]) throws IOException {
204: IndexInput is = null;
205: try {
206: long startPtr = os.getFilePointer();
207:
208: is = directory.openInput(source.file);
209: long length = is.length();
210: long remainder = length;
211: int chunk = buffer.length;
212:
213: while (remainder > 0) {
214: int len = (int) Math.min(chunk, remainder);
215: is.readBytes(buffer, 0, len);
216: os.writeBytes(buffer, len);
217: remainder -= len;
218: if (checkAbort != null)
219: // Roughly every 2 MB we will check if
220: // it's time to abort
221: checkAbort.work(80);
222: }
223:
224: // Verify that remainder is 0
225: if (remainder != 0)
226: throw new IOException(
227: "Non-zero remainder length after copying: "
228: + remainder + " (id: " + source.file
229: + ", length: " + length
230: + ", buffer size: " + chunk + ")");
231:
232: // Verify that the output length diff is equal to original file
233: long endPtr = os.getFilePointer();
234: long diff = endPtr - startPtr;
235: if (diff != length)
236: throw new IOException(
237: "Difference in the output file offsets "
238: + diff
239: + " does not match the original file length "
240: + length);
241:
242: } finally {
243: if (is != null)
244: is.close();
245: }
246: }
247: }
|