001: package it.unimi.dsi.mg4j.index;
002:
003: /*
004: * MG4J: Managing Gigabytes for Java
005: *
006: * Copyright (C) 2005-2007 Sebastiano Vigna
007: *
008: * This library is free software; you can redistribute it and/or modify it
009: * under the terms of the GNU Lesser General Public License as published by the Free
010: * Software Foundation; either version 2.1 of the License, or (at your option)
011: * any later version.
012: *
013: * This library is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
015: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
016: * for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public License
019: * along with this program; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021: *
022: */
023:
024: import it.unimi.dsi.fastutil.ints.IntList;
025: import it.unimi.dsi.fastutil.longs.LongList;
026: import it.unimi.dsi.mg4j.index.CompressionFlags.Coding;
027: import it.unimi.dsi.mg4j.index.payload.Payload;
028: import it.unimi.dsi.io.InputBitStream;
029: import it.unimi.dsi.Util;
030: import it.unimi.dsi.util.Properties;
031: import it.unimi.dsi.util.StringMap;
032: import it.unimi.dsi.util.PrefixMap;
033:
034: import java.io.IOException;
035: import java.io.InputStream;
036: import java.lang.reflect.Constructor;
037:
038: import org.apache.log4j.Logger;
039:
040: /** A {@linkplain BitStreamIndexWriter high-performance bitstream-based} index.
041: *
042: * <P>Implementing subclasses must provide access to the index bitstream (as it
043: * happens for a {@link BitStreamIndex}) but also to the positions stream,
044: * both at {@linkplain #getPositionsInputStream() byte} and {@linkplain #getPositionsInputBitStream(int) bit} level.
045: *
046: * <h2>Wired implementations</h2>
047: *
048: * <p>The standard readers associated to an instance of this class are of type {@link BitStreamHPIndexReader}.
049: * Nonetheless, it is possible to generate automatically sources for wired classes that
050: * work only for a particular set of codings and flags. The wired classes will be fetched
051: * automagically by reflection, if available. Please read the section about performance in the MG4J manual.
052: *
053: * @author Sebastiano Vigna
054: * @since 1.1
055: */
056:
057: public abstract class BitStreamHPIndex extends BitStreamIndex {
058: private static final long serialVersionUID = 0;
059: private static final Logger LOGGER = Util
060: .getLogger(BitStreamHPIndex.class);
061:
062: public BitStreamHPIndex(final int numberOfDocuments,
063: final int numberOfTerms, final long numberOfPostings,
064: final long numberOfOccurrences, final int maxCount,
065: final Payload payload, final Coding frequencyCoding,
066: final Coding pointerCoding, final Coding countCoding,
067: final Coding positionCoding, final int quantum,
068: final int height, final int bufferSize,
069: final TermProcessor termProcessor, final String field,
070: final Properties properties,
071: final StringMap<? extends CharSequence> termMap,
072: final PrefixMap<? extends CharSequence> prefixMap,
073: final IntList sizes, final LongList offsets) {
074: super (numberOfDocuments, numberOfTerms, numberOfPostings,
075: numberOfOccurrences, maxCount, payload,
076: frequencyCoding, pointerCoding, countCoding,
077: positionCoding, quantum, height, bufferSize,
078: termProcessor, field, properties, termMap, prefixMap,
079: sizes, offsets);
080: if (height < 0)
081: throw new IllegalArgumentException("Illegal height "
082: + height);
083: if (quantum <= 0 || (quantum & -quantum) != quantum)
084: throw new IllegalArgumentException("Illegal quantum "
085: + quantum);
086: }
087:
088: @SuppressWarnings("unchecked")
089: protected Constructor<? extends IndexReader> getConstructor() {
090: Class<? extends IndexReader> readerClass = BitStreamHPIndexReader.class;
091: String className = BitStreamHPIndexReader.class.getPackage()
092: .getName()
093: + ".wired."
094: + featureName(frequencyCoding)
095: + featureName(pointerCoding)
096: + (hasPayloads ? "Payloads " : featureName(countCoding)
097: + featureName(positionCoding))
098: + BitStreamHPIndexReader.class.getSimpleName();
099:
100: try {
101: readerClass = (Class<? extends IndexReader>) Class
102: .forName(className);
103: LOGGER.info("Dynamically fetched reader class "
104: + readerClass.getSimpleName());
105: } catch (Exception e) {
106: LOGGER.info("Cannot fetch dynamically class " + className
107: + "; falling back to generic (slower) class "
108: + BitStreamHPIndexReader.class.getSimpleName());
109: }
110:
111: try {
112: return readerClass.getConstructor(BitStreamHPIndex.class,
113: InputBitStream.class, InputBitStream.class);
114: } catch (Exception shouldntReallyHappen) {
115: throw new RuntimeException(
116: "Cannot find suitable constructor in "
117: + readerClass.getSimpleName());
118: }
119: }
120:
121: /** Returns an input bit stream over the index.
122: *
123: * @param bufferSize a suggested buffer size.
124: * @return an input bit stream over the index.
125: */
126: public abstract InputBitStream getPositionsInputBitStream(
127: final int bufferSize) throws IOException;
128:
129: /** Returns an input stream over the index.
130: *
131: * @return an input stream over the index.
132: */
133: public abstract InputStream getPositionsInputStream()
134: throws IOException;
135:
136: public IndexReader getReader(final int bufferSize)
137: throws IOException {
138: try {
139: return readerConstructor
140: .newInstance(
141: this ,
142: getInputBitStream(bufferSize == -1 ? this .bufferSize
143: : bufferSize),
144: getPositionsInputBitStream(bufferSize == -1 ? this .bufferSize
145: : bufferSize));
146: } catch (IOException e) {
147: throw e;
148: } catch (Exception e) {
149: throw new RuntimeException(e);
150: }
151: }
152:
153: }
|