001: package it.unimi.dsi.mg4j.index;
002:
003: /*
004: * MG4J: Managing Gigabytes for Java
005: *
006: * Copyright (C) 2003-2007 Sebastiano Vigna
007: *
008: * This library is free software; you can redistribute it and/or modify it
009: * under the terms of the GNU Lesser General Public License as published by the Free
010: * Software Foundation; either version 2.1 of the License, or (at your option)
011: * any later version.
012: *
013: * This library is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
015: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
016: * for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public License
019: * along with this program; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021: *
022: */
023:
024: import java.util.Collections;
025: import java.util.EnumMap;
026: import java.util.Map;
027:
028: /** A container for constants and enums related to index compression.
029: *
030: * <p>Several MG4J index-related methods require a set of flags specified as a <em>flag map</em>,
031: * that is, an {@link java.util.EnumMap}
032: * from {@linkplain Component components} to {@linkplain Coding codings} (see, e.g.,
033: * {@link BitStreamIndexWriter}). For the special component {@link Component#PAYLOADS}, the only
034: * admissible value is <code>null</code>.
035: *
036: *
037: * <p>Besides declaring the necessary enums, this class contains a parsing method
038: * that turns an array of the form <samp><var>component</var>:<var>coding</var></samp> into a flag map.
039: *
040: * @author Sebastiano Vigna
041: * @since 1.2
042: */
043:
044: public class CompressionFlags {
045: /** A component of the index. To each component, a flag map associates a {@linkplain Coding coding}. */
046: public static enum Component {
047: FREQUENCIES, POINTERS, PAYLOADS, COUNTS, POSITIONS
048: };
049:
050: /** A coding for an index component. */
051: public static enum Coding {
052: UNARY, GAMMA, DELTA, SHIFTED_GAMMA, ZETA, GOLOMB, SKEWED_GOLOMB, ARITHMETIC, INTERPOLATIVE, NIBBLE
053: }
054:
055: /** A string used by {@link #valueOf(String[], Map)} to disable a component. */
056: public static final String NONE = "NONE";
057:
058: /** An unmodifiable map representing the default flags for a standard index. */
059: public static final Map<Component, Coding> DEFAULT_STANDARD_INDEX;
060:
061: /** An unmodifiable map representing the default flags for a payload-based index. */
062: public static final Map<Component, Coding> DEFAULT_PAYLOAD_INDEX;
063:
064: /** An unmodifiable map representing the default flags for a standard index.
065: * @deprecated As of MG4J 1.2, replaced by {@link #DEFAULT_STANDARD_INDEX}.
066: */
067: @Deprecated
068: public static final Map<Component, Coding> DEFAULT;
069:
070: static {
071: Map<Component, Coding> map = new EnumMap<Component, Coding>(
072: Component.class);
073: DEFAULT = DEFAULT_STANDARD_INDEX = Collections
074: .unmodifiableMap(map);
075: map.put(Component.FREQUENCIES, Coding.GAMMA);
076: // This used to be GOLOMB, but precomputed codes made Golomb codes very slow in comparison
077: map.put(Component.POINTERS, Coding.DELTA);
078: map.put(Component.COUNTS, Coding.GAMMA);
079: // This used to be GOLOMB, but experience has shown that loading sizes is always a problem.
080: map.put(Component.POSITIONS, Coding.DELTA);
081:
082: map = new EnumMap<Component, Coding>(Component.class);
083: DEFAULT_PAYLOAD_INDEX = Collections.unmodifiableMap(map);
084: map.put(Component.FREQUENCIES, Coding.GAMMA);
085: map.put(Component.POINTERS, Coding.DELTA);
086: map.put(Component.PAYLOADS, null);
087: }
088:
089: /** Returns a flag map corresponding to a given array of strings.
090: *
091: * <p>This method takes an array of (possibly untrimmed) flag strings
092: * of the form <samp><var>component</var>:<var>coding</var></samp> and turns
093: * them into a flag map (see the {@linkplain CompressionFlags introduction}).
094: * The flag map can be initialised by an optional default map, and
095: * the special value <samp>NONE</samp> for <samp><var>coding</var></samp> may be
096: * used to delete a key (the corresponding key in the flag map will be missing).
097: *
098: * <p>It is acceptable that strings in the array have whitespace around.
099: *
100: * @param flag an array of (possibly untrimmed) flag strings of
101: * the form <samp><var>component</var>:<var>coding</var></samp>.
102: * @param defaultMap a optional flag map of default values, or <code>null</code>.
103: * @return the corresponding flag map.
104: */
105: public static Map<Component, Coding> valueOf(final String[] flag,
106: final Map<Component, Coding> defaultMap) {
107: final EnumMap<Component, Coding> m = defaultMap != null ? new EnumMap<Component, Coding>(
108: defaultMap)
109: : new EnumMap<Component, Coding>(Component.class);
110:
111: for (int i = 0; i < flag.length; i++) {
112: final String[] spec = flag[i].trim().split(":");
113: if (spec.length != 2)
114: throw new IllegalArgumentException("Bad format: "
115: + flag[i]);
116: if (spec[1].equals(NONE))
117: m.remove(Component.valueOf(spec[0]));
118: else
119: m.put(Component.valueOf(spec[0]), Coding
120: .valueOf(spec[1]));
121: }
122: return m;
123: }
124: }
|