0001: package org.apache.lucene.index.memory;
0002:
0003: /**
0004: * Licensed to the Apache Software Foundation (ASF) under one or more
0005: * contributor license agreements. See the NOTICE file distributed with
0006: * this work for additional information regarding copyright ownership.
0007: * The ASF licenses this file to You under the Apache License, Version 2.0
0008: * (the "License"); you may not use this file except in compliance with
0009: * the License. You may obtain a copy of the License at
0010: *
0011: * http://www.apache.org/licenses/LICENSE-2.0
0012: *
0013: * Unless required by applicable law or agreed to in writing, software
0014: * distributed under the License is distributed on an "AS IS" BASIS,
0015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0016: * See the License for the specific language governing permissions and
0017: * limitations under the License.
0018: */
0019:
0020: import java.io.IOException;
0021: import java.io.Serializable;
0022: import java.util.Arrays;
0023: import java.util.Collection;
0024: import java.util.Collections;
0025: import java.util.Comparator;
0026: import java.util.HashMap;
0027: import java.util.Iterator;
0028: import java.util.Map;
0029:
0030: import org.apache.lucene.analysis.Analyzer;
0031: import org.apache.lucene.analysis.Token;
0032: import org.apache.lucene.analysis.TokenStream;
0033: import org.apache.lucene.document.Document;
0034: import org.apache.lucene.document.Field;
0035: import org.apache.lucene.document.FieldSelector;
0036: import org.apache.lucene.index.IndexReader;
0037: import org.apache.lucene.index.Term;
0038: import org.apache.lucene.index.TermDocs;
0039: import org.apache.lucene.index.TermEnum;
0040: import org.apache.lucene.index.TermFreqVector;
0041: import org.apache.lucene.index.TermPositionVector;
0042: import org.apache.lucene.index.TermPositions;
0043: import org.apache.lucene.index.TermVectorMapper;
0044: import org.apache.lucene.search.HitCollector;
0045: import org.apache.lucene.search.IndexSearcher;
0046: import org.apache.lucene.search.Query;
0047: import org.apache.lucene.search.Searcher;
0048: import org.apache.lucene.search.Similarity;
0049:
0050: /**
0051: * High-performance single-document main memory Apache Lucene fulltext search index.
0052: *
0053: * <h4>Overview</h4>
0054: *
0055: * This class is a replacement/substitute for a large subset of
0056: * {@link org.apache.lucene.store.RAMDirectory} functionality. It is designed to
0057: * enable maximum efficiency for on-the-fly matchmaking combining structured and
0058: * fuzzy fulltext search in realtime streaming applications such as Nux XQuery based XML
0059: * message queues, publish-subscribe systems for Blogs/newsfeeds, text chat, data acquisition and
0060: * distribution systems, application level routers, firewalls, classifiers, etc.
0061: * Rather than targetting fulltext search of infrequent queries over huge persistent
0062: * data archives (historic search), this class targets fulltext search of huge
0063: * numbers of queries over comparatively small transient realtime data (prospective
0064: * search).
0065: * For example as in
0066: * <pre>
0067: * float score = search(String text, Query query)
0068: * </pre>
0069: * <p>
0070: * Each instance can hold at most one Lucene "document", with a document containing
0071: * zero or more "fields", each field having a name and a fulltext value. The
0072: * fulltext value is tokenized (split and transformed) into zero or more index terms
0073: * (aka words) on <code>addField()</code>, according to the policy implemented by an
0074: * Analyzer. For example, Lucene analyzers can split on whitespace, normalize to lower case
0075: * for case insensitivity, ignore common terms with little discriminatory value such as "he", "in", "and" (stop
0076: * words), reduce the terms to their natural linguistic root form such as "fishing"
0077: * being reduced to "fish" (stemming), resolve synonyms/inflexions/thesauri
0078: * (upon indexing and/or querying), etc. For details, see
0079: * <a target="_blank" href="http://today.java.net/pub/a/today/2003/07/30/LuceneIntro.html">Lucene Analyzer Intro</a>.
0080: * <p>
0081: * Arbitrary Lucene queries can be run against this class - see <a target="_blank"
0082: * href="http://lucene.apache.org/java/docs/queryparsersyntax.html">Lucene Query Syntax</a>
0083: * as well as <a target="_blank"
0084: * href="http://today.java.net/pub/a/today/2003/11/07/QueryParserRules.html">Query Parser Rules</a>.
0085: * Note that a Lucene query selects on the field names and associated (indexed)
0086: * tokenized terms, not on the original fulltext(s) - the latter are not stored
0087: * but rather thrown away immediately after tokenization.
0088: * <p>
0089: * For some interesting background information on search technology, see Bob Wyman's
0090: * <a target="_blank"
0091: * href="http://bobwyman.pubsub.com/main/2005/05/mary_hodder_poi.html">Prospective Search</a>,
0092: * Jim Gray's
0093: * <a target="_blank" href="http://www.acmqueue.org/modules.php?name=Content&pa=showpage&pid=293&page=4">
0094: * A Call to Arms - Custom subscriptions</a>, and Tim Bray's
0095: * <a target="_blank"
0096: * href="http://www.tbray.org/ongoing/When/200x/2003/07/30/OnSearchTOC">On Search, the Series</a>.
0097: *
0098: *
0099: * <h4>Example Usage</h4>
0100: *
0101: * <pre>
0102: * Analyzer analyzer = PatternAnalyzer.DEFAULT_ANALYZER;
0103: * //Analyzer analyzer = new SimpleAnalyzer();
0104: * MemoryIndex index = new MemoryIndex();
0105: * index.addField("content", "Readings about Salmons and other select Alaska fishing Manuals", analyzer);
0106: * index.addField("author", "Tales of James", analyzer);
0107: * QueryParser parser = new QueryParser("content", analyzer);
0108: * float score = index.search(parser.parse("+author:james +salmon~ +fish* manual~"));
0109: * if (score > 0.0f) {
0110: * System.out.println("it's a match");
0111: * } else {
0112: * System.out.println("no match found");
0113: * }
0114: * System.out.println("indexData=" + index.toString());
0115: * </pre>
0116: *
0117: *
0118: * <h4>Example XQuery Usage</h4>
0119: *
0120: * <pre>
0121: * (: An XQuery that finds all books authored by James that have something to do with "salmon fishing manuals", sorted by relevance :)
0122: * declare namespace lucene = "java:nux.xom.pool.FullTextUtil";
0123: * declare variable $query := "+salmon~ +fish* manual~"; (: any arbitrary Lucene query can go here :)
0124: *
0125: * for $book in /books/book[author="James" and lucene:match(abstract, $query) > 0.0]
0126: * let $score := lucene:match($book/abstract, $query)
0127: * order by $score descending
0128: * return $book
0129: * </pre>
0130: *
0131: *
0132: * <h4>No thread safety guarantees</h4>
0133: *
0134: * An instance can be queried multiple times with the same or different queries,
0135: * but an instance is not thread-safe. If desired use idioms such as:
0136: * <pre>
0137: * MemoryIndex index = ...
0138: * synchronized (index) {
0139: * // read and/or write index (i.e. add fields and/or query)
0140: * }
0141: * </pre>
0142: *
0143: *
0144: * <h4>Performance Notes</h4>
0145: *
0146: * Internally there's a new data structure geared towards efficient indexing
0147: * and searching, plus the necessary support code to seamlessly plug into the Lucene
0148: * framework.
0149: * <p>
0150: * This class performs very well for very small texts (e.g. 10 chars)
0151: * as well as for large texts (e.g. 10 MB) and everything in between.
0152: * Typically, it is about 10-100 times faster than <code>RAMDirectory</code>.
0153: * Note that <code>RAMDirectory</code> has particularly
0154: * large efficiency overheads for small to medium sized texts, both in time and space.
0155: * Indexing a field with N tokens takes O(N) in the best case, and O(N logN) in the worst
0156: * case. Memory consumption is probably larger than for <code>RAMDirectory</code>.
0157: * <p>
0158: * Example throughput of many simple term queries over a single MemoryIndex:
0159: * ~500000 queries/sec on a MacBook Pro, jdk 1.5.0_06, server VM.
0160: * As always, your mileage may vary.
0161: * <p>
0162: * If you're curious about
0163: * the whereabouts of bottlenecks, run java 1.5 with the non-perturbing '-server
0164: * -agentlib:hprof=cpu=samples,depth=10' flags, then study the trace log and
0165: * correlate its hotspot trailer with its call stack headers (see <a
0166: * target="_blank"
0167: * href="http://java.sun.com/developer/technicalArticles/Programming/HPROF.html">
0168: * hprof tracing </a>).
0169: *
0170: * @author whoschek.AT.lbl.DOT.gov
0171: */
0172: public class MemoryIndex {
0173:
0174: /** info for each field: Map<String fieldName, Info field> */
0175: private final HashMap fields = new HashMap();
0176:
0177: /** fields sorted ascending by fieldName; lazily computed on demand */
0178: private transient Map.Entry[] sortedFields;
0179:
0180: /** pos: positions[3*i], startOffset: positions[3*i +1], endOffset: positions[3*i +2] */
0181: private final int stride;
0182:
0183: /** Could be made configurable; See {@link Document#setBoost(float)} */
0184: private static final float docBoost = 1.0f;
0185:
0186: private static final long serialVersionUID = 2782195016849084649L;
0187:
0188: private static final boolean DEBUG = false;
0189:
0190: /**
0191: * Sorts term entries into ascending order; also works for
0192: * Arrays.binarySearch() and Arrays.sort()
0193: */
0194: private static final Comparator termComparator = new Comparator() {
0195: public int compare(Object o1, Object o2) {
0196: if (o1 instanceof Map.Entry)
0197: o1 = ((Map.Entry) o1).getKey();
0198: if (o2 instanceof Map.Entry)
0199: o2 = ((Map.Entry) o2).getKey();
0200: if (o1 == o2)
0201: return 0;
0202: return ((String) o1).compareTo((String) o2);
0203: }
0204: };
0205:
0206: /**
0207: * Constructs an empty instance.
0208: */
0209: public MemoryIndex() {
0210: this (false);
0211: }
0212:
0213: /**
0214: * Constructs an empty instance that can optionally store the start and end
0215: * character offset of each token term in the text. This can be useful for
0216: * highlighting of hit locations with the Lucene highlighter package.
0217: * Private until the highlighter package matures, so that this can actually
0218: * be meaningfully integrated.
0219: *
0220: * @param storeOffsets
0221: * whether or not to store the start and end character offset of
0222: * each token term in the text
0223: */
0224: private MemoryIndex(boolean storeOffsets) {
0225: this .stride = storeOffsets ? 3 : 1;
0226: }
0227:
0228: /**
0229: * Convenience method; Tokenizes the given field text and adds the resulting
0230: * terms to the index; Equivalent to adding an indexed non-keyword Lucene
0231: * {@link org.apache.lucene.document.Field} that is
0232: * {@link org.apache.lucene.document.Field.Index#TOKENIZED tokenized},
0233: * {@link org.apache.lucene.document.Field.Store#NO not stored},
0234: * {@link org.apache.lucene.document.Field.TermVector#WITH_POSITIONS termVectorStored with positions} (or
0235: * {@link org.apache.lucene.document.Field.TermVector#WITH_POSITIONS termVectorStored with positions and offsets}),
0236: *
0237: * @param fieldName
0238: * a name to be associated with the text
0239: * @param text
0240: * the text to tokenize and index.
0241: * @param analyzer
0242: * the analyzer to use for tokenization
0243: */
0244: public void addField(String fieldName, String text,
0245: Analyzer analyzer) {
0246: if (fieldName == null)
0247: throw new IllegalArgumentException(
0248: "fieldName must not be null");
0249: if (text == null)
0250: throw new IllegalArgumentException("text must not be null");
0251: if (analyzer == null)
0252: throw new IllegalArgumentException(
0253: "analyzer must not be null");
0254:
0255: TokenStream stream;
0256: if (analyzer instanceof PatternAnalyzer) {
0257: stream = ((PatternAnalyzer) analyzer).tokenStream(
0258: fieldName, text);
0259: } else {
0260: stream = analyzer.tokenStream(fieldName,
0261: new PatternAnalyzer.FastStringReader(text));
0262: }
0263: addField(fieldName, stream);
0264: }
0265:
0266: /**
0267: * Convenience method; Creates and returns a token stream that generates a
0268: * token for each keyword in the given collection, "as is", without any
0269: * transforming text analysis. The resulting token stream can be fed into
0270: * {@link #addField(String, TokenStream)}, perhaps wrapped into another
0271: * {@link org.apache.lucene.analysis.TokenFilter}, as desired.
0272: *
0273: * @param keywords
0274: * the keywords to generate tokens for
0275: * @return the corresponding token stream
0276: */
0277: public TokenStream keywordTokenStream(final Collection keywords) {
0278: // TODO: deprecate & move this method into AnalyzerUtil?
0279: if (keywords == null)
0280: throw new IllegalArgumentException(
0281: "keywords must not be null");
0282:
0283: return new TokenStream() {
0284: private Iterator iter = keywords.iterator();
0285: private int start = 0;
0286:
0287: public Token next() {
0288: if (!iter.hasNext())
0289: return null;
0290:
0291: Object obj = iter.next();
0292: if (obj == null)
0293: throw new IllegalArgumentException(
0294: "keyword must not be null");
0295:
0296: String term = obj.toString();
0297: Token token = new Token(term, start, start
0298: + term.length());
0299: start += term.length() + 1; // separate words by 1 (blank) character
0300: return token;
0301: }
0302: };
0303: }
0304:
0305: /**
0306: * Equivalent to <code>addField(fieldName, stream, 1.0f)</code>.
0307: *
0308: * @param fieldName
0309: * a name to be associated with the text
0310: * @param stream
0311: * the token stream to retrieve tokens from
0312: */
0313: public void addField(String fieldName, TokenStream stream) {
0314: addField(fieldName, stream, 1.0f);
0315: }
0316:
0317: /**
0318: * Iterates over the given token stream and adds the resulting terms to the index;
0319: * Equivalent to adding a tokenized, indexed, termVectorStored, unstored,
0320: * Lucene {@link org.apache.lucene.document.Field}.
0321: * Finally closes the token stream. Note that untokenized keywords can be added with this method via
0322: * {@link #keywordTokenStream(Collection)}, the Lucene contrib <code>KeywordTokenizer</code> or similar utilities.
0323: *
0324: * @param fieldName
0325: * a name to be associated with the text
0326: * @param stream
0327: * the token stream to retrieve tokens from.
0328: * @param boost
0329: * the boost factor for hits for this field
0330: * @see Field#setBoost(float)
0331: */
0332: public void addField(String fieldName, TokenStream stream,
0333: float boost) {
0334: /*
0335: * Note that this method signature avoids having a user call new
0336: * o.a.l.d.Field(...) which would be much too expensive due to the
0337: * String.intern() usage of that class.
0338: *
0339: * More often than not, String.intern() leads to serious performance
0340: * degradations rather than improvements! If you're curious why, check
0341: * out the JDK's native code, see how it oscillates multiple times back
0342: * and forth between Java code and native code on each intern() call,
0343: * only to end up using a plain vanilla java.util.HashMap on the Java
0344: * heap for it's interned strings! String.equals() has a small cost
0345: * compared to String.intern(), trust me. Application level interning
0346: * (e.g. a HashMap per Directory/Index) typically leads to better
0347: * solutions than frequent hidden low-level calls to String.intern().
0348: *
0349: * Perhaps with some luck, Lucene's Field.java (and Term.java) and
0350: * cousins could be fixed to not use String.intern(). Sigh :-(
0351: */
0352: try {
0353: if (fieldName == null)
0354: throw new IllegalArgumentException(
0355: "fieldName must not be null");
0356: if (stream == null)
0357: throw new IllegalArgumentException(
0358: "token stream must not be null");
0359: if (boost <= 0.0f)
0360: throw new IllegalArgumentException(
0361: "boost factor must be greater than 0.0");
0362: if (fields.get(fieldName) != null)
0363: throw new IllegalArgumentException(
0364: "field must not be added more than once");
0365:
0366: HashMap terms = new HashMap();
0367: int numTokens = 0;
0368: int pos = -1;
0369: Token token;
0370:
0371: while ((token = stream.next()) != null) {
0372: String term = token.termText();
0373: if (term.length() == 0)
0374: continue; // nothing to do
0375: // if (DEBUG) System.err.println("token='" + term + "'");
0376: numTokens++;
0377: pos += token.getPositionIncrement();
0378:
0379: ArrayIntList positions = (ArrayIntList) terms.get(term);
0380: if (positions == null) { // term not seen before
0381: positions = new ArrayIntList(stride);
0382: terms.put(term, positions);
0383: }
0384: if (stride == 1) {
0385: positions.add(pos);
0386: } else {
0387: positions.add(pos, token.startOffset(), token
0388: .endOffset());
0389: }
0390: }
0391:
0392: // ensure infos.numTokens > 0 invariant; needed for correct operation of terms()
0393: if (numTokens > 0) {
0394: boost = boost * docBoost; // see DocumentWriter.addDocument(...)
0395: fields
0396: .put(fieldName, new Info(terms, numTokens,
0397: boost));
0398: sortedFields = null; // invalidate sorted view, if any
0399: }
0400: } catch (IOException e) { // can never happen
0401: throw new RuntimeException(e);
0402: } finally {
0403: try {
0404: if (stream != null)
0405: stream.close();
0406: } catch (IOException e2) {
0407: throw new RuntimeException(e2);
0408: }
0409: }
0410: }
0411:
0412: /**
0413: * Creates and returns a searcher that can be used to execute arbitrary
0414: * Lucene queries and to collect the resulting query results as hits.
0415: *
0416: * @return a searcher
0417: */
0418: public IndexSearcher createSearcher() {
0419: MemoryIndexReader reader = new MemoryIndexReader();
0420: IndexSearcher searcher = new IndexSearcher(reader); // ensures no auto-close !!
0421: reader.setSearcher(searcher); // to later get hold of searcher.getSimilarity()
0422: return searcher;
0423: }
0424:
0425: /**
0426: * Convenience method that efficiently returns the relevance score by
0427: * matching this index against the given Lucene query expression.
0428: *
0429: * @param query
0430: * an arbitrary Lucene query to run against this index
0431: * @return the relevance score of the matchmaking; A number in the range
0432: * [0.0 .. 1.0], with 0.0 indicating no match. The higher the number
0433: * the better the match.
0434: * @see org.apache.lucene.queryParser.QueryParser#parse(String)
0435: */
0436: public float search(Query query) {
0437: if (query == null)
0438: throw new IllegalArgumentException("query must not be null");
0439:
0440: Searcher searcher = createSearcher();
0441: try {
0442: final float[] scores = new float[1]; // inits to 0.0f (no match)
0443: searcher.search(query, new HitCollector() {
0444: public void collect(int doc, float score) {
0445: scores[0] = score;
0446: }
0447: });
0448: float score = scores[0];
0449: return score;
0450: } catch (IOException e) { // can never happen (RAMDirectory)
0451: throw new RuntimeException(e);
0452: } finally {
0453: // searcher.close();
0454: /*
0455: * Note that it is harmless and important for good performance to
0456: * NOT close the index reader!!! This avoids all sorts of
0457: * unnecessary baggage and locking in the Lucene IndexReader
0458: * superclass, all of which is completely unnecessary for this main
0459: * memory index data structure without thread-safety claims.
0460: *
0461: * Wishing IndexReader would be an interface...
0462: *
0463: * Actually with the new tight createSearcher() API auto-closing is now
0464: * made impossible, hence searcher.close() would be harmless and also
0465: * would not degrade performance...
0466: */
0467: }
0468: }
0469:
0470: /**
0471: * Returns a reasonable approximation of the main memory [bytes] consumed by
0472: * this instance. Useful for smart memory sensititive caches/pools. Assumes
0473: * fieldNames are interned, whereas tokenized terms are memory-overlaid.
0474: *
0475: * @return the main memory consumption
0476: */
0477: public int getMemorySize() {
0478: // for example usage in a smart cache see nux.xom.pool.Pool
0479: int PTR = VM.PTR;
0480: int INT = VM.INT;
0481: int size = 0;
0482: size += VM.sizeOfObject(2 * PTR + INT); // memory index
0483: if (sortedFields != null)
0484: size += VM.sizeOfObjectArray(sortedFields.length);
0485:
0486: size += VM.sizeOfHashMap(fields.size());
0487: Iterator iter = fields.entrySet().iterator();
0488: while (iter.hasNext()) { // for each Field Info
0489: Map.Entry entry = (Map.Entry) iter.next();
0490: Info info = (Info) entry.getValue();
0491: size += VM.sizeOfObject(2 * INT + 3 * PTR); // Info instance vars
0492: if (info.sortedTerms != null)
0493: size += VM.sizeOfObjectArray(info.sortedTerms.length);
0494:
0495: int len = info.terms.size();
0496: size += VM.sizeOfHashMap(len);
0497: Iterator iter2 = info.terms.entrySet().iterator();
0498: while (--len >= 0) { // for each term
0499: Map.Entry e = (Map.Entry) iter2.next();
0500: size += VM.sizeOfObject(PTR + 3 * INT); // assumes substring() memory overlay
0501: // size += STR + 2 * ((String) e.getKey()).length();
0502: ArrayIntList positions = (ArrayIntList) e.getValue();
0503: size += VM.sizeOfArrayIntList(positions.size());
0504: }
0505: }
0506: return size;
0507: }
0508:
0509: private int numPositions(ArrayIntList positions) {
0510: return positions.size() / stride;
0511: }
0512:
0513: /** sorts into ascending order (on demand), reusing memory along the way */
0514: private void sortFields() {
0515: if (sortedFields == null)
0516: sortedFields = sort(fields);
0517: }
0518:
0519: /** returns a view of the given map's entries, sorted ascending by key */
0520: private static Map.Entry[] sort(HashMap map) {
0521: int size = map.size();
0522: Map.Entry[] entries = new Map.Entry[size];
0523:
0524: Iterator iter = map.entrySet().iterator();
0525: for (int i = 0; i < size; i++) {
0526: entries[i] = (Map.Entry) iter.next();
0527: }
0528:
0529: if (size > 1)
0530: Arrays.sort(entries, termComparator);
0531: return entries;
0532: }
0533:
0534: /**
0535: * Returns a String representation of the index data for debugging purposes.
0536: *
0537: * @return the string representation
0538: */
0539: public String toString() {
0540: StringBuffer result = new StringBuffer(256);
0541: sortFields();
0542: int sumChars = 0;
0543: int sumPositions = 0;
0544: int sumTerms = 0;
0545:
0546: for (int i = 0; i < sortedFields.length; i++) {
0547: Map.Entry entry = sortedFields[i];
0548: String fieldName = (String) entry.getKey();
0549: Info info = (Info) entry.getValue();
0550: info.sortTerms();
0551: result.append(fieldName + ":\n");
0552:
0553: int numChars = 0;
0554: int numPositions = 0;
0555: for (int j = 0; j < info.sortedTerms.length; j++) {
0556: Map.Entry e = info.sortedTerms[j];
0557: String term = (String) e.getKey();
0558: ArrayIntList positions = (ArrayIntList) e.getValue();
0559: result.append("\t'" + term + "':"
0560: + numPositions(positions) + ":");
0561: result.append(positions.toString(stride)); // ignore offsets
0562: result.append("\n");
0563: numPositions += numPositions(positions);
0564: numChars += term.length();
0565: }
0566:
0567: result.append("\tterms=" + info.sortedTerms.length);
0568: result.append(", positions=" + numPositions);
0569: result.append(", Kchars=" + (numChars / 1000.0f));
0570: result.append("\n");
0571: sumPositions += numPositions;
0572: sumChars += numChars;
0573: sumTerms += info.sortedTerms.length;
0574: }
0575:
0576: result.append("\nfields=" + sortedFields.length);
0577: result.append(", terms=" + sumTerms);
0578: result.append(", positions=" + sumPositions);
0579: result.append(", Kchars=" + (sumChars / 1000.0f));
0580: return result.toString();
0581: }
0582:
0583: ///////////////////////////////////////////////////////////////////////////////
0584: // Nested classes:
0585: ///////////////////////////////////////////////////////////////////////////////
0586: /**
0587: * Index data structure for a field; Contains the tokenized term texts and
0588: * their positions.
0589: */
0590: private static final class Info implements Serializable {
0591:
0592: /**
0593: * Term strings and their positions for this field: Map <String
0594: * termText, ArrayIntList positions>
0595: */
0596: private final HashMap terms;
0597:
0598: /** Terms sorted ascending by term text; computed on demand */
0599: private transient Map.Entry[] sortedTerms;
0600:
0601: /** Number of added tokens for this field */
0602: private final int numTokens;
0603:
0604: /** Boost factor for hits for this field */
0605: private final float boost;
0606:
0607: /** Term for this field's fieldName, lazily computed on demand */
0608: public transient Term template;
0609:
0610: private static final long serialVersionUID = 2882195016849084649L;
0611:
0612: public Info(HashMap terms, int numTokens, float boost) {
0613: this .terms = terms;
0614: this .numTokens = numTokens;
0615: this .boost = boost;
0616: }
0617:
0618: /**
0619: * Sorts hashed terms into ascending order, reusing memory along the
0620: * way. Note that sorting is lazily delayed until required (often it's
0621: * not required at all). If a sorted view is required then hashing +
0622: * sort + binary search is still faster and smaller than TreeMap usage
0623: * (which would be an alternative and somewhat more elegant approach,
0624: * apart from more sophisticated Tries / prefix trees).
0625: */
0626: public void sortTerms() {
0627: if (sortedTerms == null)
0628: sortedTerms = sort(terms);
0629: }
0630:
0631: /** note that the frequency can be calculated as numPosition(getPositions(x)) */
0632: public ArrayIntList getPositions(String term) {
0633: return (ArrayIntList) terms.get(term);
0634: }
0635:
0636: /** note that the frequency can be calculated as numPosition(getPositions(x)) */
0637: public ArrayIntList getPositions(int pos) {
0638: return (ArrayIntList) sortedTerms[pos].getValue();
0639: }
0640:
0641: public float getBoost() {
0642: return boost;
0643: }
0644:
0645: }
0646:
0647: ///////////////////////////////////////////////////////////////////////////////
0648: // Nested classes:
0649: ///////////////////////////////////////////////////////////////////////////////
0650: /**
0651: * Efficient resizable auto-expanding list holding <code>int</code> elements;
0652: * implemented with arrays.
0653: */
0654: private static final class ArrayIntList implements Serializable {
0655:
0656: private int[] elements;
0657: private int size = 0;
0658:
0659: private static final long serialVersionUID = 2282195016849084649L;
0660:
0661: public ArrayIntList() {
0662: this (10);
0663: }
0664:
0665: public ArrayIntList(int initialCapacity) {
0666: elements = new int[initialCapacity];
0667: }
0668:
0669: public void add(int elem) {
0670: if (size == elements.length)
0671: ensureCapacity(size + 1);
0672: elements[size++] = elem;
0673: }
0674:
0675: public void add(int pos, int start, int end) {
0676: if (size + 3 > elements.length)
0677: ensureCapacity(size + 3);
0678: elements[size] = pos;
0679: elements[size + 1] = start;
0680: elements[size + 2] = end;
0681: size += 3;
0682: }
0683:
0684: public int get(int index) {
0685: if (index >= size)
0686: throwIndex(index);
0687: return elements[index];
0688: }
0689:
0690: public int size() {
0691: return size;
0692: }
0693:
0694: public int[] toArray(int stride) {
0695: int[] arr = new int[size() / stride];
0696: if (stride == 1) {
0697: System.arraycopy(elements, 0, arr, 0, size); // fast path
0698: } else {
0699: for (int i = 0, j = 0; j < size; i++, j += stride)
0700: arr[i] = elements[j];
0701: }
0702: return arr;
0703: }
0704:
0705: private void ensureCapacity(int minCapacity) {
0706: int newCapacity = Math.max(minCapacity,
0707: (elements.length * 3) / 2 + 1);
0708: int[] newElements = new int[newCapacity];
0709: System.arraycopy(elements, 0, newElements, 0, size);
0710: elements = newElements;
0711: }
0712:
0713: private void throwIndex(int index) {
0714: throw new IndexOutOfBoundsException("index: " + index
0715: + ", size: " + size);
0716: }
0717:
0718: /** returns the first few positions (without offsets); debug only */
0719: public String toString(int stride) {
0720: int s = size() / stride;
0721: int len = Math.min(10, s); // avoid printing huge lists
0722: StringBuffer buf = new StringBuffer(4 * len);
0723: buf.append("[");
0724: for (int i = 0; i < len; i++) {
0725: buf.append(get(i * stride));
0726: if (i < len - 1)
0727: buf.append(", ");
0728: }
0729: if (len != s)
0730: buf.append(", ..."); // and some more...
0731: buf.append("]");
0732: return buf.toString();
0733: }
0734: }
0735:
0736: ///////////////////////////////////////////////////////////////////////////////
0737: // Nested classes:
0738: ///////////////////////////////////////////////////////////////////////////////
0739: private static final Term MATCH_ALL_TERM = new Term("", "");
0740:
0741: /**
0742: * Search support for Lucene framework integration; implements all methods
0743: * required by the Lucene IndexReader contracts.
0744: */
0745: private final class MemoryIndexReader extends IndexReader {
0746:
0747: private Searcher searcher; // needed to find searcher.getSimilarity()
0748:
0749: private MemoryIndexReader() {
0750: super (null); // avoid as much superclass baggage as possible
0751: }
0752:
0753: // lucene >= 1.9 or lucene-1.4.3 with patch removing "final" in superclass
0754: protected void finalize() {
0755: }
0756:
0757: private Info getInfo(String fieldName) {
0758: return (Info) fields.get(fieldName);
0759: }
0760:
0761: private Info getInfo(int pos) {
0762: return (Info) sortedFields[pos].getValue();
0763: }
0764:
0765: public int docFreq(Term term) {
0766: Info info = getInfo(term.field());
0767: int freq = 0;
0768: if (info != null)
0769: freq = info.getPositions(term.text()) != null ? 1 : 0;
0770: if (DEBUG)
0771: System.err.println("MemoryIndexReader.docFreq: " + term
0772: + ", freq:" + freq);
0773: return freq;
0774: }
0775:
0776: public TermEnum terms() {
0777: if (DEBUG)
0778: System.err.println("MemoryIndexReader.terms()");
0779: return terms(MATCH_ALL_TERM);
0780: }
0781:
0782: public TermEnum terms(Term term) {
0783: if (DEBUG)
0784: System.err.println("MemoryIndexReader.terms: " + term);
0785:
0786: int i; // index into info.sortedTerms
0787: int j; // index into sortedFields
0788:
0789: sortFields();
0790: if (sortedFields.length == 1
0791: && sortedFields[0].getKey() == term.field()) {
0792: j = 0; // fast path
0793: } else {
0794: j = Arrays.binarySearch(sortedFields, term.field(),
0795: termComparator);
0796: }
0797:
0798: if (j < 0) { // not found; choose successor
0799: j = -j - 1;
0800: i = 0;
0801: if (j < sortedFields.length)
0802: getInfo(j).sortTerms();
0803: } else { // found
0804: Info info = getInfo(j);
0805: info.sortTerms();
0806: i = Arrays.binarySearch(info.sortedTerms, term.text(),
0807: termComparator);
0808: if (i < 0) { // not found; choose successor
0809: i = -i - 1;
0810: if (i >= info.sortedTerms.length) { // move to next successor
0811: j++;
0812: i = 0;
0813: if (j < sortedFields.length)
0814: getInfo(j).sortTerms();
0815: }
0816: }
0817: }
0818: final int ix = i;
0819: final int jx = j;
0820:
0821: return new TermEnum() {
0822:
0823: private int i = ix; // index into info.sortedTerms
0824: private int j = jx; // index into sortedFields
0825:
0826: public boolean next() {
0827: if (DEBUG)
0828: System.err.println("TermEnum.next");
0829: if (j >= sortedFields.length)
0830: return false;
0831: Info info = getInfo(j);
0832: if (++i < info.sortedTerms.length)
0833: return true;
0834:
0835: // move to successor
0836: j++;
0837: i = 0;
0838: if (j >= sortedFields.length)
0839: return false;
0840: getInfo(j).sortTerms();
0841: return true;
0842: }
0843:
0844: public Term term() {
0845: if (DEBUG)
0846: System.err.println("TermEnum.term: " + i);
0847: if (j >= sortedFields.length)
0848: return null;
0849: Info info = getInfo(j);
0850: if (i >= info.sortedTerms.length)
0851: return null;
0852: // if (DEBUG) System.err.println("TermEnum.term: " + i + ", " + info.sortedTerms[i].getKey());
0853: return createTerm(info, j,
0854: (String) info.sortedTerms[i].getKey());
0855: }
0856:
0857: public int docFreq() {
0858: if (DEBUG)
0859: System.err.println("TermEnum.docFreq");
0860: if (j >= sortedFields.length)
0861: return 0;
0862: Info info = getInfo(j);
0863: if (i >= info.sortedTerms.length)
0864: return 0;
0865: return numPositions(info.getPositions(i));
0866: }
0867:
0868: public void close() {
0869: if (DEBUG)
0870: System.err.println("TermEnum.close");
0871: }
0872:
0873: /** Returns a new Term object, minimizing String.intern() overheads. */
0874: private Term createTerm(Info info, int pos, String text) {
0875: // Assertion: sortFields has already been called before
0876: Term template = info.template;
0877: if (template == null) { // not yet cached?
0878: String fieldName = (String) sortedFields[pos]
0879: .getKey();
0880: template = new Term(fieldName, "");
0881: info.template = template;
0882: }
0883:
0884: return template.createTerm(text);
0885: }
0886:
0887: };
0888: }
0889:
0890: public TermPositions termPositions() {
0891: if (DEBUG)
0892: System.err.println("MemoryIndexReader.termPositions");
0893:
0894: return new TermPositions() {
0895:
0896: private boolean hasNext;
0897: private int cursor = 0;
0898: private ArrayIntList current;
0899:
0900: public void seek(Term term) {
0901: if (DEBUG)
0902: System.err.println(".seek: " + term);
0903: Info info = getInfo(term.field());
0904: current = info == null ? null : info
0905: .getPositions(term.text());
0906: hasNext = (current != null);
0907: cursor = 0;
0908: }
0909:
0910: public void seek(TermEnum termEnum) {
0911: if (DEBUG)
0912: System.err.println(".seekEnum");
0913: seek(termEnum.term());
0914: }
0915:
0916: public int doc() {
0917: if (DEBUG)
0918: System.err.println(".doc");
0919: return 0;
0920: }
0921:
0922: public int freq() {
0923: int freq = current != null ? numPositions(current)
0924: : 0;
0925: if (DEBUG)
0926: System.err.println(".freq: " + freq);
0927: return freq;
0928: }
0929:
0930: public boolean next() {
0931: if (DEBUG)
0932: System.err.println(".next: " + current
0933: + ", oldHasNext=" + hasNext);
0934: boolean next = hasNext;
0935: hasNext = false;
0936: return next;
0937: }
0938:
0939: public int read(int[] docs, int[] freqs) {
0940: if (DEBUG)
0941: System.err.println(".read: " + docs.length);
0942: if (!hasNext)
0943: return 0;
0944: hasNext = false;
0945: docs[0] = 0;
0946: freqs[0] = freq();
0947: return 1;
0948: }
0949:
0950: public boolean skipTo(int target) {
0951: if (DEBUG)
0952: System.err.println(".skipTo: " + target);
0953: return next();
0954: }
0955:
0956: public void close() {
0957: if (DEBUG)
0958: System.err.println(".close");
0959: }
0960:
0961: public int nextPosition() { // implements TermPositions
0962: int pos = current.get(cursor);
0963: cursor += stride;
0964: if (DEBUG)
0965: System.err.println(".nextPosition: " + pos);
0966: return pos;
0967: }
0968:
0969: /**
0970: * Not implemented.
0971: * @throws UnsupportedOperationException
0972: */
0973: public int getPayloadLength() {
0974: throw new UnsupportedOperationException();
0975: }
0976:
0977: /**
0978: * Not implemented.
0979: * @throws UnsupportedOperationException
0980: */
0981: public byte[] getPayload(byte[] data, int offset)
0982: throws IOException {
0983: throw new UnsupportedOperationException();
0984: }
0985:
0986: public boolean isPayloadAvailable() {
0987: // unsuported
0988: return false;
0989: }
0990:
0991: };
0992: }
0993:
0994: public TermDocs termDocs() {
0995: if (DEBUG)
0996: System.err.println("MemoryIndexReader.termDocs");
0997: return termPositions();
0998: }
0999:
1000: public TermFreqVector[] getTermFreqVectors(int docNumber) {
1001: if (DEBUG)
1002: System.err
1003: .println("MemoryIndexReader.getTermFreqVectors");
1004: TermFreqVector[] vectors = new TermFreqVector[fields.size()];
1005: // if (vectors.length == 0) return null;
1006: Iterator iter = fields.keySet().iterator();
1007: for (int i = 0; i < vectors.length; i++) {
1008: String fieldName = (String) iter.next();
1009: vectors[i] = getTermFreqVector(docNumber, fieldName);
1010: }
1011: return vectors;
1012: }
1013:
1014: public void getTermFreqVector(int docNumber,
1015: TermVectorMapper mapper) throws IOException {
1016: if (DEBUG)
1017: System.err
1018: .println("MemoryIndexReader.getTermFreqVectors");
1019:
1020: // if (vectors.length == 0) return null;
1021: for (Iterator iterator = fields.keySet().iterator(); iterator
1022: .hasNext();) {
1023: String fieldName = (String) iterator.next();
1024: getTermFreqVector(docNumber, fieldName, mapper);
1025: }
1026: }
1027:
1028: public void getTermFreqVector(int docNumber, String field,
1029: TermVectorMapper mapper) throws IOException {
1030: if (DEBUG)
1031: System.err
1032: .println("MemoryIndexReader.getTermFreqVector");
1033: final Info info = getInfo(field);
1034: if (info == null) {
1035: return;
1036: }
1037: info.sortTerms();
1038: mapper.setExpectations(field, info.sortedTerms.length,
1039: stride != 1, true);
1040: for (int i = info.sortedTerms.length; --i >= 0;) {
1041:
1042: ArrayIntList positions = (ArrayIntList) info.sortedTerms[i]
1043: .getValue();
1044: int size = positions.size();
1045: org.apache.lucene.index.TermVectorOffsetInfo[] offsets = new org.apache.lucene.index.TermVectorOffsetInfo[size
1046: / stride];
1047:
1048: for (int k = 0, j = 1; j < size; k++, j += stride) {
1049: int start = positions.get(j);
1050: int end = positions.get(j + 1);
1051: offsets[k] = new org.apache.lucene.index.TermVectorOffsetInfo(
1052: start, end);
1053: }
1054: mapper.map((String) info.sortedTerms[i].getKey(),
1055: numPositions((ArrayIntList) info.sortedTerms[i]
1056: .getValue()), offsets,
1057: ((ArrayIntList) info.sortedTerms[i].getValue())
1058: .toArray(stride));
1059: }
1060: }
1061:
1062: public TermFreqVector getTermFreqVector(int docNumber,
1063: final String fieldName) {
1064: if (DEBUG)
1065: System.err
1066: .println("MemoryIndexReader.getTermFreqVector");
1067: final Info info = getInfo(fieldName);
1068: if (info == null)
1069: return null; // TODO: or return empty vector impl???
1070: info.sortTerms();
1071:
1072: return new TermPositionVector() {
1073:
1074: private final Map.Entry[] sortedTerms = info.sortedTerms;
1075:
1076: public String getField() {
1077: return fieldName;
1078: }
1079:
1080: public int size() {
1081: return sortedTerms.length;
1082: }
1083:
1084: public String[] getTerms() {
1085: String[] terms = new String[sortedTerms.length];
1086: for (int i = sortedTerms.length; --i >= 0;) {
1087: terms[i] = (String) sortedTerms[i].getKey();
1088: }
1089: return terms;
1090: }
1091:
1092: public int[] getTermFrequencies() {
1093: int[] freqs = new int[sortedTerms.length];
1094: for (int i = sortedTerms.length; --i >= 0;) {
1095: freqs[i] = numPositions((ArrayIntList) sortedTerms[i]
1096: .getValue());
1097: }
1098: return freqs;
1099: }
1100:
1101: public int indexOf(String term) {
1102: int i = Arrays.binarySearch(sortedTerms, term,
1103: termComparator);
1104: return i >= 0 ? i : -1;
1105: }
1106:
1107: public int[] indexesOf(String[] terms, int start,
1108: int len) {
1109: int[] indexes = new int[len];
1110: for (int i = 0; i < len; i++) {
1111: indexes[i] = indexOf(terms[start++]);
1112: }
1113: return indexes;
1114: }
1115:
1116: // lucene >= 1.4.3
1117: public int[] getTermPositions(int index) {
1118: return ((ArrayIntList) sortedTerms[index]
1119: .getValue()).toArray(stride);
1120: }
1121:
1122: // lucene >= 1.9 (remove this method for lucene-1.4.3)
1123: public org.apache.lucene.index.TermVectorOffsetInfo[] getOffsets(
1124: int index) {
1125: if (stride == 1)
1126: return null; // no offsets stored
1127:
1128: ArrayIntList positions = (ArrayIntList) sortedTerms[index]
1129: .getValue();
1130: int size = positions.size();
1131: org.apache.lucene.index.TermVectorOffsetInfo[] offsets = new org.apache.lucene.index.TermVectorOffsetInfo[size
1132: / stride];
1133:
1134: for (int i = 0, j = 1; j < size; i++, j += stride) {
1135: int start = positions.get(j);
1136: int end = positions.get(j + 1);
1137: offsets[i] = new org.apache.lucene.index.TermVectorOffsetInfo(
1138: start, end);
1139: }
1140: return offsets;
1141: }
1142:
1143: };
1144: }
1145:
1146: private Similarity getSimilarity() {
1147: if (searcher != null)
1148: return searcher.getSimilarity();
1149: return Similarity.getDefault();
1150: }
1151:
1152: private void setSearcher(Searcher searcher) {
1153: this .searcher = searcher;
1154: }
1155:
1156: /** performance hack: cache norms to avoid repeated expensive calculations */
1157: private byte[] cachedNorms;
1158: private String cachedFieldName;
1159: private Similarity cachedSimilarity;
1160:
1161: public byte[] norms(String fieldName) {
1162: byte[] norms = cachedNorms;
1163: Similarity sim = getSimilarity();
1164: if (fieldName != cachedFieldName || sim != cachedSimilarity) { // not cached?
1165: Info info = getInfo(fieldName);
1166: int numTokens = info != null ? info.numTokens : 0;
1167: float n = sim.lengthNorm(fieldName, numTokens);
1168: float boost = info != null ? info.getBoost() : 1.0f;
1169: n = n * boost; // see DocumentWriter.writeNorms(String segment)
1170: byte norm = Similarity.encodeNorm(n);
1171: norms = new byte[] { norm };
1172:
1173: // cache it for future reuse
1174: cachedNorms = norms;
1175: cachedFieldName = fieldName;
1176: cachedSimilarity = sim;
1177: if (DEBUG)
1178: System.err.println("MemoryIndexReader.norms: "
1179: + fieldName + ":" + n + ":" + norm + ":"
1180: + numTokens);
1181: }
1182: return norms;
1183: }
1184:
1185: public void norms(String fieldName, byte[] bytes, int offset) {
1186: if (DEBUG)
1187: System.err.println("MemoryIndexReader.norms*: "
1188: + fieldName);
1189: byte[] norms = norms(fieldName);
1190: System.arraycopy(norms, 0, bytes, offset, norms.length);
1191: }
1192:
1193: protected void doSetNorm(int doc, String fieldName, byte value) {
1194: throw new UnsupportedOperationException();
1195: }
1196:
1197: public int numDocs() {
1198: if (DEBUG)
1199: System.err.println("MemoryIndexReader.numDocs");
1200: return fields.size() > 0 ? 1 : 0;
1201: }
1202:
1203: public int maxDoc() {
1204: if (DEBUG)
1205: System.err.println("MemoryIndexReader.maxDoc");
1206: return 1;
1207: }
1208:
1209: public Document document(int n) {
1210: if (DEBUG)
1211: System.err.println("MemoryIndexReader.document");
1212: return new Document(); // there are no stored fields
1213: }
1214:
1215: //When we convert to JDK 1.5 make this Set<String>
1216: public Document document(int n, FieldSelector fieldSelector)
1217: throws IOException {
1218: if (DEBUG)
1219: System.err.println("MemoryIndexReader.document");
1220: return new Document(); // there are no stored fields
1221: }
1222:
1223: public boolean isDeleted(int n) {
1224: if (DEBUG)
1225: System.err.println("MemoryIndexReader.isDeleted");
1226: return false;
1227: }
1228:
1229: public boolean hasDeletions() {
1230: if (DEBUG)
1231: System.err.println("MemoryIndexReader.hasDeletions");
1232: return false;
1233: }
1234:
1235: protected void doDelete(int docNum) {
1236: throw new UnsupportedOperationException();
1237: }
1238:
1239: protected void doUndeleteAll() {
1240: throw new UnsupportedOperationException();
1241: }
1242:
1243: protected void doCommit() {
1244: if (DEBUG)
1245: System.err.println("MemoryIndexReader.doCommit");
1246: }
1247:
1248: protected void doClose() {
1249: if (DEBUG)
1250: System.err.println("MemoryIndexReader.doClose");
1251: }
1252:
1253: // lucene >= 1.9 (remove this method for lucene-1.4.3)
1254: public Collection getFieldNames(FieldOption fieldOption) {
1255: if (DEBUG)
1256: System.err
1257: .println("MemoryIndexReader.getFieldNamesOption");
1258: if (fieldOption == FieldOption.UNINDEXED)
1259: return Collections.EMPTY_SET;
1260: if (fieldOption == FieldOption.INDEXED_NO_TERMVECTOR)
1261: return Collections.EMPTY_SET;
1262: if (fieldOption == FieldOption.TERMVECTOR_WITH_OFFSET
1263: && stride == 1)
1264: return Collections.EMPTY_SET;
1265: if (fieldOption == FieldOption.TERMVECTOR_WITH_POSITION_OFFSET
1266: && stride == 1)
1267: return Collections.EMPTY_SET;
1268:
1269: return Collections.unmodifiableSet(fields.keySet());
1270: }
1271: }
1272:
1273: ///////////////////////////////////////////////////////////////////////////////
1274: // Nested classes:
1275: ///////////////////////////////////////////////////////////////////////////////
1276: private static final class VM {
1277:
1278: public static final int PTR = is64BitVM() ? 8 : 4;
1279:
1280: // bytes occupied by primitive data types
1281: public static final int BOOLEAN = 1;
1282: public static final int BYTE = 1;
1283: public static final int CHAR = 2;
1284: public static final int SHORT = 2;
1285: public static final int INT = 4;
1286: public static final int LONG = 8;
1287: public static final int FLOAT = 4;
1288: public static final int DOUBLE = 8;
1289:
1290: private static final int LOG_PTR = (int) Math.round(log2(PTR));
1291:
1292: /**
1293: * Object header of any heap allocated Java object.
1294: * ptr to class, info for monitor, gc, hash, etc.
1295: */
1296: // private static final int OBJECT_HEADER = 2*4; // even on 64 bit VMs?
1297: private static final int OBJECT_HEADER = 2 * PTR;
1298:
1299: /**
1300: * Modern VMs tend to trade space for time, allocating memory on word
1301: * boundaries. For example, on a 64 bit VM, the variables of a class with
1302: * one 32 bit integer and one Java char really consume 8 bytes instead of 6
1303: * bytes. 2 bytes are spent on padding. Similary, on a 64 bit VM a
1304: * java.lang.Integer consumes OBJECT_HEADER + 8 bytes rather than
1305: * OBJECT_HEADER + 4 bytes.
1306: */
1307: private static final boolean IS_WORD_ALIGNED_VM = true;
1308:
1309: private VM() {
1310: } // not instantiable
1311:
1312: // assumes n > 0
1313: // 64 bit VM:
1314: // 0 --> 0*PTR
1315: // 1..8 --> 1*PTR
1316: // 9..16 --> 2*PTR
1317: private static int sizeOf(int n) {
1318: return IS_WORD_ALIGNED_VM ?
1319: // ((n-1)/PTR + 1) * PTR : // slow version
1320: (((n - 1) >> LOG_PTR) + 1) << LOG_PTR
1321: : // fast version
1322: n;
1323: }
1324:
1325: public static int sizeOfObject(int n) {
1326: return sizeOf(OBJECT_HEADER + n);
1327: }
1328:
1329: public static int sizeOfObjectArray(int len) {
1330: return sizeOfObject(INT + PTR * len);
1331: }
1332:
1333: public static int sizeOfCharArray(int len) {
1334: return sizeOfObject(INT + CHAR * len);
1335: }
1336:
1337: public static int sizeOfIntArray(int len) {
1338: return sizeOfObject(INT + INT * len);
1339: }
1340:
1341: public static int sizeOfString(int len) {
1342: return sizeOfObject(3 * INT + PTR) + sizeOfCharArray(len);
1343: }
1344:
1345: public static int sizeOfHashMap(int len) {
1346: return sizeOfObject(4 * PTR + 4 * INT)
1347: + sizeOfObjectArray(len) + len
1348: * sizeOfObject(3 * PTR + INT); // entries
1349: }
1350:
1351: // note: does not include referenced objects
1352: public static int sizeOfArrayList(int len) {
1353: return sizeOfObject(PTR + 2 * INT) + sizeOfObjectArray(len);
1354: }
1355:
1356: public static int sizeOfArrayIntList(int len) {
1357: return sizeOfObject(PTR + INT) + sizeOfIntArray(len);
1358: }
1359:
1360: private static boolean is64BitVM() {
1361: try {
1362: int bits = Integer.getInteger("sun.arch.data.model", 0)
1363: .intValue();
1364: if (bits != 0)
1365: return bits == 64;
1366:
1367: // fallback if sun.arch.data.model isn't available
1368: return System.getProperty("java.vm.name").toLowerCase()
1369: .indexOf("64") >= 0;
1370: } catch (Throwable t) {
1371: return false; // better safe than sorry (applets, security managers, etc.) ...
1372: }
1373: }
1374:
1375: /** logarithm to the base 2. Example: log2(4) == 2, log2(8) == 3 */
1376: private static double log2(double value) {
1377: return Math.log(value) / Math.log(2);
1378: }
1379:
1380: }
1381:
1382: }
|