001: /*
002: * Copyright 1999-2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: /*
017: * $Id: DTMStringPool.java,v 1.8 2004/02/16 23:06:11 minchau Exp $
018: */
019:
020: package org.apache.xml.dtm.ref;
021:
022: import java.util.Vector;
023:
024: import org.apache.xml.utils.IntVector;
025:
026: /** <p>DTMStringPool is an "interning" mechanism for strings. It will
027: * create a stable 1:1 mapping between a set of string values and a set of
028: * integer index values, so the integers can be used to reliably and
029: * uniquely identify (and when necessary retrieve) the strings.</p>
030: *
031: * <p>Design Priorities:
032: * <ul>
033: * <li>String-to-index lookup speed is critical.</li>
034: * <li>Index-to-String lookup speed is slightly less so.</li>
035: * <li>Threadsafety is not guaranteed at this level.
036: * Enforce that in the application if needed.</li>
037: * <li>Storage efficiency is an issue but not a huge one.
038: * It is expected that string pools won't exceed about 2000 entries.</li>
039: * </ul>
040: * </p>
041: *
042: * <p>Implementation detail: A standard Hashtable is relatively
043: * inefficient when looking up primitive int values, especially when
044: * we're already maintaining an int-to-string vector. So I'm
045: * maintaining a simple hash chain within this class.</p>
046: *
047: * <p>NOTE: There is nothing in the code that has a real dependency upon
048: * String. It would work with any object type that implements reliable
049: * .hashCode() and .equals() operations. The API enforces Strings because
050: * it's safer that way, but this could trivially be turned into a general
051: * ObjectPool if one was needed.</p>
052: *
053: * <p>Status: Passed basic test in main().</p>
054: * */
055: public class DTMStringPool {
056: Vector m_intToString;
057: static final int HASHPRIME = 101;
058: int[] m_hashStart = new int[HASHPRIME];
059: IntVector m_hashChain;
060: public static final int NULL = -1;
061:
062: /**
063: * Create a DTMStringPool using the given chain size
064: *
065: * @param chainSize The size of the hash chain vector
066: */
067: public DTMStringPool(int chainSize) {
068: m_intToString = new Vector();
069: m_hashChain = new IntVector(chainSize);
070: removeAllElements();
071:
072: // -sb Add this to force empty strings to be index 0.
073: stringToIndex("");
074: }
075:
076: public DTMStringPool() {
077: this (512);
078: }
079:
080: public void removeAllElements() {
081: m_intToString.removeAllElements();
082: for (int i = 0; i < HASHPRIME; ++i)
083: m_hashStart[i] = NULL;
084: m_hashChain.removeAllElements();
085: }
086:
087: /** @return string whose value is uniquely identified by this integer index.
088: * @throws java.lang.ArrayIndexOutOfBoundsException
089: * if index doesn't map to a string.
090: * */
091: public String indexToString(int i)
092: throws java.lang.ArrayIndexOutOfBoundsException {
093: if (i == NULL)
094: return null;
095: return (String) m_intToString.elementAt(i);
096: }
097:
098: /** @return integer index uniquely identifying the value of this string. */
099: public int stringToIndex(String s) {
100: if (s == null)
101: return NULL;
102:
103: int hashslot = s.hashCode() % HASHPRIME;
104: if (hashslot < 0)
105: hashslot = -hashslot;
106:
107: // Is it one we already know?
108: int hashlast = m_hashStart[hashslot];
109: int hashcandidate = hashlast;
110: while (hashcandidate != NULL) {
111: if (m_intToString.elementAt(hashcandidate).equals(s))
112: return hashcandidate;
113:
114: hashlast = hashcandidate;
115: hashcandidate = m_hashChain.elementAt(hashcandidate);
116: }
117:
118: // New value. Add to tables.
119: int newIndex = m_intToString.size();
120: m_intToString.addElement(s);
121:
122: m_hashChain.addElement(NULL); // Initialize to no-following-same-hash
123: if (hashlast == NULL) // First for this hash
124: m_hashStart[hashslot] = newIndex;
125: else
126: // Link from previous with same hash
127: m_hashChain.setElementAt(newIndex, hashlast);
128:
129: return newIndex;
130: }
131:
132: /** Command-line unit test driver. This test relies on the fact that
133: * this version of the pool assigns indices consecutively, starting
134: * from zero, as new unique strings are encountered.
135: */
136: public static void main(String[] args) {
137: String[] word = { "Zero", "One", "Two", "Three", "Four",
138: "Five", "Six", "Seven", "Eight", "Nine", "Ten",
139: "Eleven", "Twelve", "Thirteen", "Fourteen", "Fifteen",
140: "Sixteen", "Seventeen", "Eighteen", "Nineteen",
141: "Twenty", "Twenty-One", "Twenty-Two", "Twenty-Three",
142: "Twenty-Four", "Twenty-Five", "Twenty-Six",
143: "Twenty-Seven", "Twenty-Eight", "Twenty-Nine",
144: "Thirty", "Thirty-One", "Thirty-Two", "Thirty-Three",
145: "Thirty-Four", "Thirty-Five", "Thirty-Six",
146: "Thirty-Seven", "Thirty-Eight", "Thirty-Nine" };
147:
148: DTMStringPool pool = new DTMStringPool();
149:
150: System.out
151: .println("If no complaints are printed below, we passed initial test.");
152:
153: for (int pass = 0; pass <= 1; ++pass) {
154: int i;
155:
156: for (i = 0; i < word.length; ++i) {
157: int j = pool.stringToIndex(word[i]);
158: if (j != i)
159: System.out
160: .println("\tMismatch populating pool: assigned "
161: + j + " for create " + i);
162: }
163:
164: for (i = 0; i < word.length; ++i) {
165: int j = pool.stringToIndex(word[i]);
166: if (j != i)
167: System.out
168: .println("\tMismatch in stringToIndex: returned "
169: + j + " for lookup " + i);
170: }
171:
172: for (i = 0; i < word.length; ++i) {
173: String w = pool.indexToString(i);
174: if (!word[i].equals(w))
175: System.out
176: .println("\tMismatch in indexToString: returned"
177: + w + " for lookup " + i);
178: }
179:
180: pool.removeAllElements();
181:
182: System.out.println("\nPass " + pass + " complete\n");
183: } // end pass loop
184: }
185: }
|