001: /**
002: Copyright 2004 Sun Microsystems, Inc. All rights reserved.
003: SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
004: */package com.sun.xml.stream.xerces.util;
005:
006: /**
007: * This class is a symbol table implementation that guarantees that
008: * strings used as identifiers are unique references. Multiple calls
009: * to <code>addSymbol</code> will always return the same string
010: * reference.
011: * <p>
012: * The symbol table performs the same task as <code>String.intern()</code>
013: * with the following differences:
014: * <ul>
015: * <li>
016: * A new string object does not need to be created in order to
017: * retrieve a unique reference. Symbols can be added by using
018: * a series of characters in a character array.
019: * </li>
020: * <li>
021: * Users of the symbol table can provide their own symbol hashing
022: * implementation. For example, a simple string hashing algorithm
023: * may fail to produce a balanced set of hashcodes for symbols
024: * that are <em>mostly</em> unique. Strings with similar leading
025: * characters are especially prone to this poor hashing behavior.
026: * </li>
027: * </ul>
028: *
029: * @see SymbolHash
030: *
031: * @author Andy Clark
032: *
033: * @version $Id: SymbolTable.java,v 1.2 2006/04/01 06:01:41 jeffsuttor Exp $
034: */
035: public class SymbolTable {
036:
037: //
038: // Constants
039: //
040:
041: /** Default table size. */
042: protected static final int TABLE_SIZE = 173;
043:
044: protected char[] symbolAsArray = null;
045: //
046: // Data
047: //
048:
049: /** Buckets. */
050: protected Entry[] fBuckets = null;
051:
052: // actual table size
053: protected int fTableSize;
054:
055: //
056: // Constructors
057: //
058:
059: /** Constructs a symbol table with a default number of buckets. */
060: public SymbolTable() {
061: this (TABLE_SIZE);
062: }
063:
064: /** Constructs a symbol table with a specified number of buckets. */
065: public SymbolTable(int tableSize) {
066: fTableSize = tableSize;
067: fBuckets = new Entry[fTableSize];
068: }
069:
070: //
071: // Public methods
072: //
073:
074: /**
075: * Adds the specified symbol to the symbol table and returns a
076: * reference to the unique symbol. If the symbol already exists,
077: * the previous symbol reference is returned instead, in order
078: * guarantee that symbol references remain unique.
079: *
080: * @param symbol The new symbol.
081: */
082: public String addSymbol(String symbol) {
083:
084: // search for identical symbol
085: final int hash = hash(symbol);
086: final int bucket = hash % fTableSize;
087: final int length = symbol.length();
088: OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
089: if (length == entry.characters.length
090: && hash == entry.hashCode) {
091: if (symbol.regionMatches(0, entry.symbol, 0, length)) {
092: symbolAsArray = entry.characters;
093: return entry.symbol;
094: } else {
095: continue OUTER;
096: }
097: /**
098: for (int i = 0; i < length; i++) {
099: if (symbol.charAt(i) != entry.characters[i]) {
100: continue OUTER;
101: }
102: }
103: symbolAsArray = entry.characters;
104: return entry.symbol;
105: */
106: }
107: }
108:
109: // create new entry
110: Entry entry = new Entry(symbol, fBuckets[bucket]);
111: entry.hashCode = hash;
112: symbolAsArray = entry.characters;
113: fBuckets[bucket] = entry;
114: return entry.symbol;
115:
116: } // addSymbol(String):String
117:
118: /**
119: * Adds the specified symbol to the symbol table and returns a
120: * reference to the unique symbol. If the symbol already exists,
121: * the previous symbol reference is returned instead, in order
122: * guarantee that symbol references remain unique.
123: *
124: * @param buffer The buffer containing the new symbol.
125: * @param offset The offset into the buffer of the new symbol.
126: * @param length The length of the new symbol in the buffer.
127: */
128: public String addSymbol(char[] buffer, int offset, int length) {
129: // search for identical symbol
130: int hash = hash(buffer, offset, length);
131: int bucket = hash % fTableSize;
132: OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
133: if (length == entry.characters.length
134: && hash == entry.hashCode) {
135: for (int i = 0; i < length; i++) {
136: if (buffer[offset + i] != entry.characters[i]) {
137: continue OUTER;
138: }
139: }
140: symbolAsArray = entry.characters;
141: return entry.symbol;
142: }
143: }
144:
145: // add new entry
146: Entry entry = new Entry(buffer, offset, length,
147: fBuckets[bucket]);
148: fBuckets[bucket] = entry;
149: entry.hashCode = hash;
150: symbolAsArray = entry.characters;
151: return entry.symbol;
152:
153: } // addSymbol(char[],int,int):String
154:
155: /**
156: * Returns a hashcode value for the specified symbol. The value
157: * returned by this method must be identical to the value returned
158: * by the <code>hash(char[],int,int)</code> method when called
159: * with the character array that comprises the symbol string.
160: *
161: * @param symbol The symbol to hash.
162: */
163: public int hash(String symbol) {
164:
165: int code = 0;
166: int length = symbol.length();
167: for (int i = 0; i < length; i++) {
168: code = code * 37 + symbol.charAt(i);
169: }
170: return code & 0x7FFFFFF;
171:
172: } // hash(String):int
173:
174: /**
175: * Returns a hashcode value for the specified symbol information.
176: * The value returned by this method must be identical to the value
177: * returned by the <code>hash(String)</code> method when called
178: * with the string object created from the symbol information.
179: *
180: * @param buffer The character buffer containing the symbol.
181: * @param offset The offset into the character buffer of the start
182: * of the symbol.
183: * @param length The length of the symbol.
184: */
185: public int hash(char[] buffer, int offset, int length) {
186:
187: int code = 0;
188: for (int i = 0; i < length; i++) {
189: code = code * 37 + buffer[offset + i];
190: }
191: return code & 0x7FFFFFF;
192:
193: } // hash(char[],int,int):int
194:
195: /**
196: * Returns true if the symbol table already contains the specified
197: * symbol.
198: *
199: * @param symbol The symbol to look for.
200: */
201: public boolean containsSymbol(String symbol) {
202:
203: // search for identical symbol
204: int hash = hash(symbol);
205: int bucket = hash % fTableSize;
206: int length = symbol.length();
207: OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
208: if (length == entry.characters.length
209: && hash == entry.hashCode) {
210: if (symbol.regionMatches(0, entry.symbol, 0, length)) {
211: return true;
212: } else {
213: continue OUTER;
214: }
215: /**
216: for (int i = 0; i < length; i++) {
217: if (symbol.charAt(i) != entry.characters[i]) {
218: continue OUTER;
219: }
220: }
221: return true;
222: */
223: }
224: }
225:
226: return false;
227:
228: } // containsSymbol(String):boolean
229:
230: /**
231: * Returns true if the symbol table already contains the specified
232: * symbol.
233: *
234: * @param buffer The buffer containing the symbol to look for.
235: * @param offset The offset into the buffer.
236: * @param length The length of the symbol in the buffer.
237: */
238: public boolean containsSymbol(char[] buffer, int offset, int length) {
239:
240: // search for identical symbol
241: int hash = hash(buffer, offset, length);
242: int bucket = hash % fTableSize;
243: OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
244: if (length == entry.characters.length
245: && hash == entry.hashCode) {
246: for (int i = 0; i < length; i++) {
247: if (buffer[offset + i] != entry.characters[i]) {
248: continue OUTER;
249: }
250: }
251: return true;
252: }
253: }
254:
255: return false;
256:
257: } // containsSymbol(char[],int,int):boolean
258:
259: /**
260: * Return previously retrieved symbol as character array.
261: */
262: public char[] getCharArray() {
263: return symbolAsArray;
264: }
265:
266: //
267: // Classes
268: //
269:
270: /**
271: * This class is a symbol table entry. Each entry acts as a node
272: * in a linked list.
273: */
274: protected static final class Entry {
275:
276: //
277: // Data
278: //
279:
280: /** Symbol. */
281: public String symbol;
282: int hashCode = 0;
283:
284: /**
285: * Symbol characters. This information is duplicated here for
286: * comparison performance.
287: */
288: public char[] characters;
289:
290: /** The next entry. */
291: public Entry next;
292:
293: //
294: // Constructors
295: //
296:
297: /**
298: * Constructs a new entry from the specified symbol and next entry
299: * reference.
300: */
301: public Entry(String symbol, Entry next) {
302: this .symbol = symbol.intern();
303: characters = new char[symbol.length()];
304: symbol.getChars(0, characters.length, characters, 0);
305: this .next = next;
306: }
307:
308: /**
309: * Constructs a new entry from the specified symbol information and
310: * next entry reference.
311: */
312: public Entry(char[] ch, int offset, int length, Entry next) {
313: characters = new char[length];
314: System.arraycopy(ch, offset, characters, 0, length);
315: symbol = new String(characters).intern();
316: this .next = next;
317: }
318:
319: } // class Entry
320:
321: } // class SymbolTable
|