001: /*
002: * Copyright 2000-2002,2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: //package org.zilonis.org.apache.xerces.util;
018: package org.zilonis.symbol;
019:
020: /**
021: * This class is a symbol table implementation that guarantees that
022: * strings used as identifiers are unique references. Multiple calls
023: * to <code>addSymbol</code> will always return the same string
024: * reference.
025: * <p>
026: * The symbol table performs the same task as <code>String.intern()</code>
027: * with the following differences:
028: * <ul>
029: * <li>
030: * A new string object does not need to be created in order to
031: * retrieve a unique reference. Symbols can be added by using
032: * a series of characters in a character array.
033: * </li>
034: * <li>
035: * Users of the symbol table can provide their own symbol hashing
036: * implementation. For example, a simple string hashing algorithm
037: * may fail to produce a balanced set of hashcodes for symbols
038: * that are <em>mostly</em> unique. Strings with similar leading
039: * characters are especially prone to this poor hashing behavior.
040: * </li>
041: * </ul>
042: *
043: * @see SymbolHash
044: *
045: * @author Andy Clark
046: *
047: * @version $Id: SymbolTable.java,v 1.1 2005/03/01 01:16:14 elevy Exp $
048: */
049: public class SymbolTable {
050:
051: //
052: // Constants
053: //
054:
055: /** Default table size. */
056: protected static final int TABLE_SIZE = 101;
057:
058: //
059: // Data
060: //
061:
062: /** Buckets. */
063: protected Entry[] fBuckets = null;
064:
065: // actual table size
066: protected int fTableSize;
067:
068: //
069: // Constructors
070: //
071:
072: /** Constructs a symbol table with a default number of buckets. */
073: public SymbolTable() {
074: this (TABLE_SIZE);
075: }
076:
077: /** Constructs a symbol table with a specified number of buckets. */
078: public SymbolTable(int tableSize) {
079: fTableSize = tableSize;
080: fBuckets = new Entry[fTableSize];
081: }
082:
083: //
084: // Public methods
085: //
086:
087: /**
088: * Adds the specified symbol to the symbol table and returns a
089: * reference to the unique symbol. If the symbol already exists,
090: * the previous symbol reference is returned instead, in order
091: * guarantee that symbol references remain unique.
092: *
093: * @param symbol The new symbol.
094: */
095: public String addSymbol(String symbol) {
096:
097: // search for identical symbol
098: int bucket = hash(symbol) % fTableSize;
099: int length = symbol.length();
100: OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
101: if (length == entry.characters.length) {
102: for (int i = 0; i < length; i++) {
103: if (symbol.charAt(i) != entry.characters[i]) {
104: continue OUTER;
105: }
106: }
107: return entry.symbol;
108: }
109: }
110:
111: // create new entry
112: Entry entry = new Entry(symbol, fBuckets[bucket]);
113: fBuckets[bucket] = entry;
114: return entry.symbol;
115:
116: } // addSymbol(String):String
117:
118: /**
119: * Adds the specified symbol to the symbol table and returns a
120: * reference to the unique symbol. If the symbol already exists,
121: * the previous symbol reference is returned instead, in order
122: * guarantee that symbol references remain unique.
123: *
124: * @param buffer The buffer containing the new symbol.
125: * @param offset The offset into the buffer of the new symbol.
126: * @param length The length of the new symbol in the buffer.
127: */
128: public String addSymbol(char[] buffer, int offset, int length) {
129:
130: // search for identical symbol
131: int bucket = hash(buffer, offset, length) % fTableSize;
132: OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
133: if (length == entry.characters.length) {
134: for (int i = 0; i < length; i++) {
135: if (buffer[offset + i] != entry.characters[i]) {
136: continue OUTER;
137: }
138: }
139: return entry.symbol;
140: }
141: }
142:
143: // add new entry
144: Entry entry = new Entry(buffer, offset, length,
145: fBuckets[bucket]);
146: fBuckets[bucket] = entry;
147: return entry.symbol;
148:
149: } // addSymbol(char[],int,int):String
150:
151: /**
152: * Returns a hashcode value for the specified symbol. The value
153: * returned by this method must be identical to the value returned
154: * by the <code>hash(char[],int,int)</code> method when called
155: * with the character array that comprises the symbol string.
156: *
157: * @param symbol The symbol to hash.
158: */
159: public int hash(String symbol) {
160:
161: int code = 0;
162: int length = symbol.length();
163: for (int i = 0; i < length; i++) {
164: code = code * 37 + symbol.charAt(i);
165: }
166: return code & 0x7FFFFFF;
167:
168: } // hash(String):int
169:
170: /**
171: * Returns a hashcode value for the specified symbol information.
172: * The value returned by this method must be identical to the value
173: * returned by the <code>hash(String)</code> method when called
174: * with the string object created from the symbol information.
175: *
176: * @param buffer The character buffer containing the symbol.
177: * @param offset The offset into the character buffer of the start
178: * of the symbol.
179: * @param length The length of the symbol.
180: */
181: public int hash(char[] buffer, int offset, int length) {
182:
183: int code = 0;
184: for (int i = 0; i < length; i++) {
185: code = code * 37 + buffer[offset + i];
186: }
187: return code & 0x7FFFFFF;
188:
189: } // hash(char[],int,int):int
190:
191: /**
192: * Returns true if the symbol table already contains the specified
193: * symbol.
194: *
195: * @param symbol The symbol to look for.
196: */
197: public boolean containsSymbol(String symbol) {
198:
199: // search for identical symbol
200: int bucket = hash(symbol) % fTableSize;
201: int length = symbol.length();
202: OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
203: if (length == entry.characters.length) {
204: for (int i = 0; i < length; i++) {
205: if (symbol.charAt(i) != entry.characters[i]) {
206: continue OUTER;
207: }
208: }
209: return true;
210: }
211: }
212:
213: return false;
214:
215: } // containsSymbol(String):boolean
216:
217: /**
218: * Returns true if the symbol table already contains the specified
219: * symbol.
220: *
221: * @param buffer The buffer containing the symbol to look for.
222: * @param offset The offset into the buffer.
223: * @param length The length of the symbol in the buffer.
224: */
225: public boolean containsSymbol(char[] buffer, int offset, int length) {
226:
227: // search for identical symbol
228: int bucket = hash(buffer, offset, length) % fTableSize;
229: OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
230: if (length == entry.characters.length) {
231: for (int i = 0; i < length; i++) {
232: if (buffer[offset + i] != entry.characters[i]) {
233: continue OUTER;
234: }
235: }
236: return true;
237: }
238: }
239:
240: return false;
241:
242: } // containsSymbol(char[],int,int):boolean
243:
244: //
245: // Classes
246: //
247:
248: /**
249: * This class is a symbol table entry. Each entry acts as a node
250: * in a linked list.
251: */
252: protected static final class Entry {
253:
254: //
255: // Data
256: //
257:
258: /** Symbol. */
259: public String symbol;
260:
261: /**
262: * Symbol characters. This information is duplicated here for
263: * comparison performance.
264: */
265: public char[] characters;
266:
267: /** The next entry. */
268: public Entry next;
269:
270: //
271: // Constructors
272: //
273:
274: /**
275: * Constructs a new entry from the specified symbol and next entry
276: * reference.
277: */
278: public Entry(String symbol, Entry next) {
279: this .symbol = symbol.intern();
280: characters = new char[symbol.length()];
281: symbol.getChars(0, characters.length, characters, 0);
282: this .next = next;
283: }
284:
285: /**
286: * Constructs a new entry from the specified symbol information and
287: * next entry reference.
288: */
289: public Entry(char[] ch, int offset, int length, Entry next) {
290: characters = new char[length];
291: System.arraycopy(ch, offset, characters, 0, length);
292: symbol = new String(characters).intern();
293: this .next = next;
294: }
295:
296: } // class Entry
297:
298: } // class SymbolTable
|