001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: * ====================================================================
017: *
018: * This software consists of voluntary contributions made by many
019: * individuals on behalf of the Apache Software Foundation and was
020: * originally based on software copyright (c) 1999, International
021: * Business Machines, Inc., http://www.apache.org. For more
022: * information on the Apache Software Foundation, please see
023: * <http://www.apache.org/>.
024: */
025:
026: package org.apache.jasper.xmlparser;
027:
028: /**
029: * This class is a symbol table implementation that guarantees that
030: * strings used as identifiers are unique references. Multiple calls
031: * to <code>addSymbol</code> will always return the same string
032: * reference.
033: * <p>
034: * The symbol table performs the same task as <code>String.intern()</code>
035: * with the following differences:
036: * <ul>
037: * <li>
038: * A new string object does not need to be created in order to
039: * retrieve a unique reference. Symbols can be added by using
040: * a series of characters in a character array.
041: * </li>
042: * <li>
043: * Users of the symbol table can provide their own symbol hashing
044: * implementation. For example, a simple string hashing algorithm
045: * may fail to produce a balanced set of hashcodes for symbols
046: * that are <em>mostly</em> unique. Strings with similar leading
047: * characters are especially prone to this poor hashing behavior.
048: * </li>
049: * </ul>
050: *
051: * @author Andy Clark
052: * @version $Id: SymbolTable.java 467222 2006-10-24 03:17:11Z markt $
053: */
054: public class SymbolTable {
055:
056: //
057: // Constants
058: //
059:
060: /** Default table size. */
061: protected static final int TABLE_SIZE = 101;
062:
063: //
064: // Data
065: //
066:
067: /** Buckets. */
068: protected Entry[] fBuckets = null;
069:
070: // actual table size
071: protected int fTableSize;
072:
073: //
074: // Constructors
075: //
076:
077: /** Constructs a symbol table with a default number of buckets. */
078: public SymbolTable() {
079: this (TABLE_SIZE);
080: }
081:
082: /** Constructs a symbol table with a specified number of buckets. */
083: public SymbolTable(int tableSize) {
084: fTableSize = tableSize;
085: fBuckets = new Entry[fTableSize];
086: }
087:
088: //
089: // Public methods
090: //
091:
092: /**
093: * Adds the specified symbol to the symbol table and returns a
094: * reference to the unique symbol. If the symbol already exists,
095: * the previous symbol reference is returned instead, in order
096: * guarantee that symbol references remain unique.
097: *
098: * @param symbol The new symbol.
099: */
100: public String addSymbol(String symbol) {
101:
102: // search for identical symbol
103: int bucket = hash(symbol) % fTableSize;
104: int length = symbol.length();
105: OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
106: if (length == entry.characters.length) {
107: for (int i = 0; i < length; i++) {
108: if (symbol.charAt(i) != entry.characters[i]) {
109: continue OUTER;
110: }
111: }
112: return entry.symbol;
113: }
114: }
115:
116: // create new entry
117: Entry entry = new Entry(symbol, fBuckets[bucket]);
118: fBuckets[bucket] = entry;
119: return entry.symbol;
120:
121: } // addSymbol(String):String
122:
123: /**
124: * Adds the specified symbol to the symbol table and returns a
125: * reference to the unique symbol. If the symbol already exists,
126: * the previous symbol reference is returned instead, in order
127: * guarantee that symbol references remain unique.
128: *
129: * @param buffer The buffer containing the new symbol.
130: * @param offset The offset into the buffer of the new symbol.
131: * @param length The length of the new symbol in the buffer.
132: */
133: public String addSymbol(char[] buffer, int offset, int length) {
134:
135: // search for identical symbol
136: int bucket = hash(buffer, offset, length) % fTableSize;
137: OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
138: if (length == entry.characters.length) {
139: for (int i = 0; i < length; i++) {
140: if (buffer[offset + i] != entry.characters[i]) {
141: continue OUTER;
142: }
143: }
144: return entry.symbol;
145: }
146: }
147:
148: // add new entry
149: Entry entry = new Entry(buffer, offset, length,
150: fBuckets[bucket]);
151: fBuckets[bucket] = entry;
152: return entry.symbol;
153:
154: } // addSymbol(char[],int,int):String
155:
156: /**
157: * Returns a hashcode value for the specified symbol. The value
158: * returned by this method must be identical to the value returned
159: * by the <code>hash(char[],int,int)</code> method when called
160: * with the character array that comprises the symbol string.
161: *
162: * @param symbol The symbol to hash.
163: */
164: public int hash(String symbol) {
165:
166: int code = 0;
167: int length = symbol.length();
168: for (int i = 0; i < length; i++) {
169: code = code * 37 + symbol.charAt(i);
170: }
171: return code & 0x7FFFFFF;
172:
173: } // hash(String):int
174:
175: /**
176: * Returns a hashcode value for the specified symbol information.
177: * The value returned by this method must be identical to the value
178: * returned by the <code>hash(String)</code> method when called
179: * with the string object created from the symbol information.
180: *
181: * @param buffer The character buffer containing the symbol.
182: * @param offset The offset into the character buffer of the start
183: * of the symbol.
184: * @param length The length of the symbol.
185: */
186: public int hash(char[] buffer, int offset, int length) {
187:
188: int code = 0;
189: for (int i = 0; i < length; i++) {
190: code = code * 37 + buffer[offset + i];
191: }
192: return code & 0x7FFFFFF;
193:
194: } // hash(char[],int,int):int
195:
196: /**
197: * Returns true if the symbol table already contains the specified
198: * symbol.
199: *
200: * @param symbol The symbol to look for.
201: */
202: public boolean containsSymbol(String symbol) {
203:
204: // search for identical symbol
205: int bucket = hash(symbol) % fTableSize;
206: int length = symbol.length();
207: OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
208: if (length == entry.characters.length) {
209: for (int i = 0; i < length; i++) {
210: if (symbol.charAt(i) != entry.characters[i]) {
211: continue OUTER;
212: }
213: }
214: return true;
215: }
216: }
217:
218: return false;
219:
220: } // containsSymbol(String):boolean
221:
222: /**
223: * Returns true if the symbol table already contains the specified
224: * symbol.
225: *
226: * @param buffer The buffer containing the symbol to look for.
227: * @param offset The offset into the buffer.
228: * @param length The length of the symbol in the buffer.
229: */
230: public boolean containsSymbol(char[] buffer, int offset, int length) {
231:
232: // search for identical symbol
233: int bucket = hash(buffer, offset, length) % fTableSize;
234: OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
235: if (length == entry.characters.length) {
236: for (int i = 0; i < length; i++) {
237: if (buffer[offset + i] != entry.characters[i]) {
238: continue OUTER;
239: }
240: }
241: return true;
242: }
243: }
244:
245: return false;
246:
247: } // containsSymbol(char[],int,int):boolean
248:
249: //
250: // Classes
251: //
252:
253: /**
254: * This class is a symbol table entry. Each entry acts as a node
255: * in a linked list.
256: */
257: protected static final class Entry {
258:
259: //
260: // Data
261: //
262:
263: /** Symbol. */
264: public String symbol;
265:
266: /**
267: * Symbol characters. This information is duplicated here for
268: * comparison performance.
269: */
270: public char[] characters;
271:
272: /** The next entry. */
273: public Entry next;
274:
275: //
276: // Constructors
277: //
278:
279: /**
280: * Constructs a new entry from the specified symbol and next entry
281: * reference.
282: */
283: public Entry(String symbol, Entry next) {
284: this .symbol = symbol.intern();
285: characters = new char[symbol.length()];
286: symbol.getChars(0, characters.length, characters, 0);
287: this .next = next;
288: }
289:
290: /**
291: * Constructs a new entry from the specified symbol information and
292: * next entry reference.
293: */
294: public Entry(char[] ch, int offset, int length, Entry next) {
295: characters = new char[length];
296: System.arraycopy(ch, offset, characters, 0, length);
297: symbol = new String(characters).intern();
298: this .next = next;
299: }
300:
301: } // class Entry
302:
303: } // class SymbolTable
|