001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package org.apache.xerces.util;
019:
020: import java.lang.ref.ReferenceQueue;
021: import java.lang.ref.SoftReference;
022:
023: /**
024: * This symbol table uses SoftReferences to its String entries, which means that table entries
025: * that have no references to them can be garbage collected when memory is needed. Thus, in
026: * documents with very very large numbers of unique strings, using this SymbolTable will prevent
027: * an out of memory error from occuring.
028: *
029: * @see SymbolTable
030: *
031: * @author Peter McCracken, IBM
032: *
033: * @version $Id: SoftReferenceSymbolTable.java 496158 2007-01-14 21:27:15Z mrglavas $
034: */
035: /*
036: * This class extends SymbolTable. Logically, it would make more sense if it and SymbolTable
037: * shared a common interface, because despite almost identical logic, SoftReferenceSymbolTable
038: * shares almost no code with SymbolTable (because of necessary checks for null table entries
039: * in the code). I've chosen to avoid making the interface because we don't want to slow down
040: * the vastly more common case of using the regular SymbolTable. We also don't want to change
041: * SymbolTable, since it's a public class that's probably commonly in use by many applications.
042: * -PJM
043: */
044: public class SoftReferenceSymbolTable extends SymbolTable {
045:
046: /*
047: * This variable masks the fBuckets variable used by SymbolTable.
048: */
049: protected SREntry[] fBuckets = null;
050:
051: private final ReferenceQueue fReferenceQueue;
052:
053: //
054: // Constructors
055: //
056:
057: /**
058: * Constructs a new, empty SymbolTable with the specified initial
059: * capacity and the specified load factor.
060: *
061: * @param initialCapacity the initial capacity of the SymbolTable.
062: * @param loadFactor the load factor of the SymbolTable.
063: * @throws IllegalArgumentException if the initial capacity is less
064: * than zero, or if the load factor is nonpositive.
065: */
066: public SoftReferenceSymbolTable(int initialCapacity,
067: float loadFactor) {
068: /*
069: * Not calling super() because we don't want to initialize the Entry buckets
070: * used by the base class.
071: */
072: if (initialCapacity < 0) {
073: throw new IllegalArgumentException("Illegal Capacity: "
074: + initialCapacity);
075: }
076:
077: if (loadFactor <= 0 || Float.isNaN(loadFactor)) {
078: throw new IllegalArgumentException("Illegal Load: "
079: + loadFactor);
080: }
081:
082: if (initialCapacity == 0) {
083: initialCapacity = 1;
084: }
085:
086: fLoadFactor = loadFactor;
087: fTableSize = initialCapacity;
088: fBuckets = new SREntry[fTableSize];
089: fThreshold = (int) (fTableSize * loadFactor);
090: fCount = 0;
091:
092: fReferenceQueue = new ReferenceQueue();
093: }
094:
095: /**
096: * Constructs a new, empty SymbolTable with the specified initial capacity
097: * and default load factor, which is <tt>0.75</tt>.
098: *
099: * @param initialCapacity the initial capacity of the hashtable.
100: * @throws IllegalArgumentException if the initial capacity is less
101: * than zero.
102: */
103: public SoftReferenceSymbolTable(int initialCapacity) {
104: this (initialCapacity, 0.75f);
105: }
106:
107: /**
108: * Constructs a new, empty SymbolTable with a default initial capacity (101)
109: * and load factor, which is <tt>0.75</tt>.
110: */
111: public SoftReferenceSymbolTable() {
112: this (TABLE_SIZE, 0.75f);
113: }
114:
115: //
116: // Public methods
117: //
118:
119: /**
120: * Adds the specified symbol to the symbol table and returns a
121: * reference to the unique symbol. If the symbol already exists,
122: * the previous symbol reference is returned instead, in order
123: * guarantee that symbol references remain unique.
124: *
125: * @param symbol The new symbol.
126: */
127: public String addSymbol(String symbol) {
128: clean();
129: // search for identical symbol
130: int bucket = hash(symbol) % fTableSize;
131: for (SREntry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
132: SREntryData data = (SREntryData) entry.get();
133: if (data == null) {
134: continue;
135: }
136: if (data.symbol.equals(symbol)) {
137: return data.symbol;
138: }
139: }
140:
141: if (fCount >= fThreshold) {
142: // Rehash the table if the threshold is exceeded
143: rehash();
144: bucket = hash(symbol) % fTableSize;
145: }
146:
147: // add new entry
148: symbol = symbol.intern();
149: SREntry entry = new SREntry(symbol, fBuckets[bucket], bucket,
150: fReferenceQueue);
151: fBuckets[bucket] = entry;
152: ++fCount;
153: return symbol;
154: } // addSymbol(String):String
155:
156: /**
157: * Adds the specified symbol to the symbol table and returns a
158: * reference to the unique symbol. If the symbol already exists,
159: * the previous symbol reference is returned instead, in order
160: * guarantee that symbol references remain unique.
161: *
162: * @param buffer The buffer containing the new symbol.
163: * @param offset The offset into the buffer of the new symbol.
164: * @param length The length of the new symbol in the buffer.
165: */
166: public String addSymbol(char[] buffer, int offset, int length) {
167: clean();
168: // search for identical symbol
169: int bucket = hash(buffer, offset, length) % fTableSize;
170: OUTER: for (SREntry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
171: SREntryData data = (SREntryData) entry.get();
172: if (data == null) {
173: continue;
174: }
175: if (length == data.characters.length) {
176: for (int i = 0; i < length; i++) {
177: if (buffer[offset + i] != data.characters[i]) {
178: continue OUTER;
179: }
180: }
181: return data.symbol;
182: }
183: }
184:
185: if (fCount >= fThreshold) {
186: // Rehash the table if the threshold is exceeded
187: rehash();
188: bucket = hash(buffer, offset, length) % fTableSize;
189: }
190:
191: // add new entry
192: String symbol = new String(buffer, offset, length).intern();
193: SREntry entry = new SREntry(symbol, buffer, offset, length,
194: fBuckets[bucket], bucket, fReferenceQueue);
195: fBuckets[bucket] = entry;
196: ++fCount;
197: return symbol;
198: } // addSymbol(char[],int,int):String
199:
200: /**
201: * Increases the capacity of and internally reorganizes this
202: * SymbolTable, in order to accommodate and access its entries more
203: * efficiently. This method is called automatically when the
204: * number of keys in the SymbolTable exceeds this hashtable's capacity
205: * and load factor.
206: */
207: protected void rehash() {
208:
209: int oldCapacity = fBuckets.length;
210: SREntry[] oldTable = fBuckets;
211:
212: int newCapacity = oldCapacity * 2 + 1;
213: SREntry[] newTable = new SREntry[newCapacity];
214:
215: fThreshold = (int) (newCapacity * fLoadFactor);
216: fBuckets = newTable;
217: fTableSize = fBuckets.length;
218:
219: for (int i = oldCapacity; i-- > 0;) {
220: for (SREntry old = oldTable[i]; old != null;) {
221: SREntry e = old;
222: old = old.next;
223:
224: SREntryData data = (SREntryData) e.get();
225: if (data != null) {
226: int index = hash(data.characters, 0,
227: data.characters.length)
228: % newCapacity;
229: if (newTable[index] != null) {
230: newTable[index].prev = e;
231: }
232: e.next = newTable[index];
233: newTable[index] = e;
234: } else {
235: fCount--;
236: }
237: }
238: }
239: }
240:
241: /**
242: * Returns true if the symbol table already contains the specified
243: * symbol.
244: *
245: * @param symbol The symbol to look for.
246: */
247: public boolean containsSymbol(String symbol) {
248:
249: // search for identical symbol
250: int bucket = hash(symbol) % fTableSize;
251: int length = symbol.length();
252: OUTER: for (SREntry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
253: SREntryData data = (SREntryData) entry.get();
254: if (data == null) {
255: continue;
256: }
257: if (length == data.characters.length) {
258: for (int i = 0; i < length; i++) {
259: if (symbol.charAt(i) != data.characters[i]) {
260: continue OUTER;
261: }
262: }
263: return true;
264: }
265: }
266:
267: return false;
268:
269: } // containsSymbol(String):boolean
270:
271: /**
272: * Returns true if the symbol table already contains the specified
273: * symbol.
274: *
275: * @param buffer The buffer containing the symbol to look for.
276: * @param offset The offset into the buffer.
277: * @param length The length of the symbol in the buffer.
278: */
279: public boolean containsSymbol(char[] buffer, int offset, int length) {
280:
281: // search for identical symbol
282: int bucket = hash(buffer, offset, length) % fTableSize;
283: OUTER: for (SREntry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
284: SREntryData data = (SREntryData) entry.get();
285: if (data == null) {
286: continue;
287: }
288: if (length == data.characters.length) {
289: for (int i = 0; i < length; i++) {
290: if (buffer[offset + i] != data.characters[i]) {
291: continue OUTER;
292: }
293: }
294: return true;
295: }
296: }
297:
298: return false;
299:
300: } // containsSymbol(char[],int,int):boolean
301:
302: private void removeEntry(SREntry entry) {
303: if (entry.next != null) {
304: entry.next.prev = entry.prev;
305: }
306: if (entry.prev != null) {
307: entry.prev.next = entry.next;
308: } else {
309: fBuckets[entry.bucket] = entry.next;
310: }
311: fCount--;
312: }
313:
314: /**
315: * Removes stale symbols from the table.
316: */
317: private void clean() {
318: SREntry entry = (SREntry) fReferenceQueue.poll();
319: while (entry != null) {
320: removeEntry(entry);
321: entry = (SREntry) fReferenceQueue.poll();
322: }
323: }
324:
325: //
326: // Classes
327: //
328:
329: /**
330: * This class is a symbol table entry. Each entry acts as a node
331: * in a doubly-linked list.
332: *
333: * The "SR" stands for SoftReference.
334: */
335: protected static final class SREntry extends SoftReference {
336:
337: /** The next entry. */
338: public SREntry next;
339:
340: /** The previous entry. */
341: public SREntry prev;
342:
343: public int bucket;
344:
345: //
346: // Constructors
347: //
348:
349: /**
350: * Constructs a new entry from the specified symbol and next entry
351: * reference.
352: */
353: public SREntry(String internedSymbol, SREntry next, int bucket,
354: ReferenceQueue q) {
355: super (new SREntryData(internedSymbol), q);
356: initialize(next, bucket);
357: }
358:
359: /**
360: * Constructs a new entry from the specified symbol information and
361: * next entry reference.
362: */
363: public SREntry(String internedSymbol, char[] ch, int offset,
364: int length, SREntry next, int bucket, ReferenceQueue q) {
365: super (new SREntryData(internedSymbol, ch, offset, length),
366: q);
367: initialize(next, bucket);
368: }
369:
370: private void initialize(SREntry next, int bucket) {
371: this .next = next;
372: if (next != null) {
373: next.prev = this ;
374: }
375: this .prev = null;
376: this .bucket = bucket;
377: }
378: } // class Entry
379:
380: protected static final class SREntryData {
381: public final String symbol;
382: public final char[] characters;
383:
384: public SREntryData(String internedSymbol) {
385: this .symbol = internedSymbol;
386: characters = new char[symbol.length()];
387: symbol.getChars(0, characters.length, characters, 0);
388: }
389:
390: public SREntryData(String internedSymbol, char[] ch,
391: int offset, int length) {
392: this .symbol = internedSymbol;
393: characters = new char[length];
394: System.arraycopy(ch, offset, characters, 0, length);
395: }
396: }
397: } // class SoftReferenceSymbolTable
|