001: package uk.org.ponder.stringutil;
002:
003: import uk.org.ponder.hashutil.CRC32;
004:
005: import uk.org.ponder.intutil.IntEHInnaBox;
006:
007: /** This class was intended to provide an atom table of strings represented
008: * as (relatively) opaque integer indices. This allows fast comparison via
009: * comparison of indices, and pre-interning of character data that would
010: * otherwise have required a String to be temporarily allocated.
011: * In general this class was found to be more trouble than it was worth
012: * (despite working correctly), in particular because deletion was never
013: * implemented and because thread-safety issues would probably have
014: * degraded performance more than was initially hoped in any case.
015: */
016:
017: public class StringVat {
018: private static final int PAGE_SIZE = 1024 * 128;
019: private static final int MAX_PAGES = 128;
020:
021: private CRC32 crc = new CRC32();
022: private IntEHInnaBox indexhash = new IntEHInnaBox();
023:
024: char[][] storage = new char[MAX_PAGES][];
025: int last_page = 0;
026: int last_page_pos = 0;
027:
028: StringVat() {
029: storage[0] = new char[PAGE_SIZE];
030: }
031:
032: public static StringVat vat = new StringVat();
033:
034: private void make_space(int length) {
035: if (length > (PAGE_SIZE - last_page_pos)) {
036: ++last_page;
037: storage[last_page] = new char[PAGE_SIZE];
038: last_page_pos = 0;
039: }
040: }
041:
042: public boolean equals(int index, String tocompare) {
043: char[] correct_page = storage[index / PAGE_SIZE];
044: index %= PAGE_SIZE;
045: synchronized (correct_page) {
046: int length = (int) correct_page[index++];
047: if (length != tocompare.length())
048: return false;
049: for (int i = index; i < index + length; ++i) {
050: if (correct_page[i] != tocompare.charAt(i - index))
051: return false;
052: }
053: }
054: return true;
055: }
056:
057: public boolean equals(int index, char[] array, int start, int length) {
058:
059: char[] correct_page = storage[index / PAGE_SIZE];
060: index %= PAGE_SIZE;
061: synchronized (correct_page) {
062: int storedlength = (int) correct_page[index++];
063:
064: if (length != storedlength)
065: return false;
066: for (int i = index; i < index + length; ++i) {
067: if (correct_page[i] != array[i - index + start])
068: return false;
069: }
070: }
071: return true;
072:
073: }
074:
075: public void appendto(CharWrap buf, int index) {
076:
077: char[] correct_page = storage[index / PAGE_SIZE];
078: index %= PAGE_SIZE;
079: synchronized (correct_page) {
080: int length = (int) correct_page[index++];
081:
082: buf.append(correct_page, index, length);
083: }
084: }
085:
086: public void concatto(CharWrap buf, int index1, int index2) {
087:
088: buf.clear();
089: appendto(buf, index1);
090: appendto(buf, index2);
091: }
092:
093: private int findindex(int hash, char[] array, int start, int length) {
094: int offset = -1;
095: // scan through each entry in the hash with the same hashcode
096: while (true) {
097: ++offset;
098: int testindex = indexhash.get(hash, offset);
099: if (testindex == IntEHInnaBox.INVALID_VALUE)
100: break;
101:
102: char[] correct_page = storage[testindex / PAGE_SIZE];
103: testindex %= PAGE_SIZE;
104: synchronized (correct_page) {
105: int testlength = (int) correct_page[testindex++];
106:
107: if (testlength != length)
108: continue;
109: for (int i = testindex; i < testindex + length; ++i) {
110: if (correct_page[i] != array[i - testindex + start])
111: continue;
112: }
113: }
114: // string at index matches required to store
115: return testindex - 1;
116: } // end scanning for
117: return IntEHInnaBox.INVALID_VALUE;
118: }
119:
120: public int findindex(char[] array, int start, int length) {
121: int hash = CRC32.eatquick(array, start, length);
122: return findindex(hash, array, start, length);
123: }
124:
125: public int findindex(String string) {
126: // KEEP IDENTICAL to previous method apart from String
127: int length = string.length();
128: int hash = CRC32.eatquick(string);
129: int offset = -1;
130: // scan through each entry in the hash with the same hashcode
131: while (true) {
132: ++offset;
133: int testindex = indexhash.get(hash, offset);
134: if (testindex == IntEHInnaBox.INVALID_VALUE)
135: break;
136:
137: char[] correct_page = storage[testindex / PAGE_SIZE];
138: testindex %= PAGE_SIZE;
139: synchronized (correct_page) {
140: int testlength = (int) correct_page[testindex++];
141:
142: if (testlength != length)
143: continue;
144: for (int i = testindex; i < testindex + length; ++i) {
145: if (correct_page[i] != string.charAt(i - testindex))
146: continue;
147: }
148: }
149: // string at index matches required to store
150: return testindex - 1;
151: } // end scanning for
152: return IntEHInnaBox.INVALID_VALUE;
153: }
154:
155: public int storeunique(CharWrap wrap) {
156: return storeunique(wrap.storage, wrap.offset, wrap.size);
157: }
158:
159: public int storeunique(char[] array, int start, int length) {
160: int hash = CRC32.eatquick(array, start, length);
161:
162: int findindex = findindex(hash, array, start, length);
163: if (findindex != IntEHInnaBox.INVALID_VALUE)
164: return findindex;
165: int newindex = store(array, start, length);
166: indexhash.put(hash, newindex);
167: return newindex;
168: }
169:
170: public int store(CharWrap wrap) {
171: return store(wrap.storage, wrap.offset, wrap.size);
172: }
173:
174: public int store(char[] array, int start, int length) {
175: make_space(length);
176: char[] correct_page = storage[last_page];
177: int postogo = last_page_pos + last_page * PAGE_SIZE;
178: // System.out.println("Storing length "+ length+" at index "+ last_page_pos);
179: synchronized (correct_page) {
180: correct_page[last_page_pos++] = (char) length;
181: System.arraycopy(array, start, correct_page, last_page_pos,
182: length);
183: }
184: last_page_pos += length;
185: return postogo;
186: }
187:
188: public String toString(int index) {
189: char[] correct_page = storage[index / PAGE_SIZE];
190: index %= PAGE_SIZE;
191: synchronized (correct_page) {
192: int length = (int) correct_page[index++];
193: // System.out.println("Recovered length "+ length+" now at index "+index);
194: String togo = new String(correct_page, index, length);
195: return togo.intern();
196: }
197: }
198:
199: public static void main(String[] argv) {
200: char[] buffer = { 'h', 'e', 'l', 'l', 'o', '1' };
201: int[] indices = new int[7];
202: for (int i = 0; i < 7; ++i) {
203: indices[i] = vat.store(buffer, 0, i);
204: System.out.println("String " + i + " stored "
205: + vat.toString(indices[i]) + " at index "
206: + indices[i]);
207: }
208: for (int i = 0; i < 7; ++i) {
209: System.out.println("Found " + vat.toString(indices[i])
210: + " at index " + indices[i]);
211: }
212: int index1 = vat.storeunique(buffer, 0, 6);
213: int index2 = vat.storeunique(buffer, 0, 6);
214: System.out.println("Storeunique on identicals: " + index1
215: + " and " + index2);
216: System.out.println("Equals: "
217: + vat.equals(index1, buffer, 0, 6));
218: buffer[5] = '2';
219: int index3 = vat.storeunique(buffer, 0, 6);
220: System.out.println("Storeunique on nonidentical: " + index3);
221: System.out.println("Equals: "
222: + vat.equals(index1, buffer, 0, 6));
223:
224: buffer[5] = ' ';
225: CharWrap svb = new CharWrap();
226: svb.append(buffer, 0, 6);
227: char[] buffer2 = { 'w', 'o', 'r', 'l', 'd' };
228: svb.append(buffer2, 0, 5);
229: int nextindex = vat.store(svb);
230: System.out.println("String " + vat.toString(nextindex)
231: + " stored at index " + nextindex);
232: vat.appendto(svb, indices[6]);
233: int finalindex = vat.store(svb);
234: System.out.println("String " + vat.toString(finalindex)
235: + " stored at index " + finalindex);
236: }
237:
238: }
|