001: // kelondroRowSet.java
002: // (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
003: // first published 20.06.2006 on http://www.anomic.de
004: //
005: // $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
006: // $LastChangedRevision: 1986 $
007: // $LastChangedBy: orbiter $
008: //
009: // LICENSE
010: //
011: // This program is free software; you can redistribute it and/or modify
012: // it under the terms of the GNU General Public License as published by
013: // the Free Software Foundation; either version 2 of the License, or
014: // (at your option) any later version.
015: //
016: // This program is distributed in the hope that it will be useful,
017: // but WITHOUT ANY WARRANTY; without even the implied warranty of
018: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: // GNU General Public License for more details.
020: //
021: // You should have received a copy of the GNU General Public License
022: // along with this program; if not, write to the Free Software
023: // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024:
025: package de.anomic.kelondro;
026:
027: import java.util.Date;
028: import java.util.Iterator;
029: import java.util.List;
030: import java.util.Random;
031:
032: import de.anomic.server.logging.serverLog;
033:
034: public class kelondroRowSet extends kelondroRowCollection implements
035: kelondroIndex {
036:
037: private static final int collectionReSortLimit = 400;
038:
039: private kelondroProfile profile;
040:
041: public kelondroRowSet(kelondroRowSet rs) {
042: super (rs);
043: this .profile = rs.profile;
044: }
045:
046: public kelondroRowSet(kelondroRow rowdef, int objectCount,
047: byte[] cache, int sortBound) {
048: super (rowdef, objectCount, cache, sortBound);
049: assert rowdef.objectOrder != null;
050: this .profile = new kelondroProfile();
051: }
052:
053: public kelondroRowSet(kelondroRow rowdef, int objectCount) {
054: super (rowdef, objectCount);
055: assert rowdef.objectOrder != null;
056: this .profile = new kelondroProfile();
057: }
058:
059: public kelondroRowSet(kelondroRow rowdef,
060: kelondroRow.Entry exportedCollectionRowEnvironment,
061: int columnInEnvironment) {
062: super (rowdef, exportedCollectionRowEnvironment,
063: columnInEnvironment);
064: assert rowdef.objectOrder != null;
065: this .profile = new kelondroProfile();
066: }
067:
068: public void setOrdering(kelondroByteOrder newOrder, int newColumn) {
069: assert newOrder != null;
070: if ((rowdef.objectOrder == null)
071: || (!(rowdef.objectOrder.signature().equals(newOrder
072: .signature())))
073: || (newColumn != rowdef.primaryKeyIndex)) {
074: rowdef.setOrdering(newOrder, newColumn);
075: this .sortBound = 0;
076: }
077: }
078:
079: public void reset() {
080: super .reset();
081: this .profile = new kelondroProfile();
082: }
083:
084: public synchronized boolean has(byte[] key) {
085: return (get(key) != null);
086: }
087:
088: public synchronized kelondroRow.Entry get(byte[] key) {
089: return get(key, 0, key.length);
090: }
091:
092: private kelondroRow.Entry get(byte[] key, int astart, int alength) {
093: long handle = profile.startRead();
094: int index = find(key, astart, alength);
095: kelondroRow.Entry entry = (index >= 0) ? get(index) : null;
096: profile.stopRead(handle);
097: return entry;
098: }
099:
100: public synchronized void putMultiple(List<kelondroRow.Entry> rows) {
101: Iterator<kelondroRow.Entry> i = rows.iterator();
102: while (i.hasNext())
103: put(i.next());
104: }
105:
106: public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) {
107: return put(row);
108: }
109:
110: public synchronized kelondroRow.Entry put(kelondroRow.Entry entry) {
111: assert (entry != null);
112: assert (entry.getPrimaryKeyBytes() != null);
113: //assert (!(serverLog.allZero(entry.getColBytes(super.sortColumn))));
114: long handle = profile.startWrite();
115: int index = -1;
116: kelondroRow.Entry oldentry = null;
117: // when reaching a specific amount of un-sorted entries, re-sort all
118: if ((this .chunkcount - this .sortBound) > collectionReSortLimit) {
119: sort();
120: }
121: index = find(entry.bytes(), (rowdef.primaryKeyIndex < 0) ? 0
122: : super .rowdef.colstart[rowdef.primaryKeyIndex],
123: super .rowdef.primaryKeyLength);
124: if (index < 0) {
125: super .addUnique(entry);
126: } else {
127: oldentry = get(index);
128: set(index, entry);
129: }
130: profile.stopWrite(handle);
131: return oldentry;
132: }
133:
134: private synchronized kelondroRow.Entry remove(byte[] a, int start,
135: int length, boolean keepOrder) {
136: int index = find(a, start, length);
137: if (index < 0)
138: return null;
139: //System.out.println("remove: chunk found at index position (before remove) " + index + ", inset=" + serverLog.arrayList(super.chunkcache, super.rowdef.objectsize() * index, length + 10) + ", searchkey=" + serverLog.arrayList(a, start, length));
140: kelondroRow.Entry entry = super .get(index);
141: super .removeRow(index, keepOrder);
142: //System.out.println("remove: chunk found at index position (after remove) " + index + ", inset=" + serverLog.arrayList(super.chunkcache, super.rowdef.objectsize() * index, length) + ", searchkey=" + serverLog.arrayList(a, start, length));
143: int findagainindex = find(a, start, length);
144: //System.out.println("kelondroRowSet.remove");
145: assert findagainindex < 0 : "remove: chunk found again at index position (after remove) "
146: + findagainindex
147: + ", index(before) = "
148: + index
149: + ", inset="
150: + serverLog.arrayList(super .chunkcache,
151: super .rowdef.objectsize * findagainindex,
152: length)
153: + ", searchkey="
154: + serverLog.arrayList(a, start, length); // check if the remove worked
155: return entry;
156: }
157:
158: public kelondroRow.Entry remove(byte[] a, boolean keepOrder) {
159: return remove(a, 0, a.length, keepOrder);
160: }
161:
162: private int find(byte[] a, int astart, int alength) {
163: // returns the chunknumber; -1 if not found
164:
165: if (rowdef.objectOrder == null)
166: return iterativeSearch(a, astart, alength, 0,
167: this .chunkcount);
168:
169: if ((this .chunkcount - this .sortBound) > (collectionReSortLimit << 1)) {
170: sort();
171: }
172:
173: if ((this .rowdef.objectOrder != null)
174: && (this .rowdef.objectOrder instanceof kelondroBase64Order)
175: && (this .sortBound > 4000)) {
176: // first try to find in sorted area
177: final byte[] compiledPivot = compilePivot(a, astart,
178: alength);
179: int p = binarySearchCompiledPivot(compiledPivot);
180: if (p >= 0)
181: return p;
182:
183: // then find in unsorted area
184: return iterativeSearchCompiledPivot(compiledPivot,
185: this .sortBound, this .chunkcount);
186: } else {
187: // first try to find in sorted area
188: int p = binarySearch(a, astart, alength);
189: if (p >= 0)
190: return p;
191:
192: // then find in unsorted area
193: return iterativeSearch(a, astart, alength, this .sortBound,
194: this .chunkcount);
195: }
196: }
197:
198: private int iterativeSearch(byte[] key, int astart, int alength,
199: int leftBorder, int rightBound) {
200: // returns the chunknumber
201: if (rowdef.objectOrder == null) {
202: for (int i = leftBorder; i < rightBound; i++) {
203: if (match(key, astart, alength, i))
204: return i;
205: }
206: return -1;
207: } else {
208: // we dont do a special handling of kelondroBase64Order here, because tests showed that this produces too much overhead
209: for (int i = leftBorder; i < rightBound; i++) {
210: if (compare(key, astart, alength, i) == 0)
211: return i;
212: }
213: return -1;
214: }
215: }
216:
217: private int iterativeSearchCompiledPivot(byte[] compiledPivot,
218: int leftBorder, int rightBound) {
219: // returns the chunknumber
220: assert (rowdef.objectOrder != null);
221: assert (rowdef.objectOrder instanceof kelondroBase64Order);
222: for (int i = leftBorder; i < rightBound; i++) {
223: if (comparePivot(compiledPivot, i) == 0)
224: return i;
225: }
226: return -1;
227: }
228:
229: private int binarySearch(byte[] key, int astart, int alength) {
230: // returns the exact position of the key if the key exists,
231: // or -1 if the key does not exist
232: assert (rowdef.objectOrder != null);
233: int l = 0;
234: int rbound = this .sortBound;
235: int p = 0;
236: int d;
237: while (l < rbound) {
238: p = l + ((rbound - l) >> 1);
239: d = compare(key, astart, alength, p);
240: if (d == 0)
241: return p;
242: if (d < 0)
243: rbound = p;
244: else
245: l = p + 1;
246: }
247: return -1;
248: }
249:
250: private int binarySearchCompiledPivot(byte[] compiledPivot) {
251: // returns the exact position of the key if the key exists,
252: // or -1 if the key does not exist
253: assert (rowdef.objectOrder != null);
254: assert (rowdef.objectOrder instanceof kelondroBase64Order);
255: int l = 0;
256: int rbound = this .sortBound;
257: int p = 0;
258: int d;
259: while (l < rbound) {
260: p = l + ((rbound - l) >> 1);
261: d = comparePivot(compiledPivot, p);
262: if (d == 0)
263: return p;
264: if (d < 0)
265: rbound = p;
266: else
267: l = p + 1;
268: }
269: return -1;
270: }
271:
272: public int binaryPosition(byte[] key, int astart, int alength) {
273: // returns the exact position of the key if the key exists,
274: // or a position of an entry that is greater than the key if the
275: // key does not exist
276: assert (rowdef.objectOrder != null);
277: int l = 0;
278: int rbound = this .sortBound;
279: int p = 0;
280: int d;
281: while (l < rbound) {
282: p = l + ((rbound - l) >> 1);
283: d = compare(key, astart, alength, p);
284: if (d == 0)
285: return p;
286: if (d < 0)
287: rbound = p;
288: else
289: l = p + 1;
290: }
291: return l;
292: }
293:
294: public kelondroProfile profile() {
295: return profile;
296: }
297:
298: public synchronized Iterator<byte[]> keys() {
299: sort();
300: return super .keys();
301: }
302:
303: public synchronized kelondroCloneableIterator<byte[]> keys(
304: boolean up, byte[] firstKey) {
305: return new keyIterator(up, firstKey);
306: }
307:
308: public class keyIterator implements
309: kelondroCloneableIterator<byte[]> {
310:
311: private boolean up;
312: private byte[] first;
313: private int p, bound;
314:
315: public keyIterator(boolean up, byte[] firstKey) {
316: // see that all elements are sorted
317: sort();
318: this .up = up;
319: this .first = firstKey;
320: this .bound = sortBound;
321: if (first == null) {
322: p = 0;
323: } else {
324: p = binaryPosition(first, 0, first.length); // check this to find bug in DHT selection enumeration
325: //System.out.println("binaryposition for key " + new String(firstKey) + " is " + p);
326: }
327: }
328:
329: public keyIterator clone(Object second) {
330: return new keyIterator(up, (byte[]) second);
331: }
332:
333: public boolean hasNext() {
334: if (p < 0)
335: return false;
336: if (p >= size())
337: return false;
338: if (up) {
339: return p < bound;
340: } else {
341: return p >= 0;
342: }
343: }
344:
345: public byte[] next() {
346: byte[] key = getKey(p);
347: if (up)
348: p++;
349: else
350: p--;
351: return key;
352: }
353:
354: public void remove() {
355: throw new UnsupportedOperationException();
356: }
357: }
358:
359: public synchronized Iterator<kelondroRow.Entry> rows() {
360: // iterates kelondroRow.Entry - type entries
361: sort();
362: return super .rows();
363: }
364:
365: public synchronized kelondroCloneableIterator<kelondroRow.Entry> rows(
366: boolean up, byte[] firstKey) {
367: return new rowIterator(up, firstKey);
368: }
369:
370: public class rowIterator implements
371: kelondroCloneableIterator<kelondroRow.Entry> {
372:
373: private boolean up;
374: private byte[] first;
375: private int p, bound;
376:
377: public rowIterator(boolean up, byte[] firstKey) {
378: // see that all elements are sorted
379: sort();
380: this .up = up;
381: this .first = firstKey;
382: this .bound = sortBound;
383: if (first == null) {
384: p = 0;
385: } else {
386: p = binaryPosition(first, 0, first.length); // check this to find bug in DHT selection enumeration
387: //System.out.println("binaryposition for key " + new String(firstKey) + " is " + p);
388: }
389: }
390:
391: public rowIterator clone(Object second) {
392: return new rowIterator(up, (byte[]) second);
393: }
394:
395: public boolean hasNext() {
396: if (p < 0)
397: return false;
398: if (p >= size())
399: return false;
400: if (up) {
401: return p < bound;
402: } else {
403: return p >= 0;
404: }
405: }
406:
407: public kelondroRow.Entry next() {
408: kelondroRow.Entry entry = get(p);
409: if (up)
410: p++;
411: else
412: p--;
413: return entry;
414: }
415:
416: public void remove() {
417: throw new UnsupportedOperationException();
418: }
419: }
420:
421: public static void main(String[] args) {
422: // sort/uniq-test
423: /*
424: kelondroRow rowdef = new kelondroRow("Cardinal key-4 {b256}, byte[] payload-1", kelondroNaturalOrder.naturalOrder, 0);
425: kelondroRowSet rs = new kelondroRowSet(rowdef, 0);
426: Random random = new Random(0);
427: kelondroRow.Entry entry;
428: for (int i = 0; i < 10000000; i++) {
429: entry = rowdef.newEntry();
430: entry.setCol(0, Math.abs(random.nextLong() % 1000000));
431: entry.setCol(1, "a".getBytes());
432: rs.addUnique(entry);
433: }
434: System.out.println("before sort, size = " + rs.size());
435: rs.sort();
436: System.out.println("after sort, before uniq, size = " + rs.size());
437: rs.uniq(10000);
438: System.out.println("after uniq, size = " + rs.size());
439: */
440:
441: String[] test = { "eins", "zwei", "drei", "vier", "fuenf",
442: "sechs", "sieben", "acht", "neun", "zehn" };
443: kelondroRowSet d = new kelondroRowSet(new kelondroRow(
444: "byte[] key-10, Cardinal x-4 {b256}",
445: kelondroNaturalOrder.naturalOrder, 0), 0);
446: d.setOrdering(kelondroNaturalOrder.naturalOrder, 0);
447: for (int ii = 0; ii < test.length; ii++)
448: d.add(test[ii].getBytes());
449: for (int ii = 0; ii < test.length; ii++)
450: d.add(test[ii].getBytes());
451: d.sort();
452: d.remove("fuenf".getBytes(), 0, 5, false);
453: Iterator<kelondroRow.Entry> ii = d.rows();
454: String s;
455: System.out.print("INPUT-ITERATOR: ");
456: kelondroRow.Entry entry;
457: while (ii.hasNext()) {
458: entry = (kelondroRow.Entry) ii.next();
459: s = new String((byte[]) entry.getColBytes(0)).trim();
460: System.out.print(s + ", ");
461: if (s.equals("drei"))
462: ii.remove();
463: }
464: System.out.println("");
465: System.out.println("INPUT-TOSTRING: " + d.toString());
466: d.sort();
467: System.out.println("SORTED : " + d.toString());
468: d.uniq();
469: System.out.println("UNIQ : " + d.toString());
470: d.trim(false);
471: System.out.println("TRIM : " + d.toString());
472:
473: /*
474: // second test
475: c = new kelondroRowSet(new kelondroRow(new int[]{10, 3}));
476: c.setOrdering(kelondroNaturalOrder.naturalOrder, 0);
477: Random rand = new Random(0);
478: long start = System.currentTimeMillis();
479: long t, d = 0;
480: String w;
481: for (long k = 0; k < 60000; k++) {
482: t = System.currentTimeMillis();
483: w = "a" + Long.toString(rand.nextLong());
484: c.add(w.getBytes());
485: if (k % 10000 == 0)
486: System.out.println("added " + k + " entries in " +
487: ((t - start) / 1000) + " seconds, " +
488: (((t - start) > 1000) ? (k / ((t - start) / 1000)) : k) +
489: " entries/second, size = " + c.size());
490: }
491: System.out.println("bevore sort: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
492: c.shape();
493: System.out.println("after sort: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
494: c.uniq();
495: System.out.println("after uniq: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
496: System.out.println("RESULT SIZE: " + c.size());
497: System.out.println();
498:
499: // third test
500: c = new kelondroRowSet(new kelondroRow(new int[]{10, 3}), 60000);
501: c.setOrdering(kelondroNaturalOrder.naturalOrder, 0);
502: rand = new Random(0);
503: start = System.currentTimeMillis();
504: d = 0;
505: for (long k = 0; k < 60000; k++) {
506: t = System.currentTimeMillis();
507: w = "a" + Long.toString(rand.nextLong());
508: if (c.get(w.getBytes(), 0, 10) == null) c.add(w.getBytes()); else d++;
509: if (k % 10000 == 0)
510: System.out.println("added " + k + " entries in " +
511: ((t - start) / 1000) + " seconds, " +
512: (((t - start) > 1000) ? (k / ((t - start) / 1000)) : k) +
513: " entries/second, " + d + " double, size = " + c.size() +
514: ", sum = " + (c.size() + d));
515: }
516: System.out.println("RESULT SIZE: " + c.size());
517: */
518: /*
519: // performance test for put
520: long start = System.currentTimeMillis();
521: kelondroRowSet c = new kelondroRowSet(new kelondroRow("byte[] a-12, byte[] b-12"), 0);
522: Random random = new Random(0);
523: byte[] key;
524: for (int i = 0; i < 100000; i++) {
525: key = randomHash(random);
526: c.put(c.rowdef.newEntry(new byte[][]{key, key}));
527: if (i % 1000 == 0) System.out.println(i + " entries. ");
528: }
529: System.out.println("RESULT SIZE: " + c.size());
530: System.out.println("Time: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
531: */
532:
533: // remove test
534: long start = System.currentTimeMillis();
535: kelondroRowSet c = new kelondroRowSet(new kelondroRow(
536: "byte[] a-12, byte[] b-12",
537: kelondroBase64Order.enhancedCoder, 0), 0);
538: byte[] key;
539: int testsize = 5000;
540: byte[][] delkeys = new byte[testsize / 5][];
541: Random random = new Random(0);
542: for (int i = 0; i < testsize; i++) {
543: key = randomHash(random);
544: if (i % 5 != 0)
545: continue;
546: delkeys[i / 5] = key;
547: }
548: random = new Random(0);
549: for (int i = 0; i < testsize; i++) {
550: key = randomHash(random);
551: c.put(c.rowdef.newEntry(new byte[][] { key, key }));
552: if (i % 1000 == 0) {
553: for (int j = 0; j < delkeys.length; j++)
554: c.remove(delkeys[j], true);
555: c.sort();
556: }
557: }
558: for (int j = 0; j < delkeys.length; j++)
559: c.remove(delkeys[j], true);
560: c.sort();
561: random = new Random(0);
562: for (int i = 0; i < testsize; i++) {
563: key = randomHash(random);
564: if (i % 5 == 0)
565: continue;
566: if (c.get(key) == null)
567: System.out.println("missing entry " + new String(key));
568: }
569: c.sort();
570: System.out.println("RESULT SIZE: " + c.size());
571: System.out.println("Time: "
572: + ((System.currentTimeMillis() - start) / 1000)
573: + " seconds");
574: }
575:
576: public static byte[] randomHash(final long r0, final long r1) {
577: // a long can have 64 bit, but a 12-byte hash can have 6 * 12 = 72 bits
578: // so we construct a generic Hash using two long values
579: return (kelondroBase64Order.enhancedCoder.encodeLong(
580: Math.abs(r0), 11).substring(5) + kelondroBase64Order.enhancedCoder
581: .encodeLong(Math.abs(r1), 11).substring(5)).getBytes();
582: }
583:
584: public static byte[] randomHash(Random r) {
585: return randomHash(r.nextLong(), r.nextLong());
586: }
587:
588: public String filename() {
589: return null;
590: }
591:
592: }
|