001: /*-
002: * See the file LICENSE for redistribution information.
003: *
004: * Copyright (c) 2000,2008 Oracle. All rights reserved.
005: *
006: * $Id: TupleInput.java,v 1.29.2.2 2008/01/07 15:14:06 cwl Exp $
007: */
008:
009: package com.sleepycat.bind.tuple;
010:
011: import java.math.BigInteger;
012:
013: import com.sleepycat.util.FastInputStream;
014: import com.sleepycat.util.PackedInteger;
015: import com.sleepycat.util.UtfOps;
016:
017: /**
018: * An <code>InputStream</code> with <code>DataInput</code>-like methods for
019: * reading tuple fields. It is used by <code>TupleBinding</code>.
020: *
021: * <p>This class has many methods that have the same signatures as methods in
022: * the {@link java.io.DataInput} interface. The reason this class does not
023: * implement {@link java.io.DataInput} is because it would break the interface
024: * contract for those methods because of data format differences.</p>
025: *
026: * <p>Signed numbers are stored in the buffer in MSB (most significant byte
027: * first) order with their sign bit (high-order bit) inverted to cause negative
028: * numbers to be sorted first when comparing values as unsigned byte arrays,
029: * as done in a database. Unsigned numbers, including characters, are stored
030: * in MSB order with no change to their sign bit. BigInteger values are stored
031: * with a preceding length having the same sign as the value.</p>
032: *
033: * <p>Strings and character arrays are stored either as a fixed length array of
034: * unicode characters, where the length must be known by the application, or as
035: * a null-terminated UTF byte array.</p>
036: * <ul>
037: * <li>Null strings are UTF encoded as { 0xFF }, which is not allowed in a
038: * standard UTF encoding. This allows null strings, as distinct from empty or
039: * zero length strings, to be represented in a tuple. Using the default
040: * comparator, null strings will be ordered last.</li>
041: * <li>Zero (0x0000) character values are UTF encoded as non-zero values, and
042: * therefore embedded zeros in the string are supported. The sequence { 0xC0,
043: * 0x80 } is used to encode a zero character. This UTF encoding is the same
044: * one used by native Java UTF libraries. However, this encoding of zero does
045: * impact the lexicographical ordering, and zeros will not be sorted first (the
046: * natural order) or last. For all character values other than zero, the
047: * default UTF byte ordering is the same as the Unicode lexicographical
048: * character ordering.</li>
049: * </ul>
050: *
051: * <p>Floats and doubles are stored using two different representations: sorted
052: * representation and integer-bit (IEEE 754) representation. If you use
053: * negative floating point numbers in a key, you should use sorted
054: * representation; alternatively you may use integer-bit representation but you
055: * will need to implement and configure a custom comparator to get correct
056: * numeric ordering for negative numbers.</p>
057: *
058: * <p>To use sorted representation use this set of methods:</p>
059: * <ul>
060: * <li>{@link TupleOutput#writeSortedFloat}</li>
061: * <li>{@link TupleInput#readSortedFloat}</li>
062: * <li>{@link TupleOutput#writeSortedDouble}</li>
063: * <li>{@link TupleInput#readSortedDouble}</li>
064: * </ul>
065: *
066: * <p>To use integer-bit representation use this set of methods:</p>
067: * <ul>
068: * <li>{@link TupleOutput#writeFloat}</li>
069: * <li>{@link TupleInput#readFloat}</li>
070: * <li>{@link TupleOutput#writeDouble}</li>
071: * <li>{@link TupleInput#readDouble}</li>
072: * </ul>
073: *
074: * @author Mark Hayes
075: */
076: public class TupleInput extends FastInputStream {
077:
078: /**
079: * Creates a tuple input object for reading a byte array of tuple data. A
080: * reference to the byte array will be kept by this object (it will not be
081: * copied) and therefore the byte array should not be modified while this
082: * object is in use.
083: *
084: * @param buffer is the byte array to be read and should contain data in
085: * tuple format.
086: */
087: public TupleInput(byte[] buffer) {
088:
089: super (buffer);
090: }
091:
092: /**
093: * Creates a tuple input object for reading a byte array of tuple data at
094: * a given offset for a given length. A reference to the byte array will
095: * be kept by this object (it will not be copied) and therefore the byte
096: * array should not be modified while this object is in use.
097: *
098: * @param buffer is the byte array to be read and should contain data in
099: * tuple format.
100: *
101: * @param offset is the byte offset at which to begin reading.
102: *
103: * @param length is the number of bytes to be read.
104: */
105: public TupleInput(byte[] buffer, int offset, int length) {
106:
107: super (buffer, offset, length);
108: }
109:
110: /**
111: * Creates a tuple input object from the data contained in a tuple output
112: * object. A reference to the tuple output's byte array will be kept by
113: * this object (it will not be copied) and therefore the tuple output
114: * object should not be modified while this object is in use.
115: *
116: * @param output is the tuple output object containing the data to be read.
117: */
118: public TupleInput(TupleOutput output) {
119:
120: super (output.getBufferBytes(), output.getBufferOffset(), output
121: .getBufferLength());
122: }
123:
124: // --- begin DataInput compatible methods ---
125:
126: /**
127: * Reads a null-terminated UTF string from the data buffer and converts
128: * the data from UTF to Unicode.
129: * Reads values that were written using {@link
130: * TupleOutput#writeString(String)}.
131: *
132: * @return the converted string.
133: *
134: * @throws IndexOutOfBoundsException if no null terminating byte is found
135: * in the buffer.
136: *
137: * @throws IllegalArgumentException malformed UTF data is encountered.
138: */
139: public final String readString() throws IndexOutOfBoundsException,
140: IllegalArgumentException {
141:
142: byte[] myBuf = buf;
143: int myOff = off;
144: if (available() >= 2
145: && myBuf[myOff] == TupleOutput.NULL_STRING_UTF_VALUE
146: && myBuf[myOff + 1] == 0) {
147: skip(2);
148: return null;
149: } else {
150: int byteLen = UtfOps.getZeroTerminatedByteLength(myBuf,
151: myOff);
152: skip(byteLen + 1);
153: return UtfOps.bytesToString(myBuf, myOff, byteLen);
154: }
155: }
156:
157: /**
158: * Reads a char (two byte) unsigned value from the buffer.
159: * Reads values that were written using {@link TupleOutput#writeChar}.
160: *
161: * @return the value read from the buffer.
162: *
163: * @throws IndexOutOfBoundsException if not enough bytes are available in
164: * the buffer.
165: */
166: public final char readChar() throws IndexOutOfBoundsException {
167:
168: return (char) readUnsignedShort();
169: }
170:
171: /**
172: * Reads a boolean (one byte) unsigned value from the buffer and returns
173: * true if it is non-zero and false if it is zero.
174: * Reads values that were written using {@link TupleOutput#writeBoolean}.
175: *
176: * @return the value read from the buffer.
177: *
178: * @throws IndexOutOfBoundsException if not enough bytes are available in
179: * the buffer.
180: */
181: public final boolean readBoolean() throws IndexOutOfBoundsException {
182:
183: int c = readFast();
184: if (c < 0) {
185: throw new IndexOutOfBoundsException();
186: }
187: return (c != 0);
188: }
189:
190: /**
191: * Reads a signed byte (one byte) value from the buffer.
192: * Reads values that were written using {@link TupleOutput#writeByte}.
193: *
194: * @return the value read from the buffer.
195: *
196: * @throws IndexOutOfBoundsException if not enough bytes are available in
197: * the buffer.
198: */
199: public final byte readByte() throws IndexOutOfBoundsException {
200:
201: return (byte) (readUnsignedByte() ^ 0x80);
202: }
203:
204: /**
205: * Reads a signed short (two byte) value from the buffer.
206: * Reads values that were written using {@link TupleOutput#writeShort}.
207: *
208: * @return the value read from the buffer.
209: *
210: * @throws IndexOutOfBoundsException if not enough bytes are available in
211: * the buffer.
212: */
213: public final short readShort() throws IndexOutOfBoundsException {
214:
215: return (short) (readUnsignedShort() ^ 0x8000);
216: }
217:
218: /**
219: * Reads a signed int (four byte) value from the buffer.
220: * Reads values that were written using {@link TupleOutput#writeInt}.
221: *
222: * @return the value read from the buffer.
223: *
224: * @throws IndexOutOfBoundsException if not enough bytes are available in
225: * the buffer.
226: */
227: public final int readInt() throws IndexOutOfBoundsException {
228:
229: return (int) (readUnsignedInt() ^ 0x80000000);
230: }
231:
232: /**
233: * Reads a signed long (eight byte) value from the buffer.
234: * Reads values that were written using {@link TupleOutput#writeLong}.
235: *
236: * @return the value read from the buffer.
237: *
238: * @throws IndexOutOfBoundsException if not enough bytes are available in
239: * the buffer.
240: */
241: public final long readLong() throws IndexOutOfBoundsException {
242:
243: return readUnsignedLong() ^ 0x8000000000000000L;
244: }
245:
246: /**
247: * Reads a signed float (four byte) value from the buffer.
248: * Reads values that were written using {@link TupleOutput#writeFloat}.
249: * <code>Float.intBitsToFloat</code> is used to convert the signed int
250: * value.
251: *
252: * <p><em>Note:</em> This method operations on byte array values that by
253: * default (without a custom comparator) do <em>not</em> sort correctly for
254: * negative values. Only non-negative values are sorted correctly by
255: * default. To sort all values correctly by default, use {@link
256: * #readSortedFloat}.</p>
257: *
258: * @return the value read from the buffer.
259: *
260: * @throws IndexOutOfBoundsException if not enough bytes are available in
261: * the buffer.
262: */
263: public final float readFloat() throws IndexOutOfBoundsException {
264:
265: return Float.intBitsToFloat((int) readUnsignedInt());
266: }
267:
268: /**
269: * Reads a signed double (eight byte) value from the buffer.
270: * Reads values that were written using {@link TupleOutput#writeDouble}.
271: * <code>Double.longBitsToDouble</code> is used to convert the signed long
272: * value.
273: *
274: * <p><em>Note:</em> This method operations on byte array values that by
275: * default (without a custom comparator) do <em>not</em> sort correctly for
276: * negative values. Only non-negative values are sorted correctly by
277: * default. To sort all values correctly by default, use {@link
278: * #readSortedDouble}.</p>
279: *
280: * @return the value read from the buffer.
281: *
282: * @throws IndexOutOfBoundsException if not enough bytes are available in
283: * the buffer.
284: */
285: public final double readDouble() throws IndexOutOfBoundsException {
286:
287: return Double.longBitsToDouble(readUnsignedLong());
288: }
289:
290: /**
291: * Reads a signed float (four byte) value from the buffer, with support
292: * for correct default sorting of all values.
293: * Reads values that were written using {@link
294: * TupleOutput#writeSortedFloat}.
295: *
296: * <p><code>Float.intBitsToFloat</code> and the following bit
297: * manipulations are used to convert the stored representation to a signed
298: * float value.</p>
299: * <pre>
300: * int val = ... // get stored bits
301: * val ^= (val < 0) ? 0x80000000 : 0xffffffff;
302: * return Float.intBitsToFloat(val);
303: * </pre>
304: *
305: * @return the value read from the buffer.
306: *
307: * @throws IndexOutOfBoundsException if not enough bytes are available in
308: * the buffer.
309: */
310: public final float readSortedFloat()
311: throws IndexOutOfBoundsException {
312:
313: int val = (int) readUnsignedInt();
314: val ^= (val < 0) ? 0x80000000 : 0xffffffff;
315: return Float.intBitsToFloat(val);
316: }
317:
318: /**
319: * Reads a signed double (eight byte) value from the buffer, with support
320: * for correct default sorting of all values.
321: * Reads values that were written using {@link
322: * TupleOutput#writeSortedDouble}.
323: *
324: * <p><code>Float.longBitsToDouble</code> and the following bit
325: * manipulations are used to convert the stored representation to a signed
326: * double value.</p>
327: * <pre>
328: * int val = ... // get stored bits
329: val ^= (val < 0) ? 0x8000000000000000L : 0xffffffffffffffffL;
330: return Double.longBitsToDouble(val);
331: * </pre>
332: *
333: * @return the value read from the buffer.
334: *
335: * @throws IndexOutOfBoundsException if not enough bytes are available in
336: * the buffer.
337: */
338: public final double readSortedDouble()
339: throws IndexOutOfBoundsException {
340:
341: long val = readUnsignedLong();
342: val ^= (val < 0) ? 0x8000000000000000L : 0xffffffffffffffffL;
343: return Double.longBitsToDouble(val);
344: }
345:
346: /**
347: * Reads an unsigned byte (one byte) value from the buffer.
348: * Reads values that were written using {@link
349: * TupleOutput#writeUnsignedByte}.
350: *
351: * @return the value read from the buffer.
352: *
353: * @throws IndexOutOfBoundsException if not enough bytes are available in
354: * the buffer.
355: */
356: public final int readUnsignedByte()
357: throws IndexOutOfBoundsException {
358:
359: int c = readFast();
360: if (c < 0) {
361: throw new IndexOutOfBoundsException();
362: }
363: return c;
364: }
365:
366: /**
367: * Reads an unsigned short (two byte) value from the buffer.
368: * Reads values that were written using {@link
369: * TupleOutput#writeUnsignedShort}.
370: *
371: * @return the value read from the buffer.
372: *
373: * @throws IndexOutOfBoundsException if not enough bytes are available in
374: * the buffer.
375: */
376: public final int readUnsignedShort()
377: throws IndexOutOfBoundsException {
378:
379: int c1 = readFast();
380: int c2 = readFast();
381: if ((c1 | c2) < 0) {
382: throw new IndexOutOfBoundsException();
383: }
384: return ((c1 << 8) | c2);
385: }
386:
387: // --- end DataInput compatible methods ---
388:
389: /**
390: * Reads an unsigned int (four byte) value from the buffer.
391: * Reads values that were written using {@link
392: * TupleOutput#writeUnsignedInt}.
393: *
394: * @return the value read from the buffer.
395: *
396: * @throws IndexOutOfBoundsException if not enough bytes are available in
397: * the buffer.
398: */
399: public final long readUnsignedInt()
400: throws IndexOutOfBoundsException {
401:
402: long c1 = readFast();
403: long c2 = readFast();
404: long c3 = readFast();
405: long c4 = readFast();
406: if ((c1 | c2 | c3 | c4) < 0) {
407: throw new IndexOutOfBoundsException();
408: }
409: return ((c1 << 24) | (c2 << 16) | (c3 << 8) | c4);
410: }
411:
412: /**
413: * This method is private since an unsigned long cannot be treated as
414: * such in Java, nor converted to a BigInteger of the same value.
415: */
416: private final long readUnsignedLong()
417: throws IndexOutOfBoundsException {
418:
419: long c1 = readFast();
420: long c2 = readFast();
421: long c3 = readFast();
422: long c4 = readFast();
423: long c5 = readFast();
424: long c6 = readFast();
425: long c7 = readFast();
426: long c8 = readFast();
427: if ((c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8) < 0) {
428: throw new IndexOutOfBoundsException();
429: }
430: return ((c1 << 56) | (c2 << 48) | (c3 << 40) | (c4 << 32)
431: | (c5 << 24) | (c6 << 16) | (c7 << 8) | c8);
432: }
433:
434: /**
435: * Reads the specified number of bytes from the buffer, converting each
436: * unsigned byte value to a character of the resulting string.
437: * Reads values that were written using {@link TupleOutput#writeBytes}.
438: * Only characters with values below 0x100 may be read using this method.
439: *
440: * @param length is the number of bytes to be read.
441: *
442: * @return the value read from the buffer.
443: *
444: * @throws IndexOutOfBoundsException if not enough bytes are available in
445: * the buffer.
446: */
447: public final String readBytes(int length)
448: throws IndexOutOfBoundsException {
449:
450: StringBuffer buf = new StringBuffer(length);
451: for (int i = 0; i < length; i++) {
452: int c = readFast();
453: if (c < 0) {
454: throw new IndexOutOfBoundsException();
455: }
456: buf.append((char) c);
457: }
458: return buf.toString();
459: }
460:
461: /**
462: * Reads the specified number of characters from the buffer, converting
463: * each two byte unsigned value to a character of the resulting string.
464: * Reads values that were written using {@link TupleOutput#writeChars}.
465: *
466: * @param length is the number of characters to be read.
467: *
468: * @return the value read from the buffer.
469: *
470: * @throws IndexOutOfBoundsException if not enough bytes are available in
471: * the buffer.
472: */
473: public final String readChars(int length)
474: throws IndexOutOfBoundsException {
475:
476: StringBuffer buf = new StringBuffer(length);
477: for (int i = 0; i < length; i++) {
478: buf.append(readChar());
479: }
480: return buf.toString();
481: }
482:
483: /**
484: * Reads the specified number of bytes from the buffer, converting each
485: * unsigned byte value to a character of the resulting array.
486: * Reads values that were written using {@link TupleOutput#writeBytes}.
487: * Only characters with values below 0x100 may be read using this method.
488: *
489: * @param chars is the array to receive the data and whose length is used
490: * to determine the number of bytes to be read.
491: *
492: * @throws IndexOutOfBoundsException if not enough bytes are available in
493: * the buffer.
494: */
495: public final void readBytes(char[] chars)
496: throws IndexOutOfBoundsException {
497:
498: for (int i = 0; i < chars.length; i++) {
499: int c = readFast();
500: if (c < 0) {
501: throw new IndexOutOfBoundsException();
502: }
503: chars[i] = (char) c;
504: }
505: }
506:
507: /**
508: * Reads the specified number of characters from the buffer, converting
509: * each two byte unsigned value to a character of the resulting array.
510: * Reads values that were written using {@link TupleOutput#writeChars}.
511: *
512: * @param chars is the array to receive the data and whose length is used
513: * to determine the number of characters to be read.
514: *
515: * @throws IndexOutOfBoundsException if not enough bytes are available in
516: * the buffer.
517: */
518: public final void readChars(char[] chars)
519: throws IndexOutOfBoundsException {
520:
521: for (int i = 0; i < chars.length; i++) {
522: chars[i] = readChar();
523: }
524: }
525:
526: /**
527: * Reads the specified number of UTF characters string from the data
528: * buffer and converts the data from UTF to Unicode.
529: * Reads values that were written using {@link
530: * TupleOutput#writeString(char[])}.
531: *
532: * @param length is the number of characters to be read.
533: *
534: * @return the converted string.
535: *
536: * @throws IndexOutOfBoundsException if no null terminating byte is found
537: * in the buffer.
538: *
539: * @throws IllegalArgumentException malformed UTF data is encountered.
540: */
541: public final String readString(int length)
542: throws IndexOutOfBoundsException, IllegalArgumentException {
543:
544: char[] chars = new char[length];
545: readString(chars);
546: return new String(chars);
547: }
548:
549: /**
550: * Reads the specified number of UTF characters string from the data
551: * buffer and converts the data from UTF to Unicode.
552: * Reads values that were written using {@link
553: * TupleOutput#writeString(char[])}.
554: *
555: * @param chars is the array to receive the data and whose length is used
556: * to determine the number of characters to be read.
557: *
558: * @throws IndexOutOfBoundsException if no null terminating byte is found
559: * in the buffer.
560: *
561: * @throws IllegalArgumentException malformed UTF data is encountered.
562: */
563: public final void readString(char[] chars)
564: throws IndexOutOfBoundsException, IllegalArgumentException {
565:
566: off = UtfOps.bytesToChars(buf, off, chars, 0, chars.length,
567: false);
568: }
569:
570: /**
571: * Returns the byte length of a null-terminated UTF string in the data
572: * buffer, including the terminator. Used with string values that were
573: * written using {@link TupleOutput#writeString(String)}.
574: *
575: * @throws IndexOutOfBoundsException if no null terminating byte is found
576: * in the buffer.
577: *
578: * @throws IllegalArgumentException malformed UTF data is encountered.
579: */
580: public final int getStringByteLength()
581: throws IndexOutOfBoundsException, IllegalArgumentException {
582:
583: if (available() >= 2
584: && buf[off] == TupleOutput.NULL_STRING_UTF_VALUE
585: && buf[off + 1] == 0) {
586: return 2;
587: } else {
588: return UtfOps.getZeroTerminatedByteLength(buf, off) + 1;
589: }
590: }
591:
592: /**
593: * Reads a packed integer. Note that packed integers are not appropriate
594: * for sorted values (keys) unless a custom comparator is used.
595: *
596: * @see PackedInteger
597: */
598: public final int readPackedInt() {
599:
600: int len = PackedInteger.getReadIntLength(buf, off);
601: int val = PackedInteger.readInt(buf, off);
602:
603: off += len;
604: return val;
605: }
606:
607: /**
608: * Returns the byte length of a packed integer.
609: *
610: * @see PackedInteger
611: */
612: public final int getPackedIntByteLength() {
613: return PackedInteger.getReadIntLength(buf, off);
614: }
615:
616: /**
617: * Reads a {@code BigInteger}.
618: *
619: * @see TupleOutput#writeBigInteger
620: */
621: public final BigInteger readBigInteger() {
622: int len = readShort();
623: if (len < 0) {
624: len = (-len);
625: }
626: byte[] a = new byte[len];
627: a[0] = readByte();
628: readFast(a, 1, a.length - 1);
629: return new BigInteger(a);
630: }
631:
632: /**
633: * Returns the byte length of a {@code BigInteger}.
634: *
635: * @see TupleOutput#writeBigInteger
636: */
637: public final int getBigIntegerByteLength() {
638: int saveOff = off;
639: int len = readShort();
640: off = saveOff;
641: if (len < 0) {
642: len = (-len);
643: }
644: return len + 2;
645: }
646: }
|