001: /***** BEGIN LICENSE BLOCK *****
002: * Version: CPL 1.0/GPL 2.0/LGPL 2.1
003: *
004: * The contents of this file are subject to the Common Public
005: * License Version 1.0 (the "License"); you may not use this file
006: * except in compliance with the License. You may obtain a copy of
007: * the License at http://www.eclipse.org/legal/cpl-v10.html
008: *
009: * Software distributed under the License is distributed on an "AS
010: * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
011: * implied. See the License for the specific language governing
012: * rights and limitations under the License.
013: *
014: * Copyright (C) 2007 Charles O Nutter <headius@headius.com>
015: * Copyright (C) 2007 Nick Sieger <nicksieger@gmail.com>
016: * Copyright (C) 2007 Ola Bini <ola@ologix.com>
017: * Copyright (C) 2007 William N Dortch <bill.dortch@gmail.com>
018: *
019: * Alternatively, the contents of this file may be used under the terms of
020: * either of the GNU General Public License Version 2 or later (the "GPL"),
021: * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
022: * in which case the provisions of the GPL or the LGPL are applicable instead
023: * of those above. If you wish to allow use of your version of this file only
024: * under the terms of either the GPL or the LGPL, and not to allow others to
025: * use your version of this file under the terms of the CPL, indicate your
026: * decision by deleting the provisions above and replace them with the notice
027: * and other provisions required by the GPL or the LGPL. If you do not delete
028: * the provisions above, a recipient may use your version of this file under
029: * the terms of any one of the CPL, the GPL or the LGPL.
030: ***** END LICENSE BLOCK *****/package org.jruby.util;
031:
032: import java.io.Serializable;
033: import java.io.UnsupportedEncodingException;
034:
035: /**
036: *
037: * @author headius
038: */
039: public final class ByteList implements Comparable, CharSequence,
040: Serializable {
041: private static final long serialVersionUID = -1286166947275543731L;
042:
043: public static final byte[] NULL_ARRAY = new byte[0];
044: public static final ByteList EMPTY_BYTELIST = new ByteList(0);
045:
046: public byte[] bytes;
047: public int begin;
048: public int realSize;
049:
050: int hash;
051: boolean validHash = false;
052: String stringValue;
053:
054: private static final int DEFAULT_SIZE = 4;
055: private static final double FACTOR = 1.5;
056:
057: /** Creates a new instance of ByteList */
058: public ByteList() {
059: this (DEFAULT_SIZE);
060: }
061:
062: public ByteList(int size) {
063: bytes = new byte[size];
064: realSize = 0;
065: }
066:
067: public ByteList(byte[] wrap) {
068: this (wrap, true);
069: }
070:
071: public ByteList(byte[] wrap, boolean copy) {
072: if (wrap == null)
073: throw new NullPointerException(
074: "Invalid argument: constructing with null array");
075: if (copy) {
076: bytes = (byte[]) wrap.clone();
077: } else {
078: bytes = wrap;
079: }
080: realSize = wrap.length;
081: }
082:
083: public ByteList(ByteList wrap) {
084: this (wrap.bytes, wrap.begin, wrap.realSize);
085: }
086:
087: public ByteList(byte[] wrap, int index, int len) {
088: this (wrap, index, len, true);
089: }
090:
091: public ByteList(byte[] wrap, int index, int len, boolean copy) {
092: if (wrap == null)
093: throw new NullPointerException(
094: "Invalid argument: constructing with null array");
095: if (copy || index != 0) {
096: bytes = new byte[len];
097: System.arraycopy(wrap, index, bytes, 0, len);
098: } else {
099: bytes = wrap;
100: }
101: realSize = len;
102: }
103:
104: public ByteList(ByteList wrap, int index, int len) {
105: this (wrap.bytes, wrap.begin + index, len);
106: }
107:
108: private ByteList(boolean flag) {
109: }
110:
111: public void delete(int start, int len) {
112: realSize -= len;
113: System.arraycopy(bytes, start + len, bytes, start, realSize);
114: }
115:
116: public ByteList append(byte b) {
117: grow(1);
118: bytes[realSize++] = b;
119: return this ;
120: }
121:
122: public ByteList append(int b) {
123: append((byte) b);
124: return this ;
125: }
126:
127: public Object clone() {
128: return dup();
129: }
130:
131: public ByteList dup() {
132: return dup(realSize);
133: }
134:
135: public ByteList dup(int length) {
136: ByteList dup = new ByteList(false);
137: dup.bytes = new byte[length];
138: System.arraycopy(bytes, begin, dup.bytes, 0, realSize);
139: dup.realSize = realSize;
140: dup.begin = 0;
141:
142: dup.validHash = validHash;
143: dup.hash = hash;
144: dup.stringValue = stringValue;
145:
146: return dup;
147: }
148:
149: public ByteList makeShared(int index, int len) {
150: ByteList shared = new ByteList(false);
151: shared.bytes = bytes;
152: shared.realSize = len;
153: shared.begin = begin + index;
154: return shared;
155: }
156:
157: public void view(int index, int len) {
158: realSize = len;
159: begin = begin + index;
160: }
161:
162: public void unshare() {
163: unshare(realSize);
164: }
165:
166: public void unshare(int length) {
167: byte[] newBytes = new byte[length];
168: System.arraycopy(bytes, begin, newBytes, 0, realSize);
169: bytes = newBytes;
170: begin = 0;
171: }
172:
173: public void invalidate() {
174: validHash = false;
175: stringValue = null;
176: }
177:
178: public void prepend(byte b) {
179: grow(1);
180: System.arraycopy(bytes, 0, bytes, 1, realSize);
181: bytes[0] = b;
182: realSize++;
183: }
184:
185: public void append(byte[] moreBytes) {
186: grow(moreBytes.length);
187: System.arraycopy(moreBytes, 0, bytes, realSize,
188: moreBytes.length);
189: realSize += moreBytes.length;
190: }
191:
192: public void append(ByteList moreBytes) {
193: append(moreBytes.bytes, moreBytes.begin, moreBytes.realSize);
194: }
195:
196: public void append(ByteList moreBytes, int index, int len) {
197: append(moreBytes.bytes, moreBytes.begin + index, len);
198: }
199:
200: public void append(byte[] moreBytes, int start, int len) {
201: grow(len);
202: System.arraycopy(moreBytes, start, bytes, realSize, len);
203: realSize += len;
204: }
205:
206: public void realloc(int length) {
207: byte tmp[] = new byte[length];
208: System.arraycopy(bytes, 0, tmp, 0, realSize);
209: bytes = tmp;
210: }
211:
212: public int length() {
213: return realSize;
214: }
215:
216: public void length(int newLength) {
217: grow(newLength - realSize);
218: realSize = newLength;
219: }
220:
221: public int get(int index) {
222: if (index >= realSize)
223: throw new IndexOutOfBoundsException();
224: return bytes[begin + index];
225: }
226:
227: public void set(int index, int b) {
228: if (index >= realSize)
229: throw new IndexOutOfBoundsException();
230: bytes[begin + index] = (byte) b;
231: }
232:
233: public void replace(byte[] newBytes) {
234: if (newBytes == null)
235: throw new NullPointerException(
236: "Invalid argument: replacing with null array");
237: this .bytes = newBytes;
238: realSize = newBytes.length;
239: }
240:
241: /**
242: * Unsafe version of replace(int,int,ByteList). The contract is that these
243: * unsafe versions will not make sure thet beg and len indices are correct.
244: */
245: public void unsafeReplace(int beg, int len, ByteList nbytes) {
246: unsafeReplace(beg, len, nbytes.bytes, nbytes.begin,
247: nbytes.realSize);
248: }
249:
250: /**
251: * Unsafe version of replace(int,int,byte[]). The contract is that these
252: * unsafe versions will not make sure thet beg and len indices are correct.
253: */
254: public void unsafeReplace(int beg, int len, byte[] buf) {
255: unsafeReplace(beg, len, buf, 0, buf.length);
256: }
257:
258: /**
259: * Unsafe version of replace(int,int,byte[],int,int). The contract is that these
260: * unsafe versions will not make sure thet beg and len indices are correct.
261: */
262: public void unsafeReplace(int beg, int len, byte[] nbytes,
263: int index, int count) {
264: grow(count - len);
265: int newSize = realSize + count - len;
266: System.arraycopy(bytes, beg + len, bytes, beg + count, realSize
267: - (len + beg));
268: System.arraycopy(nbytes, index, bytes, beg, count);
269: realSize = newSize;
270: }
271:
272: public void replace(int beg, int len, ByteList nbytes) {
273: replace(beg, len, nbytes.bytes, nbytes.begin, nbytes.realSize);
274: }
275:
276: public void replace(int beg, int len, byte[] buf) {
277: replace(beg, len, buf, 0, buf.length);
278: }
279:
280: public void replace(int beg, int len, byte[] nbytes, int index,
281: int count) {
282: if (len - beg > realSize)
283: throw new IndexOutOfBoundsException();
284: unsafeReplace(beg, len, nbytes, index, count);
285: }
286:
287: public void insert(int index, int b) {
288: if (index >= realSize)
289: throw new IndexOutOfBoundsException();
290: grow(1);
291: System.arraycopy(bytes, index, bytes, index + 1, realSize
292: - index);
293: bytes[index] = (byte) b;
294: realSize++;
295: }
296:
297: public int indexOf(int c) {
298: return indexOf(c, 0);
299: }
300:
301: public int indexOf(final int c, int pos) {
302: // not sure if this is checked elsewhere,
303: // didn't see it in RubyString. RubyString does
304: // cast to char, so c will be >= 0.
305: if (c > 255)
306: return -1;
307: final byte b = (byte) (c & 0xFF);
308: final int size = begin + realSize;
309: final byte[] buf = bytes;
310: pos += begin;
311: for (; pos < size && buf[pos] != b; pos++)
312: ;
313: return pos < size ? pos - begin : -1;
314: }
315:
316: public int indexOf(ByteList find) {
317: return indexOf(find, 0);
318: }
319:
320: public int indexOf(ByteList find, int i) {
321: return indexOf(bytes, begin, realSize, find.bytes, find.begin,
322: find.realSize, i);
323: }
324:
325: static int indexOf(byte[] source, int sourceOffset,
326: int sourceCount, byte[] target, int targetOffset,
327: int targetCount, int fromIndex) {
328: if (fromIndex >= sourceCount)
329: return (targetCount == 0 ? sourceCount : -1);
330: if (fromIndex < 0)
331: fromIndex = 0;
332: if (targetCount == 0)
333: return fromIndex;
334:
335: byte first = target[targetOffset];
336: int max = sourceOffset + (sourceCount - targetCount);
337:
338: for (int i = sourceOffset + fromIndex; i <= max; i++) {
339: if (source[i] != first)
340: while (++i <= max && source[i] != first)
341: ;
342:
343: if (i <= max) {
344: int j = i + 1;
345: int end = j + targetCount - 1;
346: for (int k = targetOffset + 1; j < end
347: && source[j] == target[k]; j++, k++)
348: ;
349:
350: if (j == end)
351: return i - sourceOffset;
352: }
353: }
354: return -1;
355: }
356:
357: public int lastIndexOf(int c) {
358: return lastIndexOf(c, realSize - 1);
359: }
360:
361: public int lastIndexOf(final int c, int pos) {
362: // not sure if this is checked elsewhere,
363: // didn't see it in RubyString. RubyString does
364: // cast to char, so c will be >= 0.
365: if (c > 255)
366: return -1;
367: final byte b = (byte) (c & 0xFF);
368: final int size = begin + realSize;
369: pos += begin;
370: final byte[] buf = bytes;
371: if (pos >= size) {
372: pos = size;
373: } else {
374: pos++;
375: }
376: for (; --pos >= begin && buf[pos] != b;)
377: ;
378: return pos - begin;
379: }
380:
381: public int lastIndexOf(ByteList find) {
382: return lastIndexOf(find, realSize);
383: }
384:
385: public int lastIndexOf(ByteList find, int pos) {
386: return lastIndexOf(bytes, begin, realSize, find.bytes,
387: find.begin, find.realSize, pos);
388: }
389:
390: static int lastIndexOf(byte[] source, int sourceOffset,
391: int sourceCount, byte[] target, int targetOffset,
392: int targetCount, int fromIndex) {
393: int rightIndex = sourceCount - targetCount;
394: if (fromIndex < 0)
395: return -1;
396: if (fromIndex > rightIndex)
397: fromIndex = rightIndex;
398: if (targetCount == 0)
399: return fromIndex;
400:
401: int strLastIndex = targetOffset + targetCount - 1;
402: byte strLastChar = target[strLastIndex];
403: int min = sourceOffset + targetCount - 1;
404: int i = min + fromIndex;
405:
406: startSearchForLastChar: while (true) {
407: while (i >= min && source[i] != strLastChar)
408: i--;
409: if (i < min)
410: return -1;
411: int j = i - 1;
412: int start = j - (targetCount - 1);
413: int k = strLastIndex - 1;
414:
415: while (j > start) {
416: if (source[j--] != target[k--]) {
417: i--;
418: continue startSearchForLastChar;
419: }
420: }
421: return start - sourceOffset + 1;
422: }
423: }
424:
425: public boolean equals(Object other) {
426: if (other instanceof ByteList)
427: return equal((ByteList) other);
428: return false;
429: }
430:
431: public boolean equal(ByteList other) {
432: if (other == this )
433: return true;
434: if (validHash && other.validHash && hash != other.hash)
435: return false;
436: int first;
437: int last;
438: byte[] buf;
439: if ((last = realSize) == other.realSize) {
440: // scanning from front and back simultaneously, meeting in
441: // the middle. the object is to get a mismatch as quickly as
442: // possible. alternatives might be: scan from the middle outward
443: // (not great because it won't pick up common variations at the
444: // ends until late) or sample odd bytes forward and even bytes
445: // backward (I like this one, but it's more expensive for
446: // strings that are equal; see sample_equals below).
447: for (buf = bytes, first = -1; --last > first
448: && buf[begin + last] == other.bytes[other.begin
449: + last]
450: && ++first < last
451: && buf[begin + first] == other.bytes[other.begin
452: + first];)
453: ;
454: return first >= last;
455: }
456: return false;
457: }
458:
459: // an alternative to the new version of equals, should
460: // detect inequality faster (in many cases), but is slow
461: // in the case of equal values (all bytes visited), due to
462: // using n+=2, n-=2 vs. ++n, --n while iterating over the array.
463: public boolean sample_equals(Object other) {
464: if (other == this )
465: return true;
466: if (other instanceof ByteList) {
467: ByteList b = (ByteList) other;
468: int first;
469: int last;
470: int size;
471: byte[] buf;
472: if ((size = realSize) == b.realSize) {
473: // scanning from front and back simultaneously, sampling odd
474: // bytes on the forward iteration and even bytes on the
475: // reverse iteration. the object is to get a mismatch as quickly
476: // as possible.
477: for (buf = bytes, first = -1, last = (size + 1) & ~1; (last -= 2) >= 0
478: && buf[begin + last] == b.bytes[b.begin + last]
479: && (first += 2) < size
480: && buf[begin + first] == b.bytes[b.begin
481: + first];)
482: ;
483: return last < 0 || first == size;
484: }
485: }
486: return false;
487: }
488:
489: /**
490: * This comparison matches MRI comparison of Strings (rb_str_cmp).
491: * I wish we had memcmp right now...
492: */
493: public int compareTo(Object other) {
494: return cmp((ByteList) other);
495: }
496:
497: public int cmp(final ByteList other) {
498: if (other == this )
499: return 0;
500: final int size = realSize;
501: final int len = Math.min(size, other.realSize);
502: int offset = -1;
503: // a bit of VM/JIT weirdness here: though in most cases
504: // performance is improved if array references are kept in
505: // a local variable (saves an instruction per access, as I
506: // [slightly] understand it), in some cases, when two (or more?)
507: // arrays are being accessed, the member reference is actually
508: // faster. this is one of those cases...
509: for (; ++offset < len
510: && bytes[begin + offset] == other.bytes[other.begin
511: + offset];)
512: ;
513: if (offset < len) {
514: return (bytes[begin + offset] & 0xFF) > (other.bytes[other.begin
515: + offset] & 0xFF) ? 1 : -1;
516: }
517: return size == other.realSize ? 0 : size == len ? -1 : 1;
518: }
519:
520: /**
521: * Returns the internal byte array. This is unsafe unless you know what you're
522: * doing. But it can improve performance for byte-array operations that
523: * won't change the array.
524: *
525: * @return the internal byte array
526: */
527: public byte[] unsafeBytes() {
528: return bytes;
529: }
530:
531: public byte[] bytes() {
532: byte[] newBytes = new byte[realSize];
533: System.arraycopy(bytes, begin, newBytes, 0, realSize);
534: return newBytes;
535: }
536:
537: public int begin() {
538: return begin;
539: }
540:
541: private void grow(int increaseRequested) {
542: if (increaseRequested < 0) {
543: return;
544: }
545: int newSize = realSize + increaseRequested;
546: if (bytes.length < newSize) {
547: byte[] newBytes = new byte[(int) (newSize * FACTOR)];
548: System.arraycopy(bytes, 0, newBytes, 0, realSize);
549: bytes = newBytes;
550: }
551: }
552:
553: public int hashCode() {
554: if (validHash)
555: return hash;
556:
557: int key = 0;
558: int index = begin;
559: final int end = begin + realSize;
560: while (index < end) {
561: // equivalent of: key = key * 65599 + byte;
562: key = ((key << 16) + (key << 6) - key)
563: + (int) (bytes[index++]); // & 0xFF ?
564: }
565: key = key + (key >> 5);
566: validHash = true;
567: return hash = key;
568: }
569:
570: /**
571: * Remembers toString value, which is expensive for StringBuffer.
572: *
573: * @return an ISO-8859-1 representation of the byte list
574: */
575: public String toString() {
576: try {
577: if (stringValue == null)
578: stringValue = toString("ISO-8859-1");
579: return stringValue;
580: } catch (UnsupportedEncodingException uee) {
581: throw new RuntimeException(
582: "ISO-8859-1 encoding should never fail; report this at www.jruby.org");
583: }
584: }
585:
586: public String toUtf8String() {
587: // TODO: no caching? :(
588: try {
589: return toString("UTF-8");
590: } catch (UnsupportedEncodingException uee) {
591: throw new RuntimeException(
592: "UTF-8 encoding should never fail; report this at www.jruby.org");
593: }
594: }
595:
596: public String toString(String encoding)
597: throws UnsupportedEncodingException {
598: return new String(this .bytes, begin, realSize, encoding);
599: }
600:
601: public static ByteList create(CharSequence s) {
602: return new ByteList(plain(s), false);
603: }
604:
605: public static byte[] plain(CharSequence s) {
606: if (s instanceof String) {
607: try {
608: return ((String) s).getBytes("ISO8859-1");
609: } catch (Exception e) {
610: //FALLTHROUGH
611: }
612: }
613: byte[] bytes = new byte[s.length()];
614: for (int i = 0; i < bytes.length; i++) {
615: bytes[i] = (byte) s.charAt(i);
616: }
617: return bytes;
618: }
619:
620: public static byte[] plain(char[] s) {
621: byte[] bytes = new byte[s.length];
622: for (int i = 0; i < s.length; i++) {
623: bytes[i] = (byte) s[i];
624: }
625: return bytes;
626: }
627:
628: public static char[] plain(byte[] b, int start, int length) {
629: char[] chars = new char[length];
630: for (int i = 0; i < length; i++) {
631: chars[i] = (char) (b[start + i] & 0xFF);
632: }
633: return chars;
634: }
635:
636: public static char[] plain(byte[] b) {
637: char[] chars = new char[b.length];
638: for (int i = 0; i < b.length; i++) {
639: chars[i] = (char) (b[i] & 0xFF);
640: }
641: return chars;
642: }
643:
644: public char charAt(int ix) {
645: return (char) (this .bytes[begin + ix] & 0xFF);
646: }
647:
648: public CharSequence subSequence(int start, int end) {
649: return new ByteList(this, start, end - start);
650: }
651: }
|