001: /*
002: * Modified by Nabh Information Systems, Inc.
003: * Modifications (c) 2006 Nabh Information Systems, Inc.
004: *
005: * Copyright 1999-2004 The Apache Software Foundation
006: *
007: * Licensed under the Apache License, Version 2.0 (the "License");
008: * you may not use this file except in compliance with the License.
009: * You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019: package com.nabhinc.util.md;
020:
021: import java.io.IOException;
022: import java.io.Serializable;
023:
024: /*
025: * In a server it is very important to be able to operate on
026: * the original byte[] without converting everything to chars.
027: * Some protocols are ASCII only, and some allow different
028: * non-UNICODE encodings. The encoding is not known beforehand,
029: * and can even change during the execution of the protocol.
030: * ( for example a multipart message may have parts with different
031: * encoding )
032: *
033: * For HTTP it is not very clear how the encoding of RequestURI
034: * and mime values can be determined, but it is a great advantage
035: * to be able to parse the request without converting to string.
036: */
037:
038: // TODO: This class could either extend ByteBuffer, or better a ByteBuffer inside
039: // this way it could provide the search/etc on ByteBuffer, as a helper.
040: /**
041: * This class is used to represent a chunk of bytes, and
042: * utilities to manipulate byte[].
043: *
044: * The buffer can be modified and used for both input and output.
045: *
046: * There are 2 modes: The chunk can be associated with a sink - ByteInputChannel or ByteOutputChannel,
047: * which will be used when the buffer is empty ( on input ) or filled ( on output ).
048: * For output, it can also grow. This operating mode is selected by calling setLimit() or
049: * allocate(initial, limit) with limit != -1.
050: *
051: * Various search and append method are defined - similar with String and StringBuffer, but
052: * operating on bytes.
053: *
054: * This is important because it allows processing the http headers directly on the received bytes,
055: * without converting to chars and Strings until the strings are needed. In addition, the charset
056: * is determined later, from headers or user code.
057: *
058: *
059: * @author dac@sun.com
060: * @author James Todd [gonzo@sun.com]
061: * @author Costin Manolache
062: * @author Remy Maucherat
063: */
064: public final class ByteChunk implements Cloneable, Serializable {
065:
066: /** Input interface, used when the buffer is emptiy
067: *
068: * Same as java.nio.channel.ReadableByteChannel
069: */
070: public static interface ByteInputChannel {
071: /**
072: * Read new bytes ( usually the internal conversion buffer ).
073: * The implementation is allowed to ignore the parameters,
074: * and mutate the chunk if it wishes to implement its own buffering.
075: */
076: public int realReadBytes(byte cbuf[], int off, int len)
077: throws IOException;
078: }
079:
080: /** Same as java.nio.channel.WrittableByteChannel.
081: */
082: public static interface ByteOutputChannel {
083: /**
084: * Send the bytes ( usually the internal conversion buffer ).
085: * Expect 8k output if the buffer is full.
086: */
087: public void realWriteBytes(byte cbuf[], int off, int len)
088: throws IOException;
089: }
090:
091: // --------------------
092:
093: /** Default encoding used to convert to strings. It should be UTF8,
094: as most standards seem to converge, but the servlet API requires
095: 8859_1, and this object is used mostly for servlets.
096: */
097: public static final String DEFAULT_CHARACTER_ENCODING = "ISO-8859-1";
098:
099: // byte[]
100: private byte[] buff;
101:
102: private int start = 0;
103: private int end;
104:
105: private String enc;
106:
107: private boolean isSet = false; // XXX
108:
109: // How much can it grow, when data is added
110: private int limit = -1;
111:
112: private ByteInputChannel in = null;
113: private ByteOutputChannel out = null;
114:
115: private boolean isOutput = false;
116: private boolean optimizedWrite = true;
117:
118: /**
119: * Creates a new, uninitialized ByteChunk object.
120: */
121: public ByteChunk() {
122: }
123:
124: public ByteChunk(int initial) {
125: allocate(initial, -1);
126: }
127:
128: //--------------------
129: public ByteChunk getClone() {
130: try {
131: return (ByteChunk) this .clone();
132: } catch (Exception ex) {
133: return null;
134: }
135: }
136:
137: public boolean isNull() {
138: return !isSet; // buff==null;
139: }
140:
141: /**
142: * Resets the message buff to an uninitialized state.
143: */
144: public void recycle() {
145: // buff = null;
146: enc = null;
147: start = 0;
148: end = 0;
149: isSet = false;
150: }
151:
152: public void reset() {
153: buff = null;
154: }
155:
156: // -------------------- Setup --------------------
157:
158: public void allocate(int initial, int limit) {
159: isOutput = true;
160: if (buff == null || buff.length < initial) {
161: buff = new byte[initial];
162: }
163: this .limit = limit;
164: start = 0;
165: end = 0;
166: isSet = true;
167: }
168:
169: /**
170: * Sets the message bytes to the specified subarray of bytes.
171: *
172: * @param b the ascii bytes
173: * @param off the start offset of the bytes
174: * @param len the length of the bytes
175: */
176: public void setBytes(byte[] b, int off, int len) {
177: buff = b;
178: start = off;
179: end = start + len;
180: isSet = true;
181: }
182:
183: public void setOptimizedWrite(boolean optimizedWrite) {
184: this .optimizedWrite = optimizedWrite;
185: }
186:
187: public void setEncoding(String enc) {
188: this .enc = enc;
189: }
190:
191: public String getEncoding() {
192: if (enc == null)
193: enc = DEFAULT_CHARACTER_ENCODING;
194: return enc;
195: }
196:
197: /**
198: * Returns the message bytes.
199: */
200: public byte[] getBytes() {
201: return getBuffer();
202: }
203:
204: /**
205: * Returns the message bytes.
206: */
207: public byte[] getBuffer() {
208: return buff;
209: }
210:
211: /**
212: * Returns the start offset of the bytes.
213: * For output this is the end of the buffer.
214: */
215: public int getStart() {
216: return start;
217: }
218:
219: public int getOffset() {
220: return start;
221: }
222:
223: public void setOffset(int off) {
224: if (end < off)
225: end = off;
226: start = off;
227: }
228:
229: /**
230: * Returns the length of the bytes.
231: * XXX need to clean this up
232: */
233: public int getLength() {
234: return end - start;
235: }
236:
237: /** Maximum amount of data in this buffer.
238: *
239: * If -1 or not set, the buffer will grow undefinitely.
240: * Can be smaller than the current buffer size ( which will not shrink ).
241: * When the limit is reached, the buffer will be flushed ( if out is set )
242: * or throw exception.
243: */
244: public void setLimit(int limit) {
245: this .limit = limit;
246: }
247:
248: public int getLimit() {
249: return limit;
250: }
251:
252: /**
253: * When the buffer is empty, read the data from the input channel.
254: */
255: public void setByteInputChannel(ByteInputChannel in) {
256: this .in = in;
257: }
258:
259: /** When the buffer is full, write the data to the output channel.
260: * Also used when large amount of data is appended.
261: *
262: * If not set, the buffer will grow to the limit.
263: */
264: public void setByteOutputChannel(ByteOutputChannel out) {
265: this .out = out;
266: }
267:
268: public int getEnd() {
269: return end;
270: }
271:
272: public void setEnd(int i) {
273: end = i;
274: }
275:
276: // -------------------- Adding data to the buffer --------------------
277: /** Append a char, by casting it to byte. This IS NOT intended for unicode.
278: *
279: * @param c
280: * @throws IOException
281: */
282: public void append(char c) throws IOException {
283: append((byte) c);
284: }
285:
286: public void append(byte b) throws IOException {
287: makeSpace(1);
288:
289: // couldn't make space
290: if (limit > 0 && end >= limit) {
291: flushBuffer();
292: }
293: buff[end++] = b;
294: }
295:
296: public void append(ByteChunk src) throws IOException {
297: append(src.getBytes(), src.getStart(), src.getLength());
298: }
299:
300: /** Add data to the buffer
301: */
302: public void append(byte src[], int off, int len) throws IOException {
303: // will grow, up to limit
304: makeSpace(len);
305:
306: // if we don't have limit: makeSpace can grow as it wants
307: if (limit < 0) {
308: // assert: makeSpace made enough space
309: System.arraycopy(src, off, buff, end, len);
310: end += len;
311: return;
312: }
313:
314: // Optimize on a common case.
315: // If the buffer is empty and the source is going to fill up all the
316: // space in buffer, may as well write it directly to the output,
317: // and avoid an extra copy
318: if (optimizedWrite && len == limit && end == start
319: && out != null) {
320: out.realWriteBytes(src, off, len);
321: return;
322: }
323: // if we have limit and we're below
324: if (len <= limit - end) {
325: // makeSpace will grow the buffer to the limit,
326: // so we have space
327: System.arraycopy(src, off, buff, end, len);
328: end += len;
329: return;
330: }
331:
332: // need more space than we can afford, need to flush
333: // buffer
334:
335: // the buffer is already at ( or bigger than ) limit
336:
337: // We chunk the data into slices fitting in the buffer limit, although
338: // if the data is written directly if it doesn't fit
339:
340: int avail = limit - end;
341: System.arraycopy(src, off, buff, end, avail);
342: end += avail;
343:
344: flushBuffer();
345:
346: int remain = len - avail;
347:
348: while (remain > (limit - end)) {
349: out.realWriteBytes(src, (off + len) - remain, limit - end);
350: remain = remain - (limit - end);
351: }
352:
353: System.arraycopy(src, (off + len) - remain, buff, end, remain);
354: end += remain;
355:
356: }
357:
358: // -------------------- Removing data from the buffer --------------------
359:
360: public int substract() throws IOException {
361:
362: if ((end - start) == 0) {
363: if (in == null)
364: return -1;
365: int n = in.realReadBytes(buff, 0, buff.length);
366: if (n < 0)
367: return -1;
368: }
369:
370: return (buff[start++] & 0xFF);
371:
372: }
373:
374: public int substract(ByteChunk src) throws IOException {
375:
376: if ((end - start) == 0) {
377: if (in == null)
378: return -1;
379: int n = in.realReadBytes(buff, 0, buff.length);
380: if (n < 0)
381: return -1;
382: }
383:
384: int len = getLength();
385: src.append(buff, start, len);
386: start = end;
387: return len;
388:
389: }
390:
391: public int substract(byte src[], int off, int len)
392: throws IOException {
393:
394: if ((end - start) == 0) {
395: if (in == null)
396: return -1;
397: int n = in.realReadBytes(buff, 0, buff.length);
398: if (n < 0)
399: return -1;
400: }
401:
402: int n = len;
403: if (len > getLength()) {
404: n = getLength();
405: }
406: System.arraycopy(buff, start, src, off, n);
407: start += n;
408: return n;
409:
410: }
411:
412: /** Send the buffer to the sink. Called by append() when the limit is reached.
413: * You can also call it explicitely to force the data to be written.
414: *
415: * @throws IOException
416: */
417: public void flushBuffer() throws IOException {
418: //assert out!=null
419: if (out == null) {
420: throw new IOException("Buffer overflow, no sink " + limit
421: + " " + buff.length);
422: }
423: out.realWriteBytes(buff, start, end - start);
424: end = start;
425: }
426:
427: /** Make space for len chars. If len is small, allocate
428: * a reserve space too. Never grow bigger than limit.
429: */
430: private void makeSpace(int count) {
431: byte[] tmp = null;
432:
433: int newSize;
434: int desiredSize = end + count;
435:
436: // Can't grow above the limit
437: if (limit > 0 && desiredSize > limit) {
438: desiredSize = limit;
439: }
440:
441: if (buff == null) {
442: if (desiredSize < 256)
443: desiredSize = 256; // take a minimum
444: buff = new byte[desiredSize];
445: }
446:
447: // limit < buf.length ( the buffer is already big )
448: // or we already have space XXX
449: if (desiredSize <= buff.length) {
450: return;
451: }
452: // grow in larger chunks
453: if (desiredSize < 2 * buff.length) {
454: newSize = buff.length * 2;
455: if (limit > 0 && newSize > limit)
456: newSize = limit;
457: tmp = new byte[newSize];
458: } else {
459: newSize = buff.length * 2 + count;
460: if (limit > 0 && newSize > limit)
461: newSize = limit;
462: tmp = new byte[newSize];
463: }
464:
465: System.arraycopy(buff, start, tmp, 0, end - start);
466: buff = tmp;
467: tmp = null;
468: end = end - start;
469: start = 0;
470: }
471:
472: // -------------------- Conversion and getters --------------------
473:
474: public String toString() {
475: if (null == buff) {
476: return null;
477: } else if (end - start == 0) {
478: return "";
479: }
480: return StringCache.toString(this );
481: }
482:
483: public String toStringInternal() {
484: String strValue = null;
485: try {
486: if (enc == null)
487: enc = DEFAULT_CHARACTER_ENCODING;
488: strValue = new String(buff, start, end - start, enc);
489: /*
490: Does not improve the speed too much on most systems,
491: it's safer to use the "clasical" new String().
492:
493: Most overhead is in creating char[] and copying,
494: the internal implementation of new String() is very close to
495: what we do. The decoder is nice for large buffers and if
496: we don't go to String ( so we can take advantage of reduced GC)
497:
498: // Method is commented out, in:
499: return B2CConverter.decodeString( enc );
500: */
501: } catch (java.io.UnsupportedEncodingException e) {
502: // Use the platform encoding in that case; the usage of a bad
503: // encoding will have been logged elsewhere already
504: strValue = new String(buff, start, end - start);
505: }
506: return strValue;
507: }
508:
509: public int getInt() {
510: return Ascii.parseInt(buff, start, end - start);
511: }
512:
513: public long getLong() {
514: return Ascii.parseLong(buff, start, end - start);
515: }
516:
517: // -------------------- equals --------------------
518:
519: /**
520: * Compares the message bytes to the specified String object.
521: * @param s the String to compare
522: * @return true if the comparison succeeded, false otherwise
523: */
524: public boolean equals(String s) {
525: // XXX ENCODING - this only works if encoding is UTF8-compat
526: // ( ok for tomcat, where we compare ascii - header names, etc )!!!
527:
528: byte[] b = buff;
529: int blen = end - start;
530: if (b == null || blen != s.length()) {
531: return false;
532: }
533: int boff = start;
534: for (int i = 0; i < blen; i++) {
535: if (b[boff++] != s.charAt(i)) {
536: return false;
537: }
538: }
539: return true;
540: }
541:
542: /**
543: * Compares the message bytes to the specified String object.
544: * @param s the String to compare
545: * @return true if the comparison succeeded, false otherwise
546: */
547: public boolean equalsIgnoreCase(String s) {
548: byte[] b = buff;
549: int blen = end - start;
550: if (b == null || blen != s.length()) {
551: return false;
552: }
553: int boff = start;
554: for (int i = 0; i < blen; i++) {
555: if (Ascii.toLower(b[boff++]) != Ascii.toLower(s.charAt(i))) {
556: return false;
557: }
558: }
559: return true;
560: }
561:
562: public boolean equals(ByteChunk bb) {
563: return equals(bb.getBytes(), bb.getStart(), bb.getLength());
564: }
565:
566: public boolean equals(byte b2[], int off2, int len2) {
567: byte b1[] = buff;
568: if (b1 == null && b2 == null)
569: return true;
570:
571: int len = end - start;
572: if (len2 != len || b1 == null || b2 == null)
573: return false;
574:
575: int off1 = start;
576:
577: while (len-- > 0) {
578: if (b1[off1++] != b2[off2++]) {
579: return false;
580: }
581: }
582: return true;
583: }
584:
585: public boolean equals(CharChunk cc) {
586: return equals(cc.getChars(), cc.getStart(), cc.getLength());
587: }
588:
589: public boolean equals(char c2[], int off2, int len2) {
590: // XXX works only for enc compatible with ASCII/UTF !!!
591: byte b1[] = buff;
592: if (c2 == null && b1 == null)
593: return true;
594:
595: if (b1 == null || c2 == null || end - start != len2) {
596: return false;
597: }
598: int off1 = start;
599: int len = end - start;
600:
601: while (len-- > 0) {
602: if ((char) b1[off1++] != c2[off2++]) {
603: return false;
604: }
605: }
606: return true;
607: }
608:
609: /**
610: * Returns true if the message bytes starts with the specified string.
611: * @param s the string
612: */
613: public boolean startsWith(String s) {
614: // Works only if enc==UTF
615: byte[] b = buff;
616: int blen = s.length();
617: if (b == null || blen > end - start) {
618: return false;
619: }
620: int boff = start;
621: for (int i = 0; i < blen; i++) {
622: if (b[boff++] != s.charAt(i)) {
623: return false;
624: }
625: }
626: return true;
627: }
628:
629: /* Returns true if the message bytes start with the specified byte array */
630: public boolean startsWith(byte[] b2) {
631: byte[] b1 = buff;
632: if (b1 == null && b2 == null) {
633: return true;
634: }
635:
636: int len = end - start;
637: if (b1 == null || b2 == null || b2.length > len) {
638: return false;
639: }
640: for (int i = start, j = 0; i < end && j < b2.length;) {
641: if (b1[i++] != b2[j++])
642: return false;
643: }
644: return true;
645: }
646:
647: /**
648: * Returns true if the message bytes starts with the specified string.
649: * @param s the string
650: * @param pos The position
651: */
652: public boolean startsWithIgnoreCase(String s, int pos) {
653: byte[] b = buff;
654: int len = s.length();
655: if (b == null || len + pos > end - start) {
656: return false;
657: }
658: int off = start + pos;
659: for (int i = 0; i < len; i++) {
660: if (Ascii.toLower(b[off++]) != Ascii.toLower(s.charAt(i))) {
661: return false;
662: }
663: }
664: return true;
665: }
666:
667: public int indexOf(String src, int srcOff, int srcLen, int myOff) {
668: char first = src.charAt(srcOff);
669:
670: // Look for first char
671: int srcEnd = srcOff + srcLen;
672:
673: for (int i = myOff + start; i <= (end - srcLen); i++) {
674: if (buff[i] != first)
675: continue;
676: // found first char, now look for a match
677: int myPos = i + 1;
678: for (int srcPos = srcOff + 1; srcPos < srcEnd;) {
679: if (buff[myPos++] != src.charAt(srcPos++))
680: break;
681: if (srcPos == srcEnd)
682: return i - start; // found it
683: }
684: }
685: return -1;
686: }
687:
688: // -------------------- Hash code --------------------
689:
690: // normal hash.
691: public int hash() {
692: return hashBytes(buff, start, end - start);
693: }
694:
695: // hash ignoring case
696: public int hashIgnoreCase() {
697: return hashBytesIC(buff, start, end - start);
698: }
699:
700: private static int hashBytes(byte buff[], int start, int bytesLen) {
701: int max = start + bytesLen;
702: byte bb[] = buff;
703: int code = 0;
704: for (int i = start; i < max; i++) {
705: code = code * 37 + bb[i];
706: }
707: return code;
708: }
709:
710: private static int hashBytesIC(byte bytes[], int start, int bytesLen) {
711: int max = start + bytesLen;
712: byte bb[] = bytes;
713: int code = 0;
714: for (int i = start; i < max; i++) {
715: code = code * 37 + Ascii.toLower(bb[i]);
716: }
717: return code;
718: }
719:
720: /**
721: * Returns true if the message bytes starts with the specified string.
722: * @param c the character
723: * @param starting The start position
724: */
725: public int indexOf(char c, int starting) {
726: int ret = indexOf(buff, start + starting, end, c);
727: return (ret >= start) ? ret - start : -1;
728: }
729:
730: public static int indexOf(byte bytes[], int off, int end, char qq) {
731: // Works only for UTF
732: while (off < end) {
733: byte b = bytes[off];
734: if (b == qq)
735: return off;
736: off++;
737: }
738: return -1;
739: }
740:
741: /** Find a character, no side effects.
742: * @return index of char if found, -1 if not
743: */
744: public static int findChar(byte buf[], int start, int end, char c) {
745: byte b = (byte) c;
746: int offset = start;
747: while (offset < end) {
748: if (buf[offset] == b) {
749: return offset;
750: }
751: offset++;
752: }
753: return -1;
754: }
755:
756: /** Find a character, no side effects.
757: * @return index of char if found, -1 if not
758: */
759: public static int findChars(byte buf[], int start, int end,
760: byte c[]) {
761: int clen = c.length;
762: int offset = start;
763: while (offset < end) {
764: for (int i = 0; i < clen; i++)
765: if (buf[offset] == c[i]) {
766: return offset;
767: }
768: offset++;
769: }
770: return -1;
771: }
772:
773: /** Find the first character != c
774: * @return index of char if found, -1 if not
775: */
776: public static int findNotChars(byte buf[], int start, int end,
777: byte c[]) {
778: int clen = c.length;
779: int offset = start;
780: boolean found;
781:
782: while (offset < end) {
783: found = true;
784: for (int i = 0; i < clen; i++) {
785: if (buf[offset] == c[i]) {
786: found = false;
787: break;
788: }
789: }
790: if (found) { // buf[offset] != c[0..len]
791: return offset;
792: }
793: offset++;
794: }
795: return -1;
796: }
797:
798: /**
799: * Convert specified String to a byte array. This ONLY WORKS for ascii, UTF chars will be truncated.
800: *
801: * @param value to convert to byte array
802: * @return the byte array value
803: */
804: public static final byte[] convertToBytes(String value) {
805: byte[] result = new byte[value.length()];
806: for (int i = 0; i < value.length(); i++) {
807: result[i] = (byte) value.charAt(i);
808: }
809: return result;
810: }
811:
812: }
|