001: /*
002: * Copyright 1999-2004 The Apache Software Foundation
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.apache.tomcat.util.buf;
018:
019: import java.io.IOException;
020: import java.io.Serializable;
021:
022: /*
023: * In a server it is very important to be able to operate on
024: * the original byte[] without converting everything to chars.
025: * Some protocols are ASCII only, and some allow different
026: * non-UNICODE encodings. The encoding is not known beforehand,
027: * and can even change during the execution of the protocol.
028: * ( for example a multipart message may have parts with different
029: * encoding )
030: *
031: * For HTTP it is not very clear how the encoding of RequestURI
032: * and mime values can be determined, but it is a great advantage
033: * to be able to parse the request without converting to string.
034: */
035:
036: /**
037: * This class is used to represent a chunk of bytes, and
038: * utilities to manipulate byte[].
039: *
040: * The buffer can be modified and used for both input and output.
041: *
042: * @author dac@sun.com
043: * @author James Todd [gonzo@sun.com]
044: * @author Costin Manolache
045: * @author Remy Maucherat
046: */
047: public final class ByteChunk implements Cloneable, Serializable {
048:
049: // Input interface, used when the buffer is emptied.
050: public static interface ByteInputChannel {
051: /**
052: * Read new bytes ( usually the internal conversion buffer ).
053: * The implementation is allowed to ignore the parameters,
054: * and mutate the chunk if it wishes to implement its own buffering.
055: */
056: public int realReadBytes(byte cbuf[], int off, int len)
057: throws IOException;
058: }
059:
060: // Output interface, used when the buffer is filled.
061: public static interface ByteOutputChannel {
062: /**
063: * Send the bytes ( usually the internal conversion buffer ).
064: * Expect 8k output if the buffer is full.
065: */
066: public void realWriteBytes(byte cbuf[], int off, int len)
067: throws IOException;
068: }
069:
070: // --------------------
071:
072: /** Default encoding used to convert to strings. It should be UTF8,
073: as most standards seem to converge, but the servlet API requires
074: 8859_1, and this object is used mostly for servlets.
075: */
076: public static final String DEFAULT_CHARACTER_ENCODING = "ISO-8859-1";
077:
078: // byte[]
079: private byte[] buff;
080:
081: private int start = 0;
082: private int end;
083:
084: private String enc;
085:
086: private boolean isSet = false; // XXX
087:
088: // How much can it grow, when data is added
089: private int limit = -1;
090:
091: private ByteInputChannel in = null;
092: private ByteOutputChannel out = null;
093:
094: private boolean isOutput = false;
095: private boolean optimizedWrite = true;
096:
097: /**
098: * Creates a new, uninitialized ByteChunk object.
099: */
100: public ByteChunk() {
101: }
102:
103: public ByteChunk(int initial) {
104: allocate(initial, -1);
105: }
106:
107: //--------------------
108: public ByteChunk getClone() {
109: try {
110: return (ByteChunk) this .clone();
111: } catch (Exception ex) {
112: return null;
113: }
114: }
115:
116: public boolean isNull() {
117: return !isSet; // buff==null;
118: }
119:
120: /**
121: * Resets the message buff to an uninitialized state.
122: */
123: public void recycle() {
124: // buff = null;
125: enc = null;
126: start = 0;
127: end = 0;
128: isSet = false;
129: }
130:
131: public void reset() {
132: buff = null;
133: }
134:
135: // -------------------- Setup --------------------
136:
137: public void allocate(int initial, int limit) {
138: isOutput = true;
139: if (buff == null || buff.length < initial) {
140: buff = new byte[initial];
141: }
142: this .limit = limit;
143: start = 0;
144: end = 0;
145: isSet = true;
146: }
147:
148: /**
149: * Sets the message bytes to the specified subarray of bytes.
150: *
151: * @param b the ascii bytes
152: * @param off the start offset of the bytes
153: * @param len the length of the bytes
154: */
155: public void setBytes(byte[] b, int off, int len) {
156: buff = b;
157: start = off;
158: end = start + len;
159: isSet = true;
160: }
161:
162: public void setOptimizedWrite(boolean optimizedWrite) {
163: this .optimizedWrite = optimizedWrite;
164: }
165:
166: public void setEncoding(String enc) {
167: this .enc = enc;
168: }
169:
170: /**
171: * Returns the message bytes.
172: */
173: public byte[] getBytes() {
174: return getBuffer();
175: }
176:
177: /**
178: * Returns the message bytes.
179: */
180: public byte[] getBuffer() {
181: return buff;
182: }
183:
184: /**
185: * Returns the start offset of the bytes.
186: * For output this is the end of the buffer.
187: */
188: public int getStart() {
189: return start;
190: }
191:
192: public int getOffset() {
193: return getStart();
194: }
195:
196: public void setOffset(int off) {
197: if (end < off)
198: end = off;
199: start = off;
200: }
201:
202: /**
203: * Returns the length of the bytes.
204: * XXX need to clean this up
205: */
206: public int getLength() {
207: return end - start;
208: }
209:
210: /** Maximum amount of data in this buffer.
211: *
212: * If -1 or not set, the buffer will grow undefinitely.
213: * Can be smaller than the current buffer size ( which will not shrink ).
214: * When the limit is reached, the buffer will be flushed ( if out is set )
215: * or throw exception.
216: */
217: public void setLimit(int limit) {
218: this .limit = limit;
219: }
220:
221: public int getLimit() {
222: return limit;
223: }
224:
225: /**
226: * When the buffer is empty, read the data from the input channel.
227: */
228: public void setByteInputChannel(ByteInputChannel in) {
229: this .in = in;
230: }
231:
232: /** When the buffer is full, write the data to the output channel.
233: * Also used when large amount of data is appended.
234: *
235: * If not set, the buffer will grow to the limit.
236: */
237: public void setByteOutputChannel(ByteOutputChannel out) {
238: this .out = out;
239: }
240:
241: public int getEnd() {
242: return end;
243: }
244:
245: public void setEnd(int i) {
246: end = i;
247: }
248:
249: // -------------------- Adding data to the buffer --------------------
250: public void append(char c) throws IOException {
251: append((byte) c);
252: }
253:
254: public void append(byte b) throws IOException {
255: makeSpace(1);
256:
257: // couldn't make space
258: if (limit > 0 && end >= limit) {
259: flushBuffer();
260: }
261: buff[end++] = b;
262: }
263:
264: public void append(ByteChunk src) throws IOException {
265: append(src.getBytes(), src.getStart(), src.getLength());
266: }
267:
268: /** Add data to the buffer
269: */
270: public void append(byte src[], int off, int len) throws IOException {
271: // will grow, up to limit
272: makeSpace(len);
273:
274: // if we don't have limit: makeSpace can grow as it wants
275: if (limit < 0) {
276: // assert: makeSpace made enough space
277: System.arraycopy(src, off, buff, end, len);
278: end += len;
279: return;
280: }
281:
282: // Optimize on a common case.
283: // If the buffer is empty and the source is going to fill up all the
284: // space in buffer, may as well write it directly to the output,
285: // and avoid an extra copy
286: if (optimizedWrite && len == limit && end == start) {
287: out.realWriteBytes(src, off, len);
288: return;
289: }
290: // if we have limit and we're below
291: if (len <= limit - end) {
292: // makeSpace will grow the buffer to the limit,
293: // so we have space
294: System.arraycopy(src, off, buff, end, len);
295: end += len;
296: return;
297: }
298:
299: // need more space than we can afford, need to flush
300: // buffer
301:
302: // the buffer is already at ( or bigger than ) limit
303:
304: // We chunk the data into slices fitting in the buffer limit, although
305: // if the data is written directly if it doesn't fit
306:
307: int avail = limit - end;
308: System.arraycopy(src, off, buff, end, avail);
309: end += avail;
310:
311: flushBuffer();
312:
313: int remain = len - avail;
314:
315: while (remain > (limit - end)) {
316: out.realWriteBytes(src, (off + len) - remain, limit - end);
317: remain = remain - (limit - end);
318: }
319:
320: System.arraycopy(src, (off + len) - remain, buff, end, remain);
321: end += remain;
322:
323: }
324:
325: // -------------------- Removing data from the buffer --------------------
326:
327: public int substract() throws IOException {
328:
329: if ((end - start) == 0) {
330: if (in == null)
331: return -1;
332: int n = in.realReadBytes(buff, 0, buff.length);
333: if (n < 0)
334: return -1;
335: }
336:
337: return (buff[start++] & 0xFF);
338:
339: }
340:
341: public int substract(ByteChunk src) throws IOException {
342:
343: if ((end - start) == 0) {
344: if (in == null)
345: return -1;
346: int n = in.realReadBytes(buff, 0, buff.length);
347: if (n < 0)
348: return -1;
349: }
350:
351: int len = getLength();
352: src.append(buff, start, len);
353: start = end;
354: return len;
355:
356: }
357:
358: public int substract(byte src[], int off, int len)
359: throws IOException {
360:
361: if ((end - start) == 0) {
362: if (in == null)
363: return -1;
364: int n = in.realReadBytes(buff, 0, buff.length);
365: if (n < 0)
366: return -1;
367: }
368:
369: int n = len;
370: if (len > getLength()) {
371: n = getLength();
372: }
373: System.arraycopy(buff, start, src, off, n);
374: start += n;
375: return n;
376:
377: }
378:
379: public void flushBuffer() throws IOException {
380: //assert out!=null
381: if (out == null) {
382: throw new IOException("Buffer overflow, no sink " + limit
383: + " " + buff.length);
384: }
385: out.realWriteBytes(buff, start, end - start);
386: end = start;
387: }
388:
389: /** Make space for len chars. If len is small, allocate
390: * a reserve space too. Never grow bigger than limit.
391: */
392: private void makeSpace(int count) {
393: byte[] tmp = null;
394:
395: int newSize;
396: int desiredSize = end + count;
397:
398: // Can't grow above the limit
399: if (limit > 0 && desiredSize > limit) {
400: desiredSize = limit;
401: }
402:
403: if (buff == null) {
404: if (desiredSize < 256)
405: desiredSize = 256; // take a minimum
406: buff = new byte[desiredSize];
407: }
408:
409: // limit < buf.length ( the buffer is already big )
410: // or we already have space XXX
411: if (desiredSize <= buff.length) {
412: return;
413: }
414: // grow in larger chunks
415: if (desiredSize < 2 * buff.length) {
416: newSize = buff.length * 2;
417: if (limit > 0 && newSize > limit)
418: newSize = limit;
419: tmp = new byte[newSize];
420: } else {
421: newSize = buff.length * 2 + count;
422: if (limit > 0 && newSize > limit)
423: newSize = limit;
424: tmp = new byte[newSize];
425: }
426:
427: System.arraycopy(buff, start, tmp, 0, end - start);
428: buff = tmp;
429: tmp = null;
430: end = end - start;
431: start = 0;
432: }
433:
434: // -------------------- Conversion and getters --------------------
435:
436: public String toString() {
437: if (null == buff) {
438: return null;
439: }
440: String strValue = null;
441: try {
442: if (enc == null)
443: enc = DEFAULT_CHARACTER_ENCODING;
444: return new String(buff, start, end - start, enc);
445: /*
446: Does not improve the speed too much on most systems,
447: it's safer to use the "clasical" new String().
448:
449: Most overhead is in creating char[] and copying,
450: the internal implementation of new String() is very close to
451: what we do. The decoder is nice for large buffers and if
452: we don't go to String ( so we can take advantage of reduced GC)
453:
454: // Method is commented out, in:
455: return B2CConverter.decodeString( enc );
456: */
457: } catch (java.io.IOException e) {
458: return null; // XXX
459: }
460: }
461:
462: public int getInt() {
463: return Ascii.parseInt(buff, start, end - start);
464: }
465:
466: public long getLong() {
467: return Ascii.parseLong(buff, start, end - start);
468: }
469:
470: // -------------------- equals --------------------
471:
472: /**
473: * Compares the message bytes to the specified String object.
474: * @param s the String to compare
475: * @return true if the comparison succeeded, false otherwise
476: */
477: public boolean equals(String s) {
478: // XXX ENCODING - this only works if encoding is UTF8-compat
479: // ( ok for tomcat, where we compare ascii - header names, etc )!!!
480:
481: byte[] b = buff;
482: int blen = end - start;
483: if (b == null || blen != s.length()) {
484: return false;
485: }
486: int boff = start;
487: for (int i = 0; i < blen; i++) {
488: if (b[boff++] != s.charAt(i)) {
489: return false;
490: }
491: }
492: return true;
493: }
494:
495: /**
496: * Compares the message bytes to the specified String object.
497: * @param s the String to compare
498: * @return true if the comparison succeeded, false otherwise
499: */
500: public boolean equalsIgnoreCase(String s) {
501: byte[] b = buff;
502: int blen = end - start;
503: if (b == null || blen != s.length()) {
504: return false;
505: }
506: int boff = start;
507: for (int i = 0; i < blen; i++) {
508: if (Ascii.toLower(b[boff++]) != Ascii.toLower(s.charAt(i))) {
509: return false;
510: }
511: }
512: return true;
513: }
514:
515: public boolean equals(ByteChunk bb) {
516: return equals(bb.getBytes(), bb.getStart(), bb.getLength());
517: }
518:
519: public boolean equals(byte b2[], int off2, int len2) {
520: byte b1[] = buff;
521: if (b1 == null && b2 == null)
522: return true;
523:
524: int len = end - start;
525: if (len2 != len || b1 == null || b2 == null)
526: return false;
527:
528: int off1 = start;
529:
530: while (len-- > 0) {
531: if (b1[off1++] != b2[off2++]) {
532: return false;
533: }
534: }
535: return true;
536: }
537:
538: public boolean equals(CharChunk cc) {
539: return equals(cc.getChars(), cc.getStart(), cc.getLength());
540: }
541:
542: public boolean equals(char c2[], int off2, int len2) {
543: // XXX works only for enc compatible with ASCII/UTF !!!
544: byte b1[] = buff;
545: if (c2 == null && b1 == null)
546: return true;
547:
548: if (b1 == null || c2 == null || end - start != len2) {
549: return false;
550: }
551: int off1 = start;
552: int len = end - start;
553:
554: while (len-- > 0) {
555: if ((char) b1[off1++] != c2[off2++]) {
556: return false;
557: }
558: }
559: return true;
560: }
561:
562: /**
563: * Returns true if the message bytes starts with the specified string.
564: * @param s the string
565: */
566: public boolean startsWith(String s) {
567: // Works only if enc==UTF
568: byte[] b = buff;
569: int blen = s.length();
570: if (b == null || blen > end - start) {
571: return false;
572: }
573: int boff = start;
574: for (int i = 0; i < blen; i++) {
575: if (b[boff++] != s.charAt(i)) {
576: return false;
577: }
578: }
579: return true;
580: }
581:
582: /* Returns true if the message bytes start with the specified byte array */
583: public boolean startsWith(byte[] b2) {
584: byte[] b1 = buff;
585: if (b1 == null && b2 == null) {
586: return true;
587: }
588:
589: int len = end - start;
590: if (b1 == null || b2 == null || b2.length > len) {
591: return false;
592: }
593: for (int i = start, j = 0; i < end && j < b2.length;) {
594: if (b1[i++] != b2[j++])
595: return false;
596: }
597: return true;
598: }
599:
600: /**
601: * Returns true if the message bytes starts with the specified string.
602: * @param s the string
603: */
604: public boolean startsWithIgnoreCase(String s, int pos) {
605: byte[] b = buff;
606: int len = s.length();
607: if (b == null || len + pos > end - start) {
608: return false;
609: }
610: int off = start + pos;
611: for (int i = 0; i < len; i++) {
612: if (Ascii.toLower(b[off++]) != Ascii.toLower(s.charAt(i))) {
613: return false;
614: }
615: }
616: return true;
617: }
618:
619: public int indexOf(String src, int srcOff, int srcLen, int myOff) {
620: char first = src.charAt(srcOff);
621:
622: // Look for first char
623: int srcEnd = srcOff + srcLen;
624:
625: for (int i = myOff + start; i <= (end - srcLen); i++) {
626: if (buff[i] != first)
627: continue;
628: // found first char, now look for a match
629: int myPos = i + 1;
630: for (int srcPos = srcOff + 1; srcPos < srcEnd;) {
631: if (buff[myPos++] != src.charAt(srcPos++))
632: break;
633: if (srcPos == srcEnd)
634: return i - start; // found it
635: }
636: }
637: return -1;
638: }
639:
640: // -------------------- Hash code --------------------
641:
642: // normal hash.
643: public int hash() {
644: return hashBytes(buff, start, end - start);
645: }
646:
647: // hash ignoring case
648: public int hashIgnoreCase() {
649: return hashBytesIC(buff, start, end - start);
650: }
651:
652: private static int hashBytes(byte buff[], int start, int bytesLen) {
653: int max = start + bytesLen;
654: byte bb[] = buff;
655: int code = 0;
656: for (int i = start; i < max; i++) {
657: code = code * 37 + bb[i];
658: }
659: return code;
660: }
661:
662: private static int hashBytesIC(byte bytes[], int start, int bytesLen) {
663: int max = start + bytesLen;
664: byte bb[] = bytes;
665: int code = 0;
666: for (int i = start; i < max; i++) {
667: code = code * 37 + Ascii.toLower(bb[i]);
668: }
669: return code;
670: }
671:
672: /**
673: * Returns true if the message bytes starts with the specified string.
674: * @param s the string
675: */
676: public int indexOf(char c, int starting) {
677: int ret = indexOf(buff, start + starting, end, c);
678: return (ret >= start) ? ret - start : -1;
679: }
680:
681: public static int indexOf(byte bytes[], int off, int end, char qq) {
682: // Works only for UTF
683: while (off < end) {
684: byte b = bytes[off];
685: if (b == qq)
686: return off;
687: off++;
688: }
689: return -1;
690: }
691:
692: /** Find a character, no side effects.
693: * @returns index of char if found, -1 if not
694: */
695: public static int findChar(byte buf[], int start, int end, char c) {
696: byte b = (byte) c;
697: int offset = start;
698: while (offset < end) {
699: if (buf[offset] == b) {
700: return offset;
701: }
702: offset++;
703: }
704: return -1;
705: }
706:
707: /** Find a character, no side effects.
708: * @returns index of char if found, -1 if not
709: */
710: public static int findChars(byte buf[], int start, int end,
711: byte c[]) {
712: int clen = c.length;
713: int offset = start;
714: while (offset < end) {
715: for (int i = 0; i < clen; i++)
716: if (buf[offset] == c[i]) {
717: return offset;
718: }
719: offset++;
720: }
721: return -1;
722: }
723:
724: /** Find the first character != c
725: * @returns index of char if found, -1 if not
726: */
727: public static int findNotChars(byte buf[], int start, int end,
728: byte c[]) {
729: int clen = c.length;
730: int offset = start;
731: boolean found;
732:
733: while (offset < end) {
734: found = true;
735: for (int i = 0; i < clen; i++) {
736: if (buf[offset] == c[i]) {
737: found = false;
738: break;
739: }
740: }
741: if (found) { // buf[offset] != c[0..len]
742: return offset;
743: }
744: offset++;
745: }
746: return -1;
747: }
748:
749: }
|