001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package org.apache.tomcat.util.buf;
019:
020: import java.io.IOException;
021: import java.io.Serializable;
022:
023: /*
024: * In a server it is very important to be able to operate on
025: * the original byte[] without converting everything to chars.
026: * Some protocols are ASCII only, and some allow different
027: * non-UNICODE encodings. The encoding is not known beforehand,
028: * and can even change during the execution of the protocol.
029: * ( for example a multipart message may have parts with different
030: * encoding )
031: *
032: * For HTTP it is not very clear how the encoding of RequestURI
033: * and mime values can be determined, but it is a great advantage
034: * to be able to parse the request without converting to string.
035: */
036:
037: // TODO: This class could either extend ByteBuffer, or better a ByteBuffer inside
038: // this way it could provide the search/etc on ByteBuffer, as a helper.
039: /**
040: * This class is used to represent a chunk of bytes, and
041: * utilities to manipulate byte[].
042: *
043: * The buffer can be modified and used for both input and output.
044: *
045: * There are 2 modes: The chunk can be associated with a sink - ByteInputChannel or ByteOutputChannel,
046: * which will be used when the buffer is empty ( on input ) or filled ( on output ).
047: * For output, it can also grow. This operating mode is selected by calling setLimit() or
048: * allocate(initial, limit) with limit != -1.
049: *
050: * Various search and append method are defined - similar with String and StringBuffer, but
051: * operating on bytes.
052: *
053: * This is important because it allows processing the http headers directly on the received bytes,
054: * without converting to chars and Strings until the strings are needed. In addition, the charset
055: * is determined later, from headers or user code.
056: *
057: *
058: * @author dac@sun.com
059: * @author James Todd [gonzo@sun.com]
060: * @author Costin Manolache
061: * @author Remy Maucherat
062: */
063: public final class ByteChunk implements Cloneable, Serializable {
064:
065: /** Input interface, used when the buffer is emptiy
066: *
067: * Same as java.nio.channel.ReadableByteChannel
068: */
069: public static interface ByteInputChannel {
070: /**
071: * Read new bytes ( usually the internal conversion buffer ).
072: * The implementation is allowed to ignore the parameters,
073: * and mutate the chunk if it wishes to implement its own buffering.
074: */
075: public int realReadBytes(byte cbuf[], int off, int len)
076: throws IOException;
077: }
078:
079: /** Same as java.nio.channel.WrittableByteChannel.
080: */
081: public static interface ByteOutputChannel {
082: /**
083: * Send the bytes ( usually the internal conversion buffer ).
084: * Expect 8k output if the buffer is full.
085: */
086: public void realWriteBytes(byte cbuf[], int off, int len)
087: throws IOException;
088: }
089:
090: // --------------------
091:
092: /** Default encoding used to convert to strings. It should be UTF8,
093: as most standards seem to converge, but the servlet API requires
094: 8859_1, and this object is used mostly for servlets.
095: */
096: public static final String DEFAULT_CHARACTER_ENCODING = "ISO-8859-1";
097:
098: // byte[]
099: private byte[] buff;
100:
101: private int start = 0;
102: private int end;
103:
104: private String enc;
105:
106: private boolean isSet = false; // XXX
107:
108: // How much can it grow, when data is added
109: private int limit = -1;
110:
111: private ByteInputChannel in = null;
112: private ByteOutputChannel out = null;
113:
114: private boolean isOutput = false;
115: private boolean optimizedWrite = true;
116:
117: /**
118: * Creates a new, uninitialized ByteChunk object.
119: */
120: public ByteChunk() {
121: }
122:
123: public ByteChunk(int initial) {
124: allocate(initial, -1);
125: }
126:
127: //--------------------
128: public ByteChunk getClone() {
129: try {
130: return (ByteChunk) this .clone();
131: } catch (Exception ex) {
132: return null;
133: }
134: }
135:
136: public boolean isNull() {
137: return !isSet; // buff==null;
138: }
139:
140: /**
141: * Resets the message buff to an uninitialized state.
142: */
143: public void recycle() {
144: // buff = null;
145: enc = null;
146: start = 0;
147: end = 0;
148: isSet = false;
149: }
150:
151: public void reset() {
152: buff = null;
153: }
154:
155: // -------------------- Setup --------------------
156:
157: public void allocate(int initial, int limit) {
158: isOutput = true;
159: if (buff == null || buff.length < initial) {
160: buff = new byte[initial];
161: }
162: this .limit = limit;
163: start = 0;
164: end = 0;
165: isSet = true;
166: }
167:
168: /**
169: * Sets the message bytes to the specified subarray of bytes.
170: *
171: * @param b the ascii bytes
172: * @param off the start offset of the bytes
173: * @param len the length of the bytes
174: */
175: public void setBytes(byte[] b, int off, int len) {
176: buff = b;
177: start = off;
178: end = start + len;
179: isSet = true;
180: }
181:
182: public void setOptimizedWrite(boolean optimizedWrite) {
183: this .optimizedWrite = optimizedWrite;
184: }
185:
186: public void setEncoding(String enc) {
187: this .enc = enc;
188: }
189:
190: public String getEncoding() {
191: if (enc == null)
192: enc = DEFAULT_CHARACTER_ENCODING;
193: return enc;
194: }
195:
196: /**
197: * Returns the message bytes.
198: */
199: public byte[] getBytes() {
200: return getBuffer();
201: }
202:
203: /**
204: * Returns the message bytes.
205: */
206: public byte[] getBuffer() {
207: return buff;
208: }
209:
210: /**
211: * Returns the start offset of the bytes.
212: * For output this is the end of the buffer.
213: */
214: public int getStart() {
215: return start;
216: }
217:
218: public int getOffset() {
219: return start;
220: }
221:
222: public void setOffset(int off) {
223: if (end < off)
224: end = off;
225: start = off;
226: }
227:
228: /**
229: * Returns the length of the bytes.
230: * XXX need to clean this up
231: */
232: public int getLength() {
233: return end - start;
234: }
235:
236: /** Maximum amount of data in this buffer.
237: *
238: * If -1 or not set, the buffer will grow undefinitely.
239: * Can be smaller than the current buffer size ( which will not shrink ).
240: * When the limit is reached, the buffer will be flushed ( if out is set )
241: * or throw exception.
242: */
243: public void setLimit(int limit) {
244: this .limit = limit;
245: }
246:
247: public int getLimit() {
248: return limit;
249: }
250:
251: /**
252: * When the buffer is empty, read the data from the input channel.
253: */
254: public void setByteInputChannel(ByteInputChannel in) {
255: this .in = in;
256: }
257:
258: /** When the buffer is full, write the data to the output channel.
259: * Also used when large amount of data is appended.
260: *
261: * If not set, the buffer will grow to the limit.
262: */
263: public void setByteOutputChannel(ByteOutputChannel out) {
264: this .out = out;
265: }
266:
267: public int getEnd() {
268: return end;
269: }
270:
271: public void setEnd(int i) {
272: end = i;
273: }
274:
275: // -------------------- Adding data to the buffer --------------------
276: /** Append a char, by casting it to byte. This IS NOT intended for unicode.
277: *
278: * @param c
279: * @throws IOException
280: */
281: public void append(char c) throws IOException {
282: append((byte) c);
283: }
284:
285: public void append(byte b) throws IOException {
286: makeSpace(1);
287:
288: // couldn't make space
289: if (limit > 0 && end >= limit) {
290: flushBuffer();
291: }
292: buff[end++] = b;
293: }
294:
295: public void append(ByteChunk src) throws IOException {
296: append(src.getBytes(), src.getStart(), src.getLength());
297: }
298:
299: /** Add data to the buffer
300: */
301: public void append(byte src[], int off, int len) throws IOException {
302: // will grow, up to limit
303: makeSpace(len);
304:
305: // if we don't have limit: makeSpace can grow as it wants
306: if (limit < 0) {
307: // assert: makeSpace made enough space
308: System.arraycopy(src, off, buff, end, len);
309: end += len;
310: return;
311: }
312:
313: // Optimize on a common case.
314: // If the buffer is empty and the source is going to fill up all the
315: // space in buffer, may as well write it directly to the output,
316: // and avoid an extra copy
317: if (optimizedWrite && len == limit && end == start
318: && out != null) {
319: out.realWriteBytes(src, off, len);
320: return;
321: }
322: // if we have limit and we're below
323: if (len <= limit - end) {
324: // makeSpace will grow the buffer to the limit,
325: // so we have space
326: System.arraycopy(src, off, buff, end, len);
327: end += len;
328: return;
329: }
330:
331: // need more space than we can afford, need to flush
332: // buffer
333:
334: // the buffer is already at ( or bigger than ) limit
335:
336: // We chunk the data into slices fitting in the buffer limit, although
337: // if the data is written directly if it doesn't fit
338:
339: int avail = limit - end;
340: System.arraycopy(src, off, buff, end, avail);
341: end += avail;
342:
343: flushBuffer();
344:
345: int remain = len - avail;
346:
347: while (remain > (limit - end)) {
348: out.realWriteBytes(src, (off + len) - remain, limit - end);
349: remain = remain - (limit - end);
350: }
351:
352: System.arraycopy(src, (off + len) - remain, buff, end, remain);
353: end += remain;
354:
355: }
356:
357: // -------------------- Removing data from the buffer --------------------
358:
359: public int substract() throws IOException {
360:
361: if ((end - start) == 0) {
362: if (in == null)
363: return -1;
364: int n = in.realReadBytes(buff, 0, buff.length);
365: if (n < 0)
366: return -1;
367: }
368:
369: return (buff[start++] & 0xFF);
370:
371: }
372:
373: public int substract(ByteChunk src) throws IOException {
374:
375: if ((end - start) == 0) {
376: if (in == null)
377: return -1;
378: int n = in.realReadBytes(buff, 0, buff.length);
379: if (n < 0)
380: return -1;
381: }
382:
383: int len = getLength();
384: src.append(buff, start, len);
385: start = end;
386: return len;
387:
388: }
389:
390: public int substract(byte src[], int off, int len)
391: throws IOException {
392:
393: if ((end - start) == 0) {
394: if (in == null)
395: return -1;
396: int n = in.realReadBytes(buff, 0, buff.length);
397: if (n < 0)
398: return -1;
399: }
400:
401: int n = len;
402: if (len > getLength()) {
403: n = getLength();
404: }
405: System.arraycopy(buff, start, src, off, n);
406: start += n;
407: return n;
408:
409: }
410:
411: /** Send the buffer to the sink. Called by append() when the limit is reached.
412: * You can also call it explicitely to force the data to be written.
413: *
414: * @throws IOException
415: */
416: public void flushBuffer() throws IOException {
417: //assert out!=null
418: if (out == null) {
419: throw new IOException("Buffer overflow, no sink " + limit
420: + " " + buff.length);
421: }
422: out.realWriteBytes(buff, start, end - start);
423: end = start;
424: }
425:
426: /** Make space for len chars. If len is small, allocate
427: * a reserve space too. Never grow bigger than limit.
428: */
429: private void makeSpace(int count) {
430: byte[] tmp = null;
431:
432: int newSize;
433: int desiredSize = end + count;
434:
435: // Can't grow above the limit
436: if (limit > 0 && desiredSize > limit) {
437: desiredSize = limit;
438: }
439:
440: if (buff == null) {
441: if (desiredSize < 256)
442: desiredSize = 256; // take a minimum
443: buff = new byte[desiredSize];
444: }
445:
446: // limit < buf.length ( the buffer is already big )
447: // or we already have space XXX
448: if (desiredSize <= buff.length) {
449: return;
450: }
451: // grow in larger chunks
452: if (desiredSize < 2 * buff.length) {
453: newSize = buff.length * 2;
454: if (limit > 0 && newSize > limit)
455: newSize = limit;
456: tmp = new byte[newSize];
457: } else {
458: newSize = buff.length * 2 + count;
459: if (limit > 0 && newSize > limit)
460: newSize = limit;
461: tmp = new byte[newSize];
462: }
463:
464: System.arraycopy(buff, start, tmp, 0, end - start);
465: buff = tmp;
466: tmp = null;
467: end = end - start;
468: start = 0;
469: }
470:
471: // -------------------- Conversion and getters --------------------
472:
473: public String toString() {
474: if (null == buff) {
475: return null;
476: } else if (end - start == 0) {
477: return "";
478: }
479: return StringCache.toString(this );
480: }
481:
482: public String toStringInternal() {
483: String strValue = null;
484: try {
485: if (enc == null)
486: enc = DEFAULT_CHARACTER_ENCODING;
487: strValue = new String(buff, start, end - start, enc);
488: /*
489: Does not improve the speed too much on most systems,
490: it's safer to use the "clasical" new String().
491:
492: Most overhead is in creating char[] and copying,
493: the internal implementation of new String() is very close to
494: what we do. The decoder is nice for large buffers and if
495: we don't go to String ( so we can take advantage of reduced GC)
496:
497: // Method is commented out, in:
498: return B2CConverter.decodeString( enc );
499: */
500: } catch (java.io.UnsupportedEncodingException e) {
501: // Use the platform encoding in that case; the usage of a bad
502: // encoding will have been logged elsewhere already
503: strValue = new String(buff, start, end - start);
504: }
505: return strValue;
506: }
507:
508: public int getInt() {
509: return Ascii.parseInt(buff, start, end - start);
510: }
511:
512: public long getLong() {
513: return Ascii.parseLong(buff, start, end - start);
514: }
515:
516: // -------------------- equals --------------------
517:
518: /**
519: * Compares the message bytes to the specified String object.
520: * @param s the String to compare
521: * @return true if the comparison succeeded, false otherwise
522: */
523: public boolean equals(String s) {
524: // XXX ENCODING - this only works if encoding is UTF8-compat
525: // ( ok for tomcat, where we compare ascii - header names, etc )!!!
526:
527: byte[] b = buff;
528: int blen = end - start;
529: if (b == null || blen != s.length()) {
530: return false;
531: }
532: int boff = start;
533: for (int i = 0; i < blen; i++) {
534: if (b[boff++] != s.charAt(i)) {
535: return false;
536: }
537: }
538: return true;
539: }
540:
541: /**
542: * Compares the message bytes to the specified String object.
543: * @param s the String to compare
544: * @return true if the comparison succeeded, false otherwise
545: */
546: public boolean equalsIgnoreCase(String s) {
547: byte[] b = buff;
548: int blen = end - start;
549: if (b == null || blen != s.length()) {
550: return false;
551: }
552: int boff = start;
553: for (int i = 0; i < blen; i++) {
554: if (Ascii.toLower(b[boff++]) != Ascii.toLower(s.charAt(i))) {
555: return false;
556: }
557: }
558: return true;
559: }
560:
561: public boolean equals(ByteChunk bb) {
562: return equals(bb.getBytes(), bb.getStart(), bb.getLength());
563: }
564:
565: public boolean equals(byte b2[], int off2, int len2) {
566: byte b1[] = buff;
567: if (b1 == null && b2 == null)
568: return true;
569:
570: int len = end - start;
571: if (len2 != len || b1 == null || b2 == null)
572: return false;
573:
574: int off1 = start;
575:
576: while (len-- > 0) {
577: if (b1[off1++] != b2[off2++]) {
578: return false;
579: }
580: }
581: return true;
582: }
583:
584: public boolean equals(CharChunk cc) {
585: return equals(cc.getChars(), cc.getStart(), cc.getLength());
586: }
587:
588: public boolean equals(char c2[], int off2, int len2) {
589: // XXX works only for enc compatible with ASCII/UTF !!!
590: byte b1[] = buff;
591: if (c2 == null && b1 == null)
592: return true;
593:
594: if (b1 == null || c2 == null || end - start != len2) {
595: return false;
596: }
597: int off1 = start;
598: int len = end - start;
599:
600: while (len-- > 0) {
601: if ((char) b1[off1++] != c2[off2++]) {
602: return false;
603: }
604: }
605: return true;
606: }
607:
608: /**
609: * Returns true if the message bytes starts with the specified string.
610: * @param s the string
611: */
612: public boolean startsWith(String s) {
613: // Works only if enc==UTF
614: byte[] b = buff;
615: int blen = s.length();
616: if (b == null || blen > end - start) {
617: return false;
618: }
619: int boff = start;
620: for (int i = 0; i < blen; i++) {
621: if (b[boff++] != s.charAt(i)) {
622: return false;
623: }
624: }
625: return true;
626: }
627:
628: /* Returns true if the message bytes start with the specified byte array */
629: public boolean startsWith(byte[] b2) {
630: byte[] b1 = buff;
631: if (b1 == null && b2 == null) {
632: return true;
633: }
634:
635: int len = end - start;
636: if (b1 == null || b2 == null || b2.length > len) {
637: return false;
638: }
639: for (int i = start, j = 0; i < end && j < b2.length;) {
640: if (b1[i++] != b2[j++])
641: return false;
642: }
643: return true;
644: }
645:
646: /**
647: * Returns true if the message bytes starts with the specified string.
648: * @param s the string
649: * @param pos The position
650: */
651: public boolean startsWithIgnoreCase(String s, int pos) {
652: byte[] b = buff;
653: int len = s.length();
654: if (b == null || len + pos > end - start) {
655: return false;
656: }
657: int off = start + pos;
658: for (int i = 0; i < len; i++) {
659: if (Ascii.toLower(b[off++]) != Ascii.toLower(s.charAt(i))) {
660: return false;
661: }
662: }
663: return true;
664: }
665:
666: public int indexOf(String src, int srcOff, int srcLen, int myOff) {
667: char first = src.charAt(srcOff);
668:
669: // Look for first char
670: int srcEnd = srcOff + srcLen;
671:
672: for (int i = myOff + start; i <= (end - srcLen); i++) {
673: if (buff[i] != first)
674: continue;
675: // found first char, now look for a match
676: int myPos = i + 1;
677: for (int srcPos = srcOff + 1; srcPos < srcEnd;) {
678: if (buff[myPos++] != src.charAt(srcPos++))
679: break;
680: if (srcPos == srcEnd)
681: return i - start; // found it
682: }
683: }
684: return -1;
685: }
686:
687: // -------------------- Hash code --------------------
688:
689: // normal hash.
690: public int hash() {
691: return hashBytes(buff, start, end - start);
692: }
693:
694: // hash ignoring case
695: public int hashIgnoreCase() {
696: return hashBytesIC(buff, start, end - start);
697: }
698:
699: private static int hashBytes(byte buff[], int start, int bytesLen) {
700: int max = start + bytesLen;
701: byte bb[] = buff;
702: int code = 0;
703: for (int i = start; i < max; i++) {
704: code = code * 37 + bb[i];
705: }
706: return code;
707: }
708:
709: private static int hashBytesIC(byte bytes[], int start, int bytesLen) {
710: int max = start + bytesLen;
711: byte bb[] = bytes;
712: int code = 0;
713: for (int i = start; i < max; i++) {
714: code = code * 37 + Ascii.toLower(bb[i]);
715: }
716: return code;
717: }
718:
719: /**
720: * Returns true if the message bytes starts with the specified string.
721: * @param c the character
722: * @param starting The start position
723: */
724: public int indexOf(char c, int starting) {
725: int ret = indexOf(buff, start + starting, end, c);
726: return (ret >= start) ? ret - start : -1;
727: }
728:
729: public static int indexOf(byte bytes[], int off, int end, char qq) {
730: // Works only for UTF
731: while (off < end) {
732: byte b = bytes[off];
733: if (b == qq)
734: return off;
735: off++;
736: }
737: return -1;
738: }
739:
740: /** Find a character, no side effects.
741: * @return index of char if found, -1 if not
742: */
743: public static int findChar(byte buf[], int start, int end, char c) {
744: byte b = (byte) c;
745: int offset = start;
746: while (offset < end) {
747: if (buf[offset] == b) {
748: return offset;
749: }
750: offset++;
751: }
752: return -1;
753: }
754:
755: /** Find a character, no side effects.
756: * @return index of char if found, -1 if not
757: */
758: public static int findChars(byte buf[], int start, int end,
759: byte c[]) {
760: int clen = c.length;
761: int offset = start;
762: while (offset < end) {
763: for (int i = 0; i < clen; i++)
764: if (buf[offset] == c[i]) {
765: return offset;
766: }
767: offset++;
768: }
769: return -1;
770: }
771:
772: /** Find the first character != c
773: * @return index of char if found, -1 if not
774: */
775: public static int findNotChars(byte buf[], int start, int end,
776: byte c[]) {
777: int clen = c.length;
778: int offset = start;
779: boolean found;
780:
781: while (offset < end) {
782: found = true;
783: for (int i = 0; i < clen; i++) {
784: if (buf[offset] == c[i]) {
785: found = false;
786: break;
787: }
788: }
789: if (found) { // buf[offset] != c[0..len]
790: return offset;
791: }
792: offset++;
793: }
794: return -1;
795: }
796:
797: /**
798: * Convert specified String to a byte array. This ONLY WORKS for ascii, UTF chars will be truncated.
799: *
800: * @param value to convert to byte array
801: * @return the byte array value
802: */
803: public static final byte[] convertToBytes(String value) {
804: byte[] result = new byte[value.length()];
805: for (int i = 0; i < value.length(); i++) {
806: result[i] = (byte) value.charAt(i);
807: }
808: return result;
809: }
810:
811: }
|