001: /*
002: * Copyright 2001-2005 The Apache Software Foundation
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: package net.myvietnam.mvncore.web.fileupload;
017:
018: import java.io.ByteArrayOutputStream;
019: import java.io.IOException;
020: import java.io.InputStream;
021: import java.io.OutputStream;
022: import java.io.UnsupportedEncodingException;
023:
024: /**
025: * <p> Low level API for processing file uploads.
026: *
027: * <p> This class can be used to process data streams conforming to MIME
028: * 'multipart' format as defined in
029: * <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a>. Arbitrarily
030: * large amounts of data in the stream can be processed under constant
031: * memory usage.
032: *
033: * <p> The format of the stream is defined in the following way:<br>
034: *
035: * <code>
036: * multipart-body := preamble 1*encapsulation close-delimiter epilogue<br>
037: * encapsulation := delimiter body CRLF<br>
038: * delimiter := "--" boundary CRLF<br>
039: * close-delimiter := "--" boudary "--"<br>
040: * preamble := <ignore><br>
041: * epilogue := <ignore><br>
042: * body := header-part CRLF body-part<br>
043: * header-part := 1*header CRLF<br>
044: * header := header-name ":" header-value<br>
045: * header-name := <printable ascii characters except ":"><br>
046: * header-value := <any ascii characters except CR & LF><br>
047: * body-data := <arbitrary data><br>
048: * </code>
049: *
050: * <p>Note that body-data can contain another mulipart entity. There
051: * is limited support for single pass processing of such nested
052: * streams. The nested stream is <strong>required</strong> to have a
053: * boundary token of the same length as the parent stream (see {@link
054: * #setBoundary(byte[])}).
055: *
056: * <p>Here is an example of usage of this class.<br>
057: *
058: * <pre>
059: * try {
060: * MultipartStream multipartStream = new MultipartStream(input,
061: * boundary);
062: * boolean nextPart = multipartStream.skipPreamble();
063: * OutputStream output;
064: * while(nextPart) {
065: * header = chunks.readHeader();
066: * // process headers
067: * // create some output stream
068: * multipartStream.readBodyPart(output);
069: * nextPart = multipartStream.readBoundary();
070: * }
071: * } catch(MultipartStream.MalformedStreamException e) {
072: * // the stream failed to follow required syntax
073: * } catch(IOException) {
074: * // a read or write error occurred
075: * }
076: *
077: * </pre>
078: *
079: * @author <a href="mailto:Rafal.Krzewski@e-point.pl">Rafal Krzewski</a>
080: * @author <a href="mailto:martinc@apache.org">Martin Cooper</a>
081: * @author Sean C. Sullivan
082: *
083: * @version $Id: MultipartStream.java,v 1.2 2006/02/12 04:43:11 minhnn Exp $
084: */
085: public class MultipartStream {
086:
087: // ----------------------------------------------------- Manifest constants
088:
089: /**
090: * The Carriage Return ASCII character value.
091: */
092: public static final byte CR = 0x0D;
093:
094: /**
095: * The Line Feed ASCII character value.
096: */
097: public static final byte LF = 0x0A;
098:
099: /**
100: * The dash (-) ASCII character value.
101: */
102: public static final byte DASH = 0x2D;
103:
104: /**
105: * The maximum length of <code>header-part</code> that will be
106: * processed (10 kilobytes = 10240 bytes.).
107: */
108: public static final int HEADER_PART_SIZE_MAX = 10240;
109:
110: /**
111: * The default length of the buffer used for processing a request.
112: */
113: protected static final int DEFAULT_BUFSIZE = 4096;
114:
115: /**
116: * A byte sequence that marks the end of <code>header-part</code>
117: * (<code>CRLFCRLF</code>).
118: */
119: protected static final byte[] HEADER_SEPARATOR = { CR, LF, CR, LF };
120:
121: /**
122: * A byte sequence that that follows a delimiter that will be
123: * followed by an encapsulation (<code>CRLF</code>).
124: */
125: protected static final byte[] FIELD_SEPARATOR = { CR, LF };
126:
127: /**
128: * A byte sequence that that follows a delimiter of the last
129: * encapsulation in the stream (<code>--</code>).
130: */
131: protected static final byte[] STREAM_TERMINATOR = { DASH, DASH };
132:
133: /**
134: * A byte sequence that precedes a boundary (<code>CRLF--</code>).
135: */
136: protected static final byte[] BOUNDARY_PREFIX = { CR, LF, DASH,
137: DASH };
138:
139: /**
140: * The number of bytes, over and above the boundary size, to use for the
141: * keep region.
142: */
143: private static final int KEEP_REGION_PAD = 3;
144:
145: // ----------------------------------------------------------- Data members
146:
147: /**
148: * The input stream from which data is read.
149: */
150: private InputStream input;
151:
152: /**
153: * The length of the boundary token plus the leading <code>CRLF--</code>.
154: */
155: private int boundaryLength;
156:
157: /**
158: * The amount of data, in bytes, that must be kept in the buffer in order
159: * to detect delimiters reliably.
160: */
161: private int keepRegion;
162:
163: /**
164: * The byte sequence that partitions the stream.
165: */
166: private byte[] boundary;
167:
168: /**
169: * The length of the buffer used for processing the request.
170: */
171: private int bufSize;
172:
173: /**
174: * The buffer used for processing the request.
175: */
176: private byte[] buffer;
177:
178: /**
179: * The index of first valid character in the buffer.
180: * <br>
181: * 0 <= head < bufSize
182: */
183: private int head;
184:
185: /**
186: * The index of last valid characer in the buffer + 1.
187: * <br>
188: * 0 <= tail <= bufSize
189: */
190: private int tail;
191:
192: /**
193: * The content encoding to use when reading headers.
194: */
195: private String headerEncoding;
196:
197: // ----------------------------------------------------------- Constructors
198:
199: /**
200: * Default constructor.
201: *
202: * @see #MultipartStream(InputStream, byte[], int)
203: * @see #MultipartStream(InputStream, byte[])
204: *
205: */
206: public MultipartStream() {
207: }
208:
209: /**
210: * <p> Constructs a <code>MultipartStream</code> with a custom size buffer.
211: *
212: * <p> Note that the buffer must be at least big enough to contain the
213: * boundary string, plus 4 characters for CR/LF and double dash, plus at
214: * least one byte of data. Too small a buffer size setting will degrade
215: * performance.
216: *
217: * @param input The <code>InputStream</code> to serve as a data source.
218: * @param boundary The token used for dividing the stream into
219: * <code>encapsulations</code>.
220: * @param bufSize The size of the buffer to be used, in bytes.
221: *
222: *
223: * @see #MultipartStream()
224: * @see #MultipartStream(InputStream, byte[])
225: *
226: */
227: public MultipartStream(InputStream input, byte[] boundary,
228: int bufSize) {
229: this .input = input;
230: this .bufSize = bufSize;
231: this .buffer = new byte[bufSize];
232:
233: // We prepend CR/LF to the boundary to chop trailng CR/LF from
234: // body-data tokens.
235: this .boundary = new byte[boundary.length
236: + BOUNDARY_PREFIX.length];
237: this .boundaryLength = boundary.length + BOUNDARY_PREFIX.length;
238: this .keepRegion = boundary.length + KEEP_REGION_PAD;
239: System.arraycopy(BOUNDARY_PREFIX, 0, this .boundary, 0,
240: BOUNDARY_PREFIX.length);
241: System.arraycopy(boundary, 0, this .boundary,
242: BOUNDARY_PREFIX.length, boundary.length);
243:
244: head = 0;
245: tail = 0;
246: }
247:
248: /**
249: * <p> Constructs a <code>MultipartStream</code> with a default size buffer.
250: *
251: * @param input The <code>InputStream</code> to serve as a data source.
252: * @param boundary The token used for dividing the stream into
253: * <code>encapsulations</code>.
254: *
255: * @throws IOException when an error occurs.
256: *
257: * @see #MultipartStream()
258: * @see #MultipartStream(InputStream, byte[], int)
259: *
260: */
261: public MultipartStream(InputStream input, byte[] boundary)
262: throws IOException {
263: this (input, boundary, DEFAULT_BUFSIZE);
264: }
265:
266: // --------------------------------------------------------- Public methods
267:
268: /**
269: * Retrieves the character encoding used when reading the headers of an
270: * individual part. When not specified, or <code>null</code>, the platform
271: * default encoding is used.
272:
273: *
274: * @return The encoding used to read part headers.
275: */
276: public String getHeaderEncoding() {
277: return headerEncoding;
278: }
279:
280: /**
281: * Specifies the character encoding to be used when reading the headers of
282: * individual parts. When not specified, or <code>null</code>, the platform
283: * default encoding is used.
284: *
285: * @param encoding The encoding used to read part headers.
286: */
287: public void setHeaderEncoding(String encoding) {
288: headerEncoding = encoding;
289: }
290:
291: /**
292: * Reads a byte from the <code>buffer</code>, and refills it as
293: * necessary.
294: *
295: * @return The next byte from the input stream.
296: *
297: * @throws IOException if there is no more data available.
298: */
299: public byte readByte() throws IOException {
300: // Buffer depleted ?
301: if (head == tail) {
302: head = 0;
303: // Refill.
304: tail = input.read(buffer, head, bufSize);
305: if (tail == -1) {
306: // No more data available.
307: throw new IOException("No more data is available");
308: }
309: }
310: return buffer[head++];
311: }
312:
313: /**
314: * Skips a <code>boundary</code> token, and checks whether more
315: * <code>encapsulations</code> are contained in the stream.
316: *
317: * @return <code>true</code> if there are more encapsulations in
318: * this stream; <code>false</code> otherwise.
319: *
320: * @throws MalformedStreamException if the stream ends unexpecetedly or
321: * fails to follow required syntax.
322: */
323: public boolean readBoundary() throws MalformedStreamException {
324: byte[] marker = new byte[2];
325: boolean nextChunk = false;
326:
327: head += boundaryLength;
328: try {
329: marker[0] = readByte();
330: if (marker[0] == LF) {
331: // Work around IE5 Mac bug with input type=image.
332: // Because the boundary delimiter, not including the trailing
333: // CRLF, must not appear within any file (RFC 2046, section
334: // 5.1.1), we know the missing CR is due to a buggy browser
335: // rather than a file containing something similar to a
336: // boundary.
337: return true;
338: }
339:
340: marker[1] = readByte();
341: if (arrayequals(marker, STREAM_TERMINATOR, 2)) {
342: nextChunk = false;
343: } else if (arrayequals(marker, FIELD_SEPARATOR, 2)) {
344: nextChunk = true;
345: } else {
346: throw new MalformedStreamException(
347: "Unexpected characters follow a boundary");
348: }
349: } catch (IOException e) {
350: throw new MalformedStreamException(
351: "Stream ended unexpectedly");
352: }
353: return nextChunk;
354: }
355:
356: /**
357: * <p>Changes the boundary token used for partitioning the stream.
358: *
359: * <p>This method allows single pass processing of nested multipart
360: * streams.
361: *
362: * <p>The boundary token of the nested stream is <code>required</code>
363: * to be of the same length as the boundary token in parent stream.
364: *
365: * <p>Restoring the parent stream boundary token after processing of a
366: * nested stream is left to the application.
367: *
368: * @param boundary The boundary to be used for parsing of the nested
369: * stream.
370: *
371: * @throws IllegalBoundaryException if the <code>boundary</code>
372: * has a different length than the one
373: * being currently parsed.
374: */
375: public void setBoundary(byte[] boundary)
376: throws IllegalBoundaryException {
377: if (boundary.length != boundaryLength - BOUNDARY_PREFIX.length) {
378: throw new IllegalBoundaryException(
379: "The length of a boundary token can not be changed");
380: }
381: System.arraycopy(boundary, 0, this .boundary,
382: BOUNDARY_PREFIX.length, boundary.length);
383: }
384:
385: /**
386: * <p>Reads the <code>header-part</code> of the current
387: * <code>encapsulation</code>.
388: *
389: * <p>Headers are returned verbatim to the input stream, including the
390: * trailing <code>CRLF</code> marker. Parsing is left to the
391: * application.
392: *
393: * <p><strong>TODO</strong> allow limiting maximum header size to
394: * protect against abuse.
395: *
396: * @return The <code>header-part</code> of the current encapsulation.
397: *
398: * @throws MalformedStreamException if the stream ends unexpecetedly.
399: */
400: public String readHeaders() throws MalformedStreamException {
401: int i = 0;
402: byte[] b = new byte[1];
403: // to support multi-byte characters
404: ByteArrayOutputStream baos = new ByteArrayOutputStream();
405: int sizeMax = HEADER_PART_SIZE_MAX;
406: int size = 0;
407: while (i < HEADER_SEPARATOR.length) {
408: try {
409: b[0] = readByte();
410: } catch (IOException e) {
411: throw new MalformedStreamException(
412: "Stream ended unexpectedly");
413: }
414: size++;
415: if (b[0] == HEADER_SEPARATOR[i]) {
416: i++;
417: } else {
418: i = 0;
419: }
420: if (size <= sizeMax) {
421: baos.write(b[0]);
422: }
423: }
424:
425: String headers = null;
426: if (headerEncoding != null) {
427: try {
428: headers = baos.toString(headerEncoding);
429: } catch (UnsupportedEncodingException e) {
430: // Fall back to platform default if specified encoding is not
431: // supported.
432: headers = baos.toString();
433: }
434: } else {
435: headers = baos.toString();
436: }
437:
438: return headers;
439: }
440:
441: /**
442: * <p>Reads <code>body-data</code> from the current
443: * <code>encapsulation</code> and writes its contents into the
444: * output <code>Stream</code>.
445: *
446: * <p>Arbitrary large amounts of data can be processed by this
447: * method using a constant size buffer. (see {@link
448: * #MultipartStream(InputStream,byte[],int) constructor}).
449: *
450: * @param output The <code>Stream</code> to write data into.
451: *
452: * @return the amount of data written.
453: *
454: * @throws MalformedStreamException if the stream ends unexpectedly.
455: * @throws IOException if an i/o error occurs.
456: */
457: public int readBodyData(OutputStream output)
458: throws MalformedStreamException, IOException {
459: boolean done = false;
460: int pad;
461: int pos;
462: int bytesRead;
463: int total = 0;
464: while (!done) {
465: // Is boundary token present somewere in the buffer?
466: pos = findSeparator();
467: if (pos != -1) {
468: // Write the rest of the data before the boundary.
469: output.write(buffer, head, pos - head);
470: total += pos - head;
471: head = pos;
472: done = true;
473: } else {
474: // Determine how much data should be kept in the
475: // buffer.
476: if (tail - head > keepRegion) {
477: pad = keepRegion;
478: } else {
479: pad = tail - head;
480: }
481: // Write out the data belonging to the body-data.
482: output.write(buffer, head, tail - head - pad);
483:
484: // Move the data to the beginning of the buffer.
485: total += tail - head - pad;
486: System.arraycopy(buffer, tail - pad, buffer, 0, pad);
487:
488: // Refill buffer with new data.
489: head = 0;
490: bytesRead = input.read(buffer, pad, bufSize - pad);
491:
492: // [pprrrrrrr]
493: if (bytesRead != -1) {
494: tail = pad + bytesRead;
495: } else {
496: // The last pad amount is left in the buffer.
497: // Boundary can't be in there so write out the
498: // data you have and signal an error condition.
499: output.write(buffer, 0, pad);
500: output.flush();
501: total += pad;
502: throw new MalformedStreamException(
503: "Stream ended unexpectedly");
504: }
505: }
506: }
507: output.flush();
508: return total;
509: }
510:
511: /**
512: * <p> Reads <code>body-data</code> from the current
513: * <code>encapsulation</code> and discards it.
514: *
515: * <p>Use this method to skip encapsulations you don't need or don't
516: * understand.
517: *
518: * @return The amount of data discarded.
519: *
520: * @throws MalformedStreamException if the stream ends unexpectedly.
521: * @throws IOException if an i/o error occurs.
522: */
523: public int discardBodyData() throws MalformedStreamException,
524: IOException {
525: boolean done = false;
526: int pad;
527: int pos;
528: int bytesRead;
529: int total = 0;
530: while (!done) {
531: // Is boundary token present somewere in the buffer?
532: pos = findSeparator();
533: if (pos != -1) {
534: // Write the rest of the data before the boundary.
535: total += pos - head;
536: head = pos;
537: done = true;
538: } else {
539: // Determine how much data should be kept in the
540: // buffer.
541: if (tail - head > keepRegion) {
542: pad = keepRegion;
543: } else {
544: pad = tail - head;
545: }
546: total += tail - head - pad;
547:
548: // Move the data to the beginning of the buffer.
549: System.arraycopy(buffer, tail - pad, buffer, 0, pad);
550:
551: // Refill buffer with new data.
552: head = 0;
553: bytesRead = input.read(buffer, pad, bufSize - pad);
554:
555: // [pprrrrrrr]
556: if (bytesRead != -1) {
557: tail = pad + bytesRead;
558: } else {
559: // The last pad amount is left in the buffer.
560: // Boundary can't be in there so signal an error
561: // condition.
562: total += pad;
563: throw new MalformedStreamException(
564: "Stream ended unexpectedly");
565: }
566: }
567: }
568: return total;
569: }
570:
571: /**
572: * Finds the beginning of the first <code>encapsulation</code>.
573: *
574: * @return <code>true</code> if an <code>encapsulation</code> was found in
575: * the stream.
576: *
577: * @throws IOException if an i/o error occurs.
578: */
579: public boolean skipPreamble() throws IOException {
580: // First delimiter may be not preceeded with a CRLF.
581: System.arraycopy(boundary, 2, boundary, 0, boundary.length - 2);
582: boundaryLength = boundary.length - 2;
583: try {
584: // Discard all data up to the delimiter.
585: discardBodyData();
586:
587: // Read boundary - if succeded, the stream contains an
588: // encapsulation.
589: return readBoundary();
590: } catch (MalformedStreamException e) {
591: return false;
592: } finally {
593: // Restore delimiter.
594: System.arraycopy(boundary, 0, boundary, 2,
595: boundary.length - 2);
596: boundaryLength = boundary.length;
597: boundary[0] = CR;
598: boundary[1] = LF;
599: }
600: }
601:
602: /**
603: * Compares <code>count</code> first bytes in the arrays
604: * <code>a</code> and <code>b</code>.
605: *
606: * @param a The first array to compare.
607: * @param b The second array to compare.
608: * @param count How many bytes should be compared.
609: *
610: * @return <code>true</code> if <code>count</code> first bytes in arrays
611: * <code>a</code> and <code>b</code> are equal.
612: */
613: public static boolean arrayequals(byte[] a, byte[] b, int count) {
614: for (int i = 0; i < count; i++) {
615: if (a[i] != b[i]) {
616: return false;
617: }
618: }
619: return true;
620: }
621:
622: /**
623: * Searches for a byte of specified value in the <code>buffer</code>,
624: * starting at the specified <code>position</code>.
625: *
626: * @param value The value to find.
627: * @param pos The starting position for searching.
628: *
629: * @return The position of byte found, counting from beginning of the
630: * <code>buffer</code>, or <code>-1</code> if not found.
631: */
632: protected int findByte(byte value, int pos) {
633: for (int i = pos; i < tail; i++) {
634: if (buffer[i] == value) {
635: return i;
636: }
637: }
638:
639: return -1;
640: }
641:
642: /**
643: * Searches for the <code>boundary</code> in the <code>buffer</code>
644: * region delimited by <code>head</code> and <code>tail</code>.
645: *
646: * @return The position of the boundary found, counting from the
647: * beginning of the <code>buffer</code>, or <code>-1</code> if
648: * not found.
649: */
650: protected int findSeparator() {
651: int first;
652: int match = 0;
653: int maxpos = tail - boundaryLength;
654: for (first = head; (first <= maxpos)
655: && (match != boundaryLength); first++) {
656: first = findByte(boundary[0], first);
657: if (first == -1 || (first > maxpos)) {
658: return -1;
659: }
660: for (match = 1; match < boundaryLength; match++) {
661: if (buffer[first + match] != boundary[match]) {
662: break;
663: }
664: }
665: }
666: if (match == boundaryLength) {
667: return first - 1;
668: }
669: return -1;
670: }
671:
672: /**
673: * Returns a string representation of this object.
674: *
675: * @return The string representation of this object.
676: */
677: public String toString() {
678: StringBuffer sbTemp = new StringBuffer();
679: sbTemp.append("boundary='");
680: sbTemp.append(String.valueOf(boundary));
681: sbTemp.append("'\nbufSize=");
682: sbTemp.append(bufSize);
683: return sbTemp.toString();
684: }
685:
686: /**
687: * Thrown to indicate that the input stream fails to follow the
688: * required syntax.
689: */
690: public static class MalformedStreamException extends IOException {
691: /**
692: * Constructs a <code>MalformedStreamException</code> with no
693: * detail message.
694: */
695: public MalformedStreamException() {
696: super ();
697: }
698:
699: /**
700: * Constructs an <code>MalformedStreamException</code> with
701: * the specified detail message.
702: *
703: * @param message The detail message.
704: */
705: public MalformedStreamException(String message) {
706: super (message);
707: }
708: }
709:
710: /**
711: * Thrown upon attempt of setting an invalid boundary token.
712: */
713: public static class IllegalBoundaryException extends IOException {
714: /**
715: * Constructs an <code>IllegalBoundaryException</code> with no
716: * detail message.
717: */
718: public IllegalBoundaryException() {
719: super ();
720: }
721:
722: /**
723: * Constructs an <code>IllegalBoundaryException</code> with
724: * the specified detail message.
725: *
726: * @param message The detail message.
727: */
728: public IllegalBoundaryException(String message) {
729: super (message);
730: }
731: }
732:
733: // ------------------------------------------------------ Debugging methods
734:
735: // These are the methods that were used to debug this stuff.
736: /*
737:
738: // Dump data.
739: protected void dump()
740: {
741: System.out.println("01234567890");
742: byte[] temp = new byte[buffer.length];
743: for(int i=0; i<buffer.length; i++)
744: {
745: if (buffer[i] == 0x0D || buffer[i] == 0x0A)
746: {
747: temp[i] = 0x21;
748: }
749: else
750: {
751: temp[i] = buffer[i];
752: }
753: }
754: System.out.println(new String(temp));
755: int i;
756: for (i=0; i<head; i++)
757: System.out.print(" ");
758: System.out.println("h");
759: for (i=0; i<tail; i++)
760: System.out.print(" ");
761: System.out.println("t");
762: System.out.flush();
763: }
764:
765: // Main routine, for testing purposes only.
766: //
767: // @param args A String[] with the command line arguments.
768: // @throws Exception, a generic exception.
769: public static void main( String[] args )
770: throws Exception
771: {
772: File boundaryFile = new File("boundary.dat");
773: int boundarySize = (int)boundaryFile.length();
774: byte[] boundary = new byte[boundarySize];
775: FileInputStream input = new FileInputStream(boundaryFile);
776: input.read(boundary,0,boundarySize);
777:
778: input = new FileInputStream("multipart.dat");
779: MultipartStream chunks = new MultipartStream(input, boundary);
780:
781: int i = 0;
782: String header;
783: OutputStream output;
784: boolean nextChunk = chunks.skipPreamble();
785: while (nextChunk)
786: {
787: header = chunks.readHeaders();
788: System.out.println("!"+header+"!");
789: System.out.println("wrote part"+i+".dat");
790: output = new FileOutputStream("part"+(i++)+".dat");
791: chunks.readBodyData(output);
792: nextChunk = chunks.readBoundary();
793: }
794: }
795:
796: */
797: }
|