001: /*
002: * Copyright 2001-2004 The Apache Software Foundation
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: package net.jforum.util.legacy.commons.fileupload;
017:
018: import java.io.ByteArrayOutputStream;
019: import java.io.IOException;
020: import java.io.InputStream;
021: import java.io.OutputStream;
022: import java.io.Serializable;
023: import java.io.UnsupportedEncodingException;
024:
025: /**
026: * <p> Low level API for processing file uploads.
027: *
028: * <p> This class can be used to process data streams conforming to MIME
029: * 'multipart' format as defined in
030: * <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a>. Arbitrarily
031: * large amounts of data in the stream can be processed under constant
032: * memory usage.
033: *
034: * <p> The format of the stream is defined in the following way:<br>
035: *
036: * <code>
037: * multipart-body := preamble 1*encapsulation close-delimiter epilogue<br>
038: * encapsulation := delimiter body CRLF<br>
039: * delimiter := "--" boundary CRLF<br>
040: * close-delimiter := "--" boudary "--"<br>
041: * preamble := <ignore><br>
042: * epilogue := <ignore><br>
043: * body := header-part CRLF body-part<br>
044: * header-part := 1*header CRLF<br>
045: * header := header-name ":" header-value<br>
046: * header-name := <printable ascii characters except ":"><br>
047: * header-value := <any ascii characters except CR & LF><br>
048: * body-data := <arbitrary data><br>
049: * </code>
050: *
051: * <p>Note that body-data can contain another mulipart entity. There
052: * is limited support for single pass processing of such nested
053: * streams. The nested stream is <strong>required</strong> to have a
054: * boundary token of the same length as the parent stream (see {@link
055: * #setBoundary(byte[])}).
056: *
057: * <p>Here is an example of usage of this class.<br>
058: *
059: * <pre>
060: * try {
061: * MultipartStream multipartStream = new MultipartStream(input,
062: * boundary);
063: * boolean nextPart = multipartStream.skipPreamble();
064: * OutputStream output;
065: * while(nextPart) {
066: * header = chunks.readHeader();
067: * // process headers
068: * // create some output stream
069: * multipartStream.readBodyPart(output);
070: * nextPart = multipartStream.readBoundary();
071: * }
072: * } catch(MultipartStream.MalformedStreamException e) {
073: * // the stream failed to follow required syntax
074: * } catch(IOException) {
075: * // a read or write error occurred
076: * }
077: *
078: * </pre>
079: *
080: * @author <a href="mailto:Rafal.Krzewski@e-point.pl">Rafal Krzewski</a>
081: * @author <a href="mailto:martinc@apache.org">Martin Cooper</a>
082: * @author Sean C. Sullivan
083: *
084: * @version $Id: MultipartStream.java,v 1.4 2007/04/12 02:11:54 rafaelsteil Exp $
085: */
086: public class MultipartStream {
087:
088: // ----------------------------------------------------- Manifest constants
089:
090: /**
091: * The Carriage Return ASCII character value.
092: */
093: public static final byte CR = 0x0D;
094:
095: /**
096: * The Line Feed ASCII character value.
097: */
098: public static final byte LF = 0x0A;
099:
100: /**
101: * The dash (-) ASCII character value.
102: */
103: public static final byte DASH = 0x2D;
104:
105: /**
106: * The maximum length of <code>header-part</code> that will be
107: * processed (10 kilobytes = 10240 bytes.).
108: */
109: public static final int HEADER_PART_SIZE_MAX = 10240;
110:
111: /**
112: * The default length of the buffer used for processing a request.
113: */
114: protected static final int DEFAULT_BUFSIZE = 4096;
115:
116: /**
117: * A byte sequence that marks the end of <code>header-part</code>
118: * (<code>CRLFCRLF</code>).
119: */
120: protected static final byte[] HEADER_SEPARATOR = { CR, LF, CR, LF };
121:
122: /**
123: * A byte sequence that that follows a delimiter that will be
124: * followed by an encapsulation (<code>CRLF</code>).
125: */
126: protected static final byte[] FIELD_SEPARATOR = { CR, LF };
127:
128: /**
129: * A byte sequence that that follows a delimiter of the last
130: * encapsulation in the stream (<code>--</code>).
131: */
132: protected static final byte[] STREAM_TERMINATOR = { DASH, DASH };
133:
134: // ----------------------------------------------------------- Data members
135:
136: /**
137: * The input stream from which data is read.
138: */
139: private InputStream input;
140:
141: /**
142: * The length of the boundary token plus the leading <code>CRLF--</code>.
143: */
144: private int boundaryLength;
145:
146: /**
147: * The amount of data, in bytes, that must be kept in the buffer in order
148: * to detect delimiters reliably.
149: */
150: private int keepRegion;
151:
152: /**
153: * The byte sequence that partitions the stream.
154: */
155: private byte[] boundary;
156:
157: /**
158: * The length of the buffer used for processing the request.
159: */
160: private int bufSize;
161:
162: /**
163: * The buffer used for processing the request.
164: */
165: private byte[] buffer;
166:
167: /**
168: * The index of first valid character in the buffer.
169: * <br>
170: * 0 <= head < bufSize
171: */
172: private int head;
173:
174: /**
175: * The index of last valid characer in the buffer + 1.
176: * <br>
177: * 0 <= tail <= bufSize
178: */
179: private int tail;
180:
181: /**
182: * The content encoding to use when reading headers.
183: */
184: private String headerEncoding;
185:
186: // ----------------------------------------------------------- Constructors
187:
188: /**
189: * Default constructor.
190: *
191: * @see #MultipartStream(InputStream, byte[], int)
192: * @see #MultipartStream(InputStream, byte[])
193: *
194: */
195: public MultipartStream() {
196: }
197:
198: /**
199: * <p> Constructs a <code>MultipartStream</code> with a custom size buffer.
200: *
201: * <p> Note that the buffer must be at least big enough to contain the
202: * boundary string, plus 4 characters for CR/LF and double dash, plus at
203: * least one byte of data. Too small a buffer size setting will degrade
204: * performance.
205: *
206: * @param input The <code>InputStream</code> to serve as a data source.
207: * @param boundary The token used for dividing the stream into
208: * <code>encapsulations</code>.
209: * @param bufSize The size of the buffer to be used, in bytes.
210: *
211: *
212: * @see #MultipartStream()
213: * @see #MultipartStream(InputStream, byte[])
214: *
215: */
216: public MultipartStream(InputStream input, byte[] boundary,
217: int bufSize) {
218: this .input = input;
219: this .bufSize = bufSize;
220: this .buffer = new byte[bufSize];
221:
222: // We prepend CR/LF to the boundary to chop trailng CR/LF from
223: // body-data tokens.
224: this .boundary = new byte[boundary.length + 4];
225: this .boundaryLength = boundary.length + 4;
226: this .keepRegion = boundary.length + 3;
227: this .boundary[0] = CR;
228: this .boundary[1] = LF;
229: this .boundary[2] = DASH;
230: this .boundary[3] = DASH;
231: System
232: .arraycopy(boundary, 0, this .boundary, 4,
233: boundary.length);
234:
235: head = 0;
236: tail = 0;
237: }
238:
239: /**
240: * <p> Constructs a <code>MultipartStream</code> with a default size buffer.
241: *
242: * @param input The <code>InputStream</code> to serve as a data source.
243: * @param boundary The token used for dividing the stream into
244: * <code>encapsulations</code>.
245: *
246: * @exception IOException when an error occurs.
247: *
248: * @see #MultipartStream()
249: * @see #MultipartStream(InputStream, byte[], int)
250: *
251: */
252: public MultipartStream(InputStream input, byte[] boundary) {
253: this (input, boundary, DEFAULT_BUFSIZE);
254: }
255:
256: // --------------------------------------------------------- Public methods
257:
258: /**
259: * Retrieves the character encoding used when reading the headers of an
260: * individual part. When not specified, or <code>null</code>, the platform
261: * default encoding is used.
262:
263: *
264: * @return The encoding used to read part headers.
265: */
266: public String getHeaderEncoding() {
267: return headerEncoding;
268: }
269:
270: /**
271: * Specifies the character encoding to be used when reading the headers of
272: * individual parts. When not specified, or <code>null</code>, the platform
273: * default encoding is used.
274: *
275: * @param encoding The encoding used to read part headers.
276: */
277: public void setHeaderEncoding(String encoding) {
278: headerEncoding = encoding;
279: }
280:
281: /**
282: * Reads a byte from the <code>buffer</code>, and refills it as
283: * necessary.
284: *
285: * @return The next byte from the input stream.
286: *
287: * @exception IOException if there is no more data available.
288: */
289: public byte readByte() throws IOException {
290: // Buffer depleted ?
291: if (head == tail) {
292: head = 0;
293: // Refill.
294: tail = input.read(buffer, head, bufSize);
295: if (tail == -1) {
296: // No more data available.
297: throw new IOException("No more data is available");
298: }
299: }
300: return buffer[head++];
301: }
302:
303: /**
304: * Skips a <code>boundary</code> token, and checks whether more
305: * <code>encapsulations</code> are contained in the stream.
306: *
307: * @return <code>true</code> if there are more encapsulations in
308: * this stream; <code>false</code> otherwise.
309: *
310: * @exception MalformedStreamException if the stream ends unexpecetedly or
311: * fails to follow required syntax.
312: */
313: public boolean readBoundary() throws MalformedStreamException {
314: byte[] marker = new byte[2];
315: boolean nextChunk = false;
316:
317: head += boundaryLength;
318: try {
319: marker[0] = readByte();
320: if (marker[0] == LF) {
321: // Work around IE5 Mac bug with input type=image.
322: // Because the boundary delimiter, not including the trailing
323: // CRLF, must not appear within any file (RFC 2046, section
324: // 5.1.1), we know the missing CR is due to a buggy browser
325: // rather than a file containing something similar to a
326: // boundary.
327: return true;
328: }
329:
330: marker[1] = readByte();
331: if (arrayequals(marker, STREAM_TERMINATOR, 2)) {
332: nextChunk = false;
333: } else if (arrayequals(marker, FIELD_SEPARATOR, 2)) {
334: nextChunk = true;
335: } else {
336: throw new MalformedStreamException(
337: "Unexpected characters follow a boundary");
338: }
339: } catch (IOException e) {
340: throw new MalformedStreamException(
341: "Stream ended unexpectedly");
342: }
343: return nextChunk;
344: }
345:
346: /**
347: * <p>Changes the boundary token used for partitioning the stream.
348: *
349: * <p>This method allows single pass processing of nested multipart
350: * streams.
351: *
352: * <p>The boundary token of the nested stream is <code>required</code>
353: * to be of the same length as the boundary token in parent stream.
354: *
355: * <p>Restoring the parent stream boundary token after processing of a
356: * nested stream is left to the application.
357: *
358: * @param boundary The boundary to be used for parsing of the nested
359: * stream.
360: *
361: * @exception IllegalBoundaryException if the <code>boundary</code>
362: * has a different length than the one
363: * being currently parsed.
364: */
365: public void setBoundary(byte[] boundary)
366: throws IllegalBoundaryException {
367: if (boundary.length != boundaryLength - 4) {
368: throw new IllegalBoundaryException(
369: "The length of a boundary token can not be changed");
370: }
371: System
372: .arraycopy(boundary, 0, this .boundary, 4,
373: boundary.length);
374: }
375:
376: /**
377: * <p>Reads the <code>header-part</code> of the current
378: * <code>encapsulation</code>.
379: *
380: * <p>Headers are returned verbatim to the input stream, including the
381: * trailing <code>CRLF</code> marker. Parsing is left to the
382: * application.
383: *
384: * <p><strong>TODO</strong> allow limiting maximum header size to
385: * protect against abuse.
386: *
387: * @return The <code>header-part</code> of the current encapsulation.
388: *
389: * @exception MalformedStreamException if the stream ends unexpecetedly.
390: */
391: public String readHeaders() throws MalformedStreamException {
392: int i = 0;
393: byte[] b = new byte[1];
394: // to support multi-byte characters
395: ByteArrayOutputStream baos = new ByteArrayOutputStream();
396: int sizeMax = HEADER_PART_SIZE_MAX;
397: int size = 0;
398: while (i < 4) {
399: try {
400: b[0] = readByte();
401: } catch (IOException e) {
402: throw new MalformedStreamException(
403: "Stream ended unexpectedly");
404: }
405: size++;
406: if (b[0] == HEADER_SEPARATOR[i]) {
407: i++;
408: } else {
409: i = 0;
410: }
411: if (size <= sizeMax) {
412: baos.write(b[0]);
413: }
414: }
415:
416: String headers = null;
417: if (headerEncoding != null) {
418: try {
419: headers = baos.toString(headerEncoding);
420: } catch (UnsupportedEncodingException e) {
421: // Fall back to platform default if specified encoding is not
422: // supported.
423: headers = baos.toString();
424: }
425: } else {
426: headers = baos.toString();
427: }
428:
429: return headers;
430: }
431:
432: /**
433: * <p>Reads <code>body-data</code> from the current
434: * <code>encapsulation</code> and writes its contents into the
435: * output <code>Stream</code>.
436: *
437: * <p>Arbitrary large amounts of data can be processed by this
438: * method using a constant size buffer. (see {@link
439: * #MultipartStream(InputStream,byte[],int) constructor}).
440: *
441: * @param output The <code>Stream</code> to write data into.
442: *
443: * @return the amount of data written.
444: *
445: * @exception MalformedStreamException if the stream ends unexpectedly.
446: * @exception IOException if an i/o error occurs.
447: */
448: public int readBodyData(OutputStream output)
449: throws MalformedStreamException, IOException {
450: boolean done = false;
451: int pad;
452: int pos;
453: int bytesRead;
454: int total = 0;
455: while (!done) {
456: // Is boundary token present somewere in the buffer?
457: pos = findSeparator();
458: if (pos != -1) {
459: // Write the rest of the data before the boundary.
460: output.write(buffer, head, pos - head);
461: total += pos - head;
462: head = pos;
463: done = true;
464: } else {
465: // Determine how much data should be kept in the
466: // buffer.
467: if (tail - head > keepRegion) {
468: pad = keepRegion;
469: } else {
470: pad = tail - head;
471: }
472: // Write out the data belonging to the body-data.
473: output.write(buffer, head, tail - head - pad);
474:
475: // Move the data to the beginning of the buffer.
476: total += tail - head - pad;
477: System.arraycopy(buffer, tail - pad, buffer, 0, pad);
478:
479: // Refill buffer with new data.
480: head = 0;
481: bytesRead = input.read(buffer, pad, bufSize - pad);
482:
483: // [pprrrrrrr]
484: if (bytesRead != -1) {
485: tail = pad + bytesRead;
486: } else {
487: // The last pad amount is left in the buffer.
488: // Boundary can't be in there so write out the
489: // data you have and signal an error condition.
490: output.write(buffer, 0, pad);
491: output.flush();
492: total += pad;
493: throw new MalformedStreamException(
494: "Stream ended unexpectedly");
495: }
496: }
497: }
498: output.flush();
499: return total;
500: }
501:
502: /**
503: * <p> Reads <code>body-data</code> from the current
504: * <code>encapsulation</code> and discards it.
505: *
506: * <p>Use this method to skip encapsulations you don't need or don't
507: * understand.
508: *
509: * @return The amount of data discarded.
510: *
511: * @exception MalformedStreamException if the stream ends unexpectedly.
512: * @exception IOException if an i/o error occurs.
513: */
514: public int discardBodyData() throws MalformedStreamException,
515: IOException {
516: boolean done = false;
517: int pad;
518: int pos;
519: int bytesRead;
520: int total = 0;
521: while (!done) {
522: // Is boundary token present somewere in the buffer?
523: pos = findSeparator();
524: if (pos != -1) {
525: // Write the rest of the data before the boundary.
526: total += pos - head;
527: head = pos;
528: done = true;
529: } else {
530: // Determine how much data should be kept in the
531: // buffer.
532: if (tail - head > keepRegion) {
533: pad = keepRegion;
534: } else {
535: pad = tail - head;
536: }
537: total += tail - head - pad;
538:
539: // Move the data to the beginning of the buffer.
540: System.arraycopy(buffer, tail - pad, buffer, 0, pad);
541:
542: // Refill buffer with new data.
543: head = 0;
544: bytesRead = input.read(buffer, pad, bufSize - pad);
545:
546: // [pprrrrrrr]
547: if (bytesRead != -1) {
548: tail = pad + bytesRead;
549: } else {
550: // The last pad amount is left in the buffer.
551: // Boundary can't be in there so signal an error
552: // condition.
553: total += pad;
554: throw new MalformedStreamException(
555: "Stream ended unexpectedly");
556: }
557: }
558: }
559: return total;
560: }
561:
562: /**
563: * Finds the beginning of the first <code>encapsulation</code>.
564: *
565: * @return <code>true</code> if an <code>encapsulation</code> was found in
566: * the stream.
567: *
568: * @exception IOException if an i/o error occurs.
569: */
570: public boolean skipPreamble() throws IOException {
571: // First delimiter may be not preceeded with a CRLF.
572: System.arraycopy(boundary, 2, boundary, 0, boundary.length - 2);
573: boundaryLength = boundary.length - 2;
574: try {
575: // Discard all data up to the delimiter.
576: discardBodyData();
577:
578: // Read boundary - if succeded, the stream contains an
579: // encapsulation.
580: return readBoundary();
581: } catch (MalformedStreamException e) {
582: return false;
583: } finally {
584: // Restore delimiter.
585: System.arraycopy(boundary, 0, boundary, 2,
586: boundary.length - 2);
587: boundaryLength = boundary.length;
588: boundary[0] = CR;
589: boundary[1] = LF;
590: }
591: }
592:
593: /**
594: * Compares <code>count</code> first bytes in the arrays
595: * <code>a</code> and <code>b</code>.
596: *
597: * @param a The first array to compare.
598: * @param b The second array to compare.
599: * @param count How many bytes should be compared.
600: *
601: * @return <code>true</code> if <code>count</code> first bytes in arrays
602: * <code>a</code> and <code>b</code> are equal.
603: */
604: public static boolean arrayequals(byte[] a, byte[] b, int count) {
605: for (int i = 0; i < count; i++) {
606: if (a[i] != b[i]) {
607: return false;
608: }
609: }
610: return true;
611: }
612:
613: /**
614: * Searches for a byte of specified value in the <code>buffer</code>,
615: * starting at the specified <code>position</code>.
616: *
617: * @param value The value to find.
618: * @param pos The starting position for searching.
619: *
620: * @return The position of byte found, counting from beginning of the
621: * <code>buffer</code>, or <code>-1</code> if not found.
622: */
623: protected int findByte(byte value, int pos) {
624: for (int i = pos; i < tail; i++) {
625: if (buffer[i] == value) {
626: return i;
627: }
628: }
629:
630: return -1;
631: }
632:
633: /**
634: * Searches for the <code>boundary</code> in the <code>buffer</code>
635: * region delimited by <code>head</code> and <code>tail</code>.
636: *
637: * @return The position of the boundary found, counting from the
638: * beginning of the <code>buffer</code>, or <code>-1</code> if
639: * not found.
640: */
641: protected int findSeparator() {
642: int first;
643: int match = 0;
644: int maxpos = tail - boundaryLength;
645: for (first = head; (first <= maxpos)
646: && (match != boundaryLength); first++) {
647: first = findByte(boundary[0], first);
648: if (first == -1 || (first > maxpos)) {
649: return -1;
650: }
651: for (match = 1; match < boundaryLength; match++) {
652: if (buffer[first + match] != boundary[match]) {
653: break;
654: }
655: }
656: }
657: if (match == boundaryLength) {
658: return first - 1;
659: }
660: return -1;
661: }
662:
663: /**
664: * Returns a string representation of this object.
665: *
666: * @return The string representation of this object.
667: */
668: public String toString() {
669: StringBuffer sbTemp = new StringBuffer();
670: sbTemp.append("boundary='");
671: sbTemp.append(String.valueOf(boundary));
672: sbTemp.append("'\nbufSize=");
673: sbTemp.append(bufSize);
674: return sbTemp.toString();
675: }
676:
677: /**
678: * Thrown to indicate that the input stream fails to follow the
679: * required syntax.
680: */
681: public static class MalformedStreamException extends IOException
682: implements Serializable {
683: /**
684: * Constructs a <code>MalformedStreamException</code> with no
685: * detail message.
686: */
687: public MalformedStreamException() {
688: super ();
689: }
690:
691: /**
692: * Constructs an <code>MalformedStreamException</code> with
693: * the specified detail message.
694: *
695: * @param message The detail message.
696: */
697: public MalformedStreamException(String message) {
698: super (message);
699: }
700: }
701:
702: /**
703: * Thrown upon attempt of setting an invalid boundary token.
704: */
705: public static class IllegalBoundaryException extends IOException
706: implements Serializable {
707: /**
708: * Constructs an <code>IllegalBoundaryException</code> with no
709: * detail message.
710: */
711: public IllegalBoundaryException() {
712: super ();
713: }
714:
715: /**
716: * Constructs an <code>IllegalBoundaryException</code> with
717: * the specified detail message.
718: *
719: * @param message The detail message.
720: */
721: public IllegalBoundaryException(String message) {
722: super (message);
723: }
724: }
725:
726: // ------------------------------------------------------ Debugging methods
727:
728: // These are the methods that were used to debug this stuff.
729: /*
730:
731: // Dump data.
732: protected void dump()
733: {
734: System.out.println("01234567890");
735: byte[] temp = new byte[buffer.length];
736: for(int i=0; i<buffer.length; i++)
737: {
738: if (buffer[i] == 0x0D || buffer[i] == 0x0A)
739: {
740: temp[i] = 0x21;
741: }
742: else
743: {
744: temp[i] = buffer[i];
745: }
746: }
747: System.out.println(new String(temp));
748: int i;
749: for (i=0; i<head; i++)
750: System.out.print(" ");
751: System.out.println("h");
752: for (i=0; i<tail; i++)
753: System.out.print(" ");
754: System.out.println("t");
755: System.out.flush();
756: }
757:
758: // Main routine, for testing purposes only.
759: //
760: // @param args A String[] with the command line arguments.
761: // @exception Exception, a generic exception.
762: public static void main( String[] args )
763: throws Exception
764: {
765: File boundaryFile = new File("boundary.dat");
766: int boundarySize = (int)boundaryFile.length();
767: byte[] boundary = new byte[boundarySize];
768: FileInputStream input = new FileInputStream(boundaryFile);
769: input.read(boundary,0,boundarySize);
770:
771: input = new FileInputStream("multipart.dat");
772: MultipartStream chunks = new MultipartStream(input, boundary);
773:
774: int i = 0;
775: String header;
776: OutputStream output;
777: boolean nextChunk = chunks.skipPreamble();
778: while (nextChunk)
779: {
780: header = chunks.readHeaders();
781: System.out.println("!"+header+"!");
782: System.out.println("wrote part"+i+".dat");
783: output = new FileOutputStream("part"+(i++)+".dat");
784: chunks.readBodyData(output);
785: nextChunk = chunks.readBoundary();
786: }
787: }
788:
789: */
790: }
|