001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package org.apache.tomcat.util.http.fileupload;
019:
020: import java.io.ByteArrayOutputStream;
021: import java.io.IOException;
022: import java.io.InputStream;
023: import java.io.OutputStream;
024: import java.io.UnsupportedEncodingException;
025:
026: /**
027: * <p> Low level API for processing file uploads.
028: *
029: * <p> This class can be used to process data streams conforming to MIME
030: * 'multipart' format as defined in
031: * <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a>. Arbitrarily
032: * large amounts of data in the stream can be processed under constant
033: * memory usage.
034: *
035: * <p> The format of the stream is defined in the following way:<br>
036: *
037: * <code>
038: * multipart-body := preamble 1*encapsulation close-delimiter epilogue<br>
039: * encapsulation := delimiter body CRLF<br>
040: * delimiter := "--" boundary CRLF<br>
041: * close-delimiter := "--" boudary "--"<br>
042: * preamble := <ignore><br>
043: * epilogue := <ignore><br>
044: * body := header-part CRLF body-part<br>
045: * header-part := 1*header CRLF<br>
046: * header := header-name ":" header-value<br>
047: * header-name := <printable ascii characters except ":"><br>
048: * header-value := <any ascii characters except CR & LF><br>
049: * body-data := <arbitrary data><br>
050: * </code>
051: *
052: * <p>Note that body-data can contain another mulipart entity. There
053: * is limited support for single pass processing of such nested
054: * streams. The nested stream is <strong>required</strong> to have a
055: * boundary token of the same length as the parent stream (see {@link
056: * #setBoundary(byte[])}).
057: *
058: * <p>Here is an exaple of usage of this class.<br>
059: *
060: * <pre>
061: * try {
062: * MultipartStream multipartStream = new MultipartStream(input,
063: * boundary);
064: * boolean nextPart = malitPartStream.skipPreamble();
065: * OutputStream output;
066: * while(nextPart) {
067: * header = chunks.readHeader();
068: * // process headers
069: * // create some output stream
070: * multipartStream.readBodyPart(output);
071: * nextPart = multipartStream.readBoundary();
072: * }
073: * } catch(MultipartStream.MalformedStreamException e) {
074: * // the stream failed to follow required syntax
075: * } catch(IOException) {
076: * // a read or write error occurred
077: * }
078: *
079: * </pre>
080: *
081: * @author <a href="mailto:Rafal.Krzewski@e-point.pl">Rafal Krzewski</a>
082: * @author <a href="mailto:martinc@apache.org">Martin Cooper</a>
083: * @author Sean C. Sullivan
084: *
085: * @version $Id: MultipartStream.java 467222 2006-10-24 03:17:11Z markt $
086: */
087: public class MultipartStream {
088:
089: // ----------------------------------------------------- Manifest constants
090:
091: /**
092: * The maximum length of <code>header-part</code> that will be
093: * processed (10 kilobytes = 10240 bytes.).
094: */
095: public static final int HEADER_PART_SIZE_MAX = 10240;
096:
097: /**
098: * The default length of the buffer used for processing a request.
099: */
100: protected static final int DEFAULT_BUFSIZE = 4096;
101:
102: /**
103: * A byte sequence that marks the end of <code>header-part</code>
104: * (<code>CRLFCRLF</code>).
105: */
106: protected static final byte[] HEADER_SEPARATOR = { 0x0D, 0x0A,
107: 0x0D, 0x0A };
108:
109: /**
110: * A byte sequence that that follows a delimiter that will be
111: * followed by an encapsulation (<code>CRLF</code>).
112: */
113: protected static final byte[] FIELD_SEPARATOR = { 0x0D, 0x0A };
114:
115: /**
116: * A byte sequence that that follows a delimiter of the last
117: * encapsulation in the stream (<code>--</code>).
118: */
119: protected static final byte[] STREAM_TERMINATOR = { 0x2D, 0x2D };
120:
121: // ----------------------------------------------------------- Data members
122:
123: /**
124: * The input stream from which data is read.
125: */
126: private InputStream input;
127:
128: /**
129: * The length of the boundary token plus the leading <code>CRLF--</code>.
130: */
131: private int boundaryLength;
132:
133: /**
134: * The amount of data, in bytes, that must be kept in the buffer in order
135: * to detect delimiters reliably.
136: */
137: private int keepRegion;
138:
139: /**
140: * The byte sequence that partitions the stream.
141: */
142: private byte[] boundary;
143:
144: /**
145: * The length of the buffer used for processing the request.
146: */
147: private int bufSize;
148:
149: /**
150: * The buffer used for processing the request.
151: */
152: private byte[] buffer;
153:
154: /**
155: * The index of first valid character in the buffer.
156: * <br>
157: * 0 <= head < bufSize
158: */
159: private int head;
160:
161: /**
162: * The index of last valid characer in the buffer + 1.
163: * <br>
164: * 0 <= tail <= bufSize
165: */
166: private int tail;
167:
168: /**
169: * The content encoding to use when reading headers.
170: */
171: private String headerEncoding;
172:
173: // ----------------------------------------------------------- Constructors
174:
175: /**
176: * Default constructor.
177: *
178: * @see #MultipartStream(InputStream, byte[], int)
179: * @see #MultipartStream(InputStream, byte[])
180: *
181: */
182: public MultipartStream() {
183: }
184:
185: /**
186: * <p> Constructs a <code>MultipartStream</code> with a custom size buffer.
187: *
188: * <p> Note that the buffer must be at least big enough to contain the
189: * boundary string, plus 4 characters for CR/LF and double dash, plus at
190: * least one byte of data. Too small a buffer size setting will degrade
191: * performance.
192: *
193: * @param input The <code>InputStream</code> to serve as a data source.
194: * @param boundary The token used for dividing the stream into
195: * <code>encapsulations</code>.
196: * @param bufSize The size of the buffer to be used, in bytes.
197: *
198: *
199: * @see #MultipartStream()
200: * @see #MultipartStream(InputStream, byte[])
201: *
202: */
203: public MultipartStream(InputStream input, byte[] boundary,
204: int bufSize) {
205: this .input = input;
206: this .bufSize = bufSize;
207: this .buffer = new byte[bufSize];
208:
209: // We prepend CR/LF to the boundary to chop trailng CR/LF from
210: // body-data tokens.
211: this .boundary = new byte[boundary.length + 4];
212: this .boundaryLength = boundary.length + 4;
213: this .keepRegion = boundary.length + 3;
214: this .boundary[0] = 0x0D;
215: this .boundary[1] = 0x0A;
216: this .boundary[2] = 0x2D;
217: this .boundary[3] = 0x2D;
218: System
219: .arraycopy(boundary, 0, this .boundary, 4,
220: boundary.length);
221:
222: head = 0;
223: tail = 0;
224: }
225:
226: /**
227: * <p> Constructs a <code>MultipartStream</code> with a default size buffer.
228: *
229: * @param input The <code>InputStream</code> to serve as a data source.
230: * @param boundary The token used for dividing the stream into
231: * <code>encapsulations</code>.
232: *
233: * @exception IOException when an error occurs.
234: *
235: * @see #MultipartStream()
236: * @see #MultipartStream(InputStream, byte[], int)
237: *
238: */
239: public MultipartStream(InputStream input, byte[] boundary)
240: throws IOException {
241: this (input, boundary, DEFAULT_BUFSIZE);
242: }
243:
244: // --------------------------------------------------------- Public methods
245:
246: /**
247: * Retrieves the character encoding used when reading the headers of an
248: * individual part. When not specified, or <code>null</code>, the platform
249: * default encoding is used.
250:
251: *
252: * @return The encoding used to read part headers.
253: */
254: public String getHeaderEncoding() {
255: return headerEncoding;
256: }
257:
258: /**
259: * Specifies the character encoding to be used when reading the headers of
260: * individual parts. When not specified, or <code>null</code>, the platform
261: * default encoding is used.
262: *
263: * @param encoding The encoding used to read part headers.
264: */
265: public void setHeaderEncoding(String encoding) {
266: headerEncoding = encoding;
267: }
268:
269: /**
270: * Reads a byte from the <code>buffer</code>, and refills it as
271: * necessary.
272: *
273: * @return The next byte from the input stream.
274: *
275: * @exception IOException if there is no more data available.
276: */
277: public byte readByte() throws IOException {
278: // Buffer depleted ?
279: if (head == tail) {
280: head = 0;
281: // Refill.
282: tail = input.read(buffer, head, bufSize);
283: if (tail == -1) {
284: // No more data available.
285: throw new IOException("No more data is available");
286: }
287: }
288: return buffer[head++];
289: }
290:
291: /**
292: * Skips a <code>boundary</code> token, and checks whether more
293: * <code>encapsulations</code> are contained in the stream.
294: *
295: * @return <code>true</code> if there are more encapsulations in
296: * this stream; <code>false</code> otherwise.
297: *
298: * @exception MalformedStreamException if the stream ends unexpecetedly or
299: * fails to follow required syntax.
300: */
301: public boolean readBoundary() throws MalformedStreamException {
302: byte[] marker = new byte[2];
303: boolean nextChunk = false;
304:
305: head += boundaryLength;
306: try {
307: marker[0] = readByte();
308: marker[1] = readByte();
309: if (arrayequals(marker, STREAM_TERMINATOR, 2)) {
310: nextChunk = false;
311: } else if (arrayequals(marker, FIELD_SEPARATOR, 2)) {
312: nextChunk = true;
313: } else {
314: throw new MalformedStreamException(
315: "Unexpected characters follow a boundary");
316: }
317: } catch (IOException e) {
318: throw new MalformedStreamException(
319: "Stream ended unexpectedly");
320: }
321: return nextChunk;
322: }
323:
324: /**
325: * <p>Changes the boundary token used for partitioning the stream.
326: *
327: * <p>This method allows single pass processing of nested multipart
328: * streams.
329: *
330: * <p>The boundary token of the nested stream is <code>required</code>
331: * to be of the same length as the boundary token in parent stream.
332: *
333: * <p>Restoring the parent stream boundary token after processing of a
334: * nested stream is left to the application.
335: *
336: * @param boundary The boundary to be used for parsing of the nested
337: * stream.
338: *
339: * @exception IllegalBoundaryException if the <code>boundary</code>
340: * has a different length than the one
341: * being currently parsed.
342: */
343: public void setBoundary(byte[] boundary)
344: throws IllegalBoundaryException {
345: if (boundary.length != boundaryLength - 4) {
346: throw new IllegalBoundaryException(
347: "The length of a boundary token can not be changed");
348: }
349: System
350: .arraycopy(boundary, 0, this .boundary, 4,
351: boundary.length);
352: }
353:
354: /**
355: * <p>Reads the <code>header-part</code> of the current
356: * <code>encapsulation</code>.
357: *
358: * <p>Headers are returned verbatim to the input stream, including the
359: * trailing <code>CRLF</code> marker. Parsing is left to the
360: * application.
361: *
362: * <p><strong>TODO</strong> allow limiting maximum header size to
363: * protect against abuse.
364: *
365: * @return The <code>header-part</code> of the current encapsulation.
366: *
367: * @exception MalformedStreamException if the stream ends unexpecetedly.
368: */
369: public String readHeaders() throws MalformedStreamException {
370: int i = 0;
371: byte b[] = new byte[1];
372: // to support multi-byte characters
373: ByteArrayOutputStream baos = new ByteArrayOutputStream();
374: int sizeMax = HEADER_PART_SIZE_MAX;
375: int size = 0;
376: while (i < 4) {
377: try {
378: b[0] = readByte();
379: } catch (IOException e) {
380: throw new MalformedStreamException(
381: "Stream ended unexpectedly");
382: }
383: size++;
384: if (b[0] == HEADER_SEPARATOR[i]) {
385: i++;
386: } else {
387: i = 0;
388: }
389: if (size <= sizeMax) {
390: baos.write(b[0]);
391: }
392: }
393:
394: String headers = null;
395: if (headerEncoding != null) {
396: try {
397: headers = baos.toString(headerEncoding);
398: } catch (UnsupportedEncodingException e) {
399: // Fall back to platform default if specified encoding is not
400: // supported.
401: headers = baos.toString();
402: }
403: } else {
404: headers = baos.toString();
405: }
406:
407: return headers;
408: }
409:
410: /**
411: * <p>Reads <code>body-data</code> from the current
412: * <code>encapsulation</code> and writes its contents into the
413: * output <code>Stream</code>.
414: *
415: * <p>Arbitrary large amounts of data can be processed by this
416: * method using a constant size buffer. (see {@link
417: * #MultipartStream(InputStream,byte[],int) constructor}).
418: *
419: * @param output The <code>Stream</code> to write data into.
420: *
421: * @return the amount of data written.
422: *
423: * @exception MalformedStreamException if the stream ends unexpectedly.
424: * @exception IOException if an i/o error occurs.
425: */
426: public int readBodyData(OutputStream output)
427: throws MalformedStreamException, IOException {
428: boolean done = false;
429: int pad;
430: int pos;
431: int bytesRead;
432: int total = 0;
433: while (!done) {
434: // Is boundary token present somewere in the buffer?
435: pos = findSeparator();
436: if (pos != -1) {
437: // Write the rest of the data before the boundary.
438: output.write(buffer, head, pos - head);
439: total += pos - head;
440: head = pos;
441: done = true;
442: } else {
443: // Determine how much data should be kept in the
444: // buffer.
445: if (tail - head > keepRegion) {
446: pad = keepRegion;
447: } else {
448: pad = tail - head;
449: }
450: // Write out the data belonging to the body-data.
451: output.write(buffer, head, tail - head - pad);
452:
453: // Move the data to the beging of the buffer.
454: total += tail - head - pad;
455: System.arraycopy(buffer, tail - pad, buffer, 0, pad);
456:
457: // Refill buffer with new data.
458: head = 0;
459: bytesRead = input.read(buffer, pad, bufSize - pad);
460:
461: // [pprrrrrrr]
462: if (bytesRead != -1) {
463: tail = pad + bytesRead;
464: } else {
465: // The last pad amount is left in the buffer.
466: // Boundary can't be in there so write out the
467: // data you have and signal an error condition.
468: output.write(buffer, 0, pad);
469: output.flush();
470: total += pad;
471: throw new MalformedStreamException(
472: "Stream ended unexpectedly");
473: }
474: }
475: }
476: output.flush();
477: return total;
478: }
479:
480: /**
481: * <p> Reads <code>body-data</code> from the current
482: * <code>encapsulation</code> and discards it.
483: *
484: * <p>Use this method to skip encapsulations you don't need or don't
485: * understand.
486: *
487: * @return The amount of data discarded.
488: *
489: * @exception MalformedStreamException if the stream ends unexpectedly.
490: * @exception IOException if an i/o error occurs.
491: */
492: public int discardBodyData() throws MalformedStreamException,
493: IOException {
494: boolean done = false;
495: int pad;
496: int pos;
497: int bytesRead;
498: int total = 0;
499: while (!done) {
500: // Is boundary token present somewere in the buffer?
501: pos = findSeparator();
502: if (pos != -1) {
503: // Write the rest of the data before the boundary.
504: total += pos - head;
505: head = pos;
506: done = true;
507: } else {
508: // Determine how much data should be kept in the
509: // buffer.
510: if (tail - head > keepRegion) {
511: pad = keepRegion;
512: } else {
513: pad = tail - head;
514: }
515: total += tail - head - pad;
516:
517: // Move the data to the beging of the buffer.
518: System.arraycopy(buffer, tail - pad, buffer, 0, pad);
519:
520: // Refill buffer with new data.
521: head = 0;
522: bytesRead = input.read(buffer, pad, bufSize - pad);
523:
524: // [pprrrrrrr]
525: if (bytesRead != -1) {
526: tail = pad + bytesRead;
527: } else {
528: // The last pad amount is left in the buffer.
529: // Boundary can't be in there so signal an error
530: // condition.
531: total += pad;
532: throw new MalformedStreamException(
533: "Stream ended unexpectedly");
534: }
535: }
536: }
537: return total;
538: }
539:
540: /**
541: * Finds the beginning of the first <code>encapsulation</code>.
542: *
543: * @return <code>true</code> if an <code>encapsulation</code> was found in
544: * the stream.
545: *
546: * @exception IOException if an i/o error occurs.
547: */
548: public boolean skipPreamble() throws IOException {
549: // First delimiter may be not preceeded with a CRLF.
550: System.arraycopy(boundary, 2, boundary, 0, boundary.length - 2);
551: boundaryLength = boundary.length - 2;
552: try {
553: // Discard all data up to the delimiter.
554: discardBodyData();
555:
556: // Read boundary - if succeded, the stream contains an
557: // encapsulation.
558: return readBoundary();
559: } catch (MalformedStreamException e) {
560: return false;
561: } finally {
562: // Restore delimiter.
563: System.arraycopy(boundary, 0, boundary, 2,
564: boundary.length - 2);
565: boundaryLength = boundary.length;
566: boundary[0] = 0x0D;
567: boundary[1] = 0x0A;
568: }
569: }
570:
571: /**
572: * Compares <code>count</code> first bytes in the arrays
573: * <code>a</code> and <code>b</code>.
574: *
575: * @param a The first array to compare.
576: * @param b The second array to compare.
577: * @param count How many bytes should be compared.
578: *
579: * @return <code>true</code> if <code>count</code> first bytes in arrays
580: * <code>a</code> and <code>b</code> are equal.
581: */
582: public static boolean arrayequals(byte[] a, byte[] b, int count) {
583: for (int i = 0; i < count; i++) {
584: if (a[i] != b[i]) {
585: return false;
586: }
587: }
588: return true;
589: }
590:
591: /**
592: * Searches for a byte of specified value in the <code>buffer</code>,
593: * starting at the specified <code>position</code>.
594: *
595: * @param value The value to find.
596: * @param pos The starting position for searching.
597: *
598: * @return The position of byte found, counting from beginning of the
599: * <code>buffer</code>, or <code>-1</code> if not found.
600: */
601: protected int findByte(byte value, int pos) {
602: for (int i = pos; i < tail; i++) {
603: if (buffer[i] == value) {
604: return i;
605: }
606: }
607:
608: return -1;
609: }
610:
611: /**
612: * Searches for the <code>boundary</code> in the <code>buffer</code>
613: * region delimited by <code>head</code> and <code>tail</code>.
614: *
615: * @return The position of the boundary found, counting from the
616: * beginning of the <code>buffer</code>, or <code>-1</code> if
617: * not found.
618: */
619: protected int findSeparator() {
620: int first;
621: int match = 0;
622: int maxpos = tail - boundaryLength;
623: for (first = head; (first <= maxpos)
624: && (match != boundaryLength); first++) {
625: first = findByte(boundary[0], first);
626: if (first == -1 || (first > maxpos)) {
627: return -1;
628: }
629: for (match = 1; match < boundaryLength; match++) {
630: if (buffer[first + match] != boundary[match]) {
631: break;
632: }
633: }
634: }
635: if (match == boundaryLength) {
636: return first - 1;
637: }
638: return -1;
639: }
640:
641: /**
642: * Returns a string representation of this object.
643: *
644: * @return The string representation of this object.
645: */
646: public String toString() {
647: StringBuffer sbTemp = new StringBuffer();
648: sbTemp.append("boundary='");
649: sbTemp.append(String.valueOf(boundary));
650: sbTemp.append("'\nbufSize=");
651: sbTemp.append(bufSize);
652: return sbTemp.toString();
653: }
654:
655: /**
656: * Thrown to indicate that the input stream fails to follow the
657: * required syntax.
658: */
659: public class MalformedStreamException extends IOException {
660: /**
661: * Constructs a <code>MalformedStreamException</code> with no
662: * detail message.
663: */
664: public MalformedStreamException() {
665: super ();
666: }
667:
668: /**
669: * Constructs an <code>MalformedStreamException</code> with
670: * the specified detail message.
671: *
672: * @param message The detail message.
673: */
674: public MalformedStreamException(String message) {
675: super (message);
676: }
677: }
678:
679: /**
680: * Thrown upon attempt of setting an invalid boundary token.
681: */
682: public class IllegalBoundaryException extends IOException {
683: /**
684: * Constructs an <code>IllegalBoundaryException</code> with no
685: * detail message.
686: */
687: public IllegalBoundaryException() {
688: super ();
689: }
690:
691: /**
692: * Constructs an <code>IllegalBoundaryException</code> with
693: * the specified detail message.
694: *
695: * @param message The detail message.
696: */
697: public IllegalBoundaryException(String message) {
698: super (message);
699: }
700: }
701:
702: // ------------------------------------------------------ Debugging methods
703:
704: // These are the methods that were used to debug this stuff.
705: /*
706:
707: // Dump data.
708: protected void dump()
709: {
710: System.out.println("01234567890");
711: byte[] temp = new byte[buffer.length];
712: for(int i=0; i<buffer.length; i++)
713: {
714: if (buffer[i] == 0x0D || buffer[i] == 0x0A)
715: {
716: temp[i] = 0x21;
717: }
718: else
719: {
720: temp[i] = buffer[i];
721: }
722: }
723: System.out.println(new String(temp));
724: int i;
725: for (i=0; i<head; i++)
726: System.out.print(" ");
727: System.out.println("h");
728: for (i=0; i<tail; i++)
729: System.out.print(" ");
730: System.out.println("t");
731: System.out.flush();
732: }
733:
734: // Main routine, for testing purposes only.
735: //
736: // @param args A String[] with the command line arguments.
737: // @exception Exception, a generic exception.
738: public static void main( String[] args )
739: throws Exception
740: {
741: File boundaryFile = new File("boundary.dat");
742: int boundarySize = (int)boundaryFile.length();
743: byte[] boundary = new byte[boundarySize];
744: FileInputStream input = new FileInputStream(boundaryFile);
745: input.read(boundary,0,boundarySize);
746:
747: input = new FileInputStream("multipart.dat");
748: MultipartStream chunks = new MultipartStream(input, boundary);
749:
750: int i = 0;
751: String header;
752: OutputStream output;
753: boolean nextChunk = chunks.skipPreamble();
754: while (nextChunk)
755: {
756: header = chunks.readHeaders();
757: System.out.println("!"+header+"!");
758: System.out.println("wrote part"+i+".dat");
759: output = new FileOutputStream("part"+(i++)+".dat");
760: chunks.readBodyData(output);
761: nextChunk = chunks.readBoundary();
762: }
763: }
764:
765: */
766: }
|