001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one
003: * or more contributor license agreements. See the NOTICE file
004: * distributed with this work for additional information
005: * regarding copyright ownership. The ASF licenses this file
006: * to you under the Apache License, Version 2.0 (the
007: * "License"); you may not use this file except in compliance
008: * with the License. You may obtain a copy of the License at
009: *
010: * http://www.apache.org/licenses/LICENSE-2.0
011: *
012: * Unless required by applicable law or agreed to in writing,
013: * software distributed under the License is distributed on an
014: * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015: * KIND, either express or implied. See the License for the
016: * specific language governing permissions and limitations
017: * under the License.
018: */package org.apache.cxf.attachment;
019:
020: import java.io.IOException;
021: import java.io.InputStream;
022: import java.io.PushbackInputStream;
023: import java.net.URLDecoder;
024: import java.util.Enumeration;
025: import java.util.HashSet;
026: import java.util.Set;
027: import java.util.regex.Matcher;
028: import java.util.regex.Pattern;
029:
030: import javax.activation.DataHandler;
031: import javax.activation.DataSource;
032: import javax.mail.Header;
033: import javax.mail.MessagingException;
034: import javax.mail.internet.InternetHeaders;
035:
036: import org.apache.cxf.helpers.IOUtils;
037: import org.apache.cxf.io.CachedOutputStream;
038: import org.apache.cxf.message.Attachment;
039: import org.apache.cxf.message.Message;
040:
041: public class AttachmentDeserializer {
042:
043: public static final String ATTACHMENT_DIRECTORY = "attachment-directory";
044:
045: public static final String ATTACHMENT_MEMORY_THRESHOLD = "attachment-memory-threshold";
046:
047: public static final int THRESHHOLD = 1024 * 100;
048:
049: private static final Pattern CONTENT_TYPE_BOUNDARY_PATTERN = Pattern
050: .compile("boundary=\"?([^\";]*)");
051:
052: // TODO: Is there a better way to detect boundaries in the message content?
053: // It seems constricting to assume the boundary will start with ----=_Part_
054: private static final Pattern INPUT_STREAM_BOUNDARY_PATTERN = Pattern
055: .compile("^--(----=_Part_\\S*)", Pattern.MULTILINE);
056:
057: private boolean lazyLoading = true;
058:
059: private PushbackInputStream stream;
060:
061: private String boundary;
062:
063: private String contentType;
064:
065: private LazyAttachmentCollection attachments;
066:
067: private Message message;
068:
069: private InputStream body;
070:
071: private Set<DelegatingInputStream> loaded = new HashSet<DelegatingInputStream>();
072:
073: public AttachmentDeserializer(Message message) {
074: this .message = message;
075: }
076:
077: public void initializeAttachments() throws IOException {
078: initializeRootMessage();
079:
080: attachments = new LazyAttachmentCollection(this );
081: message.setAttachments(attachments);
082: }
083:
084: protected void initializeRootMessage() throws IOException {
085: contentType = (String) message.get(Message.CONTENT_TYPE);
086:
087: if (contentType == null) {
088: throw new IllegalStateException(
089: "Content-Type can not be empty!");
090: }
091:
092: if (message.getContent(InputStream.class) == null) {
093: throw new IllegalStateException(
094: "An InputStream must be provided!");
095: }
096:
097: if (contentType.toLowerCase().indexOf("multipart/related") != -1) {
098: boundary = findBoundaryFromContentType(contentType);
099: if (null == boundary) {
100: boundary = findBoundaryFromInputStream();
101: }
102: // If a boundary still wasn't found, throw an exception
103: if (null == boundary) {
104: throw new IOException(
105: "Couldn't determine the boundary from the message!");
106: }
107:
108: stream = new PushbackInputStream(message
109: .getContent(InputStream.class),
110: boundary.getBytes().length * 2);
111: if (!readTillFirstBoundary(stream, boundary.getBytes())) {
112: throw new IOException("Couldn't find MIME boundary: "
113: + boundary);
114: }
115:
116: try {
117: // TODO: Do we need to copy these headers somewhere?
118: new InternetHeaders(stream);
119: } catch (MessagingException e) {
120: throw new RuntimeException(e);
121: }
122:
123: body = new DelegatingInputStream(
124: new MimeBodyPartInputStream(stream, boundary
125: .getBytes()));
126: message.setContent(InputStream.class, body);
127: }
128: }
129:
130: private String findBoundaryFromContentType(String ct)
131: throws IOException {
132: // Use regex to get the boundary and return null if it's not found
133: Matcher m = CONTENT_TYPE_BOUNDARY_PATTERN.matcher(ct);
134: return m.find() ? "--" + m.group(1) : null;
135: }
136:
137: private String findBoundaryFromInputStream() throws IOException {
138: CachedOutputStream bos = new CachedOutputStream();
139:
140: InputStream is = message.getContent(InputStream.class);
141: IOUtils.copy(is, bos);
142:
143: is.close();
144: bos.close();
145: String msg = bos.toString();
146:
147: // Reset the input stream since we'll need it again later
148: message.setContent(InputStream.class, bos.getInputStream());
149:
150: // Use regex to get the boundary and return null if it's not found
151: Matcher m = INPUT_STREAM_BOUNDARY_PATTERN.matcher(msg);
152: return m.find() ? "--" + m.group(1) : null;
153: }
154:
155: public AttachmentImpl readNext() throws IOException {
156: // Cache any mime parts that are currently being streamed
157: cacheStreamedAttachments();
158:
159: int v = stream.read();
160: if (v == -1) {
161: return null;
162: }
163: stream.unread(v);
164:
165: InternetHeaders headers;
166: try {
167: headers = new InternetHeaders(stream);
168: } catch (MessagingException e) {
169: // TODO create custom IOException
170: throw new RuntimeException(e);
171: }
172:
173: String id = headers.getHeader("Content-ID", null);
174: if (id != null && id.startsWith("<")) {
175: id = id.substring(1, id.length() - 1);
176: } else {
177: //no Content-ID, set cxf default ID
178: id = "Content-ID: <root.message@cxf.apache.org";
179: }
180:
181: id = URLDecoder.decode(id.startsWith("cid:") ? id.substring(4)
182: : id, "UTF-8");
183:
184: AttachmentImpl att = new AttachmentImpl(id);
185: setupAttachment(att, headers);
186: return att;
187: }
188:
189: private void cacheStreamedAttachments() throws IOException {
190: if (body instanceof DelegatingInputStream
191: && !((DelegatingInputStream) body).isClosed()) {
192:
193: cache((DelegatingInputStream) body, true);
194: message.setContent(InputStream.class, body);
195: }
196:
197: for (Attachment a : attachments.getLoadedAttachments()) {
198: DataSource s = a.getDataHandler().getDataSource();
199: cache((DelegatingInputStream) s.getInputStream(), false);
200: }
201: }
202:
203: private void cache(DelegatingInputStream input,
204: boolean deleteOnClose) throws IOException {
205: if (loaded.contains(input)) {
206: return;
207: }
208: loaded.add(input);
209: CachedOutputStream out = null;
210: try {
211: out = new CachedOutputStream();
212: IOUtils.copy(input, out);
213: input.setInputStream(out.getInputStream());
214: } finally {
215: if (out != null) {
216: out.close();
217: }
218: }
219: }
220:
221: /**
222: * Move the read pointer to the begining of the first part read till the end
223: * of first boundary
224: *
225: * @param pushbackInStream
226: * @param boundary
227: * @throws MessagingException
228: */
229: private static boolean readTillFirstBoundary(
230: PushbackInputStream pbs, byte[] bp) throws IOException {
231:
232: // work around a bug in PushBackInputStream where the buffer isn't
233: // initialized
234: // and available always returns 0.
235: int value = pbs.read();
236: pbs.unread(value);
237: while (value != -1) {
238: value = pbs.read();
239: if ((byte) value == bp[0]) {
240: int boundaryIndex = 0;
241: while (value != -1 && (boundaryIndex < bp.length)
242: && ((byte) value == bp[boundaryIndex])) {
243:
244: value = pbs.read();
245: if (value == -1) {
246: throw new IOException(
247: "Unexpected End while searching for first Mime Boundary");
248: }
249: boundaryIndex++;
250: }
251: if (boundaryIndex == bp.length) {
252: // boundary found, read the newline
253: if (value == 13) {
254: pbs.read();
255: }
256: return true;
257: }
258: }
259: }
260: return false;
261: }
262:
263: /**
264: * Create an Attachment from the MIME stream. If there is a previous attachment
265: * that is not read, cache that attachment.
266: *
267: * @return
268: * @throws IOException
269: */
270: private void setupAttachment(AttachmentImpl att,
271: InternetHeaders headers) throws IOException {
272: MimeBodyPartInputStream partStream = new MimeBodyPartInputStream(
273: stream, boundary.getBytes());
274:
275: final String ct = headers.getHeader("Content-Type", null);
276: DataSource source = new AttachmentDataSource(ct,
277: new DelegatingInputStream(partStream));
278: att.setDataHandler(new DataHandler(source));
279:
280: for (Enumeration<?> e = headers.getAllHeaders(); e
281: .hasMoreElements();) {
282: Header header = (Header) e.nextElement();
283: if (header.getName().equalsIgnoreCase(
284: "Content-Transfer-Encoding")
285: && header.getValue().equalsIgnoreCase("binary")) {
286: att.setXOP(true);
287: }
288: att.setHeader(header.getName(), header.getValue());
289: }
290: }
291:
292: public boolean isLazyLoading() {
293: return lazyLoading;
294: }
295:
296: public void setLazyLoading(boolean lazyLoading) {
297: this.lazyLoading = lazyLoading;
298: }
299: }
|