Source Code Cross Referenced for MIMEParser.java in » Library » mime-pull » org » jvnet » mimepull » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Library » mime pull » org.jvnet.mimepull
Source Cross Referenced Class Diagram Java Document (Java Doc)
001:        /*
002:         * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003:         *
004:         * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005:         *
006:         * The contents of this file are subject to the terms of either the GNU
007:         * General Public License Version 2 only ("GPL") or the Common Development
008:         * and Distribution License("CDDL") (collectively, the "License").  You
009:         * may not use this file except in compliance with the License. You can obtain
010:         * a copy of the License at https://glassfish.dev.java.net/public/CDDL+GPL.html
011:         * or glassfish/bootstrap/legal/LICENSE.txt.  See the License for the specific
012:         * language governing permissions and limitations under the License.
013:         *
014:         * When distributing the software, include this License Header Notice in each
015:         * file and include the License file at glassfish/bootstrap/legal/LICENSE.txt.
016:         * Sun designates this particular file as subject to the "Classpath" exception
017:         * as provided by Sun in the GPL Version 2 section of the License file that
018:         * accompanied this code.  If applicable, add the following below the License
019:         * Header, with the fields enclosed by brackets [] replaced by your own
020:         * identifying information: "Portions Copyrighted [year]
021:         * [name of copyright owner]"
022:         *
023:         * Contributor(s):
024:         *
025:         * If you wish your version of this file to be governed by only the CDDL or
026:         * only the GPL Version 2, indicate your decision by adding "[Contributor]
027:         * elects to include this software in this distribution under the [CDDL or GPL
028:         * Version 2] license."  If you don't indicate a single choice of license, a
029:         * recipient has the option to distribute your version of this file under
030:         * either the CDDL, the GPL Version 2 or to extend the choice of license to
031:         * its licensees as provided above.  However, if you add GPL Version 2 code
032:         * and therefore, elected the GPL Version 2 license, then the option applies
033:         * only if the new code is made subject to such option by the copyright
034:         * holder.
035:         */
036:        package org.jvnet.mimepull;
037:
038:        import java.io.InputStream;
039:        import java.io.IOException;
040:        import java.io.PushbackInputStream;
041:        import java.util.*;
042:        import java.nio.ByteBuffer;
043:
044:        /**
045:         * Pull parser for the MIME messages. Applications can use pull API to continue
046:         * the parsing MIME messages lazily.
047:         *
048:         * <pre>
049:         * for e.g.:
050:         * <p>
051:         *
052:         * MIMEParser parser = ...
053:         * Iterator<MIMEEvent> it = parser.iterator();
054:         * while(it.hasNext()) {
055:         *   MIMEEvent event = it.next();
056:         *   ...
057:         * }
058:         * </pre>
059:         *
060:         * @author Jitendra Kotamraju
061:         */
062:        class MIMEParser implements  Iterable<MIMEEvent> {
063:            // Actually, the grammar doesn't support whitespace characters
064:            // after boundary. But the mail implementation checks for it.
065:            // We will only check for these many whitespace characters after boundary
066:            private static final int NO_LWSP = 1000;
067:
068:            private enum STATE {
069:                START_MESSAGE, SKIP_PREAMBLE, START_PART, HEADERS, BODY, END_PART, END_MESSAGE
070:            }
071:
072:            private STATE state = STATE.START_MESSAGE;
073:
074:            private final InputStream in;
075:            private final byte[] bndbytes;
076:            private final int bl;
077:            private final MIMEConfig config;
078:            private final int[] bcs = new int[128]; // BnM algo: Bad Character Shift table
079:            private final int[] gss; // BnM algo : Good Suffix Shift table
080:
081:            /**
082:             * Have we parsed the data from our InputStream yet?
083:             */
084:            private boolean parsed;
085:
086:            /*
087:             * Read and process body partsList until we see the
088:             * terminating boundary line (or EOF).
089:             */
090:            private boolean done = false;
091:
092:            private boolean eof;
093:            private final int capacity;
094:            private byte[] buf;
095:            private int len;
096:            private boolean bol; // beginning of the line
097:
098:            MIMEParser(InputStream in, String boundary, MIMEConfig config) {
099:                this .in = in;
100:                this .bndbytes = getBytes("--" + boundary);
101:                bl = bndbytes.length;
102:                this .config = config;
103:                gss = new int[bl];
104:                compileBoundaryPattern();
105:
106:                // \r\n + boundary + "--\r\n" + lots of LWSP
107:                capacity = config.chunkSize + 2 + bl + 4 + NO_LWSP;
108:                createBuf(capacity);
109:            }
110:
111:            /**
112:             * Returns iterator for the parsing events. Use the iterator to advance
113:             * the parsing.
114:             *
115:             * @return iterator for parsing events
116:             */
117:            public Iterator<MIMEEvent> iterator() {
118:                return new MIMEEventIterator();
119:            }
120:
121:            class MIMEEventIterator implements  Iterator<MIMEEvent> {
122:
123:                public boolean hasNext() {
124:                    return !parsed;
125:                }
126:
127:                public MIMEEvent next() {
128:                    switch (state) {
129:                    case START_MESSAGE:
130:                        state = STATE.SKIP_PREAMBLE;
131:                        return MIMEEvent.START_MESSAGE;
132:
133:                    case SKIP_PREAMBLE:
134:                        skipPreamble();
135:                        // fall through
136:                    case START_PART:
137:                        state = STATE.HEADERS;
138:                        return MIMEEvent.START_PART;
139:
140:                    case HEADERS:
141:                        InternetHeaders ih = readHeaders();
142:                        state = STATE.BODY;
143:                        bol = true;
144:                        return new MIMEEvent.Headers(ih);
145:
146:                    case BODY:
147:                        ByteBuffer buf = readBody();
148:                        bol = false;
149:                        return new MIMEEvent.Content(buf);
150:
151:                    case END_PART:
152:                        if (done) {
153:                            state = STATE.END_MESSAGE;
154:                        } else {
155:                            state = STATE.START_PART;
156:                        }
157:                        return MIMEEvent.END_PART;
158:
159:                    case END_MESSAGE:
160:                        parsed = true;
161:                        return MIMEEvent.END_MESSAGE;
162:
163:                    default:
164:                        throw new MIMEParsingException(
165:                                "Unknown Parser state = " + state);
166:                    }
167:                }
168:
169:                public void remove() {
170:                    throw new UnsupportedOperationException();
171:                }
172:            }
173:
174:            /**
175:             * Collects the headers for the current part by parsing mesage stream.
176:             *
177:             * @return headers for the current part
178:             */
179:            private InternetHeaders readHeaders() {
180:                if (!eof) {
181:                    fillBuf();
182:                }
183:                return new InternetHeaders(new LineInputStream());
184:            }
185:
186:            /**
187:             * Reads and saves the part of the current attachment part's content.
188:             * At the end of this method, buf should have the remaining data
189:             * at index 0.
190:             *
191:             * @return a chunk of the part's content
192:             *
193:             */
194:            private ByteBuffer readBody() {
195:                if (!eof) {
196:                    fillBuf();
197:                }
198:                int start = match(buf, 0, len); // matches boundary
199:                if (start == -1) {
200:                    // No boundary is found
201:                    assert eof || len >= config.chunkSize;
202:                    int chunkSize = eof ? len : config.chunkSize;
203:                    if (eof) {
204:                        done = true;
205:                        state = STATE.END_PART;
206:                    }
207:                    return adjustBuf(chunkSize, len - chunkSize);
208:                }
209:                // Found boundary.
210:                // Is it at the start of a line ?
211:                int chunkLen = start;
212:                if (bol && start == 0) {
213:                    // nothing to do
214:                } else if (start > 0
215:                        && (buf[start - 1] == '\n' || buf[start - 1] == '\r')) {
216:                    --chunkLen;
217:                    if (buf[start - 1] == '\n' && start > 1
218:                            && buf[start - 2] == '\r') {
219:                        --chunkLen;
220:                    }
221:                } else {
222:                    return adjustBuf(start + 1, len - start - 1); // boundary is not at beginning of a line
223:                }
224:
225:                if (start + bl + 1 < len && buf[start + bl] == '-'
226:                        && buf[start + bl + 1] == '-') {
227:                    state = STATE.END_PART;
228:                    done = true;
229:                    return adjustBuf(chunkLen, 0);
230:                }
231:
232:                // Consider all the whitespace in boundary+whitespace+"\r\n"
233:                int lwsp = 0;
234:                for (int i = start + bl; i < len
235:                        && (buf[i] == ' ' || buf[i] == '\t'); i++) {
236:                    ++lwsp;
237:                }
238:
239:                // Check for \n or \r\n
240:                if (start + bl + lwsp < len
241:                        && (buf[start + bl + lwsp] == '\n' || buf[start + bl
242:                                + lwsp] == '\r')) {
243:                    if (buf[start + bl + lwsp] == '\n') {
244:                        state = STATE.END_PART;
245:                        return adjustBuf(chunkLen, len - start - bl - lwsp - 1);
246:                    } else if (start + bl + lwsp + 1 < len
247:                            && buf[start + bl + lwsp + 1] == '\n') {
248:                        state = STATE.END_PART;
249:                        return adjustBuf(chunkLen, len - start - bl - lwsp - 2);
250:                    }
251:                }
252:
253:                // Let us give chance to consume atleast NO_LWSP whitespace characters
254:                if (lwsp > 0 && start > config.chunkSize) {
255:                    return adjustBuf(start, len - start);
256:                }
257:
258:                // Not a proper boundary
259:                return adjustBuf(start + 1, len - start - 1);
260:            }
261:
262:            /**
263:             * Returns a chunk from the original buffer. A new buffer is
264:             * created with the remaining bytes.
265:             *
266:             * @param chunkSize create a chunk with these many bytes
267:             * @param remaining bytes from the end of the buffer that need to be copied to
268:             *        the beginning of the new buffer
269:             * @return chunk
270:             */
271:            private ByteBuffer adjustBuf(int chunkSize, int remaining) {
272:                assert buf != null;
273:                assert chunkSize >= 0;
274:                assert remaining >= 0;
275:
276:                byte[] temp = buf;
277:                // create a new buf and adjust it without this chunk
278:                createBuf(remaining);
279:                System.arraycopy(temp, len - remaining, buf, 0, remaining);
280:                len = remaining;
281:
282:                return ByteBuffer.wrap(temp, 0, chunkSize);
283:            }
284:
285:            private void createBuf(int min) {
286:                buf = new byte[min < capacity ? capacity : min];
287:            }
288:
289:            /**
290:             * Skips the preamble to find the first attachment part
291:             */
292:            private void skipPreamble() {
293:
294:                while (true) {
295:                    if (!eof) {
296:                        fillBuf();
297:                    }
298:                    int start = match(buf, 0, len); // matches boundary
299:                    if (start == -1) {
300:                        // No boundary is found
301:                        if (eof) {
302:                            throw new MIMEParsingException(
303:                                    "Missing start boundary");
304:                        } else {
305:                            adjustBuf(len - bl + 1, bl - 1);
306:                            continue;
307:                        }
308:                    }
309:
310:                    if (start > config.chunkSize) {
311:                        adjustBuf(start, len - start);
312:                        continue;
313:                    }
314:                    // Consider all the whitespace boundary+whitespace+"\r\n"
315:                    int lwsp = 0;
316:                    for (int i = start + bl; i < len
317:                            && (buf[i] == ' ' || buf[i] == '\t'); i++) {
318:                        ++lwsp;
319:                    }
320:                    // Check for \n or \r\n
321:                    if (start + bl + lwsp < len
322:                            && (buf[start + bl + lwsp] == '\n' || buf[start
323:                                    + bl + lwsp] == '\r')) {
324:                        if (buf[start + bl + lwsp] == '\n') {
325:                            adjustBuf(start + bl + lwsp + 1, len - start - bl
326:                                    - lwsp - 1);
327:                            break;
328:                        } else if (start + bl + lwsp + 1 < len
329:                                && buf[start + bl + lwsp + 1] == '\n') {
330:                            adjustBuf(start + bl + lwsp + 2, len - start - bl
331:                                    - lwsp - 2);
332:                            break;
333:                        }
334:                    }
335:                    adjustBuf(start + 1, len - start - 1);
336:                }
337:            }
338:
339:            private static byte[] getBytes(String s) {
340:                char[] chars = s.toCharArray();
341:                int size = chars.length;
342:                byte[] bytes = new byte[size];
343:
344:                for (int i = 0; i < size;)
345:                    bytes[i] = (byte) chars[i++];
346:                return bytes;
347:            }
348:
349:            /**
350:             * Boyer-Moore search method. Copied from java.util.regex.Pattern.java
351:             *
352:             * Pre calculates arrays needed to generate the bad character
353:             * shift and the good suffix shift. Only the last seven bits
354:             * are used to see if chars match; This keeps the tables small
355:             * and covers the heavily used ASCII range, but occasionally
356:             * results in an aliased match for the bad character shift.
357:             */
358:            private void compileBoundaryPattern() {
359:                int i, j;
360:
361:                // Precalculate part of the bad character shift
362:                // It is a table for where in the pattern each
363:                // lower 7-bit value occurs
364:                for (i = 0; i < bndbytes.length; i++) {
365:                    bcs[bndbytes[i] & 0x7F] = i + 1;
366:                }
367:
368:                // Precalculate the good suffix shift
369:                // i is the shift amount being considered
370:                NEXT: for (i = bndbytes.length; i > 0; i--) {
371:                    // j is the beginning index of suffix being considered
372:                    for (j = bndbytes.length - 1; j >= i; j--) {
373:                        // Testing for good suffix
374:                        if (bndbytes[j] == bndbytes[j - i]) {
375:                            // src[j..len] is a good suffix
376:                            gss[j - 1] = i;
377:                        } else {
378:                            // No match. The array has already been
379:                            // filled up with correct values before.
380:                            continue NEXT;
381:                        }
382:                    }
383:                    // This fills up the remaining of optoSft
384:                    // any suffix can not have larger shift amount
385:                    // then its sub-suffix. Why???
386:                    while (j > 0) {
387:                        gss[--j] = i;
388:                    }
389:                }
390:                // Set the guard value because of unicode compression
391:                gss[bndbytes.length - 1] = 1;
392:            }
393:
394:            /**
395:             * Finds the boundary in the given buffer using Boyer-Moore algo.
396:             * Copied from java.util.regex.Pattern.java
397:             *
398:             * @param mybuf boundary to be searched in this mybuf
399:             * @param off start index in mybuf
400:             * @param len number of bytes in mybuf
401:             *
402:             * @return -1 if there is no match or index where the match starts
403:             */
404:            private int match(byte[] mybuf, int off, int len) {
405:                int last = len - bndbytes.length;
406:
407:                // Loop over all possible match positions in text
408:                NEXT: while (off <= last) {
409:                    // Loop over pattern from right to left
410:                    for (int j = bndbytes.length - 1; j >= 0; j--) {
411:                        byte ch = mybuf[off + j];
412:                        if (ch != bndbytes[j]) {
413:                            // Shift search to the right by the maximum of the
414:                            // bad character shift and the good suffix shift
415:                            off += Math.max(j + 1 - bcs[ch & 0x7F], gss[j]);
416:                            continue NEXT;
417:                        }
418:                    }
419:                    // Entire pattern matched starting at off
420:                    return off;
421:                }
422:                return -1;
423:            }
424:
425:            /**
426:             * Fills the remaining buf to the full capacity
427:             */
428:            private void fillBuf() {
429:                assert !eof;
430:                while (len < buf.length) {
431:                    int read;
432:                    try {
433:                        read = in.read(buf, len, buf.length - len);
434:                    } catch (IOException ioe) {
435:                        throw new MIMEParsingException(ioe);
436:                    }
437:                    if (read == -1) {
438:                        eof = true;
439:                        break;
440:                    } else {
441:                        len += read;
442:                    }
443:                }
444:            }
445:
446:            private void doubleBuf() {
447:                byte[] temp = new byte[2 * len];
448:                System.arraycopy(buf, 0, temp, 0, len);
449:                buf = temp;
450:                fillBuf();
451:            }
452:
453:            class LineInputStream {
454:                private int offset;
455:
456:                /**
457:                 * Read a line containing only ASCII characters from the input
458:                 * stream. A line is terminated by a CR or NL or CR-NL sequence.
459:                 * A common error is a CR-CR-NL sequence, which will also terminate
460:                 * a line.
461:                 * The line terminator is not returned as part of the returned
462:                 * String. Returns null if no data is available. <p>
463:                 *
464:                 * This class is similar to the deprecated
465:                 * <code>DataInputStream.readLine()</code>
466:                 */
467:                public String readLine() throws IOException {
468:
469:                    int hdrLen = 0;
470:                    int lwsp = 0;
471:                    while (offset + hdrLen < len) {
472:                        if (buf[offset + hdrLen] == '\n') {
473:                            lwsp = 1;
474:                            break;
475:                        }
476:                        if (offset + hdrLen + 1 == len) {
477:                            doubleBuf();
478:                        }
479:                        if (offset + hdrLen + 1 >= len) { // No more data in the stream
480:                            assert eof;
481:                            return null;
482:                        }
483:                        if (buf[offset + hdrLen] == '\r'
484:                                && buf[offset + hdrLen + 1] == '\n') {
485:                            lwsp = 2;
486:                            break;
487:                        }
488:                        ++hdrLen;
489:                    }
490:                    if (hdrLen == 0) {
491:                        adjustBuf(offset + lwsp, len - offset - lwsp);
492:                        return null;
493:                    }
494:
495:                    String hdr = new String(buf, offset, hdrLen);
496:                    offset += hdrLen + lwsp;
497:                    return hdr;
498:                }
499:
500:            }
501:
502:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.