Source Code Cross Referenced for MimeUtility.java in » 6.0-JDK-Modules » saaj » com » sun » xml » messaging » saaj » packaging » mime » internet » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » 6.0 JDK Modules » saaj » com.sun.xml.messaging.saaj.packaging.mime.internet
Source Cross Referenced Class Diagram Java Document (Java Doc)
0001:        /*
0002:         * The contents of this file are subject to the terms
0003:         * of the Common Development and Distribution License
0004:         * (the "License").  You may not use this file except
0005:         * in compliance with the License.
0006:         *
0007:         * You can obtain a copy of the license at
0008:         * https://jwsdp.dev.java.net/CDDLv1.0.html
0009:         * See the License for the specific language governing
0010:         * permissions and limitations under the License.
0011:         *
0012:         * When distributing Covered Code, include this CDDL
0013:         * HEADER in each file and include the License file at
0014:         * https://jwsdp.dev.java.net/CDDLv1.0.html  If applicable,
0015:         * add the following below this CDDL HEADER, with the
0016:         * fields enclosed by brackets "[]" replaced with your
0017:         * own identifying information: Portions Copyright [yyyy]
0018:         * [name of copyright owner]
0019:         */
0020:        /*
0021:         * @(#)MimeUtility.java       1.45 03/03/10
0022:         */
0023:
0024:        /*
0025:         * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
0026:         * 
0027:         * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
0028:         * 
0029:         * The contents of this file are subject to the terms of either the GNU
0030:         * General Public License Version 2 only ("GPL") or the Common Development
0031:         * and Distribution License("CDDL") (collectively, the "License").  You
0032:         * may not use this file except in compliance with the License. You can obtain
0033:         * a copy of the License at https://glassfish.dev.java.net/public/CDDL+GPL.html
0034:         * or glassfish/bootstrap/legal/LICENSE.txt.  See the License for the specific
0035:         * language governing permissions and limitations under the License.
0036:         * 
0037:         * When distributing the software, include this License Header Notice in each
0038:         * file and include the License file at glassfish/bootstrap/legal/LICENSE.txt.
0039:         * Sun designates this particular file as subject to the "Classpath" exception
0040:         * as provided by Sun in the GPL Version 2 section of the License file that
0041:         * accompanied this code.  If applicable, add the following below the License
0042:         * Header, with the fields enclosed by brackets [] replaced by your own
0043:         * identifying information: "Portions Copyrighted [year]
0044:         * [name of copyright owner]"
0045:         * 
0046:         * Contributor(s):
0047:         * 
0048:         * If you wish your version of this file to be governed by only the CDDL or
0049:         * only the GPL Version 2, indicate your decision by adding "[Contributor]
0050:         * elects to include this software in this distribution under the [CDDL or GPL
0051:         * Version 2] license."  If you don't indicate a single choice of license, a
0052:         * recipient has the option to distribute your version of this file under
0053:         * either the CDDL, the GPL Version 2 or to extend the choice of license to
0054:         * its licensees as provided above.  However, if you add GPL Version 2 code
0055:         * and therefore, elected the GPL Version 2 license, then the option applies
0056:         * only if the new code is made subject to such option by the copyright
0057:         * holder.
0058:         */
0059:
0060:        package com.sun.xml.messaging.saaj.packaging.mime.internet;
0061:
0062:        import java.io.*;
0063:        import java.util.*;
0064:
0065:        import javax.activation.DataHandler;
0066:        import javax.activation.DataSource;
0067:
0068:        import com.sun.xml.messaging.saaj.packaging.mime.MessagingException;
0069:        import com.sun.xml.messaging.saaj.packaging.mime.util.*;
0070:
0071:        /**
0072:         * This is a utility class that provides various MIME related
0073:         * functionality. <p>
0074:         *
0075:         * There are a set of methods to encode and decode MIME headers as 
0076:         * per RFC 2047. A brief description on handling such headers is
0077:         * given below: <p>
0078:         *
0079:         * RFC 822 mail headers <strong>must</strong> contain only US-ASCII
0080:         * characters. Headers that contain non US-ASCII characters must be
0081:         * encoded so that they contain only US-ASCII characters. Basically,
0082:         * this process involves using either BASE64 or QP to encode certain
0083:         * characters. RFC 2047 describes this in detail. <p>
0084:         *
0085:         * In Java, Strings contain (16 bit) Unicode characters. ASCII is a
0086:         * subset of Unicode (and occupies the range 0 - 127). A String
0087:         * that contains only ASCII characters is already mail-safe. If the
0088:         * String contains non US-ASCII characters, it must be encoded. An
0089:         * additional complexity in this step is that since Unicode is not
0090:         * yet a widely used charset, one might want to first charset-encode
0091:         * the String into another charset and then do the transfer-encoding.
0092:         * <p>
0093:         * Note that to get the actual bytes of a mail-safe String (say,
0094:         * for sending over SMTP), one must do 
0095:         * <p><blockquote><pre>
0096:         *
0097:         *	byte[] bytes = string.getBytes("iso-8859-1");	
0098:         *
0099:         * </pre></blockquote><p>
0100:         * 
0101:         * The <code>setHeader</code> and <code>addHeader</code> methods
0102:         * on MimeMessage and MimeBodyPart assume that the given header values
0103:         * are Unicode strings that contain only US-ASCII characters. Hence
0104:         * the callers of those methods must insure that the values they pass
0105:         * do not contain non US-ASCII characters. The methods in this class 
0106:         * help do this. <p>
0107:         *
0108:         * The <code>getHeader</code> family of methods on MimeMessage and
0109:         * MimeBodyPart return the raw header value. These might be encoded
0110:         * as per RFC 2047, and if so, must be decoded into Unicode Strings.
0111:         * The methods in this class help to do this. <p>
0112:         *
0113:         * Several System properties control strict conformance to the MIME
0114:         * spec.  Note that these are not session properties but must be set
0115:         * globally as System properties. <p>
0116:         *
0117:         * The <code>mail.mime.decodetext.strict</code> property controls
0118:         * decoding of MIME encoded words.  The MIME spec requires that encoded
0119:         * words start at the beginning of a whitespace separated word.  Some
0120:         * mailers incorrectly include encoded words in the middle of a word.
0121:         * If the <code>mail.mime.decodetext.strict</code> System property is
0122:         * set to <code>"false"</code>, an attempt will be made to decode these
0123:         * illegal encoded words. The default is true. <p>
0124:         *
0125:         * The <code>mail.mime.encodeeol.strict</code> property controls the
0126:         * choice of Content-Transfer-Encoding for MIME parts that are not of
0127:         * type "text".  Often such parts will contain textual data for which
0128:         * an encoding that allows normal end of line conventions is appropriate.
0129:         * In rare cases, such a part will appear to contain entirely textual
0130:         * data, but will require an encoding that preserves CR and LF characters
0131:         * without change.  If the <code>mail.mime.decodetext.strict</code>
0132:         * System property is set to <code>"true"</code>, such an encoding will
0133:         * be used when necessary.  The default is false. <p>
0134:         *
0135:         * In addition, the <code>mail.mime.charset</code> System property can
0136:         * be used to specify the default MIME charset to use for encoded words
0137:         * and text parts that don't otherwise specify a charset.  Normally, the
0138:         * default MIME charset is derived from the default Java charset, as
0139:         * specified in the <code>file.encoding</code> System property.  Most
0140:         * applications will have no need to explicitly set the default MIME
0141:         * charset.  In cases where the default MIME charset to be used for
0142:         * mail messages is different than the charset used for files stored on
0143:         * the system, this property should be set.
0144:         *
0145:         * @version 1.45, 03/03/10
0146:         * @author  John Mani
0147:         * @author  Bill Shannon
0148:         */
0149:
0150:        public class MimeUtility {
0151:
0152:            // This class cannot be instantiated
0153:            private MimeUtility() {
0154:            }
0155:
0156:            public static final int ALL = -1;
0157:
0158:            private static final int BUFFER_SIZE = 1024;
0159:            private static boolean decodeStrict = true;
0160:            private static boolean encodeEolStrict = false;
0161:            private static boolean foldEncodedWords = false;
0162:            private static boolean foldText = true;
0163:
0164:            static {
0165:                try {
0166:                    String s = System
0167:                            .getProperty("mail.mime.decodetext.strict");
0168:                    // default to true
0169:                    decodeStrict = s == null || !s.equalsIgnoreCase("false");
0170:                    s = System.getProperty("mail.mime.encodeeol.strict");
0171:                    // default to false
0172:                    encodeEolStrict = s != null && s.equalsIgnoreCase("true");
0173:                    s = System.getProperty("mail.mime.foldencodedwords");
0174:                    // default to false
0175:                    foldEncodedWords = s != null && s.equalsIgnoreCase("true");
0176:                    s = System.getProperty("mail.mime.foldtext");
0177:                    // default to true
0178:                    foldText = s == null || !s.equalsIgnoreCase("false");
0179:                } catch (SecurityException sex) {
0180:                    // ignore it
0181:                }
0182:            }
0183:
0184:            /**
0185:             * Get the content-transfer-encoding that should be applied
0186:             * to the input stream of this datasource, to make it mailsafe. <p>
0187:             *
0188:             * The algorithm used here is: <br>
0189:             * <ul>
0190:             * <li>
0191:             * If the primary type of this datasource is "text" and if all
0192:             * the bytes in its input stream are US-ASCII, then the encoding
0193:             * is "7bit". If more than half of the bytes are non-US-ASCII, then
0194:             * the encoding is "base64". If less than half of the bytes are
0195:             * non-US-ASCII, then the encoding is "quoted-printable".
0196:             * <li>
0197:             * If the primary type of this datasource is not "text", then if
0198:             * all the bytes of its input stream are US-ASCII, the encoding
0199:             * is "7bit". If there is even one non-US-ASCII character, the
0200:             * encoding is "base64".
0201:             * </ul>
0202:             *
0203:             * @param	ds	DataSource
0204:             * @return		the encoding. This is either "7bit",
0205:             *			"quoted-printable" or "base64"
0206:             */
0207:            public static String getEncoding(DataSource ds) {
0208:                ContentType cType = null;
0209:                InputStream is = null;
0210:                String encoding = null;
0211:
0212:                try {
0213:                    cType = new ContentType(ds.getContentType());
0214:                    is = ds.getInputStream();
0215:                } catch (Exception ex) {
0216:                    return "base64"; // what else ?!
0217:                }
0218:
0219:                boolean isText = cType.match("text/*");
0220:                // if not text, stop processing when we see non-ASCII
0221:                int i = checkAscii(is, ALL, !isText);
0222:                switch (i) {
0223:                case ALL_ASCII:
0224:                    encoding = "7bit"; // all ascii
0225:                    break;
0226:                case MOSTLY_ASCII:
0227:                    encoding = "quoted-printable"; // mostly ascii
0228:                    break;
0229:                default:
0230:                    encoding = "base64"; // mostly binary
0231:                    break;
0232:                }
0233:
0234:                // Close the input stream
0235:                try {
0236:                    is.close();
0237:                } catch (IOException ioex) {
0238:                }
0239:
0240:                return encoding;
0241:            }
0242:
0243:            /**
0244:             * Same as <code>getEncoding(DataSource)</code> except that instead
0245:             * of reading the data from an <code>InputStream</code> it uses the
0246:             * <code>writeTo</code> method to examine the data.  This is more
0247:             * efficient in the common case of a <code>DataHandler</code>
0248:             * created with an object and a MIME type (for example, a
0249:             * "text/plain" String) because all the I/O is done in this
0250:             * thread.  In the case requiring an <code>InputStream</code> the
0251:             * <code>DataHandler</code> uses a thread, a pair of pipe streams,
0252:             * and the <code>writeTo</code> method to produce the data. <p>
0253:             *
0254:             * @since	JavaMail 1.2
0255:             */
0256:            public static String getEncoding(DataHandler dh) {
0257:                ContentType cType = null;
0258:                String encoding = null;
0259:
0260:                /*
0261:                 * Try to pick the most efficient means of determining the
0262:                 * encoding.  If this DataHandler was created using a DataSource,
0263:                 * the getEncoding(DataSource) method is typically faster.  If
0264:                 * the DataHandler was created with an object, this method is
0265:                 * much faster.  To distinguish the two cases, we use a heuristic.
0266:                 * A DataHandler created with an object will always have a null name.
0267:                 * A DataHandler created with a DataSource will usually have a
0268:                 * non-null name.
0269:                 *
0270:                 * XXX - This is actually quite a disgusting hack, but it makes
0271:                 *	 a common case run over twice as fast.
0272:                 */
0273:                if (dh.getName() != null)
0274:                    return getEncoding(dh.getDataSource());
0275:
0276:                try {
0277:                    cType = new ContentType(dh.getContentType());
0278:                } catch (Exception ex) {
0279:                    return "base64"; // what else ?!
0280:                }
0281:
0282:                if (cType.match("text/*")) {
0283:                    // Check all of the available bytes
0284:                    AsciiOutputStream aos = new AsciiOutputStream(false, false);
0285:                    try {
0286:                        dh.writeTo(aos);
0287:                    } catch (IOException ex) {
0288:                    } // ignore it
0289:                    switch (aos.getAscii()) {
0290:                    case ALL_ASCII:
0291:                        encoding = "7bit"; // all ascii
0292:                        break;
0293:                    case MOSTLY_ASCII:
0294:                        encoding = "quoted-printable"; // mostly ascii
0295:                        break;
0296:                    default:
0297:                        encoding = "base64"; // mostly binary
0298:                        break;
0299:                    }
0300:                } else { // not "text"
0301:                    // Check all of available bytes, break out if we find
0302:                    // at least one non-US-ASCII character
0303:                    AsciiOutputStream aos = new AsciiOutputStream(true,
0304:                            encodeEolStrict);
0305:                    try {
0306:                        dh.writeTo(aos);
0307:                    } catch (IOException ex) {
0308:                    } // ignore it
0309:                    if (aos.getAscii() == ALL_ASCII) // all ascii
0310:                        encoding = "7bit";
0311:                    else
0312:                        // found atleast one non-ascii character, use b64 
0313:                        encoding = "base64";
0314:                }
0315:
0316:                return encoding;
0317:            }
0318:
0319:            /**
0320:             * Decode the given input stream. The Input stream returned is
0321:             * the decoded input stream. All the encodings defined in RFC 2045
0322:             * are supported here. They include "base64", "quoted-printable",
0323:             * "7bit", "8bit", and "binary". In addition, "uuencode" is also
0324:             * supported.
0325:             *
0326:             * @param	is		input stream
0327:             * @param	encoding	the encoding of the stream.
0328:             * @return			decoded input stream.
0329:             */
0330:            public static InputStream decode(InputStream is, String encoding)
0331:                    throws MessagingException {
0332:                if (encoding.equalsIgnoreCase("base64"))
0333:                    return new BASE64DecoderStream(is);
0334:                else if (encoding.equalsIgnoreCase("quoted-printable"))
0335:                    return new QPDecoderStream(is);
0336:                else if (encoding.equalsIgnoreCase("uuencode")
0337:                        || encoding.equalsIgnoreCase("x-uuencode")
0338:                        || encoding.equalsIgnoreCase("x-uue"))
0339:                    return new UUDecoderStream(is);
0340:                else if (encoding.equalsIgnoreCase("binary")
0341:                        || encoding.equalsIgnoreCase("7bit")
0342:                        || encoding.equalsIgnoreCase("8bit"))
0343:                    return is;
0344:                else
0345:                    throw new MessagingException("Unknown encoding: "
0346:                            + encoding);
0347:            }
0348:
0349:            /**
0350:             * Wrap an encoder around the given output stream. 
0351:             * All the encodings defined in RFC 2045 are supported here. 
0352:             * They include "base64", "quoted-printable", "7bit", "8bit" and
0353:             * "binary". In addition, "uuencode" is also supported.
0354:             *
0355:             * @param	os		output stream
0356:             * @param	encoding	the encoding of the stream. 
0357:             * @return			output stream that applies the
0358:             *				specified encoding.
0359:             */
0360:            public static OutputStream encode(OutputStream os, String encoding)
0361:                    throws MessagingException {
0362:                if (encoding == null)
0363:                    return os;
0364:                else if (encoding.equalsIgnoreCase("base64"))
0365:                    return new BASE64EncoderStream(os);
0366:                else if (encoding.equalsIgnoreCase("quoted-printable"))
0367:                    return new QPEncoderStream(os);
0368:                else if (encoding.equalsIgnoreCase("uuencode")
0369:                        || encoding.equalsIgnoreCase("x-uuencode")
0370:                        || encoding.equalsIgnoreCase("x-uue"))
0371:                    return new UUEncoderStream(os);
0372:                else if (encoding.equalsIgnoreCase("binary")
0373:                        || encoding.equalsIgnoreCase("7bit")
0374:                        || encoding.equalsIgnoreCase("8bit"))
0375:                    return os;
0376:                else
0377:                    throw new MessagingException("Unknown encoding: "
0378:                            + encoding);
0379:            }
0380:
0381:            /**
0382:             * Wrap an encoder around the given output stream.
0383:             * All the encodings defined in RFC 2045 are supported here.
0384:             * They include "base64", "quoted-printable", "7bit", "8bit" and
0385:             * "binary". In addition, "uuencode" is also supported.
0386:             * The <code>filename</code> parameter is used with the "uuencode"
0387:             * encoding and is included in the encoded output.
0388:             *
0389:             * @param   os              output stream
0390:             * @param   encoding        the encoding of the stream.
0391:             * @param   filename        name for the file being encoded (only used
0392:             *                          with uuencode)
0393:             * @return                  output stream that applies the
0394:             *                          specified encoding.
0395:             * @since                   JavaMail 1.2
0396:             */
0397:            public static OutputStream encode(OutputStream os, String encoding,
0398:                    String filename) throws MessagingException {
0399:                if (encoding == null)
0400:                    return os;
0401:                else if (encoding.equalsIgnoreCase("base64"))
0402:                    return new BASE64EncoderStream(os);
0403:                else if (encoding.equalsIgnoreCase("quoted-printable"))
0404:                    return new QPEncoderStream(os);
0405:                else if (encoding.equalsIgnoreCase("uuencode")
0406:                        || encoding.equalsIgnoreCase("x-uuencode")
0407:                        || encoding.equalsIgnoreCase("x-uue"))
0408:                    return new UUEncoderStream(os, filename);
0409:                else if (encoding.equalsIgnoreCase("binary")
0410:                        || encoding.equalsIgnoreCase("7bit")
0411:                        || encoding.equalsIgnoreCase("8bit"))
0412:                    return os;
0413:                else
0414:                    throw new MessagingException("Unknown encoding: "
0415:                            + encoding);
0416:            }
0417:
0418:            /**
0419:             * Encode a RFC 822 "text" token into mail-safe form as per
0420:             * RFC 2047. <p>
0421:             *
0422:             * The given Unicode string is examined for non US-ASCII
0423:             * characters. If the string contains only US-ASCII characters,
0424:             * it is returned as-is.  If the string contains non US-ASCII
0425:             * characters, it is first character-encoded using the platform's
0426:             * default charset, then transfer-encoded using either the B or 
0427:             * Q encoding. The resulting bytes are then returned as a Unicode 
0428:             * string containing only ASCII  characters. <p>
0429:             *
0430:             * Note that this method should be used to encode only 
0431:             * "unstructured" RFC 822 headers. <p>
0432:             *
0433:             * Example of usage:
0434:             * <p><blockquote><pre>
0435:             *
0436:             *  MimeBodyPart part = ...
0437:             *  String rawvalue = "FooBar Mailer, Japanese version 1.1"
0438:             *  try {
0439:             *    // If we know for sure that rawvalue contains only US-ASCII 
0440:             *    // characters, we can skip the encoding part
0441:             *    part.setHeader("X-mailer", MimeUtility.encodeText(rawvalue));
0442:             *  } catch (UnsupportedEncodingException e) {
0443:             *    // encoding failure
0444:             *  } catch (MessagingException me) {
0445:             *   // setHeader() failure
0446:             *  }
0447:             *
0448:             * </pre></blockquote><p>
0449:             * 
0450:             * @param	text	unicode string
0451:             * @return	Unicode string containing only US-ASCII characters
0452:             * @exception UnsupportedEncodingException if the encoding fails
0453:             */
0454:            public static String encodeText(String text)
0455:                    throws UnsupportedEncodingException {
0456:                return encodeText(text, null, null);
0457:            }
0458:
0459:            /**
0460:             * Encode a RFC 822 "text" token into mail-safe form as per
0461:             * RFC 2047. <p>
0462:             *
0463:             * The given Unicode string is examined for non US-ASCII
0464:             * characters. If the string contains only US-ASCII characters,
0465:             * it is returned as-is.  If the string contains non US-ASCII
0466:             * characters, it is first character-encoded using the specified
0467:             * charset, then transfer-encoded using either the B or Q encoding.
0468:             * The resulting bytes are then returned as a Unicode string 
0469:             * containing only ASCII characters. <p>
0470:             *
0471:             * Note that this method should be used to encode only 
0472:             * "unstructured" RFC 822 headers. 
0473:             * 
0474:             * @param	text	the header value
0475:             * @param	charset	the charset. If this parameter is null, the
0476:             *		platform's default chatset is used.
0477:             * @param	encoding the encoding to be used. Currently supported
0478:             *		values are "B" and "Q". If this parameter is null, then
0479:             *		the "Q" encoding is used if most of characters to be
0480:             *		encoded are in the ASCII charset, otherwise "B" encoding
0481:             *		is used.
0482:             * @return	Unicode string containing only US-ASCII characters
0483:             */
0484:            public static String encodeText(String text, String charset,
0485:                    String encoding) throws UnsupportedEncodingException {
0486:                return encodeWord(text, charset, encoding, false);
0487:            }
0488:
0489:            /**
0490:             * Decode "unstructured" headers, that is, headers that are defined
0491:             * as '*text' as per RFC 822. <p>
0492:             *
0493:             * The string is decoded using the algorithm specified in
0494:             * RFC 2047, Section 6.1.1. If the charset-conversion fails
0495:             * for any sequence, an UnsupportedEncodingException is thrown.
0496:             * If the String is not an RFC 2047 style encoded header, it is
0497:             * returned as-is <p>
0498:             *
0499:             * Example of usage:
0500:             * <p><blockquote><pre>
0501:             *
0502:             *  MimeBodyPart part = ...
0503:             *  String rawvalue = null;
0504:             *  String  value = null;
0505:             *  try {
0506:             *    if ((rawvalue = part.getHeader("X-mailer")[0]) != null)
0507:             *      value = MimeUtility.decodeText(rawvalue);
0508:             *  } catch (UnsupportedEncodingException e) {
0509:             *      // Don't care
0510:             *      value = rawvalue;
0511:             *  } catch (MessagingException me) { }
0512:             *
0513:             *  return value;
0514:             *
0515:             * </pre></blockquote><p>
0516:             *
0517:             * @param	etext	the possibly encoded value
0518:             * @exception       UnsupportedEncodingException if the charset
0519:             *			conversion failed.
0520:             */
0521:            public static String decodeText(String etext)
0522:                    throws UnsupportedEncodingException {
0523:                /*
0524:                 * We look for sequences separated by "linear-white-space".
0525:                 * (as per RFC 2047, Section 6.1.1)
0526:                 * RFC 822 defines "linear-white-space" as SPACE | HT | CR | NL.
0527:                 */
0528:                String lwsp = " \t\n\r";
0529:                StringTokenizer st;
0530:
0531:                /*
0532:                 * First, lets do a quick run thru the string and check
0533:                 * whether the sequence "=?"  exists at all. If none exists,
0534:                 * we know there are no encoded-words in here and we can just
0535:                 * return the string as-is, without suffering thru the later 
0536:                 * decoding logic. 
0537:                 * This handles the most common case of unencoded headers 
0538:                 * efficiently.
0539:                 */
0540:                if (etext.indexOf("=?") == -1)
0541:                    return etext;
0542:
0543:                // Encoded words found. Start decoding ...
0544:
0545:                st = new StringTokenizer(etext, lwsp, true);
0546:                StringBuffer sb = new StringBuffer(); // decode buffer
0547:                StringBuffer wsb = new StringBuffer(); // white space buffer
0548:                boolean prevWasEncoded = false;
0549:
0550:                while (st.hasMoreTokens()) {
0551:                    char c;
0552:                    String s = st.nextToken();
0553:                    // If whitespace, append it to the whitespace buffer
0554:                    if (((c = s.charAt(0)) == ' ') || (c == '\t')
0555:                            || (c == '\r') || (c == '\n'))
0556:                        wsb.append(c);
0557:                    else {
0558:                        // Check if token is an 'encoded-word' ..
0559:                        String word;
0560:                        try {
0561:                            word = decodeWord(s);
0562:                            // Yes, this IS an 'encoded-word'.
0563:                            if (!prevWasEncoded && wsb.length() > 0) {
0564:                                // if the previous word was also encoded, we
0565:                                // should ignore the collected whitespace. Else
0566:                                // we include the whitespace as well.
0567:                                sb.append(wsb);
0568:                            }
0569:                            prevWasEncoded = true;
0570:                        } catch (ParseException pex) {
0571:                            // This is NOT an 'encoded-word'.
0572:                            word = s;
0573:                            // possibly decode inner encoded words
0574:                            if (!decodeStrict)
0575:                                word = decodeInnerWords(word);
0576:                            // include colleced whitespace ..
0577:                            if (wsb.length() > 0)
0578:                                sb.append(wsb);
0579:                            prevWasEncoded = false;
0580:                        }
0581:                        sb.append(word); // append the actual word
0582:                        wsb.setLength(0); // reset wsb for reuse
0583:                    }
0584:                }
0585:                return sb.toString();
0586:            }
0587:
0588:            /**
0589:             * Encode a RFC 822 "word" token into mail-safe form as per
0590:             * RFC 2047. <p>
0591:             *
0592:             * The given Unicode string is examined for non US-ASCII
0593:             * characters. If the string contains only US-ASCII characters,
0594:             * it is returned as-is.  If the string contains non US-ASCII
0595:             * characters, it is first character-encoded using the platform's
0596:             * default charset, then transfer-encoded using either the B or 
0597:             * Q encoding. The resulting bytes are then returned as a Unicode 
0598:             * string containing only ASCII  characters. <p>
0599:             * 
0600:             * This method is meant to be used when creating RFC 822 "phrases".
0601:             * The InternetAddress class, for example, uses this to encode
0602:             * it's 'phrase' component.
0603:             *
0604:             * @param	text	unicode string
0605:             * @return	Array of Unicode strings containing only US-ASCII 
0606:             *		characters.
0607:             * @exception UnsupportedEncodingException if the encoding fails
0608:             */
0609:            public static String encodeWord(String word)
0610:                    throws UnsupportedEncodingException {
0611:                return encodeWord(word, null, null);
0612:            }
0613:
0614:            /**
0615:             * Encode a RFC 822 "word" token into mail-safe form as per
0616:             * RFC 2047. <p>
0617:             *
0618:             * The given Unicode string is examined for non US-ASCII
0619:             * characters. If the string contains only US-ASCII characters,
0620:             * it is returned as-is.  If the string contains non US-ASCII
0621:             * characters, it is first character-encoded using the specified
0622:             * charset, then transfer-encoded using either the B or Q encoding.
0623:             * The resulting bytes are then returned as a Unicode string 
0624:             * containing only ASCII characters. <p>
0625:             * 
0626:             * @param	text	unicode string
0627:             * @param	charset	the MIME charset
0628:             * @param	encoding the encoding to be used. Currently supported
0629:             *		values are "B" and "Q". If this parameter is null, then
0630:             *		the "Q" encoding is used if most of characters to be
0631:             *		encoded are in the ASCII charset, otherwise "B" encoding
0632:             *		is used.
0633:             * @return	Unicode string containing only US-ASCII characters
0634:             * @exception UnsupportedEncodingException if the encoding fails
0635:             */
0636:            public static String encodeWord(String word, String charset,
0637:                    String encoding) throws UnsupportedEncodingException {
0638:                return encodeWord(word, charset, encoding, true);
0639:            }
0640:
0641:            /*
0642:             * Encode the given string. The parameter 'encodingWord' should
0643:             * be true if a RFC 822 "word" token is being encoded and false if a
0644:             * RFC 822 "text" token is being encoded. This is because the 
0645:             * "Q" encoding defined in RFC 2047 has more restrictions when
0646:             * encoding "word" tokens. (Sigh)
0647:             */
0648:            private static String encodeWord(String string, String charset,
0649:                    String encoding, boolean encodingWord)
0650:                    throws UnsupportedEncodingException {
0651:
0652:                // If 'string' contains only US-ASCII characters, just
0653:                // return it.
0654:                int ascii = checkAscii(string);
0655:                if (ascii == ALL_ASCII)
0656:                    return string;
0657:
0658:                // Else, apply the specified charset conversion.
0659:                String jcharset;
0660:                if (charset == null) { // use default charset
0661:                    jcharset = getDefaultJavaCharset(); // the java charset
0662:                    charset = getDefaultMIMECharset(); // the MIME equivalent
0663:                } else
0664:                    // MIME charset -> java charset
0665:                    jcharset = javaCharset(charset);
0666:
0667:                // If no transfer-encoding is specified, figure one out.
0668:                if (encoding == null) {
0669:                    if (ascii != MOSTLY_NONASCII)
0670:                        encoding = "Q";
0671:                    else
0672:                        encoding = "B";
0673:                }
0674:
0675:                boolean b64;
0676:                if (encoding.equalsIgnoreCase("B"))
0677:                    b64 = true;
0678:                else if (encoding.equalsIgnoreCase("Q"))
0679:                    b64 = false;
0680:                else
0681:                    throw new UnsupportedEncodingException(
0682:                            "Unknown transfer encoding: " + encoding);
0683:
0684:                StringBuffer outb = new StringBuffer(); // the output buffer
0685:                doEncode(string, b64, jcharset,
0686:                // As per RFC 2047, size of an encoded string should not
0687:                        // exceed 75 bytes.
0688:                        // 7 = size of "=?", '?', 'B'/'Q', '?', "?="
0689:                        75 - 7 - charset.length(), // the available space
0690:                        "=?" + charset + "?" + encoding + "?", // prefix
0691:                        true, encodingWord, outb);
0692:
0693:                return outb.toString();
0694:            }
0695:
0696:            private static void doEncode(String string, boolean b64,
0697:                    String jcharset, int avail, String prefix, boolean first,
0698:                    boolean encodingWord, StringBuffer buf)
0699:                    throws UnsupportedEncodingException {
0700:
0701:                // First find out what the length of the encoded version of
0702:                // 'string' would be.
0703:                byte[] bytes = string.getBytes(jcharset);
0704:                int len;
0705:                if (b64) // "B" encoding
0706:                    len = BEncoderStream.encodedLength(bytes);
0707:                else
0708:                    // "Q"
0709:                    len = QEncoderStream.encodedLength(bytes, encodingWord);
0710:
0711:                int size;
0712:                if ((len > avail) && ((size = string.length()) > 1)) {
0713:                    // If the length is greater than 'avail', split 'string'
0714:                    // into two and recurse.
0715:                    doEncode(string.substring(0, size / 2), b64, jcharset,
0716:                            avail, prefix, first, encodingWord, buf);
0717:                    doEncode(string.substring(size / 2, size), b64, jcharset,
0718:                            avail, prefix, false, encodingWord, buf);
0719:                } else {
0720:                    // length <= than 'avail'. Encode the given string
0721:                    ByteArrayOutputStream os = new ByteArrayOutputStream(
0722:                            BUFFER_SIZE);
0723:                    OutputStream eos; // the encoder
0724:                    if (b64) // "B" encoding
0725:                        eos = new BEncoderStream(os);
0726:                    else
0727:                        // "Q" encoding
0728:                        eos = new QEncoderStream(os, encodingWord);
0729:
0730:                    try { // do the encoding
0731:                        eos.write(bytes);
0732:                        eos.close();
0733:                    } catch (IOException ioex) {
0734:                    }
0735:
0736:                    byte[] encodedBytes = os.toByteArray(); // the encoded stuff
0737:                    // Now write out the encoded (all ASCII) bytes into our
0738:                    // StringBuffer
0739:                    if (!first) // not the first line of this sequence
0740:                        if (foldEncodedWords)
0741:                            buf.append("\r\n "); // start a continuation line
0742:                        else
0743:                            buf.append(" "); // line will be folded later
0744:
0745:                    buf.append(prefix);
0746:                    for (int i = 0; i < encodedBytes.length; i++)
0747:                        buf.append((char) encodedBytes[i]);
0748:                    buf.append("?="); // terminate the current sequence
0749:                }
0750:            }
0751:
0752:            /**
0753:             * The string is parsed using the rules in RFC 2047 for parsing
0754:             * an "encoded-word". If the parse fails, a ParseException is 
0755:             * thrown. Otherwise, it is transfer-decoded, and then 
0756:             * charset-converted into Unicode. If the charset-conversion
0757:             * fails, an UnsupportedEncodingException is thrown.<p>
0758:             *
0759:             * @param	eword	the possibly encoded value
0760:             * @exception       ParseException if the string is not an
0761:             *			encoded-word as per RFC 2047.
0762:             * @exception       UnsupportedEncodingException if the charset
0763:             *			conversion failed.
0764:             */
0765:            public static String decodeWord(String eword)
0766:                    throws ParseException, UnsupportedEncodingException {
0767:
0768:                if (!eword.startsWith("=?")) // not an encoded word
0769:                    throw new ParseException();
0770:
0771:                // get charset
0772:                int start = 2;
0773:                int pos;
0774:                if ((pos = eword.indexOf('?', start)) == -1)
0775:                    throw new ParseException();
0776:                String charset = javaCharset(eword.substring(start, pos));
0777:
0778:                // get encoding
0779:                start = pos + 1;
0780:                if ((pos = eword.indexOf('?', start)) == -1)
0781:                    throw new ParseException();
0782:                String encoding = eword.substring(start, pos);
0783:
0784:                // get encoded-sequence
0785:                start = pos + 1;
0786:                if ((pos = eword.indexOf("?=", start)) == -1)
0787:                    throw new ParseException();
0788:                String word = eword.substring(start, pos);
0789:
0790:                try {
0791:                    // Extract the bytes from word
0792:                    ByteArrayInputStream bis = new ByteArrayInputStream(
0793:                            ASCIIUtility.getBytes(word));
0794:
0795:                    // Get the appropriate decoder
0796:                    InputStream is;
0797:                    if (encoding.equalsIgnoreCase("B"))
0798:                        is = new BASE64DecoderStream(bis);
0799:                    else if (encoding.equalsIgnoreCase("Q"))
0800:                        is = new QDecoderStream(bis);
0801:                    else
0802:                        throw new UnsupportedEncodingException(
0803:                                "unknown encoding: " + encoding);
0804:
0805:                    // For b64 & q, size of decoded word <= size of word. So
0806:                    // the decoded bytes must fit into the 'bytes' array. This
0807:                    // is certainly more efficient than writing bytes into a
0808:                    // ByteArrayOutputStream and then pulling out the byte[]
0809:                    // from it.
0810:                    int count = bis.available();
0811:                    byte[] bytes = new byte[count];
0812:                    // count is set to the actual number of decoded bytes 
0813:                    count = is.read(bytes, 0, count);
0814:
0815:                    // Finally, convert the decoded bytes into a String using
0816:                    // the specified charset
0817:                    String s = new String(bytes, 0, count, charset);
0818:                    if (pos + 2 < eword.length()) {
0819:                        // there's still more text in the string
0820:                        String rest = eword.substring(pos + 2);
0821:                        if (!decodeStrict)
0822:                            rest = decodeInnerWords(rest);
0823:                        s += rest;
0824:                    }
0825:                    return s;
0826:                } catch (UnsupportedEncodingException uex) {
0827:                    // explicitly catch and rethrow this exception, otherwise
0828:                    // the below IOException catch will swallow this up!
0829:                    throw uex;
0830:                } catch (IOException ioex) {
0831:                    // Shouldn't happen.
0832:                    throw new ParseException();
0833:                } catch (IllegalArgumentException iex) {
0834:                    /* An unknown charset of the form ISO-XXX-XXX, will cause
0835:                     * the JDK to throw an IllegalArgumentException ... Since the
0836:                     * JDK will attempt to create a classname using this string,
0837:                     * but valid classnames must not contain the character '-',
0838:                     * and this results in an IllegalArgumentException, rather than
0839:                     * the expected UnsupportedEncodingException. Yikes
0840:                     */
0841:                    throw new UnsupportedEncodingException();
0842:                }
0843:            }
0844:
0845:            /**
0846:             * Look for encoded words within a word.  The MIME spec doesn't
0847:             * allow this, but many broken mailers, especially Japanese mailers,
0848:             * produce such incorrect encodings.
0849:             */
0850:            private static String decodeInnerWords(String word)
0851:                    throws UnsupportedEncodingException {
0852:                int start = 0, i;
0853:                StringBuffer buf = new StringBuffer();
0854:                while ((i = word.indexOf("=?", start)) >= 0) {
0855:                    buf.append(word.substring(start, i));
0856:                    int end = word.indexOf("?=", i);
0857:                    if (end < 0)
0858:                        break;
0859:                    String s = word.substring(i, end + 2);
0860:                    try {
0861:                        s = decodeWord(s);
0862:                    } catch (ParseException pex) {
0863:                        // ignore it, just use the original string
0864:                    }
0865:                    buf.append(s);
0866:                    start = end + 2;
0867:                }
0868:                if (start == 0)
0869:                    return word;
0870:                if (start < word.length())
0871:                    buf.append(word.substring(start));
0872:                return buf.toString();
0873:            }
0874:
0875:            /**
0876:             * A utility method to quote a word, if the word contains any
0877:             * characters from the specified 'specials' list.<p>
0878:             *
0879:             * The <code>HeaderTokenizer</code> class defines two special
0880:             * sets of delimiters - MIME and RFC 822. <p>
0881:             *
0882:             * This method is typically used during the generation of 
0883:             * RFC 822 and MIME header fields.
0884:             *
0885:             * @param	word	word to be quoted
0886:             * @param	specials the set of special characters
0887:             * @return		the possibly quoted word
0888:             * @see	javax.mail.internet.HeaderTokenizer#MIME
0889:             * @see	javax.mail.internet.HeaderTokenizer#RFC822
0890:             */
0891:            public static String quote(String word, String specials) {
0892:                int len = word.length();
0893:
0894:                /*
0895:                 * Look for any "bad" characters, Escape and
0896:                 *  quote the entire string if necessary.
0897:                 */
0898:                boolean needQuoting = false;
0899:                for (int i = 0; i < len; i++) {
0900:                    char c = word.charAt(i);
0901:                    if (c == '"' || c == '\\' || c == '\r' || c == '\n') {
0902:                        // need to escape them and then quote the whole string
0903:                        StringBuffer sb = new StringBuffer(len + 3);
0904:                        sb.append('"');
0905:                        sb.append(word.substring(0, i));
0906:                        int lastc = 0;
0907:                        for (int j = i; j < len; j++) {
0908:                            char cc = word.charAt(j);
0909:                            if ((cc == '"') || (cc == '\\') || (cc == '\r')
0910:                                    || (cc == '\n'))
0911:                                if (cc == '\n' && lastc == '\r')
0912:                                    ; // do nothing, CR was already escaped
0913:                                else
0914:                                    sb.append('\\'); // Escape the character
0915:                            sb.append(cc);
0916:                            lastc = cc;
0917:                        }
0918:                        sb.append('"');
0919:                        return sb.toString();
0920:                    } else if (c < 040 || c >= 0177 || specials.indexOf(c) >= 0)
0921:                        // These characters cause the string to be quoted
0922:                        needQuoting = true;
0923:                }
0924:
0925:                if (needQuoting) {
0926:                    StringBuffer sb = new StringBuffer(len + 2);
0927:                    sb.append('"').append(word).append('"');
0928:                    return sb.toString();
0929:                } else
0930:                    return word;
0931:            }
0932:
0933:            /**
0934:             * Fold a string at linear whitespace so that each line is no longer
0935:             * than 76 characters, if possible.  If there are more than 76
0936:             * non-whitespace characters consecutively, the string is folded at
0937:             * the first whitespace after that sequence.  The parameter
0938:             * <code>used</code> indicates how many characters have been used in
0939:             * the current line; it is usually the length of the header name. <p>
0940:             *
0941:             * Note that line breaks in the string aren't escaped; they probably
0942:             * should be.
0943:             *
0944:             * @param	used	characters used in line so far
0945:             * @param	s	the string to fold
0946:             * @return		the folded string
0947:             */
0948:            /*public*/static String fold(int used, String s) {
0949:                if (!foldText)
0950:                    return s;
0951:
0952:                int end;
0953:                char c;
0954:                // Strip trailing spaces
0955:                for (end = s.length() - 1; end >= 0; end--) {
0956:                    c = s.charAt(end);
0957:                    if (c != ' ' && c != '\t')
0958:                        break;
0959:                }
0960:                if (end != s.length() - 1)
0961:                    s = s.substring(0, end + 1);
0962:
0963:                // if the string fits now, just return it
0964:                if (used + s.length() <= 76)
0965:                    return s;
0966:
0967:                // have to actually fold the string
0968:                StringBuffer sb = new StringBuffer(s.length() + 4);
0969:                char lastc = 0;
0970:                while (used + s.length() > 76) {
0971:                    int lastspace = -1;
0972:                    for (int i = 0; i < s.length(); i++) {
0973:                        if (lastspace != -1 && used + i > 76)
0974:                            break;
0975:                        c = s.charAt(i);
0976:                        if (c == ' ' || c == '\t')
0977:                            if (!(lastc == ' ' || lastc == '\t'))
0978:                                lastspace = i;
0979:                        lastc = c;
0980:                    }
0981:                    if (lastspace == -1) {
0982:                        // no space, use the whole thing
0983:                        sb.append(s);
0984:                        s = "";
0985:                        used = 0;
0986:                        break;
0987:                    }
0988:                    sb.append(s.substring(0, lastspace));
0989:                    sb.append("\r\n");
0990:                    lastc = s.charAt(lastspace);
0991:                    sb.append(lastc);
0992:                    s = s.substring(lastspace + 1);
0993:                    used = 1;
0994:                }
0995:                sb.append(s);
0996:                return sb.toString();
0997:            }
0998:
0999:            /**
1000:             * Unfold a folded header.  Any line breaks that aren't escaped and
1001:             * are followed by whitespace are removed.
1002:             *
1003:             * @param	s	the string to unfold
1004:             * @return		the unfolded string
1005:             */
1006:            /*public*/static String unfold(String s) {
1007:                if (!foldText)
1008:                    return s;
1009:
1010:                StringBuffer sb = null;
1011:                int i;
1012:                while ((i = indexOfAny(s, "\r\n")) >= 0) {
1013:                    int start = i;
1014:                    int l = s.length();
1015:                    i++; // skip CR or NL
1016:                    if (i < l && s.charAt(i - 1) == '\r' && s.charAt(i) == '\n')
1017:                        i++; // skip LF
1018:                    if (start == 0 || s.charAt(start - 1) != '\\') {
1019:                        char c;
1020:                        // if next line starts with whitespace, skip all of it
1021:                        // XXX - always has to be true?
1022:                        if (i < l && ((c = s.charAt(i)) == ' ' || c == '\t')) {
1023:                            i++; // skip whitespace
1024:                            while (i < l
1025:                                    && ((c = s.charAt(i)) == ' ' || c == '\t'))
1026:                                i++;
1027:                            if (sb == null)
1028:                                sb = new StringBuffer(s.length());
1029:                            if (start != 0) {
1030:                                sb.append(s.substring(0, start));
1031:                                sb.append(' ');
1032:                            }
1033:                            s = s.substring(i);
1034:                            continue;
1035:                        }
1036:                        // it's not a continuation line, just leave it in
1037:                        if (sb == null)
1038:                            sb = new StringBuffer(s.length());
1039:                        sb.append(s.substring(0, i));
1040:                        s = s.substring(i);
1041:                    } else {
1042:                        // there's a backslash at "start - 1"
1043:                        // strip it out, but leave in the line break
1044:                        if (sb == null)
1045:                            sb = new StringBuffer(s.length());
1046:                        sb.append(s.substring(0, start - 1));
1047:                        sb.append(s.substring(start, i));
1048:                        s = s.substring(i);
1049:                    }
1050:                }
1051:                if (sb != null) {
1052:                    sb.append(s);
1053:                    return sb.toString();
1054:                } else
1055:                    return s;
1056:            }
1057:
1058:            /**
1059:             * Return the first index of any of the characters in "any" in "s",
1060:             * or -1 if none are found.
1061:             *
1062:             * This should be a method on String.
1063:             */
1064:            private static int indexOfAny(String s, String any) {
1065:                return indexOfAny(s, any, 0);
1066:            }
1067:
1068:            private static int indexOfAny(String s, String any, int start) {
1069:                try {
1070:                    int len = s.length();
1071:                    for (int i = start; i < len; i++) {
1072:                        if (any.indexOf(s.charAt(i)) >= 0)
1073:                            return i;
1074:                    }
1075:                    return -1;
1076:                } catch (StringIndexOutOfBoundsException e) {
1077:                    return -1;
1078:                }
1079:            }
1080:
1081:            /**
1082:             * Convert a MIME charset name into a valid Java charset name. <p>
1083:             *
1084:             * @param charset	the MIME charset name
1085:             * @return  the Java charset equivalent. If a suitable mapping is
1086:             *		not available, the passed in charset is itself returned.
1087:             */
1088:            public static String javaCharset(String charset) {
1089:                if (mime2java == null || charset == null)
1090:                    // no mapping table, or charset parameter is null
1091:                    return charset;
1092:
1093:                String alias = (String) mime2java.get(charset.toLowerCase());
1094:                return alias == null ? charset : alias;
1095:            }
1096:
1097:            /**
1098:             * Convert a java charset into its MIME charset name. <p>
1099:             *
1100:             * Note that a future version of JDK (post 1.2) might provide
1101:             * this functionality, in which case, we may deprecate this
1102:             * method then.
1103:             *
1104:             * @param   charset    the JDK charset
1105:             * @return      	the MIME/IANA equivalent. If a mapping
1106:             *			is not possible, the passed in charset itself
1107:             *			is returned.
1108:             * @since		JavaMail 1.1
1109:             */
1110:            public static String mimeCharset(String charset) {
1111:                if (java2mime == null || charset == null)
1112:                    // no mapping table or charset param is null
1113:                    return charset;
1114:
1115:                String alias = (String) java2mime.get(charset.toLowerCase());
1116:                return alias == null ? charset : alias;
1117:            }
1118:
1119:            private static String defaultJavaCharset;
1120:            private static String defaultMIMECharset;
1121:
1122:            /**
1123:             * Get the default charset corresponding to the system's current 
1124:             * default locale.  If the System property <code>mail.mime.charset</code>
1125:             * is set, a system charset corresponding to this MIME charset will be
1126:             * returned. <p>
1127:             * 
1128:             * @return	the default charset of the system's default locale, 
1129:             * 		as a Java charset. (NOT a MIME charset)
1130:             * @since	JavaMail 1.1
1131:             */
1132:            public static String getDefaultJavaCharset() {
1133:                if (defaultJavaCharset == null) {
1134:                    /*
1135:                     * If mail.mime.charset is set, it controls the default
1136:                     * Java charset as well.
1137:                     */
1138:                    String mimecs = null;
1139:                    try {
1140:                        mimecs = System.getProperty("mail.mime.charset");
1141:                    } catch (SecurityException ex) {
1142:                    } // ignore it
1143:                    if (mimecs != null && mimecs.length() > 0) {
1144:                        defaultJavaCharset = javaCharset(mimecs);
1145:                        return defaultJavaCharset;
1146:                    }
1147:
1148:                    try {
1149:                        defaultJavaCharset = System.getProperty(
1150:                                "file.encoding", "8859_1");
1151:                    } catch (SecurityException sex) {
1152:
1153:                        class NullInputStream extends InputStream {
1154:                            public int read() {
1155:                                return 0;
1156:                            }
1157:                        }
1158:                        InputStreamReader reader = new InputStreamReader(
1159:                                new NullInputStream());
1160:                        defaultJavaCharset = reader.getEncoding();
1161:                        if (defaultJavaCharset == null)
1162:                            defaultJavaCharset = "8859_1";
1163:                    }
1164:                }
1165:
1166:                return defaultJavaCharset;
1167:            }
1168:
1169:            /*
1170:             * Get the default MIME charset for this locale.
1171:             */
1172:            static String getDefaultMIMECharset() {
1173:                if (defaultMIMECharset == null) {
1174:                    try {
1175:                        defaultMIMECharset = System
1176:                                .getProperty("mail.mime.charset");
1177:                    } catch (SecurityException ex) {
1178:                    } // ignore it
1179:                }
1180:                if (defaultMIMECharset == null)
1181:                    defaultMIMECharset = mimeCharset(getDefaultJavaCharset());
1182:                return defaultMIMECharset;
1183:            }
1184:
1185:            // Tables to map MIME charset names to Java names and vice versa.
1186:            // XXX - Should eventually use J2SE 1.4 java.nio.charset.Charset
1187:            private static Hashtable mime2java;
1188:            private static Hashtable java2mime;
1189:
1190:            static {
1191:                java2mime = new Hashtable(40);
1192:                mime2java = new Hashtable(10);
1193:
1194:                try {
1195:                    // Use this class's classloader to load the mapping file
1196:                    // XXX - we should use SecuritySupport, but it's in another package
1197:                    InputStream is = com.sun.xml.messaging.saaj.packaging.mime.internet.MimeUtility.class
1198:                            .getResourceAsStream("/META-INF/javamail.charset.map");
1199:
1200:                    if (is != null) {
1201:                        is = new LineInputStream(is);
1202:
1203:                        // Load the JDK-to-MIME charset mapping table
1204:                        loadMappings((LineInputStream) is, java2mime);
1205:
1206:                        // Load the MIME-to-JDK charset mapping table
1207:                        loadMappings((LineInputStream) is, mime2java);
1208:                    }
1209:                } catch (Exception ex) {
1210:                }
1211:
1212:                // If we didn't load the tables, e.g., because we didn't have
1213:                // permission, load them manually.  The entries here should be
1214:                // the same as the default javamail.charset.map.
1215:                if (java2mime.isEmpty()) {
1216:                    java2mime.put("8859_1", "ISO-8859-1");
1217:                    java2mime.put("iso8859_1", "ISO-8859-1");
1218:                    java2mime.put("ISO8859-1", "ISO-8859-1");
1219:
1220:                    java2mime.put("8859_2", "ISO-8859-2");
1221:                    java2mime.put("iso8859_2", "ISO-8859-2");
1222:                    java2mime.put("ISO8859-2", "ISO-8859-2");
1223:
1224:                    java2mime.put("8859_3", "ISO-8859-3");
1225:                    java2mime.put("iso8859_3", "ISO-8859-3");
1226:                    java2mime.put("ISO8859-3", "ISO-8859-3");
1227:
1228:                    java2mime.put("8859_4", "ISO-8859-4");
1229:                    java2mime.put("iso8859_4", "ISO-8859-4");
1230:                    java2mime.put("ISO8859-4", "ISO-8859-4");
1231:
1232:                    java2mime.put("8859_5", "ISO-8859-5");
1233:                    java2mime.put("iso8859_5", "ISO-8859-5");
1234:                    java2mime.put("ISO8859-5", "ISO-8859-5");
1235:
1236:                    java2mime.put("8859_6", "ISO-8859-6");
1237:                    java2mime.put("iso8859_6", "ISO-8859-6");
1238:                    java2mime.put("ISO8859-6", "ISO-8859-6");
1239:
1240:                    java2mime.put("8859_7", "ISO-8859-7");
1241:                    java2mime.put("iso8859_7", "ISO-8859-7");
1242:                    java2mime.put("ISO8859-7", "ISO-8859-7");
1243:
1244:                    java2mime.put("8859_8", "ISO-8859-8");
1245:                    java2mime.put("iso8859_8", "ISO-8859-8");
1246:                    java2mime.put("ISO8859-8", "ISO-8859-8");
1247:
1248:                    java2mime.put("8859_9", "ISO-8859-9");
1249:                    java2mime.put("iso8859_9", "ISO-8859-9");
1250:                    java2mime.put("ISO8859-9", "ISO-8859-9");
1251:
1252:                    java2mime.put("SJIS", "Shift_JIS");
1253:                    java2mime.put("MS932", "Shift_JIS");
1254:                    java2mime.put("JIS", "ISO-2022-JP");
1255:                    java2mime.put("ISO2022JP", "ISO-2022-JP");
1256:                    java2mime.put("EUC_JP", "euc-jp");
1257:                    java2mime.put("KOI8_R", "koi8-r");
1258:                    java2mime.put("EUC_CN", "euc-cn");
1259:                    java2mime.put("EUC_TW", "euc-tw");
1260:                    java2mime.put("EUC_KR", "euc-kr");
1261:                }
1262:                if (mime2java.isEmpty()) {
1263:                    mime2java.put("iso-2022-cn", "ISO2022CN");
1264:                    mime2java.put("iso-2022-kr", "ISO2022KR");
1265:                    mime2java.put("utf-8", "UTF8");
1266:                    mime2java.put("utf8", "UTF8");
1267:                    mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
1268:                    mime2java.put("ja_jp.eucjp", "EUCJIS");
1269:                    mime2java.put("euc-kr", "KSC5601");
1270:                    mime2java.put("euckr", "KSC5601");
1271:                    mime2java.put("us-ascii", "ISO-8859-1");
1272:                    mime2java.put("x-us-ascii", "ISO-8859-1");
1273:                }
1274:            }
1275:
1276:            private static void loadMappings(LineInputStream is, Hashtable table) {
1277:                String currLine;
1278:
1279:                while (true) {
1280:                    try {
1281:                        currLine = is.readLine();
1282:                    } catch (IOException ioex) {
1283:                        break; // error in reading, stop
1284:                    }
1285:
1286:                    if (currLine == null) // end of file, stop
1287:                        break;
1288:                    if (currLine.startsWith("--") && currLine.endsWith("--"))
1289:                        // end of this table
1290:                        break;
1291:
1292:                    // ignore empty lines and comments
1293:                    if (currLine.trim().length() == 0
1294:                            || currLine.startsWith("#"))
1295:                        continue;
1296:
1297:                    // A valid entry is of the form <key><separator><value>
1298:                    // where, <separator> := SPACE | HT. Parse this
1299:                    StringTokenizer tk = new StringTokenizer(currLine, " \t");
1300:                    try {
1301:                        String key = tk.nextToken();
1302:                        String value = tk.nextToken();
1303:                        table.put(key.toLowerCase(), value);
1304:                    } catch (NoSuchElementException nex) {
1305:                    }
1306:                }
1307:            }
1308:
1309:            static final int ALL_ASCII = 1;
1310:            static final int MOSTLY_ASCII = 2;
1311:            static final int MOSTLY_NONASCII = 3;
1312:
1313:            /** 
1314:             * Check if the given string contains non US-ASCII characters.
1315:             * @param	s	string
1316:             * @return		ALL_ASCII if all characters in the string 
1317:             *			belong to the US-ASCII charset. MOSTLY_ASCII
1318:             *			if more than half of the available characters
1319:             *			are US-ASCII characters. Else MOSTLY_NONASCII.
1320:             */
1321:            static int checkAscii(String s) {
1322:                int ascii = 0, non_ascii = 0;
1323:                int l = s.length();
1324:
1325:                for (int i = 0; i < l; i++) {
1326:                    if (nonascii((int) s.charAt(i))) // non-ascii
1327:                        non_ascii++;
1328:                    else
1329:                        ascii++;
1330:                }
1331:
1332:                if (non_ascii == 0)
1333:                    return ALL_ASCII;
1334:                if (ascii > non_ascii)
1335:                    return MOSTLY_ASCII;
1336:
1337:                return MOSTLY_NONASCII;
1338:            }
1339:
1340:            /** 
1341:             * Check if the given byte array contains non US-ASCII characters.
1342:             * @param	b	byte array
1343:             * @return		ALL_ASCII if all characters in the string 
1344:             *			belong to the US-ASCII charset. MOSTLY_ASCII
1345:             *			if more than half of the available characters
1346:             *			are US-ASCII characters. Else MOSTLY_NONASCII.
1347:             *
1348:             * XXX - this method is no longer used
1349:             */
1350:            static int checkAscii(byte[] b) {
1351:                int ascii = 0, non_ascii = 0;
1352:
1353:                for (int i = 0; i < b.length; i++) {
1354:                    // The '&' operator automatically causes b[i] to be promoted
1355:                    // to an int, and we mask out the higher bytes in the int 
1356:                    // so that the resulting value is not a negative integer.
1357:                    if (nonascii(b[i] & 0xff)) // non-ascii
1358:                        non_ascii++;
1359:                    else
1360:                        ascii++;
1361:                }
1362:
1363:                if (non_ascii == 0)
1364:                    return ALL_ASCII;
1365:                if (ascii > non_ascii)
1366:                    return MOSTLY_ASCII;
1367:
1368:                return MOSTLY_NONASCII;
1369:            }
1370:
1371:            /** 
1372:             * Check if the given input stream contains non US-ASCII characters.
1373:             * Upto <code>max</code> bytes are checked. If <code>max</code> is
1374:             * set to <code>ALL</code>, then all the bytes available in this
1375:             * input stream are checked. If <code>breakOnNonAscii</code> is true
1376:             * the check terminates when the first non-US-ASCII character is
1377:             * found and MOSTLY_NONASCII is returned. Else, the check continues
1378:             * till <code>max</code> bytes or till the end of stream.
1379:             *
1380:             * @param	is	the input stream
1381:             * @param	max	maximum bytes to check for. The special value
1382:             *			ALL indicates that all the bytes in this input
1383:             *			stream must be checked.
1384:             * @param	breakOnNonAscii if <code>true</code>, then terminate the
1385:             *			the check when the first non-US-ASCII character
1386:             *			is found.
1387:             * @return		ALL_ASCII if all characters in the string 
1388:             *			belong to the US-ASCII charset. MOSTLY_ASCII
1389:             *			if more than half of the available characters
1390:             *			are US-ASCII characters. Else MOSTLY_NONASCII.
1391:             */
1392:            static int checkAscii(InputStream is, int max,
1393:                    boolean breakOnNonAscii) {
1394:                int ascii = 0, non_ascii = 0;
1395:                int len;
1396:                int block = 4096;
1397:                int linelen = 0;
1398:                boolean longLine = false, badEOL = false;
1399:                boolean checkEOL = encodeEolStrict && breakOnNonAscii;
1400:                byte buf[] = null;
1401:                if (max != 0) {
1402:                    block = (max == ALL) ? 4096 : Math.min(max, 4096);
1403:                    buf = new byte[block];
1404:                }
1405:                while (max != 0) {
1406:                    try {
1407:                        if ((len = is.read(buf, 0, block)) == -1)
1408:                            break;
1409:                        int lastb = 0;
1410:                        for (int i = 0; i < len; i++) {
1411:                            // The '&' operator automatically causes b[i] to 
1412:                            // be promoted to an int, and we mask out the higher
1413:                            // bytes in the int so that the resulting value is 
1414:                            // not a negative integer.
1415:                            int b = buf[i] & 0xff;
1416:                            if (checkEOL
1417:                                    && ((lastb == '\r' && b != '\n') || (lastb != '\r' && b == '\n')))
1418:                                badEOL = true;
1419:                            if (b == '\r' || b == '\n')
1420:                                linelen = 0;
1421:                            else {
1422:                                linelen++;
1423:                                if (linelen > 998) // 1000 - CRLF
1424:                                    longLine = true;
1425:                            }
1426:                            if (nonascii(b)) { // non-ascii
1427:                                if (breakOnNonAscii) // we are done
1428:                                    return MOSTLY_NONASCII;
1429:                                else
1430:                                    non_ascii++;
1431:                            } else
1432:                                ascii++;
1433:                            lastb = b;
1434:                        }
1435:                    } catch (IOException ioex) {
1436:                        break;
1437:                    }
1438:                    if (max != ALL)
1439:                        max -= len;
1440:                }
1441:
1442:                if (max == 0 && breakOnNonAscii)
1443:                    // We have been told to break on the first non-ascii character.
1444:                    // We haven't got any non-ascii character yet, but then we
1445:                    // have not checked all of the available bytes either. So we
1446:                    // cannot say for sure that this input stream is ALL_ASCII,
1447:                    // and hence we must play safe and return MOSTLY_NONASCII
1448:
1449:                    return MOSTLY_NONASCII;
1450:
1451:                if (non_ascii == 0) { // no non-us-ascii characters so far
1452:                    // If we're looking at non-text data, and we saw CR without LF
1453:                    // or vice versa, consider this mostly non-ASCII so that it
1454:                    // will be base64 encoded (since the quoted-printable encoder
1455:                    // doesn't encode this case properly).
1456:                    if (badEOL)
1457:                        return MOSTLY_NONASCII;
1458:                    // if we've seen a long line, we degrade to mostly ascii
1459:                    else if (longLine)
1460:                        return MOSTLY_ASCII;
1461:                    else
1462:                        return ALL_ASCII;
1463:                }
1464:                if (ascii > non_ascii) // mostly ascii
1465:                    return MOSTLY_ASCII;
1466:                return MOSTLY_NONASCII;
1467:            }
1468:
1469:            static final boolean nonascii(int b) {
1470:                return b >= 0177
1471:                        || (b < 040 && b != '\r' && b != '\n' && b != '\t');
1472:            }
1473:        }
1474:
1475:        /**
1476:         * An OutputStream that determines whether the data written to
1477:         * it is all ASCII, mostly ASCII, or mostly non-ASCII.
1478:         */
1479:        class AsciiOutputStream extends OutputStream {
1480:            private boolean breakOnNonAscii;
1481:            private int ascii = 0, non_ascii = 0;
1482:            private int linelen = 0;
1483:            private boolean longLine = false;
1484:            private boolean badEOL = false;
1485:            private boolean checkEOL = false;
1486:            private int lastb = 0;
1487:            private int ret = 0;
1488:
1489:            public AsciiOutputStream(boolean breakOnNonAscii,
1490:                    boolean encodeEolStrict) {
1491:                this .breakOnNonAscii = breakOnNonAscii;
1492:                checkEOL = encodeEolStrict && breakOnNonAscii;
1493:            }
1494:
1495:            public void write(int b) throws IOException {
1496:                check(b);
1497:            }
1498:
1499:            public void write(byte b[]) throws IOException {
1500:                write(b, 0, b.length);
1501:            }
1502:
1503:            public void write(byte b[], int off, int len) throws IOException {
1504:                len += off;
1505:                for (int i = off; i < len; i++)
1506:                    check(b[i]);
1507:            }
1508:
1509:            private final void check(int b) throws IOException {
1510:                b &= 0xff;
1511:                if (checkEOL
1512:                        && ((lastb == '\r' && b != '\n') || (lastb != '\r' && b == '\n')))
1513:                    badEOL = true;
1514:                if (b == '\r' || b == '\n')
1515:                    linelen = 0;
1516:                else {
1517:                    linelen++;
1518:                    if (linelen > 998) // 1000 - CRLF
1519:                        longLine = true;
1520:                }
1521:                if (MimeUtility.nonascii(b)) { // non-ascii
1522:                    non_ascii++;
1523:                    if (breakOnNonAscii) { // we are done
1524:                        ret = MimeUtility.MOSTLY_NONASCII;
1525:                        throw new EOFException();
1526:                    }
1527:                } else
1528:                    ascii++;
1529:                lastb = b;
1530:            }
1531:
1532:            /**
1533:             * Return ASCII-ness of data stream.
1534:             */
1535:            public int getAscii() {
1536:                if (ret != 0)
1537:                    return ret;
1538:                // If we're looking at non-text data, and we saw CR without LF
1539:                // or vice versa, consider this mostly non-ASCII so that it
1540:                // will be base64 encoded (since the quoted-printable encoder
1541:                // doesn't encode this case properly).
1542:                if (badEOL)
1543:                    return MimeUtility.MOSTLY_NONASCII;
1544:                else if (non_ascii == 0) { // no non-us-ascii characters so far
1545:                    // if we've seen a long line, we degrade to mostly ascii
1546:                    if (longLine)
1547:                        return MimeUtility.MOSTLY_ASCII;
1548:                    else
1549:                        return MimeUtility.ALL_ASCII;
1550:                }
1551:                if (ascii > non_ascii) // mostly ascii
1552:                    return MimeUtility.MOSTLY_ASCII;
1553:                return MimeUtility.MOSTLY_NONASCII;
1554:            }
1555:        }
w_w__w.j_a___v__a___2_s_.___c__om__ | Contact Us
All other trademarks are property of their respective owners.