001: /*
002: * Copyright 2001-2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.apache.commons.codec.net;
018:
019: import java.io.ByteArrayOutputStream;
020: import java.io.UnsupportedEncodingException;
021: import java.util.BitSet;
022: import org.apache.commons.codec.BinaryDecoder;
023: import org.apache.commons.codec.BinaryEncoder;
024: import org.apache.commons.codec.DecoderException;
025: import org.apache.commons.codec.EncoderException;
026: import org.apache.commons.codec.StringDecoder;
027: import org.apache.commons.codec.StringEncoder;
028:
029: /**
030: * <p>
031: * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521 </a>.
032: * </p>
033: * <p>
034: * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to
035: * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are
036: * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the
037: * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable
038: * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
039: * gateway.
040: * </p>
041: *
042: * <p>
043: * Note:
044: * </p>
045: * <p>
046: * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec
047: * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the steamable codec
048: * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy
049: * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec.
050: * </p>
051: *
052: * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
053: * Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
054: *
055: * @author Apache Software Foundation
056: * @since 1.3
057: * @version $Id: QuotedPrintableCodec.java,v 1.7 2004/04/09 22:21:07 ggregory Exp $
058: */
059: public class QuotedPrintableCodec implements BinaryEncoder,
060: BinaryDecoder, StringEncoder, StringDecoder {
061: /**
062: * The default charset used for string decoding and encoding.
063: */
064: private String charset = StringEncodings.UTF8;
065:
066: /**
067: * BitSet of printable characters as defined in RFC 1521.
068: */
069: private static final BitSet PRINTABLE_CHARS = new BitSet(256);
070:
071: private static byte ESCAPE_CHAR = '=';
072:
073: private static byte TAB = 9;
074:
075: private static byte SPACE = 32;
076: // Static initializer for printable chars collection
077: static {
078: // alpha characters
079: for (int i = 33; i <= 60; i++) {
080: PRINTABLE_CHARS.set(i);
081: }
082: for (int i = 62; i <= 126; i++) {
083: PRINTABLE_CHARS.set(i);
084: }
085: PRINTABLE_CHARS.set(TAB);
086: PRINTABLE_CHARS.set(SPACE);
087: }
088:
089: /**
090: * Default constructor.
091: */
092: public QuotedPrintableCodec() {
093: super ();
094: }
095:
096: /**
097: * Constructor which allows for the selection of a default charset
098: *
099: * @param charset
100: * the default string charset to use.
101: */
102: public QuotedPrintableCodec(String charset) {
103: super ();
104: this .charset = charset;
105: }
106:
107: /**
108: * Encodes byte into its quoted-printable representation.
109: *
110: * @param b
111: * byte to encode
112: * @param buffer
113: * the buffer to write to
114: */
115: private static final void encodeQuotedPrintable(int b,
116: ByteArrayOutputStream buffer) {
117: buffer.write(ESCAPE_CHAR);
118: char hex1 = Character.toUpperCase(Character.forDigit(
119: (b >> 4) & 0xF, 16));
120: char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF,
121: 16));
122: buffer.write(hex1);
123: buffer.write(hex2);
124: }
125:
126: /**
127: * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
128: *
129: * <p>
130: * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
131: * RFC 1521 and is suitable for encoding binary data and unformatted text.
132: * </p>
133: *
134: * @param printable
135: * bitset of characters deemed quoted-printable
136: * @param bytes
137: * array of bytes to be encoded
138: * @return array of bytes containing quoted-printable data
139: */
140: public static final byte[] encodeQuotedPrintable(BitSet printable,
141: byte[] bytes) {
142: if (bytes == null) {
143: return null;
144: }
145: if (printable == null) {
146: printable = PRINTABLE_CHARS;
147: }
148: ByteArrayOutputStream buffer = new ByteArrayOutputStream();
149: for (int i = 0; i < bytes.length; i++) {
150: int b = bytes[i];
151: if (b < 0) {
152: b = 256 + b;
153: }
154: if (printable.get(b)) {
155: buffer.write(b);
156: } else {
157: encodeQuotedPrintable(b, buffer);
158: }
159: }
160: return buffer.toByteArray();
161: }
162:
163: /**
164: * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
165: * back to their original representation.
166: *
167: * <p>
168: * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
169: * RFC 1521.
170: * </p>
171: *
172: * @param bytes
173: * array of quoted-printable characters
174: * @return array of original bytes
175: * @throws DecoderException
176: * Thrown if quoted-printable decoding is unsuccessful
177: */
178: public static final byte[] decodeQuotedPrintable(byte[] bytes)
179: throws DecoderException {
180: if (bytes == null) {
181: return null;
182: }
183: ByteArrayOutputStream buffer = new ByteArrayOutputStream();
184: for (int i = 0; i < bytes.length; i++) {
185: int b = bytes[i];
186: if (b == ESCAPE_CHAR) {
187: try {
188: int u = Character.digit((char) bytes[++i], 16);
189: int l = Character.digit((char) bytes[++i], 16);
190: if (u == -1 || l == -1) {
191: throw new DecoderException(
192: "Invalid quoted-printable encoding");
193: }
194: buffer.write((char) ((u << 4) + l));
195: } catch (ArrayIndexOutOfBoundsException e) {
196: throw new DecoderException(
197: "Invalid quoted-printable encoding");
198: }
199: } else {
200: buffer.write(b);
201: }
202: }
203: return buffer.toByteArray();
204: }
205:
206: /**
207: * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
208: *
209: * <p>
210: * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
211: * RFC 1521 and is suitable for encoding binary data and unformatted text.
212: * </p>
213: *
214: * @param bytes
215: * array of bytes to be encoded
216: * @return array of bytes containing quoted-printable data
217: */
218: public byte[] encode(byte[] bytes) {
219: return encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
220: }
221:
222: /**
223: * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted
224: * back to their original representation.
225: *
226: * <p>
227: * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
228: * RFC 1521.
229: * </p>
230: *
231: * @param bytes
232: * array of quoted-printable characters
233: * @return array of original bytes
234: * @throws DecoderException
235: * Thrown if quoted-printable decoding is unsuccessful
236: */
237: public byte[] decode(byte[] bytes) throws DecoderException {
238: return decodeQuotedPrintable(bytes);
239: }
240:
241: /**
242: * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
243: *
244: * <p>
245: * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
246: * RFC 1521 and is suitable for encoding binary data.
247: * </p>
248: *
249: * @param pString
250: * string to convert to quoted-printable form
251: * @return quoted-printable string
252: *
253: * @throws EncoderException
254: * Thrown if quoted-printable encoding is unsuccessful
255: *
256: * @see #getDefaultCharset()
257: */
258: public String encode(String pString) throws EncoderException {
259: if (pString == null) {
260: return null;
261: }
262: try {
263: return encode(pString, getDefaultCharset());
264: } catch (UnsupportedEncodingException e) {
265: throw new EncoderException(e.getMessage());
266: }
267: }
268:
269: /**
270: * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
271: * are converted back to their original representation.
272: *
273: * @param pString
274: * quoted-printable string to convert into its original form
275: * @param charset
276: * the original string charset
277: * @return original string
278: * @throws DecoderException
279: * Thrown if quoted-printable decoding is unsuccessful
280: * @throws UnsupportedEncodingException
281: * Thrown if charset is not supported
282: */
283: public String decode(String pString, String charset)
284: throws DecoderException, UnsupportedEncodingException {
285: if (pString == null) {
286: return null;
287: }
288: return new String(decode(pString
289: .getBytes(StringEncodings.US_ASCII)), charset);
290: }
291:
292: /**
293: * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are
294: * converted back to their original representation.
295: *
296: * @param pString
297: * quoted-printable string to convert into its original form
298: * @return original string
299: * @throws DecoderException
300: * Thrown if quoted-printable decoding is unsuccessful
301: * @throws UnsupportedEncodingException
302: * Thrown if charset is not supported
303: * @see #getDefaultCharset()
304: */
305: public String decode(String pString) throws DecoderException {
306: if (pString == null) {
307: return null;
308: }
309: try {
310: return decode(pString, getDefaultCharset());
311: } catch (UnsupportedEncodingException e) {
312: throw new DecoderException(e.getMessage());
313: }
314: }
315:
316: /**
317: * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped.
318: *
319: * @param pObject
320: * string to convert to a quoted-printable form
321: * @return quoted-printable object
322: * @throws EncoderException
323: * Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is
324: * unsuccessful
325: */
326: public Object encode(Object pObject) throws EncoderException {
327: if (pObject == null) {
328: return null;
329: } else if (pObject instanceof byte[]) {
330: return encode((byte[]) pObject);
331: } else if (pObject instanceof String) {
332: return encode((String) pObject);
333: } else {
334: throw new EncoderException("Objects of type "
335: + pObject.getClass().getName()
336: + " cannot be quoted-printable encoded");
337: }
338: }
339:
340: /**
341: * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
342: * representation.
343: *
344: * @param pObject
345: * quoted-printable object to convert into its original form
346: * @return original object
347: * @throws DecoderException
348: * Thrown if quoted-printable decoding is not applicable to objects of this type if decoding is
349: * unsuccessful
350: */
351: public Object decode(Object pObject) throws DecoderException {
352: if (pObject == null) {
353: return null;
354: } else if (pObject instanceof byte[]) {
355: return decode((byte[]) pObject);
356: } else if (pObject instanceof String) {
357: return decode((String) pObject);
358: } else {
359: throw new DecoderException("Objects of type "
360: + pObject.getClass().getName()
361: + " cannot be quoted-printable decoded");
362: }
363: }
364:
365: /**
366: * Returns the default charset used for string decoding and encoding.
367: *
368: * @return the default string charset.
369: */
370: public String getDefaultCharset() {
371: return this .charset;
372: }
373:
374: /**
375: * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
376: *
377: * <p>
378: * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
379: * RFC 1521 and is suitable for encoding binary data and unformatted text.
380: * </p>
381: *
382: * @param pString
383: * string to convert to quoted-printable form
384: * @param charset
385: * the charset for pString
386: * @return quoted-printable string
387: *
388: * @throws UnsupportedEncodingException
389: * Thrown if the charset is not supported
390: */
391: public String encode(String pString, String charset)
392: throws UnsupportedEncodingException {
393: if (pString == null) {
394: return null;
395: }
396: return new String(encode(pString.getBytes(charset)),
397: StringEncodings.US_ASCII);
398: }
399: }
|