001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package java.nio.charset;
019:
020: import java.io.BufferedReader;
021: import java.io.IOException;
022: import java.io.InputStream;
023: import java.io.InputStreamReader;
024: import java.net.URL;
025: import java.nio.ByteBuffer;
026: import java.nio.CharBuffer;
027: import java.nio.charset.spi.CharsetProvider;
028: import java.security.AccessController;
029: import java.security.PrivilegedAction;
030: import java.util.Collections;
031: import java.util.Comparator;
032: import java.util.Enumeration;
033: import java.util.HashMap;
034: import java.util.HashSet;
035: import java.util.Iterator;
036: import java.util.Locale;
037: import java.util.Set;
038: import java.util.SortedMap;
039: import java.util.TreeMap;
040:
041: import org.apache.harmony.niochar.CharsetProviderImpl;
042:
043: /**
044: * A charset defines a mapping between a Unicode character sequence and a byte
045: * sequence. It facilitate the encoding from a Unicode character sequence into a
046: * byte sequence, and the decoding from a byte sequence into a Unicode character
047: * sequence.
048: * <p>
049: * A charset has a canonical name, which are usually in uppercase. Typically it
050: * also has one or more aliases. The name string can only consist of the
051: * following characters: '0' - '9', 'A' - 'Z', 'a' - 'z', '.', ':'. '-' and '_'.
052: * The first character of the name must be a digit or a letter.
053: * </p>
054: * <p>
055: * The following charsets should be supported by any java platforms: US-ASCII,
056: * ISO-8859-1, UTF-8, UTF-16BE, UTF-16LE, UTF-16.
057: * </p>
058: * <p>
059: * Additional charsets can be made available by configuring one or more charset
060: * providers through provider configuration files. Such files are always named
061: * as "java.nio.charset.spi.CharsetProvider" and located in the
062: * "META-INF/services" sub folder of one or more classpaths. The files should be
063: * encoded in "UTF-8". Each line of their content specifies the class name of a
064: * charset provider which extends <code>java.nio.spi.CharsetProvider</code>.
065: * A line should ends with '\r', '\n' or '\r\n'. Leading and trailing
066: * whitespaces are trimmed. Blank lines, and lines (after trimmed) starting with
067: * "#" which are regarded as comments, are both ignored. Duplicates of already
068: * appeared names are also ignored. Both the configuration files and the
069: * provider classes will be loaded using the thread context class loader.
070: * </p>
071: * <p>
072: * This class is thread-safe.
073: * </p>
074: *
075: * @see java.nio.charset.spi.CharsetProvider
076: */
077: public abstract class Charset implements Comparable<Charset> {
078:
079: /*
080: * The name of configuration files where charset provider class names can be
081: * specified.
082: */
083: private static final String PROVIDER_CONFIGURATION_FILE_NAME = "META-INF/services/java.nio.charset.spi.CharsetProvider"; //$NON-NLS-1$
084:
085: /*
086: * The encoding of configuration files
087: */
088: private static final String PROVIDER_CONFIGURATION_FILE_ENCODING = "UTF-8"; //$NON-NLS-1$
089:
090: /*
091: * The comment string used in configuration files
092: */
093: private static final String PROVIDER_CONFIGURATION_FILE_COMMENT = "#"; //$NON-NLS-1$
094:
095: private static ClassLoader systemClassLoader;
096:
097: // built in provider instance, assuming thread-safe
098: private static CharsetProviderImpl _builtInProvider = null;
099:
100: // cached built in charsets
101: private static TreeMap<String, Charset> _builtInCharsets = null;
102:
103: private final String canonicalName;
104:
105: // the aliases set
106: private final HashSet<String> aliasesSet;
107:
108: // cached Charset table
109: private static HashMap<String, Charset> cachedCharsetTable = new HashMap<String, Charset>();
110:
111: static {
112: /*
113: * Create built-in charset provider even if no privilege to access
114: * charset provider.
115: */
116: _builtInProvider = AccessController
117: .doPrivileged(new PrivilegedAction<CharsetProviderImpl>() {
118: public CharsetProviderImpl run() {
119: return new CharsetProviderImpl();
120: }
121: });
122: }
123:
124: /**
125: * Constructs a <code>Charset</code> object. Duplicated aliases are
126: * ignored.
127: *
128: * @param canonicalName
129: * the canonical name of the charset
130: * @param aliases
131: * an array containing all aliases of the charset
132: * @throws IllegalCharsetNameException
133: * on an illegal value being supplied for either
134: * <code>canonicalName</code> or for any element of
135: * <code>aliases</code>.
136: */
137: protected Charset(String canonicalName, String[] aliases) {
138: if (null == canonicalName) {
139: throw new NullPointerException();
140: }
141: // check whether the given canonical name is legal
142: checkCharsetName(canonicalName);
143: this .canonicalName = canonicalName;
144: // check each alias and put into a set
145: this .aliasesSet = new HashSet<String>();
146: if (null != aliases) {
147: for (int i = 0; i < aliases.length; i++) {
148: checkCharsetName(aliases[i]);
149: this .aliasesSet.add(aliases[i]);
150: }
151: }
152: }
153:
154: /*
155: * Checks whether a character is a special character that can be used in
156: * charset names, other than letters and digits.
157: */
158: private static boolean isSpecial(char c) {
159: return ('-' == c || '.' == c || ':' == c || '_' == c);
160: }
161:
162: /*
163: * Checks whether a character is a letter (ascii) which are defined in Java
164: * Spec.
165: */
166: private static boolean isLetter(char c) {
167: return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
168: }
169:
170: /*
171: * Checks whether a character is a digit (ascii) which are defined in Java
172: * Spec.
173: */
174: private static boolean isDigit(char c) {
175: return ('0' <= c && c <= '9');
176: }
177:
178: /*
179: * Checks whether a given string is a legal charset name. The argument name
180: * should not be null.
181: */
182: private static void checkCharsetName(String name) {
183: // An empty string is illegal charset name
184: if (name.length() == 0) {
185: throw new IllegalCharsetNameException(name);
186: }
187: // The first character must be a letter or a digit
188: // This is related to HARMONY-68 (won't fix)
189: // char first = name.charAt(0);
190: // if (!isLetter(first) && !isDigit(first)) {
191: // throw new IllegalCharsetNameException(name);
192: // }
193: // Check the remaining characters
194: int length = name.length();
195: for (int i = 0; i < length; i++) {
196: char c = name.charAt(i);
197: if (!isLetter(c) && !isDigit(c) && !isSpecial(c)) {
198: throw new IllegalCharsetNameException(name);
199: }
200: }
201: }
202:
203: /*
204: * Use privileged code to get the context class loader.
205: */
206: private static ClassLoader getContextClassLoader() {
207: final Thread t = Thread.currentThread();
208: return AccessController
209: .doPrivileged(new PrivilegedAction<ClassLoader>() {
210: public ClassLoader run() {
211: return t.getContextClassLoader();
212: }
213: });
214: }
215:
216: /*
217: * Use privileged code to get the system class loader.
218: */
219: private static void getSystemClassLoader() {
220: if (null == systemClassLoader) {
221: systemClassLoader = AccessController
222: .doPrivileged(new PrivilegedAction<ClassLoader>() {
223: public ClassLoader run() {
224: return ClassLoader.getSystemClassLoader();
225: }
226: });
227: }
228: }
229:
230: /*
231: * Add the charsets supported by the given provider to the map.
232: */
233: private static void addCharsets(CharsetProvider cp,
234: TreeMap<String, Charset> charsets) {
235: Iterator<Charset> it = cp.charsets();
236: while (it.hasNext()) {
237: Charset cs = it.next();
238: // Only new charsets will be added
239: if (!charsets.containsKey(cs.name())) {
240: charsets.put(cs.name(), cs);
241: }
242: }
243: }
244:
245: /*
246: * Trim comment string, and then trim white spaces.
247: */
248: private static String trimClassName(String name) {
249: String trimedName = name;
250: int index = name.indexOf(PROVIDER_CONFIGURATION_FILE_COMMENT);
251: // Trim comments
252: if (index != -1) {
253: trimedName = name.substring(0, index);
254: }
255: return trimedName.trim();
256: }
257:
258: /*
259: * Read a configuration file and add the charsets supported by the providers
260: * specified by this configuration file to the map.
261: */
262: private static void loadConfiguredCharsets(URL configFile,
263: ClassLoader contextClassLoader,
264: TreeMap<String, Charset> charsets) {
265: BufferedReader reader = null;
266: try {
267: InputStream is = configFile.openStream();
268: // Read each line for charset provider class names
269: reader = new BufferedReader(new InputStreamReader(is,
270: PROVIDER_CONFIGURATION_FILE_ENCODING));
271: String providerClassName = reader.readLine();
272: while (null != providerClassName) {
273: providerClassName = trimClassName(providerClassName);
274: // Skip comments and blank lines
275: if (providerClassName.length() > 0) { // Non empty string
276: // Load the charset provider
277: Object cp = null;
278: try {
279: Class<?> c = Class.forName(providerClassName,
280: true, contextClassLoader);
281: cp = c.newInstance();
282: } catch (Exception ex) {
283: // try to use system classloader when context
284: // classloader failed to load config file.
285: try {
286: getSystemClassLoader();
287: Class<?> c = Class.forName(
288: providerClassName, true,
289: systemClassLoader);
290: cp = c.newInstance();
291: } catch (Exception e) {
292: throw new Error(e.getMessage(), e);
293: }
294: }
295: // Put the charsets supported by this provider into the map
296: addCharsets((CharsetProvider) cp, charsets);
297: }
298: // Read the next line of the config file
299: providerClassName = reader.readLine();
300: }
301: } catch (IOException ex) {
302: // Can't read this configuration file, ignore
303: } finally {
304: try {
305: if (null != reader) {
306: reader.close();
307: }
308: } catch (IOException ex) {
309: // Ignore closing exception
310: }
311: }
312: }
313:
314: /**
315: * Gets a map of all available charsets supported by the runtime.
316: * <p>
317: * The returned map contains mappings from canonical names to corresponding
318: * instances of <code>Charset</code>. The canonical names can be
319: * considered as case-insensitive.
320: * </p>
321: *
322: * @return an unmodifiable map of all available charsets supported by the
323: * runtime
324: */
325: @SuppressWarnings("unchecked")
326: public static SortedMap<String, Charset> availableCharsets() {
327: // Initialize the built-in charsets map cache if necessary
328: if (null == _builtInCharsets) {
329: synchronized (Charset.class) {
330: if (null == _builtInCharsets) {
331: _builtInCharsets = new TreeMap<String, Charset>(
332: IgnoreCaseComparator.getInstance());
333: _builtInProvider.putCharsets(_builtInCharsets);
334: }
335: }
336: }
337:
338: // Add built-in charsets
339: TreeMap<String, Charset> charsets = (TreeMap<String, Charset>) _builtInCharsets
340: .clone();
341:
342: // Collect all charsets provided by charset providers
343: ClassLoader contextClassLoader = getContextClassLoader();
344: Enumeration<URL> e = null;
345: try {
346: if (null != contextClassLoader) {
347: e = contextClassLoader
348: .getResources(PROVIDER_CONFIGURATION_FILE_NAME);
349: } else {
350: getSystemClassLoader();
351: e = systemClassLoader
352: .getResources(PROVIDER_CONFIGURATION_FILE_NAME);
353: }
354: // Examine each configuration file
355: while (e.hasMoreElements()) {
356: loadConfiguredCharsets(e.nextElement(),
357: contextClassLoader, charsets);
358: }
359: } catch (IOException ex) {
360: // Unexpected ClassLoader exception, ignore
361: }
362: return Collections.unmodifiableSortedMap(charsets);
363: }
364:
365: /*
366: * Read a configuration file and try to find the desired charset among those
367: * which are supported by the providers specified in this configuration
368: * file.
369: */
370: private static Charset searchConfiguredCharsets(String charsetName,
371: ClassLoader contextClassLoader, URL configFile) {
372: BufferedReader reader = null;
373: try {
374: InputStream is = configFile.openStream();
375: // Read each line for charset provider class names
376: reader = new BufferedReader(new InputStreamReader(is,
377: PROVIDER_CONFIGURATION_FILE_ENCODING));
378: String providerClassName = reader.readLine();
379: while (null != providerClassName) {
380: providerClassName = trimClassName(providerClassName);
381: if (providerClassName.length() > 0) { // Non empty string
382: // Load the charset provider
383: Object cp = null;
384: try {
385: Class<?> c = Class.forName(providerClassName,
386: true, contextClassLoader);
387: cp = c.newInstance();
388: } catch (Exception ex) {
389: // try to use system classloader when context
390: // classloader failed to load config file.
391: try {
392: getSystemClassLoader();
393: Class<?> c = Class.forName(
394: providerClassName, true,
395: systemClassLoader);
396: cp = c.newInstance();
397: } catch (SecurityException e) {
398: throw e;
399: } catch (Exception e) {
400: throw new Error(e.getMessage(), e);
401: }
402: }
403: // Try to get the desired charset from this provider
404: Charset cs = ((CharsetProvider) cp)
405: .charsetForName(charsetName);
406: if (null != cs) {
407: return cs;
408: }
409: }
410: // Read the next line of the config file
411: providerClassName = reader.readLine();
412: }
413: return null;
414: } catch (IOException ex) {
415: // Can't read this configuration file
416: return null;
417: } finally {
418: try {
419: if (null != reader) {
420: reader.close();
421: }
422: } catch (IOException ex) {
423: // Ignore closing exception
424: }
425: }
426: }
427:
428: /*
429: * Gets a <code> Charset </code> instance for the specified charset name. If
430: * the charset is not supported, returns null instead of throwing an
431: * exception.
432: */
433: private static Charset forNameInternal(String charsetName)
434: throws IllegalCharsetNameException {
435: if (null == charsetName) {
436: throw new IllegalArgumentException();
437: }
438: checkCharsetName(charsetName);
439: synchronized (Charset.class) {
440: // Try to get Charset from cachedCharsetTable
441: Charset cs = getCachedCharset(charsetName);
442: if (null != cs) {
443: return cs;
444: }
445: // Try built-in charsets
446: if (_builtInProvider == null) {
447: _builtInProvider = new CharsetProviderImpl();
448: }
449: cs = _builtInProvider.charsetForName(charsetName);
450: if (null != cs) {
451: cacheCharset(cs);
452: return cs;
453: }
454:
455: // Collect all charsets provided by charset providers
456: ClassLoader contextClassLoader = getContextClassLoader();
457: Enumeration<URL> e = null;
458: try {
459: if (null != contextClassLoader) {
460: e = contextClassLoader
461: .getResources(PROVIDER_CONFIGURATION_FILE_NAME);
462: } else {
463: getSystemClassLoader();
464: e = systemClassLoader
465: .getResources(PROVIDER_CONFIGURATION_FILE_NAME);
466: }
467: // Examine each configuration file
468: while (e.hasMoreElements()) {
469: cs = searchConfiguredCharsets(charsetName,
470: contextClassLoader, e.nextElement());
471: if (null != cs) {
472: cacheCharset(cs);
473: return cs;
474: }
475: }
476: } catch (IOException ex) {
477: // Unexpected ClassLoader exception, ignore
478: }
479: }
480: return null;
481: }
482:
483: /*
484: * save charset into cachedCharsetTable
485: */
486: private static void cacheCharset(Charset cs) {
487: cachedCharsetTable.put(cs.name(), cs);
488: Set<String> aliasesSet = cs.aliases();
489: if (null != aliasesSet) {
490: Iterator<String> iter = aliasesSet.iterator();
491: while (iter.hasNext()) {
492: String alias = iter.next();
493: cachedCharsetTable.put(alias, cs);
494: }
495: }
496: }
497:
498: /*
499: * get cached charset reference by name
500: */
501: private static Charset getCachedCharset(String name) {
502: return cachedCharsetTable.get(name);
503: }
504:
505: /**
506: * Gets a <code>Charset</code> instance for the specified charset name.
507: *
508: * @param charsetName
509: * the name of the charset
510: * @return a <code>Charset</code> instance for the specified charset name
511: * @throws IllegalCharsetNameException
512: * If the specified charset name is illegal.
513: * @throws UnsupportedCharsetException
514: * If the desired charset is not supported by this runtime.
515: */
516: public static Charset forName(String charsetName) {
517: Charset c = forNameInternal(charsetName);
518: if (null == c) {
519: throw new UnsupportedCharsetException(charsetName);
520: }
521: return c;
522: }
523:
524: /**
525: * Determines whether the specified charset is supported by this runtime.
526: *
527: * @param charsetName
528: * the name of the charset
529: * @return true if the specified charset is supported, otherwise false
530: * @throws IllegalCharsetNameException
531: * If the specified charset name is illegal.
532: */
533: public static boolean isSupported(String charsetName) {
534: Charset cs = forNameInternal(charsetName);
535: return (null != cs);
536: }
537:
538: /**
539: * Determines whether this charset is a super set of the given charset.
540: *
541: * @param charset
542: * a given charset
543: * @return true if this charset is a super set of the given charset,
544: * otherwise false
545: */
546: public abstract boolean contains(Charset charset);
547:
548: /**
549: * Gets a new instance of encoder for this charset.
550: *
551: * @return a new instance of encoder for this charset
552: */
553: public abstract CharsetEncoder newEncoder();
554:
555: /**
556: * Gets a new instance of decoder for this charset.
557: *
558: * @return a new instance of decoder for this charset
559: */
560: public abstract CharsetDecoder newDecoder();
561:
562: /**
563: * Gets the canonical name of this charset.
564: *
565: * @return this charset's name in canonical form.
566: */
567: public final String name() {
568: return this .canonicalName;
569: }
570:
571: /**
572: * Gets the set of this charset's aliases.
573: *
574: * @return an unmodifiable set of this charset's aliases
575: */
576: public final Set<String> aliases() {
577: return Collections.unmodifiableSet(this .aliasesSet);
578: }
579:
580: /**
581: * Gets the name of this charset for the default locale.
582: *
583: * @return the name of this charset for the default locale
584: */
585: public String displayName() {
586: return this .canonicalName;
587: }
588:
589: /**
590: * Gets the name of this charset for the specified locale.
591: *
592: * @param l
593: * a certain locale
594: * @return the name of this charset for the specified locale
595: */
596: public String displayName(Locale l) {
597: return this .canonicalName;
598: }
599:
600: /**
601: * Answers whether this charset is known to be registered in the IANA
602: * Charset Registry.
603: *
604: * @return true if the charset is known to be registered, otherwise returns
605: * false.
606: */
607: public final boolean isRegistered() {
608: return !canonicalName.startsWith("x-") //$NON-NLS-1$
609: && !canonicalName.startsWith("X-"); //$NON-NLS-1$
610: }
611:
612: /**
613: * Answers true if this charset supports encoding, otherwise false.
614: *
615: * @return true
616: */
617: public boolean canEncode() {
618: return true;
619: }
620:
621: /**
622: * Encodes the content of the give character buffer and outputs to a byte
623: * buffer that is to be returned.
624: * <p>
625: * The default action in case of encoding errors is
626: * <code>CodingErrorAction.REPLACE</code>.
627: * </p>
628: *
629: * @param buffer
630: * the character buffer containing the content to be encoded
631: * @return the result of the encoding
632: */
633: public final ByteBuffer encode(CharBuffer buffer) {
634: try {
635: return this .newEncoder().onMalformedInput(
636: CodingErrorAction.REPLACE).onUnmappableCharacter(
637: CodingErrorAction.REPLACE).encode(buffer);
638:
639: } catch (CharacterCodingException ex) {
640: throw new Error(ex.getMessage(), ex);
641: }
642: }
643:
644: /**
645: * Encodes a string and outputs to a byte buffer that is to be returned.
646: * <p>
647: * The default action in case of encoding errors is
648: * <code>CodingErrorAction.REPLACE</code>.
649: * </p>
650: *
651: * @param s
652: * the string to be encoded
653: * @return the result of the encoding
654: */
655: public final ByteBuffer encode(String s) {
656: return encode(CharBuffer.wrap(s));
657: }
658:
659: /**
660: * Decodes the content of the give byte buffer and outputs to a character
661: * buffer that is to be returned.
662: * <p>
663: * The default action in case of decoding errors is
664: * <code>CodingErrorAction.REPLACE</code>.
665: * </p>
666: *
667: * @param buffer
668: * the byte buffer containing the content to be decoded
669: * @return a character buffer containing the output of the decoding
670: */
671: public final CharBuffer decode(ByteBuffer buffer) {
672:
673: try {
674: return this .newDecoder().onMalformedInput(
675: CodingErrorAction.REPLACE).onUnmappableCharacter(
676: CodingErrorAction.REPLACE).decode(buffer);
677:
678: } catch (CharacterCodingException ex) {
679: throw new Error(ex.getMessage(), ex);
680: }
681: }
682:
683: /*
684: * -------------------------------------------------------------------
685: * Methods implementing parent interface Comparable
686: * -------------------------------------------------------------------
687: */
688:
689: /**
690: * Compares this charset with the given charset.
691: *
692: * @param charset
693: * the given object to be compared with
694: * @return a negative integer if less than the given object, a positive
695: * integer if larger than it, or 0 if equal to it
696: */
697: public final int compareTo(Charset charset) {
698: return this .canonicalName
699: .compareToIgnoreCase(charset.canonicalName);
700: }
701:
702: /*
703: * -------------------------------------------------------------------
704: * Methods overriding parent class Object
705: * -------------------------------------------------------------------
706: */
707:
708: /**
709: * Determines whether this charset equals to the given object. They are
710: * considered to be equal if they have the same canonical name.
711: *
712: * @param obj
713: * the given object to be compared with
714: * @return true if they have the same canonical name, otherwise false
715: */
716: @Override
717: public final boolean equals(Object obj) {
718: if (obj instanceof Charset) {
719: Charset that = (Charset) obj;
720: return this .canonicalName.equals(that.canonicalName);
721: }
722: return false;
723: }
724:
725: /**
726: * Gets the hash code of this charset.
727: *
728: * @return the hash code of this charset
729: */
730: @Override
731: public final int hashCode() {
732: return this .canonicalName.hashCode();
733: }
734:
735: /**
736: * Gets a string representation of this charset. Usually this contains the
737: * canonical name of the charset.
738: *
739: * @return a string representation of this charset
740: */
741: @Override
742: public final String toString() {
743: return "Charset[" + this .canonicalName + "]"; //$NON-NLS-1$//$NON-NLS-2$
744: }
745:
746: /**
747: * Gets the system default charset from jvm.
748: *
749: * @return the default charset
750: */
751: public static Charset defaultCharset() {
752: Charset defaultCharset = null;
753: String encoding = AccessController
754: .doPrivileged(new PrivilegedAction<String>() {
755: public String run() {
756: return System.getProperty("file.encoding"); //$NON-NLS-1$
757: }
758: });
759: try {
760: defaultCharset = Charset.forName(encoding);
761: } catch (UnsupportedCharsetException e) {
762: defaultCharset = Charset.forName("UTF-8"); //$NON-NLS-1$
763: }
764: return defaultCharset;
765: }
766:
767: /**
768: * A comparator that ignores case.
769: */
770: static class IgnoreCaseComparator implements Comparator<String> {
771:
772: // the singleton
773: private static Comparator<String> c = new IgnoreCaseComparator();
774:
775: /*
776: * Default constructor.
777: */
778: private IgnoreCaseComparator() {
779: // no action
780: }
781:
782: /*
783: * Gets a single instance.
784: */
785: public static Comparator<String> getInstance() {
786: return c;
787: }
788:
789: /*
790: * Compares two strings ignoring case.
791: */
792: public int compare(String s1, String s2) {
793: return s1.compareToIgnoreCase(s2);
794: }
795: }
796: }
|