Source Code Cross Referenced for CharsetICU.java in » Internationalization-Localization » icu4j » com » ibm » icu » charset » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation

1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI

Java

Java Tutorial

Illustrator Tutorials

GIMP Tutorials

C# / C Sharp

C# / CSharp Tutorial

C# / CSharp Open Source

SQL Server / T-SQL Tutorial

Oracle PL / SQL

Oracle PL/SQL Tutorial

Flash / Flex / ActionScript

VBA / Excel / Access / Word

XML

XML Tutorial

Microsoft Office PowerPoint 2007 Tutorial

Microsoft Office Excel 2007 Tutorial

Microsoft Office Word 2007 Tutorial

Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.charset

Source Cross Referenced Class Diagram Java Document (Java Doc)

001:        /**
002:         *******************************************************************************
003:         * Copyright (C) 2006, International Business Machines Corporation and    *
004:         * others. All Rights Reserved.                                                *
005:         *******************************************************************************
006:         *
007:         *******************************************************************************
008:         */package com.ibm.icu.charset;
009:
010:        import java.io.ByteArrayInputStream;
011:        import java.io.InputStreamReader;
012:        import java.lang.reflect.Constructor;
013:
014:        import java.lang.reflect.InvocationTargetException;
015:        import java.nio.charset.Charset;
016:        import java.nio.charset.IllegalCharsetNameException;
017:        import java.nio.charset.UnsupportedCharsetException;
018:        import java.util.HashMap;
019:
020:        import com.ibm.icu.lang.UCharacter;
021:
022:        /**
023:         * <p>A subclass of java.nio.Charset for providing implementation of ICU's charset converters.
024:         * This API is used to convert codepage or character encoded data to and
025:         * from UTF-16. You can open a converter with {@link Charset#forName } and {@link #forNameICU }. With that
026:         * converter, you can get its properties, set options, convert your data.</p>
027:         *
028:         * <p>Since many software programs recogize different converter names for
029:         * different types of converters, there are other functions in this API to
030:         * iterate over the converter aliases. 
031:         * 
032:         * @draft ICU 3.6
033:         * @provisional This API might change or be removed in a future release.
034:         */
035:        public abstract class CharsetICU extends Charset {
036:
037:            String icuCanonicalName;
038:            String javaCanonicalName;
039:            int options;
040:
041:            float maxCharsPerByte;
042:
043:            boolean useFallback;
044:
045:            String name; /* +4: 60  internal name of the converter- invariant chars */
046:
047:            int codepage; /* +64: 4 codepage # (now IBM-$codepage) */
048:
049:            byte platform; /* +68: 1 platform of the converter (only IBM now) */
050:            byte conversionType; /* +69: 1 conversion type */
051:
052:            int minBytesPerChar; /* +70: 1 Minimum # bytes per char in this codepage */
053:            int maxBytesPerChar; /* +71: 1 Maximum # bytes output per UChar in this codepage */
054:
055:            byte subChar[/*UCNV_MAX_SUBCHAR_LEN*/]; /* +72: 4  [note:  4 and 8 byte boundary] */
056:            byte subCharLen; /* +76: 1 */
057:
058:            byte hasToUnicodeFallback; /* +77: 1 UBool needs to be changed to UBool to be consistent across platform */
059:            byte hasFromUnicodeFallback; /* +78: 1 */
060:            short unicodeMask; /* +79: 1  bit 0: has supplementary  bit 1: has single surrogates */
061:            byte subChar1; /* +80: 1  single-byte substitution character for IBM MBCS (0 if none) */
062:            byte reserved[/*19*/]; /* +81: 19 to round out the structure */
063:
064:            /**
065:             * 
066:             * @param icuCanonicalName
067:             * @param canonicalName
068:             * @param aliases
069:             * @draft ICU 3.6
070:             * @provisional This API might change or be removed in a future release.
071:             */
072:            protected CharsetICU(String icuCanonicalName, String canonicalName,
073:                    String[] aliases) {
074:                super (canonicalName, aliases);
075:                if (canonicalName.length() == 0) {
076:                    throw new IllegalCharsetNameException(canonicalName);
077:                }
078:                this .javaCanonicalName = canonicalName;
079:                this .icuCanonicalName = icuCanonicalName;
080:            }
081:
082:            /**
083:             * Ascertains if a charset is a sub set of this charset
084:             * Implements the abstract method of super class.
085:             * @param cs charset to test
086:             * @return true if the given charset is a subset of this charset
087:             * @stable ICU 3.6
088:             */
089:            public boolean contains(Charset cs) {
090:                if (null == cs) {
091:                    return false;
092:                } else if (this .equals(cs)) {
093:                    return true;
094:                }
095:                return false;
096:            }
097:
098:            private static final HashMap algorithmicCharsets = new HashMap();
099:            static {
100:                /*algorithmicCharsets.put("BOCU-1",                "com.ibm.icu.charset.CharsetBOCU1" );
101:                algorithmicCharsets.put("CESU-8",                "com.ibm.icu.charset.CharsetCESU8" );
102:                algorithmicCharsets.put("HZ",                    "com.ibm.icu.charset.CharsetHZ" );
103:                algorithmicCharsets.put("imapmailboxname",       "com.ibm.icu.charset.CharsetIMAP" );
104:                algorithmicCharsets.put("ISCII",                 "com.ibm.icu.charset.CharsetISCII" );
105:                algorithmicCharsets.put("iso2022",               "com.ibm.icu.charset.CharsetISO2022" );*/
106:                /*algorithmicCharsets.put("lmbcs1",                "com.ibm.icu.charset.CharsetLMBCS1" );
107:                algorithmicCharsets.put("lmbcs11",               "com.ibm.icu.charset.CharsetLMBCS11" );
108:                algorithmicCharsets.put("lmbcs16",               "com.ibm.icu.charset.CharsetLMBCS16" );
109:                algorithmicCharsets.put("lmbcs17",               "com.ibm.icu.charset.CharsetLMBCS17" );
110:                algorithmicCharsets.put("lmbcs18",               "com.ibm.icu.charset.CharsetLMBCS18" );
111:                algorithmicCharsets.put("lmbcs19",               "com.ibm.icu.charset.CharsetLMBCS19" );
112:                algorithmicCharsets.put("lmbcs2",                "com.ibm.icu.charset.CharsetLMBCS2" );
113:                algorithmicCharsets.put("lmbcs3",                "com.ibm.icu.charset.CharsetLMBCS3" );
114:                algorithmicCharsets.put("lmbcs4",                "com.ibm.icu.charset.CharsetLMBCS4" );
115:                algorithmicCharsets.put("lmbcs5",                "com.ibm.icu.charset.CharsetLMBCS5" );
116:                algorithmicCharsets.put("lmbcs6",                "com.ibm.icu.charset.CharsetLMBCS6" );
117:                algorithmicCharsets.put("lmbcs8",                "com.ibm.icu.charset.CharsetLMBCS8" )
118:                algorithmicCharsets.put("scsu",                  "com.ibm.icu.charset.CharsetSCSU" ); */
119:                algorithmicCharsets.put("US-ASCII",
120:                        "com.ibm.icu.charset.CharsetASCII");
121:                algorithmicCharsets.put("ISO-8859-1",
122:                        "com.ibm.icu.charset.Charset88591");
123:                algorithmicCharsets.put("UTF-16",
124:                        "com.ibm.icu.charset.CharsetUTF16");
125:                algorithmicCharsets.put("UTF-16BE",
126:                        "com.ibm.icu.charset.CharsetUTF16");
127:                algorithmicCharsets.put("UTF-16LE",
128:                        "com.ibm.icu.charset.CharsetUTF16LE");
129:                algorithmicCharsets.put("UTF16_OppositeEndian",
130:                        "com.ibm.icu.charset.CharsetUTF16LE");
131:                algorithmicCharsets.put("UTF16_PlatformEndian",
132:                        "com.ibm.icu.charset.CharsetUTF16");
133:                algorithmicCharsets.put("UTF-32",
134:                        "com.ibm.icu.charset.CharsetUTF32");
135:                algorithmicCharsets.put("UTF-32BE",
136:                        "com.ibm.icu.charset.CharsetUTF32");
137:                algorithmicCharsets.put("UTF-32LE",
138:                        "com.ibm.icu.charset.CharsetUTF32LE");
139:                algorithmicCharsets.put("UTF32_PlatformEndian",
140:                        "com.ibm.icu.charset.CharsetUTF32LE");
141:                algorithmicCharsets.put("UTF32_OppositeEndian",
142:                        "com.ibm.icu.charset.CharsetUTF32");
143:                algorithmicCharsets.put("UTF-7",
144:                        "com.ibm.icu.charset.CharsetUTF7");
145:                algorithmicCharsets.put("UTF-8",
146:                        "com.ibm.icu.charset.CharsetUTF8");
147:            }
148:
149:            /*public*/static final Charset getCharset(String icuCanonicalName,
150:                    String javaCanonicalName, String[] aliases) {
151:                String className = (String) algorithmicCharsets
152:                        .get(icuCanonicalName);
153:                if (className == null) {
154:                    //all the cnv files are loaded as MBCS
155:                    className = "com.ibm.icu.charset.CharsetMBCS";
156:                }
157:                try {
158:                    CharsetICU conv = null;
159:                    Class cs = Class.forName(className);
160:                    Class[] paramTypes = new Class[] { String.class,
161:                            String.class, String[].class };
162:                    final Constructor c = cs.getConstructor(paramTypes);
163:                    Object[] params = new Object[] { icuCanonicalName,
164:                            javaCanonicalName, aliases };
165:
166:                    // Run constructor
167:                    try {
168:                        Object obj = c.newInstance(params);
169:                        if (obj != null && obj instanceof  CharsetICU) {
170:                            conv = (CharsetICU) obj;
171:                            return conv;
172:                        }
173:                    } catch (InvocationTargetException e) {
174:                        throw new UnsupportedCharsetException(icuCanonicalName
175:                                + ": " + "Could not load " + className
176:                                + ". Exception:" + e.getTargetException());
177:                    }
178:                } catch (ClassNotFoundException ex) {
179:                } catch (NoSuchMethodException ex) {
180:                } catch (IllegalAccessException ex) {
181:                } catch (InstantiationException ex) {
182:                }
183:                throw new UnsupportedCharsetException(icuCanonicalName + ": "
184:                        + "Could not load " + className);
185:            }
186:
187:            static final boolean isSurrogate(int c) {
188:                return (((c) & 0xfffff800) == 0xd800);
189:            }
190:
191:            /**
192:             * Always use fallbacks from codepage to Unicode?
193:             * @draft ICU 3.6
194:             * @provisional This API might change or be removed in a future release.
195:             */
196:            final boolean isToUUseFallback() {
197:                return true;
198:            }
199:
200:            /**
201:             * Use fallbacks from Unicode to codepage when useFallback or for private-use code points
202:             * @param c A codepoint
203:             * @draft ICU 3.6
204:             * @provisional This API might change or be removed in a future release.
205:             */
206:            final boolean isFromUUseFallback(int c) {
207:                return (useFallback) || isPrivateUse(c);
208:            }
209:
210:            /**
211:             * Returns the default charset name 
212:             * @draft ICU 3.6
213:             * @provisional This API might change or be removed in a future release.
214:             */
215:            static final String getDefaultCharsetName() {
216:                String defaultEncoding = new InputStreamReader(
217:                        new ByteArrayInputStream(new byte[0])).getEncoding();
218:                return defaultEncoding;
219:            }
220:
221:            static final boolean isPrivateUse(int c) {
222:                return (UCharacter.getType(c) == UCharacter.PRIVATE_USE);
223:            }
224:
225:            /**
226:             * Returns a charset object for the named charset.
227:             * This method gurantee that ICU charset is returned when
228:             * available.  If the ICU charset provider does not support
229:             * the specified charset, then try other charset providers
230:             * including the standard Java charset provider.
231:             * 
232:             * @param charsetName The name of the requested charset,
233:             * may be either a canonical name or an alias
234:             * @return A charset object for the named charset
235:             * @throws IllegalCharsetNameException If the given charset name
236:             * is illegal
237:             * @throws UnsupportedCharsetException If no support for the
238:             * named charset is available in this instance of th Java
239:             * virtual machine
240:             * @draft ICU 3.6
241:             * @provisional This API might change or be removed in a future release.
242:             */
243:            public static Charset forNameICU(String charsetName)
244:                    throws IllegalCharsetNameException,
245:                    UnsupportedCharsetException {
246:                CharsetProviderICU icuProvider = new CharsetProviderICU();
247:                Charset cs = icuProvider.charsetForName(charsetName);
248:                if (cs != null) {
249:                    return cs;
250:                }
251:                return Charset.forName(charsetName);
252:            }
253:        }

www.java2java.com | Contact Us

All other trademarks are property of their respective owners.