Source Code Cross Referenced for PosixCharMap.java in  » Internationalization-Localization » icu4j » com » ibm » icu » dev » tool » localeconverter » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.dev.tool.localeconverter 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:         *******************************************************************************
003:         * Copyright (C) 2002-2005, International Business Machines Corporation and    *
004:         * others. All Rights Reserved.                                                *
005:         *******************************************************************************
006:         */
007:
008:        package com.ibm.icu.dev.tool.localeconverter;
009:
010:        import java.io.*;
011:        import java.util.*;
012:        import com.ibm.icu.lang.*;
013:        import com.ibm.icu.text.*;
014:
015:        public class PosixCharMap {
016:            private Hashtable table = new Hashtable();
017:            private Hashtable backTable = null;
018:            private PosixCharMap parentMap;
019:            private String encoding;
020:
021:            public PosixCharMap() {
022:            }
023:
024:            public PosixCharMap(PosixCharMap parent) {
025:                parentMap = parent;
026:            }
027:
028:            public PosixCharMap(String fileName) throws IOException {
029:                this (new FileReader(fileName));
030:            }
031:
032:            public PosixCharMap(String pathName, String fileName)
033:                    throws IOException {
034:                this (new FileReader(new File(pathName, fileName)));
035:            }
036:
037:            public PosixCharMap(Reader inputReader) throws IOException {
038:                load(new BufferedReader(inputReader));
039:            }
040:
041:            public PosixCharMap getParent() {
042:                return parentMap;
043:            }
044:
045:            public void setParent(PosixCharMap parent) {
046:                parentMap = parent;
047:            }
048:
049:            public void load(String pathName, String fileName)
050:                    throws IOException {
051:                load(new File(pathName, fileName), "");
052:            }
053:
054:            public void load(String pathName, String fileName, String enc)
055:                    throws IOException {
056:                load(new File(pathName, fileName), enc);
057:            }
058:
059:            public void load(File file, String enc) throws IOException {
060:                encoding = enc;
061:                load(new BufferedReader(new FileReader(file)));
062:            }
063:
064:            /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
065:            static private final char[] UNESCAPE_MAP = {
066:            /*"   0x22, 0x22 */
067:            /*'   0x27, 0x27 */
068:            /*?   0x3F, 0x3F */
069:            /*\   0x5C, 0x5C */
070:            /*a*/0x61, 0x07,
071:            /*b*/0x62, 0x08,
072:            /*f*/0x66, 0x0c,
073:            /*n*/0x6E, 0x0a,
074:            /*r*/0x72, 0x0d,
075:            /*t*/0x74, 0x09,
076:            /*v*/0x76, 0x0b };
077:
078:            /**
079:             * Convert an escape to a 32-bit code point value.  We attempt
080:             * to parallel the icu4c unesacpeAt() function.
081:             * @param offset16 an array containing offset to the character
082:             * <em>after</em> the backslash.  Upon return offset16[0] will
083:             * be updated to point after the escape sequence.
084:             * @return character value from 0 to 10FFFF, or -1 on error.
085:             */
086:            public static int unescapeAt(String s, int[] offset16) {
087:                int c;
088:                int result = 0;
089:                int n = 0;
090:                int minDig = 0;
091:                int maxDig = 0;
092:                int bitsPerDigit = 4;
093:                int dig;
094:                int i;
095:
096:                /* Check that offset is in range */
097:                int offset = offset16[0];
098:                int length = s.length();
099:                if (offset < 0 || offset >= length) {
100:                    return -1;
101:                }
102:
103:                /* Fetch first UChar after '\\' */
104:                c = UTF16.charAt(s, offset);
105:                offset += UTF16.getCharCount(c);
106:
107:                /* Convert hexadecimal and octal escapes */
108:                switch (c) {
109:                case 'u':
110:                    minDig = maxDig = 4;
111:                    break;
112:                case 'U':
113:                    minDig = maxDig = 8;
114:                    break;
115:                case 'x':
116:                    minDig = 1;
117:                    maxDig = 2;
118:                    break;
119:                default:
120:                    dig = UCharacter.digit(c, 8);
121:                    if (dig >= 0) {
122:                        minDig = 1;
123:                        maxDig = 3;
124:                        n = 1; /* Already have first octal digit */
125:                        bitsPerDigit = 3;
126:                        result = dig;
127:                    }
128:                    break;
129:                }
130:                if (minDig != 0) {
131:                    while (offset < length && n < maxDig) {
132:                        // TEMPORARY
133:                        // TODO: Restore the char32-based code when UCharacter.digit
134:                        // is working (Bug 66).
135:
136:                        //c = UTF16.charAt(s, offset);
137:                        //dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
138:                        c = s.charAt(offset);
139:                        dig = Character.digit((char) c, (bitsPerDigit == 3) ? 8
140:                                : 16);
141:                        if (dig < 0) {
142:                            break;
143:                        }
144:                        result = (result << bitsPerDigit) | dig;
145:                        //offset += UTF16.getCharCount(c);
146:                        ++offset;
147:                        ++n;
148:                    }
149:                    if (n < minDig) {
150:                        return -1;
151:                    }
152:                    offset16[0] = offset;
153:                    return result;
154:                }
155:
156:                /* Convert C-style escapes in table */
157:                for (i = 0; i < UNESCAPE_MAP.length; i += 2) {
158:                    if (c == UNESCAPE_MAP[i]) {
159:                        offset16[0] = offset;
160:                        return UNESCAPE_MAP[i + 1];
161:                    } else if (c < UNESCAPE_MAP[i]) {
162:                        break;
163:                    }
164:                }
165:
166:                /* If no special forms are recognized, then consider
167:                 * the backslash to generically escape the next character. */
168:                offset16[0] = offset;
169:                return c;
170:            }
171:
172:            /**
173:             * Convert all escapes in a given string using unescapeAt().
174:             * @exception IllegalArgumentException if an invalid escape is
175:             * seen.
176:             */
177:            public static String unescape(String s) {
178:                StringBuffer buf = new StringBuffer();
179:                int[] pos = new int[1];
180:                for (int i = 0; i < s.length();) {
181:                    char c = s.charAt(i++);
182:                    if (c == '\\') {
183:                        pos[0] = i;
184:                        int e = unescapeAt(s, pos);
185:                        if (e < 0) {
186:                            throw new IllegalArgumentException(
187:                                    "Invalid escape sequence "
188:                                            + s.substring(i - 1, Math.min(
189:                                                    i + 8, s.length())));
190:                        }
191:                        UTF16.append(buf, e);
192:                        i = pos[0];
193:                    } else {
194:                        buf.append(c);
195:                    }
196:                }
197:                return buf.toString();
198:            }
199:
200:            public void load(Reader inputReader) throws IOException {
201:                PosixCharMap oldMap = SymbolTransition.getCharMap();
202:                SymbolTransition.setCharMap(null);
203:                try {
204:                    final int TOKEN = 1;
205:                    final int EOF = 2;
206:                    final int EOL = 3;
207:                    final int RANGE = 4;
208:                    final Lex.Transition[][] states1 = { { //state 0: start
209:                                    new SpaceTransition(0),
210:                                    new EOLTransition(EOL),
211:                                    new Lex.EOFTransition(EOF),
212:                                    new Lex.DefaultTransition(
213:                                            Lex.ACCUMULATE_CONSUME, -1) }, { //grab first word
214:                                    new Lex.StringTransition(
215:                                            SpaceTransition.SPACE_CHARS,
216:                                            Lex.IGNORE_CONSUME, TOKEN),
217:                                    new Lex.StringTransition(
218:                                            EOLTransition.EOL_CHARS,
219:                                            Lex.IGNORE_CONSUME, TOKEN),
220:                                    new Lex.EOFTransition(TOKEN),
221:                                    new Lex.DefaultTransition(
222:                                            Lex.ACCUMULATE_CONSUME, -1) } };
223:
224:                    final Lex.Transition[][] states2 = { { //These states only return <symbols>.  All
225:                            //other text is ignored.
226:                            new Lex.EOFTransition(EOF),
227:                            new EOLTransition(EOL),
228:                            new SymbolTransition(TOKEN),
229:                            new SpaceTransition(0),
230:                            new RangeTransition(RANGE),
231:                            new Lex.DefaultTransition(Lex.ACCUMULATE_CONSUME, 0) }, };
232:
233:                    PushbackReader input = new PushbackReader(inputReader);
234:                    Lex p = new Lex(states1, input);
235:                    int state;
236:                    do {
237:                        state = p.nextToken();
238:                    } while ((state != EOF) && !p.dataEquals("CHARMAP"));
239:                    p.accept(EOL);
240:                    if (state != EOF) {
241:                        p = new Lex(states2, input);
242:                        state = p.nextToken();
243:                        while (state != EOF) {
244:
245:                            String key = p.getData();
246:                            if (p.dataEquals("ENDCHARMAP")) {
247:                                break;
248:                            }
249:                            state = p.nextToken();
250:                            while (state == EOL) {
251:                                if (p.dataEquals("ENDCHARMAP")) {
252:                                    break;
253:                                }
254:                                String data = unescape(p.getData());
255:                                data.trim();
256:                                if (data.startsWith("<U")
257:                                        || data.startsWith("#U")) {
258:                                    String numData = data.substring(2, data
259:                                            .length() - 1);
260:                                    int digit = Integer.parseInt(numData, 16);
261:                                    defineMapping(key, "" + (char) digit);
262:                                } else if (data.startsWith("\\x")) {
263:                                    byte[] encData = new byte[100];
264:                                    int num = hexToByte(data, encData);
265:                                    String tData = new String(encData, 0, num,
266:                                            encoding);
267:                                    defineMapping(key, tData);
268:                                } else {
269:                                    defineMapping(key, byteToChar(data,
270:                                            encoding));
271:                                }
272:                                state = p.nextToken();
273:                                key = p.getData();
274:                            }
275:                            // we come here only if there is a range transition
276:                            if (state == RANGE) {
277:
278:                                String begin = key;
279:
280:                                state = p.nextToken();
281:                                String end = p.getData();
282:
283:                                state = p.nextToken();
284:                                String data = p.getData();
285:                                data.trim();
286:                                byte[] encData = new byte[6];
287:                                int num = hexToByte(data, encData);
288:                                String tData = new String(encData, 0, num,
289:                                        encoding);
290:                                String stringVal;
291:                                int[] val = getInt(begin);
292:                                int beginRange = 0;
293:                                int endRange = 0;
294:                                if (val == null) {
295:                                    val = getInt((String) table.get(begin));
296:                                    if (val != null) {
297:                                        beginRange = val[1];
298:                                    }
299:                                }
300:                                val = getInt(end);
301:                                if (val == null) {
302:                                    val = getInt((String) table.get(end));
303:                                    if (val != null) {
304:                                        endRange = val[1];
305:                                    }
306:                                }
307:                                stringVal = key.substring(0, val[0]);
308:                                int digit = (int) (char) tData.charAt(0);
309:                                while (beginRange <= endRange) {
310:                                    defineMapping(
311:                                            (stringVal + beginRange + ">"), ""
312:                                                    + (char) digit++);
313:                                    beginRange++;
314:                                }
315:
316:                                state = p.nextToken();
317:                                key = p.getData();
318:                            }
319:
320:                            //state = p.nextToken();
321:                        }
322:                    }
323:                } catch (EOFException e) {
324:                } finally {
325:                    SymbolTransition.setCharMap(oldMap);
326:                }
327:            }
328:
329:            public int[] getInt(String data) {
330:                if (data == null) {
331:                    return null;
332:                }
333:                int i = 0;
334:                int[] retVal = new int[2];
335:                int len = data.length();
336:                while (i < len) {
337:                    if ((data.charAt(i)) - 0x30 < (0x39 - 0x30)) {
338:                        break;
339:                    }
340:                    i++;
341:                }
342:                if (i < len) {
343:                    String sub = data.substring(i, len - 1);
344:                    retVal[0] = i;
345:                    retVal[1] = Integer.parseInt(sub, 10);
346:                    return retVal;
347:                }
348:                return null;
349:            }
350:
351:            public int hexToByte(String data, byte[] retval) {
352:                String tData = data;
353:                int i = 0;
354:                for (i = 0; i < data.length() / 4; i++) {
355:                    if (tData.charAt(0) == '\\' && tData.charAt(1) == 'x') {
356:                        String numData = tData.substring(2, 4);
357:                        retval[i] = (byte) Integer.parseInt(numData, 16);
358:                        tData = tData.substring(4, tData.length());
359:                    }
360:                }
361:                return i;
362:            }
363:
364:            public String byteToChar(String data, String encoding)
365:                    throws UnsupportedEncodingException {
366:
367:                byte[] bytes = new byte[data.length()];
368:                for (int i = 0; i < data.length(); i++) {
369:                    char ch = data.charAt(i);
370:                    if (ch > 0xFF) {
371:                        throw new RuntimeException(
372:                                "Bytes in the string are greater than 0xFF");
373:                    }
374:                    bytes[i] = (byte) ch;
375:                }
376:                return new String(bytes, encoding);
377:            }
378:
379:            public void defineMapping(String from, String to) {
380:                table.put(from, to);
381:                backTable = null;
382:            }
383:
384:            public void undefineMapping(String from) {
385:                table.remove(from);
386:                backTable = null;
387:            }
388:
389:            public void swap() {
390:                Hashtable newTable = new Hashtable();
391:                Enumeration enumer = table.keys();
392:                while (enumer.hasMoreElements()) {
393:                    String key = (String) enumer.nextElement();
394:                    String code = (String) table.get(key);
395:
396:                    String newKey = toSymbol(code);
397:                    String newCode = toLiteral(key);
398:                    String prevCode = (String) newTable.get(newKey);
399:                    if (prevCode == null || prevCode.compareTo(newCode) > 0) {
400:                        newTable.put(newKey, newCode);
401:                    }
402:                }
403:                table = newTable;
404:            }
405:
406:            private String toLiteral(String code) {
407:                String data = code.substring(2, code.length() - 1);
408:                int digit = Integer.parseInt(data, 16);
409:                return "" + (char) digit;
410:            }
411:
412:            private String toSymbol(String code) {
413:                StringBuffer escapeBuffer = new StringBuffer();
414:                escapeBuffer.append(">");
415:                for (int i = 0; i < code.length(); i++) {
416:                    int value = ((int) code.charAt(i)) & 0xFFFF;
417:                    while ((value > 0) || (escapeBuffer.length() < 5)) {
418:                        char digit = Character.forDigit(value % 16, 16);
419:                        escapeBuffer.append(digit);
420:                        value >>= 4;
421:                    }
422:                }
423:                escapeBuffer.append("U<");
424:                escapeBuffer.reverse();
425:                return escapeBuffer.toString();
426:            }
427:
428:            public void dump(PrintStream out) {
429:                StringBuffer escapeBuffer = new StringBuffer();
430:                Enumeration enumer = table.keys();
431:                while (enumer.hasMoreElements()) {
432:                    String key = (String) enumer.nextElement();
433:                    String code = (String) table.get(key);
434:                    out.print(key);
435:                    out.print("       <U");
436:                    for (int i = 0; i < code.length(); i++) {
437:                        int value = ((int) code.charAt(i)) & 0xFFFF;
438:                        escapeBuffer.setLength(0);
439:                        while ((value > 0) || (escapeBuffer.length() < 4)) {
440:                            char digit = Character.forDigit(value % 16, 16);
441:                            escapeBuffer.append(digit);
442:                            value >>= 4;
443:                        }
444:                        escapeBuffer.reverse();
445:                        out.print(escapeBuffer.toString());
446:                    }
447:                    out.println(">");
448:                }
449:            }
450:
451:            public String mapKey(final String key) {
452:                String result = (String) table.get(key);
453:                if (result == null) {
454:                    if (parentMap != null) {
455:                        result = parentMap.mapKey(key);
456:                    } else {
457:                        result = key;
458:                    }
459:                }
460:                return result;
461:            }
462:
463:            public String backmapValue(final String value) {
464:                if (backTable == null) {
465:                    backTable = new Hashtable();
466:                    Enumeration enumer = table.keys();
467:                    while (enumer.hasMoreElements()) {
468:                        String key = (String) enumer.nextElement();
469:                        String val = (String) table.get(key);
470:                        backTable.put(val, key);
471:                    }
472:                }
473:                String result = (String) backTable.get(value);
474:                if (result == null) {
475:                    if (parentMap != null) {
476:                        result = parentMap.backmapValue(value);
477:                    } else {
478:                        result = value;
479:                    }
480:                }
481:                return result;
482:            }
483:
484:            public Enumeration keys() {
485:                return table.keys();
486:            }
487:
488:            public Enumeration elements() {
489:                return table.elements();
490:            }
491:
492:            public static void main(String args[]) {
493:                try {
494:                    PosixCharMap map1 = new PosixCharMap(
495:                            "C:\\projects\\com\\taligent\\localeconverter\\CharMaps",
496:                            "IBM-1129.UPMAP100.txt");
497:                    map1.swap();
498:                    map1.dump(System.out);
499:
500:                    SymbolTransition.setCharMap(map1);
501:                    System.out.println();
502:                    System.out.println();
503:
504:                    //PosixCharMap map = new PosixCharMap("C:\\projects\\data\\ISO-8859-1.html");
505:                    PosixCharMap map = new PosixCharMap(
506:                            "C:\\projects\\com\\taligent\\localeconverter\\CharMaps",
507:                            "ibm1129.txt");
508:                    map.dump(System.out);
509:                    System.out.println();
510:                } catch (Exception e) {
511:                    System.out.println(e);
512:                }
513:            }
514:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.