Source Code Cross Referenced for ImplicitCEGenerator.java in  » Internationalization-Localization » icu4j » com » ibm » icu » impl » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.impl 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /**
002:         *******************************************************************************
003:         * Copyright (C) 2004, International Business Machines Corporation and         *
004:         * others. All Rights Reserved.                                                *
005:         *******************************************************************************
006:         */package com.ibm.icu.impl;
007:
008:        /**
009:         * For generation of Implicit CEs
010:         * @author Mark Davis
011:         *
012:         * Cleaned up so that changes can be made more easily.
013:         * Old values:
014:         # First Implicit: E26A792D
015:         # Last Implicit: E3DC70C0
016:         # First CJK: E0030300
017:         # Last CJK: E0A9DD00
018:         # First CJK_A: E0A9DF00
019:         # Last CJK_A: E0DE3100
020:         @internal
021:         */
022:        public class ImplicitCEGenerator {
023:
024:            /**
025:             * constants
026:             */
027:            static final boolean DEBUG = false;
028:
029:            static final long topByte = 0xFF000000L;
030:            static final long bottomByte = 0xFFL;
031:            static final long fourBytes = 0xFFFFFFFFL;
032:
033:            static final int MAX_INPUT = 0x220001; // 2 * Unicode range + 2
034:
035:            public static final int CJK_BASE = 0x4E00, CJK_LIMIT = 0x9FFF + 1,
036:                    CJK_COMPAT_USED_BASE = 0xFA0E,
037:                    CJK_COMPAT_USED_LIMIT = 0xFA2F + 1, CJK_A_BASE = 0x3400,
038:                    CJK_A_LIMIT = 0x4DBF + 1, CJK_B_BASE = 0x20000,
039:                    CJK_B_LIMIT = 0x2A6DF + 1;
040:
041:            private void throwError(String title, int cp) {
042:                throw new IllegalArgumentException(title + "\t"
043:                        + Utility.hex(cp, 6) + "\t"
044:                        + Utility.hex(getImplicitFromRaw(cp) & fourBytes));
045:            }
046:
047:            private void throwError(String title, long ce) {
048:                throw new IllegalArgumentException(title + "\t"
049:                        + Utility.hex(ce & fourBytes));
050:            }
051:
052:            private void show(int i) {
053:                if (i >= 0 && i <= MAX_INPUT) {
054:                    System.out.println(Utility.hex(i) + "\t"
055:                            + Utility.hex(getImplicitFromRaw(i) & fourBytes));
056:                }
057:            }
058:
059:            /**
060:             * Precomputed by constructor
061:             */
062:            int final3Multiplier;
063:            int final4Multiplier;
064:            int final3Count;
065:            int final4Count;
066:            int medialCount;
067:            int min3Primary;
068:            int min4Primary;
069:            int max4Primary;
070:            int minTrail;
071:            int maxTrail;
072:            int max3Trail;
073:            int max4Trail;
074:            int min4Boundary;
075:
076:            public int getGap4() {
077:                return final4Multiplier - 1;
078:            }
079:
080:            public int getGap3() {
081:                return final3Multiplier - 1;
082:            }
083:
084:            // old comment
085:            // we must skip all 00, 01, 02, FF bytes, so most bytes have 252 values
086:            // we must leave a gap of 01 between all values of the last byte, so the last byte has 126 values (3 byte case)
087:            // we shift so that HAN all has the same first primary, for compression.
088:            // for the 4 byte case, we make the gap as large as we can fit.
089:
090:            /**
091:             * Supply parameters for generating implicit CEs
092:             */
093:            public ImplicitCEGenerator(int minPrimary, int maxPrimary) {
094:                // 13 is the largest 4-byte gap we can use without getting 2 four-byte forms.
095:                this (minPrimary, maxPrimary, 0x04, 0xFE, 1, 1);
096:            }
097:
098:            /**
099:             * Set up to generate implicits.
100:             * @param minPrimary
101:             * @param maxPrimary
102:             * @param minTrail final byte
103:             * @param maxTrail final byte
104:             * @param gap3 the gap we leave for tailoring for 3-byte forms
105:             * @param primaries3count number of 3-byte primarys we can use (normally 1)
106:             */
107:            public ImplicitCEGenerator(int minPrimary, int maxPrimary,
108:                    int minTrail, int maxTrail, int gap3, int primaries3count) {
109:                // some simple parameter checks
110:                if (minPrimary < 0 || minPrimary >= maxPrimary
111:                        || maxPrimary > 0xFF) {
112:                    throw new IllegalArgumentException("bad lead bytes");
113:                }
114:                if (minTrail < 0 || minTrail >= maxTrail || maxTrail > 0xFF) {
115:                    throw new IllegalArgumentException("bad trail bytes");
116:                }
117:                if (primaries3count < 1) {
118:                    throw new IllegalArgumentException(
119:                            "bad three-byte primaries");
120:                }
121:
122:                this .minTrail = minTrail;
123:                this .maxTrail = maxTrail;
124:
125:                min3Primary = minPrimary;
126:                max4Primary = maxPrimary;
127:                // compute constants for use later.
128:                // number of values we can use in trailing bytes
129:                // leave room for empty values between AND above, e.g. if gap = 2
130:                // range 3..7 => +3 -4 -5 -6 -7: so 1 value
131:                // range 3..8 => +3 -4 -5 +6 -7 -8: so 2 values
132:                // range 3..9 => +3 -4 -5 +6 -7 -8 -9: so 2 values
133:                final3Multiplier = gap3 + 1;
134:                final3Count = (maxTrail - minTrail + 1) / final3Multiplier;
135:                max3Trail = minTrail + (final3Count - 1) * final3Multiplier;
136:
137:                // medials can use full range
138:                medialCount = (maxTrail - minTrail + 1);
139:                // find out how many values fit in each form
140:                int threeByteCount = medialCount * final3Count;
141:                // now determine where the 3/4 boundary is.
142:                // we use 3 bytes below the boundary, and 4 above
143:                int primariesAvailable = maxPrimary - minPrimary + 1;
144:                int primaries4count = primariesAvailable - primaries3count;
145:
146:                int min3ByteCoverage = primaries3count * threeByteCount;
147:                min4Primary = minPrimary + primaries3count;
148:                min4Boundary = min3ByteCoverage;
149:                // Now expand out the multiplier for the 4 bytes, and redo.
150:
151:                int totalNeeded = MAX_INPUT - min4Boundary;
152:                int neededPerPrimaryByte = divideAndRoundUp(totalNeeded,
153:                        primaries4count);
154:                if (DEBUG)
155:                    System.out.println("neededPerPrimaryByte: "
156:                            + neededPerPrimaryByte);
157:
158:                int neededPerFinalByte = divideAndRoundUp(neededPerPrimaryByte,
159:                        medialCount * medialCount);
160:                if (DEBUG)
161:                    System.out.println("neededPerFinalByte: "
162:                            + neededPerFinalByte);
163:
164:                int gap4 = (maxTrail - minTrail - 1) / neededPerFinalByte;
165:                if (DEBUG)
166:                    System.out.println("expandedGap: " + gap4);
167:                if (gap4 < 1)
168:                    throw new IllegalArgumentException("must have larger gap4s");
169:
170:                final4Multiplier = gap4 + 1;
171:                final4Count = neededPerFinalByte;
172:                max4Trail = minTrail + (final4Count - 1) * final4Multiplier;
173:
174:                if (primaries4count * medialCount * medialCount * final4Count < MAX_INPUT) {
175:                    throw new IllegalArgumentException("internal error");
176:                }
177:                if (DEBUG) {
178:                    System.out.println("final4Count: " + final4Count);
179:                    for (int counter = 0; counter < final4Count; ++counter) {
180:                        int value = minTrail + (1 + counter) * final4Multiplier;
181:                        System.out.println(counter + "\t" + value + "\t"
182:                                + Utility.hex(value));
183:                    }
184:                }
185:            }
186:
187:            static public int divideAndRoundUp(int a, int b) {
188:                return 1 + (a - 1) / b;
189:            }
190:
191:            /**
192:             * Converts implicit CE into raw integer
193:             * @param implicit
194:             * @return -1 if illegal format
195:             */
196:            public int getRawFromImplicit(int implicit) {
197:                int result;
198:                int b3 = implicit & 0xFF;
199:                implicit >>= 8;
200:                int b2 = implicit & 0xFF;
201:                implicit >>= 8;
202:                int b1 = implicit & 0xFF;
203:                implicit >>= 8;
204:                int b0 = implicit & 0xFF;
205:
206:                // simple parameter checks
207:                if (b0 < min3Primary || b0 > max4Primary || b1 < minTrail
208:                        || b1 > maxTrail)
209:                    return -1;
210:                // normal offsets
211:                b1 -= minTrail;
212:
213:                // take care of the final values, and compose
214:                if (b0 < min4Primary) {
215:                    if (b2 < minTrail || b2 > max3Trail || b3 != 0)
216:                        return -1;
217:                    b2 -= minTrail;
218:                    int remainder = b2 % final3Multiplier;
219:                    if (remainder != 0)
220:                        return -1;
221:                    b0 -= min3Primary;
222:                    b2 /= final3Multiplier;
223:                    result = ((b0 * medialCount) + b1) * final3Count + b2;
224:                } else {
225:                    if (b2 < minTrail || b2 > maxTrail || b3 < minTrail
226:                            || b3 > max4Trail)
227:                        return -1;
228:                    b2 -= minTrail;
229:                    b3 -= minTrail;
230:                    int remainder = b3 % final4Multiplier;
231:                    if (remainder != 0)
232:                        return -1;
233:                    b3 /= final4Multiplier;
234:                    b0 -= min4Primary;
235:                    result = (((b0 * medialCount) + b1) * medialCount + b2)
236:                            * final4Count + b3 + min4Boundary;
237:                }
238:                // final check
239:                if (result < 0 || result > MAX_INPUT)
240:                    return -1;
241:                return result;
242:            }
243:
244:            /**
245:             * Generate the implicit CE, from raw integer.
246:             * Left shifted to put the first byte at the top of an int.
247:             * @param cp code point
248:             * @return Primary implicit weight
249:             */
250:            public int getImplicitFromRaw(int cp) {
251:                if (cp < 0 || cp > MAX_INPUT) {
252:                    throw new IllegalArgumentException(
253:                            "Code point out of range " + Utility.hex(cp));
254:                }
255:                int last0 = cp - min4Boundary;
256:                if (last0 < 0) {
257:                    int last1 = cp / final3Count;
258:                    last0 = cp % final3Count;
259:
260:                    int last2 = last1 / medialCount;
261:                    last1 %= medialCount;
262:
263:                    last0 = minTrail + last0 * final3Multiplier; // spread out, leaving gap at start
264:                    last1 = minTrail + last1; // offset
265:                    last2 = min3Primary + last2; // offset
266:
267:                    if (last2 >= min4Primary) {
268:                        throw new IllegalArgumentException(
269:                                "4-byte out of range: " + Utility.hex(cp)
270:                                        + ", " + Utility.hex(last2));
271:                    }
272:
273:                    return (last2 << 24) + (last1 << 16) + (last0 << 8);
274:                } else {
275:                    int last1 = last0 / final4Count;
276:                    last0 %= final4Count;
277:
278:                    int last2 = last1 / medialCount;
279:                    last1 %= medialCount;
280:
281:                    int last3 = last2 / medialCount;
282:                    last2 %= medialCount;
283:
284:                    last0 = minTrail + last0 * final4Multiplier; // spread out, leaving gap at start           
285:                    last1 = minTrail + last1; // offset
286:                    last2 = minTrail + last2; // offset
287:                    last3 = min4Primary + last3; // offset
288:
289:                    if (last3 > max4Primary) {
290:                        throw new IllegalArgumentException(
291:                                "4-byte out of range: " + Utility.hex(cp)
292:                                        + ", " + Utility.hex(last3));
293:                    }
294:
295:                    return (last3 << 24) + (last2 << 16) + (last1 << 8) + last0;
296:                }
297:            }
298:
299:            /**
300:             * Gets an Implicit from a code point. Internally, 
301:             * swaps (which produces a raw value 0..220000, 
302:             * then converts raw to implicit.
303:             * @param cp
304:             * @return Primary implicit weight
305:             */
306:            public int getImplicitFromCodePoint(int cp) {
307:                if (DEBUG)
308:                    System.out.println("Incoming: " + Utility.hex(cp));
309:
310:                // Produce Raw value
311:                // note, we add 1 so that the first value is always empty!!
312:                cp = ImplicitCEGenerator.swapCJK(cp) + 1;
313:                // we now have a range of numbers from 0 to 220000.
314:
315:                if (DEBUG)
316:                    System.out.println("CJK swapped: " + Utility.hex(cp));
317:
318:                return getImplicitFromRaw(cp);
319:            }
320:
321:            /**
322:             * Function used to: 
323:             * a) collapse the 2 different Han ranges from UCA into one (in the right order), and
324:             * b) bump any non-CJK characters by 10FFFF.
325:             * The relevant blocks are:
326:             * A:    4E00..9FFF; CJK Unified Ideographs
327:             *       F900..FAFF; CJK Compatibility Ideographs
328:             * B:    3400..4DBF; CJK Unified Ideographs Extension A
329:             *       20000..XX;  CJK Unified Ideographs Extension B (and others later on)
330:             * As long as
331:             *   no new B characters are allocated between 4E00 and FAFF, and
332:             *   no new A characters are outside of this range,
333:             * (very high probability) this simple code will work.
334:             * The reordered blocks are:
335:             * Block1 is CJK
336:             * Block2 is CJK_COMPAT_USED
337:             * Block3 is CJK_A
338:             * (all contiguous)
339:             * Any other CJK gets its normal code point
340:             * Any non-CJK gets +10FFFF
341:             * When we reorder Block1, we make sure that it is at the very start,
342:             * so that it will use a 3-byte form.
343:             * Warning: the we only pick up the compatibility characters that are
344:             * NOT decomposed, so that block is smaller!
345:             */
346:
347:            static int NON_CJK_OFFSET = 0x110000;
348:
349:            static int swapCJK(int i) {
350:
351:                if (i >= CJK_BASE) {
352:                    if (i < CJK_LIMIT)
353:                        return i - CJK_BASE;
354:
355:                    if (i < CJK_COMPAT_USED_BASE)
356:                        return i + NON_CJK_OFFSET;
357:
358:                    if (i < CJK_COMPAT_USED_LIMIT)
359:                        return i - CJK_COMPAT_USED_BASE
360:                                + (CJK_LIMIT - CJK_BASE);
361:                    if (i < CJK_B_BASE)
362:                        return i + NON_CJK_OFFSET;
363:
364:                    if (i < CJK_B_LIMIT)
365:                        return i; // non-BMP-CJK
366:
367:                    return i + NON_CJK_OFFSET; // non-CJK
368:                }
369:                if (i < CJK_A_BASE)
370:                    return i + NON_CJK_OFFSET;
371:
372:                if (i < CJK_A_LIMIT)
373:                    return i - CJK_A_BASE + (CJK_LIMIT - CJK_BASE)
374:                            + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
375:                return i + NON_CJK_OFFSET; // non-CJK
376:            }
377:
378:            /**
379:             * @return Minimal trail value
380:             */
381:            public int getMinTrail() {
382:                return minTrail;
383:            }
384:
385:            /**
386:             * @return Maximal trail value
387:             */
388:            public int getMaxTrail() {
389:                return maxTrail;
390:            }
391:
392:            public int getCodePointFromRaw(int i) {
393:                i--;
394:                int result = 0;
395:                if (i >= NON_CJK_OFFSET) {
396:                    result = i - NON_CJK_OFFSET;
397:                } else if (i >= CJK_B_BASE) {
398:                    result = i;
399:                } else if (i < CJK_A_LIMIT + (CJK_LIMIT - CJK_BASE)
400:                        + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) {
401:                    // rest of CJKs, compacted
402:                    if (i < CJK_LIMIT - CJK_BASE) {
403:                        result = i + CJK_BASE;
404:                    } else if (i < (CJK_LIMIT - CJK_BASE)
405:                            + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) {
406:                        result = i + CJK_COMPAT_USED_BASE
407:                                - (CJK_LIMIT - CJK_BASE);
408:                    } else {
409:                        result = i
410:                                + CJK_A_BASE
411:                                - (CJK_LIMIT - CJK_BASE)
412:                                - (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
413:                    }
414:                } else {
415:                    result = -1;
416:                }
417:                return result;
418:            }
419:
420:            public int getRawFromCodePoint(int i) {
421:                return swapCJK(i) + 1;
422:            }
423:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.