Source Code Cross Referenced for CollatorReader.java in  » Internationalization-Localization » icu4j » com » ibm » icu » text » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.text 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /**
002:         *******************************************************************************
003:         * Copyright (C) 1996-2004, International Business Machines Corporation and    *
004:         * others. All Rights Reserved.                                                *
005:         *******************************************************************************
006:         */package com.ibm.icu.text;
007:
008:        import java.io.BufferedInputStream;
009:        import java.io.ByteArrayInputStream;
010:        import java.io.InputStream;
011:        import java.io.DataInputStream;
012:        import java.io.IOException;
013:
014:        import com.ibm.icu.impl.ICUBinary;
015:        import com.ibm.icu.impl.ICUData;
016:        import com.ibm.icu.impl.ICUResourceBundle;
017:        import com.ibm.icu.impl.IntTrie;
018:        import com.ibm.icu.lang.UCharacter;
019:        import com.ibm.icu.util.VersionInfo;
020:        import com.ibm.icu.text.CollationParsedRuleBuilder.InverseUCA;
021:        import com.ibm.icu.text.RuleBasedCollator.UCAConstants;
022:
023:        /**
024:         * <p>Internal reader class for ICU data file uca.icu containing 
025:         * Unicode Collation Algorithm data.</p> 
026:         * <p>This class simply reads uca.icu, authenticates that it is a valid
027:         * ICU data file and split its contents up into blocks of data for use in
028:         * <a href=Collator.html>com.ibm.icu.text.Collator</a>.
029:         * </p> 
030:         * <p>uca.icu which is in big-endian format is jared together with this 
031:         * package.</p>
032:         * @author Syn Wee Quek
033:         * @since release 2.2, April 18 2002
034:         * @draft 2.2
035:         */
036:
037:        final class CollatorReader {
038:            static char[] read(RuleBasedCollator rbc, UCAConstants ucac)
039:                    throws IOException {
040:                InputStream i = ICUData
041:                        .getRequiredStream(ICUResourceBundle.ICU_BUNDLE
042:                                + "/ucadata.icu");
043:                BufferedInputStream b = new BufferedInputStream(i, 90000);
044:                CollatorReader reader = new CollatorReader(b);
045:                char[] result = reader.readImp(rbc, ucac);
046:                b.close();
047:                return result;
048:            }
049:
050:            static void initRBC(RuleBasedCollator rbc, byte[] data)
051:                    throws IOException {
052:                final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;
053:
054:                InputStream i = new ByteArrayInputStream(data);
055:                BufferedInputStream b = new BufferedInputStream(i);
056:                CollatorReader reader = new CollatorReader(b, false);
057:                if (data.length > MIN_BINARY_DATA_SIZE_) {
058:                    reader.readImp(rbc, null);
059:                } else {
060:                    reader.readHeader(rbc);
061:                    reader.readOptions(rbc);
062:                    // duplicating UCA_'s data
063:                    rbc.setWithUCATables();
064:                }
065:            }
066:
067:            static InverseUCA getInverseUCA() throws IOException {
068:                InverseUCA result = null;
069:                InputStream i = ICUData
070:                        .getRequiredStream(ICUResourceBundle.ICU_BUNDLE
071:                                + "/invuca.icu");
072:                //        try    {
073:                //            String invdat = "/com/ibm/icu/impl/data/invuca.icu";
074:                //            InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat);
075:                BufferedInputStream b = new BufferedInputStream(i, 110000);
076:                result = CollatorReader.readInverseUCA(b);
077:                b.close();
078:                i.close();
079:                return result;
080:                //        } catch (Exception e) {
081:                //            throw new RuntimeException(e.getMessage());
082:                //        }
083:            }
084:
085:            // protected constructor ---------------------------------------------
086:
087:            /**
088:             * <p>Protected constructor.</p>
089:             * @param inputStream ICU collator file input stream
090:             * @exception IOException throw if data file fails authentication 
091:             * @draft 2.1
092:             */
093:            private CollatorReader(InputStream inputStream) throws IOException {
094:                this (inputStream, true);
095:                /*
096:                byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_);
097:                // weiv: check that we have the correct Unicode version in 
098:                // binary files
099:                VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
100:                if(UnicodeVersion[0] != UCDVersion.getMajor() 
101:                || UnicodeVersion[1] != UCDVersion.getMinor()) {
102:                    throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
103:                }
104:                m_dataInputStream_ = new DataInputStream(inputStream);
105:                 */
106:            }
107:
108:            /**
109:             * <p>Protected constructor.</p>
110:             * @param inputStream ICU uprops.icu file input stream
111:             * @param readICUHeader flag to indicate if the ICU header has to be read
112:             * @exception IOException throw if data file fails authentication 
113:             * @draft 2.1
114:             */
115:            private CollatorReader(InputStream inputStream,
116:                    boolean readICUHeader) throws IOException {
117:                if (readICUHeader) {
118:                    byte[] UnicodeVersion = ICUBinary.readHeader(inputStream,
119:                            DATA_FORMAT_ID_, UCA_AUTHENTICATE_);
120:                    // weiv: check that we have the correct Unicode version in 
121:                    // binary files
122:                    VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
123:                    if (UnicodeVersion[0] != UCDVersion.getMajor()
124:                            || UnicodeVersion[1] != UCDVersion.getMinor()) {
125:                        throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
126:                    }
127:                }
128:                m_dataInputStream_ = new DataInputStream(inputStream);
129:            }
130:
131:            // protected methods -------------------------------------------------
132:
133:            /**
134:             * Read and break up the header stream of data passed in as arguments into 
135:             * meaningful Collator data.
136:             * @param rbc RuleBasedCollator to populate with header information
137:             * @exception IOException thrown when there's a data error.
138:             */
139:            private void readHeader(RuleBasedCollator rbc) throws IOException {
140:                m_size_ = m_dataInputStream_.readInt();
141:                // all the offsets are in bytes
142:                // to get the address add to the header address and cast properly 
143:                // Default options int options
144:                m_headerSize_ = m_dataInputStream_.readInt(); // start of options
145:                int readcount = 8; // for size and headersize
146:                // structure which holds values for indirect positioning and implicit 
147:                // ranges
148:                int UCAConst = m_dataInputStream_.readInt();
149:                readcount += 4;
150:                // this one is needed only for UCA, to copy the appropriate 
151:                // contractions
152:                m_dataInputStream_.skip(4);
153:                readcount += 4;
154:                // reserved for future use
155:                m_dataInputStream_.skipBytes(4);
156:                readcount += 4;
157:                // const uint8_t *mappingPosition; 
158:                int mapping = m_dataInputStream_.readInt();
159:                readcount += 4;
160:                // uint32_t *expansion; 
161:                rbc.m_expansionOffset_ = m_dataInputStream_.readInt();
162:                readcount += 4;
163:                // UChar *contractionIndex;     
164:                rbc.m_contractionOffset_ = m_dataInputStream_.readInt();
165:                readcount += 4;
166:                // uint32_t *contractionCEs;
167:                int contractionCE = m_dataInputStream_.readInt();
168:                readcount += 4;
169:                // needed for various closures int contractionSize 
170:                int contractionSize = m_dataInputStream_.readInt();
171:                readcount += 4;
172:                // array of last collation element in expansion
173:                int expansionEndCE = m_dataInputStream_.readInt();
174:                readcount += 4;
175:                // array of maximum expansion size corresponding to the expansion
176:                // collation elements with last element in expansionEndCE
177:                int expansionEndCEMaxSize = m_dataInputStream_.readInt();
178:                readcount += 4;
179:                // size of endExpansionCE int expansionEndCESize
180:                m_dataInputStream_.skipBytes(4);
181:                readcount += 4;
182:                // hash table of unsafe code points 
183:                int unsafe = m_dataInputStream_.readInt();
184:                readcount += 4;
185:                // hash table of final code points in contractions.
186:                int contractionEnd = m_dataInputStream_.readInt();
187:                readcount += 4;
188:                // int CEcount = m_dataInputStream_.readInt();
189:                m_dataInputStream_.skipBytes(4);
190:                readcount += 4;
191:                // is jamoSpecial
192:                rbc.m_isJamoSpecial_ = m_dataInputStream_.readBoolean();
193:                readcount++;
194:                // padding
195:                m_dataInputStream_.skipBytes(3);
196:                readcount += 3;
197:                rbc.m_version_ = readVersion(m_dataInputStream_);
198:                readcount += 4;
199:                rbc.m_UCA_version_ = readVersion(m_dataInputStream_);
200:                readcount += 4;
201:                rbc.m_UCD_version_ = readVersion(m_dataInputStream_);
202:                readcount += 4;
203:                // byte charsetName[] = new byte[32]; // for charset CEs
204:                m_dataInputStream_.skipBytes(32);
205:                readcount += 32;
206:                m_dataInputStream_.skipBytes(56); // for future use
207:                readcount += 56;
208:                if (m_headerSize_ < readcount) {
209:                    throw new IOException("Internal Error: Header size error");
210:                }
211:                m_dataInputStream_.skipBytes(m_headerSize_ - readcount);
212:
213:                if (rbc.m_contractionOffset_ == 0) { // contraction can be null
214:                    rbc.m_contractionOffset_ = mapping;
215:                    contractionCE = mapping;
216:                }
217:                m_optionSize_ = rbc.m_expansionOffset_ - m_headerSize_;
218:                m_expansionSize_ = rbc.m_contractionOffset_
219:                        - rbc.m_expansionOffset_;
220:                m_contractionIndexSize_ = contractionCE
221:                        - rbc.m_contractionOffset_;
222:                m_contractionCESize_ = mapping - contractionCE;
223:                m_trieSize_ = expansionEndCE - mapping;
224:                m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE;
225:                m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize;
226:                m_unsafeSize_ = contractionEnd - unsafe;
227:                m_UCAValuesSize_ = m_size_ - UCAConst; // UCA value, will be handled later
228:                // treat it as normal collator first
229:                // for normal collator there is no UCA contraction
230:                m_contractionEndSize_ = m_size_ - contractionEnd;
231:
232:                rbc.m_contractionOffset_ >>= 1; // casting to ints
233:                rbc.m_expansionOffset_ >>= 2; // casting to chars
234:            }
235:
236:            /**
237:             * Read and break up the collation options passed in the stream of data
238:             * and update the argument Collator with the results
239:             * @param rbc RuleBasedCollator to populate
240:             * @exception IOException thrown when there's a data error.
241:             * @draft 2.2
242:             */
243:            private void readOptions(RuleBasedCollator rbc) throws IOException {
244:                int readcount = 0;
245:                rbc.m_defaultVariableTopValue_ = m_dataInputStream_.readInt();
246:                readcount += 4;
247:                rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
248:                readcount += 4;
249:                rbc.m_defaultIsAlternateHandlingShifted_ = (m_dataInputStream_
250:                        .readInt() == RuleBasedCollator.AttributeValue.SHIFTED_);
251:                readcount += 4;
252:                rbc.m_defaultCaseFirst_ = m_dataInputStream_.readInt();
253:                readcount += 4;
254:                rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
255:                readcount += 4;
256:                int value = m_dataInputStream_.readInt();
257:                readcount += 4;
258:                if (value == RuleBasedCollator.AttributeValue.ON_) {
259:                    value = Collator.CANONICAL_DECOMPOSITION;
260:                } else {
261:                    value = Collator.NO_DECOMPOSITION;
262:                }
263:                rbc.m_defaultDecomposition_ = value;
264:                rbc.m_defaultStrength_ = m_dataInputStream_.readInt();
265:                readcount += 4;
266:                rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
267:                readcount += 4;
268:                rbc.m_defaultIsNumericCollation_ = (m_dataInputStream_
269:                        .readInt() == RuleBasedCollator.AttributeValue.ON_);
270:                readcount += 4;
271:                m_dataInputStream_.skip(60); // reserved for future use
272:                readcount += 60;
273:                m_dataInputStream_.skipBytes(m_optionSize_ - readcount);
274:                if (m_optionSize_ < readcount) {
275:                    throw new IOException("Internal Error: Option size error");
276:                }
277:            }
278:
279:            /**
280:             * Read and break up the stream of data passed in as arguments into 
281:             * meaningful Collator data.
282:             * @param rbc RuleBasedCollator to populate
283:             * @param UCAConst object to fill up with UCA constants if we are reading 
284:             *                 the UCA collator, if not use a null
285:             * @return UCAContractions array filled up with the UCA contractions if we
286:             *                        are reading the UCA collator
287:             * @exception IOException thrown when there's a data error.
288:             * @draft 2.2
289:             */
290:            private char[] readImp(RuleBasedCollator rbc,
291:                    RuleBasedCollator.UCAConstants UCAConst) throws IOException {
292:                readHeader(rbc);
293:                // header size has been checked by readHeader
294:                int readcount = m_headerSize_;
295:                // option size has been checked by readOptions
296:                readOptions(rbc);
297:                readcount += m_optionSize_;
298:                m_expansionSize_ >>= 2;
299:                rbc.m_expansion_ = new int[m_expansionSize_];
300:                for (int i = 0; i < m_expansionSize_; i++) {
301:                    rbc.m_expansion_[i] = m_dataInputStream_.readInt();
302:                }
303:                readcount += (m_expansionSize_ << 2);
304:                if (m_contractionIndexSize_ > 0) {
305:                    m_contractionIndexSize_ >>= 1;
306:                    rbc.m_contractionIndex_ = new char[m_contractionIndexSize_];
307:                    for (int i = 0; i < m_contractionIndexSize_; i++) {
308:                        rbc.m_contractionIndex_[i] = m_dataInputStream_
309:                                .readChar();
310:                    }
311:                    readcount += (m_contractionIndexSize_ << 1);
312:                    m_contractionCESize_ >>= 2;
313:                    rbc.m_contractionCE_ = new int[m_contractionCESize_];
314:                    for (int i = 0; i < m_contractionCESize_; i++) {
315:                        rbc.m_contractionCE_[i] = m_dataInputStream_.readInt();
316:                    }
317:                    readcount += (m_contractionCESize_ << 2);
318:                }
319:                rbc.m_trie_ = new IntTrie(m_dataInputStream_,
320:                        RuleBasedCollator.DataManipulate.getInstance());
321:                if (!rbc.m_trie_.isLatin1Linear()) {
322:                    throw new IOException("Data corrupted, "
323:                            + "Collator Tries expected to have linear "
324:                            + "latin one data arrays");
325:                }
326:                readcount += rbc.m_trie_.getSerializedDataSize();
327:                m_expansionEndCESize_ >>= 2;
328:                rbc.m_expansionEndCE_ = new int[m_expansionEndCESize_];
329:                for (int i = 0; i < m_expansionEndCESize_; i++) {
330:                    rbc.m_expansionEndCE_[i] = m_dataInputStream_.readInt();
331:                }
332:                readcount += (m_expansionEndCESize_ << 2);
333:                rbc.m_expansionEndCEMaxSize_ = new byte[m_expansionEndCEMaxSizeSize_];
334:                for (int i = 0; i < m_expansionEndCEMaxSizeSize_; i++) {
335:                    rbc.m_expansionEndCEMaxSize_[i] = m_dataInputStream_
336:                            .readByte();
337:                }
338:                readcount += m_expansionEndCEMaxSizeSize_;
339:                rbc.m_unsafe_ = new byte[m_unsafeSize_];
340:                for (int i = 0; i < m_unsafeSize_; i++) {
341:                    rbc.m_unsafe_[i] = m_dataInputStream_.readByte();
342:                }
343:                readcount += m_unsafeSize_;
344:                if (UCAConst != null) {
345:                    // we are reading the UCA
346:                    // unfortunately the UCA offset in any collator data is not 0 and
347:                    // only refers to the UCA data
348:                    m_contractionEndSize_ -= m_UCAValuesSize_;
349:                }
350:                rbc.m_contractionEnd_ = new byte[m_contractionEndSize_];
351:                for (int i = 0; i < m_contractionEndSize_; i++) {
352:                    rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte();
353:                }
354:                readcount += m_contractionEndSize_;
355:                if (UCAConst != null) {
356:                    UCAConst.FIRST_TERTIARY_IGNORABLE_[0] = m_dataInputStream_
357:                            .readInt();
358:                    int readUCAConstcount = 4;
359:                    UCAConst.FIRST_TERTIARY_IGNORABLE_[1] = m_dataInputStream_
360:                            .readInt();
361:                    readUCAConstcount += 4;
362:                    UCAConst.LAST_TERTIARY_IGNORABLE_[0] = m_dataInputStream_
363:                            .readInt();
364:                    readUCAConstcount += 4;
365:                    UCAConst.LAST_TERTIARY_IGNORABLE_[1] = m_dataInputStream_
366:                            .readInt();
367:                    readUCAConstcount += 4;
368:                    UCAConst.FIRST_PRIMARY_IGNORABLE_[0] = m_dataInputStream_
369:                            .readInt();
370:                    readUCAConstcount += 4;
371:                    UCAConst.FIRST_PRIMARY_IGNORABLE_[1] = m_dataInputStream_
372:                            .readInt();
373:                    readUCAConstcount += 4;
374:                    UCAConst.FIRST_SECONDARY_IGNORABLE_[0] = m_dataInputStream_
375:                            .readInt();
376:                    readUCAConstcount += 4;
377:                    UCAConst.FIRST_SECONDARY_IGNORABLE_[1] = m_dataInputStream_
378:                            .readInt();
379:                    readUCAConstcount += 4;
380:                    UCAConst.LAST_SECONDARY_IGNORABLE_[0] = m_dataInputStream_
381:                            .readInt();
382:                    readUCAConstcount += 4;
383:                    UCAConst.LAST_SECONDARY_IGNORABLE_[1] = m_dataInputStream_
384:                            .readInt();
385:                    readUCAConstcount += 4;
386:                    UCAConst.LAST_PRIMARY_IGNORABLE_[0] = m_dataInputStream_
387:                            .readInt();
388:                    readUCAConstcount += 4;
389:                    UCAConst.LAST_PRIMARY_IGNORABLE_[1] = m_dataInputStream_
390:                            .readInt();
391:                    readUCAConstcount += 4;
392:                    UCAConst.FIRST_VARIABLE_[0] = m_dataInputStream_.readInt();
393:                    readUCAConstcount += 4;
394:                    UCAConst.FIRST_VARIABLE_[1] = m_dataInputStream_.readInt();
395:                    readUCAConstcount += 4;
396:                    UCAConst.LAST_VARIABLE_[0] = m_dataInputStream_.readInt();
397:                    readUCAConstcount += 4;
398:                    UCAConst.LAST_VARIABLE_[1] = m_dataInputStream_.readInt();
399:                    readUCAConstcount += 4;
400:                    UCAConst.FIRST_NON_VARIABLE_[0] = m_dataInputStream_
401:                            .readInt();
402:                    readUCAConstcount += 4;
403:                    UCAConst.FIRST_NON_VARIABLE_[1] = m_dataInputStream_
404:                            .readInt();
405:                    readUCAConstcount += 4;
406:                    UCAConst.LAST_NON_VARIABLE_[0] = m_dataInputStream_
407:                            .readInt();
408:                    readUCAConstcount += 4;
409:                    UCAConst.LAST_NON_VARIABLE_[1] = m_dataInputStream_
410:                            .readInt();
411:                    readUCAConstcount += 4;
412:                    UCAConst.RESET_TOP_VALUE_[0] = m_dataInputStream_.readInt();
413:                    readUCAConstcount += 4;
414:                    UCAConst.RESET_TOP_VALUE_[1] = m_dataInputStream_.readInt();
415:                    readUCAConstcount += 4;
416:                    UCAConst.FIRST_IMPLICIT_[0] = m_dataInputStream_.readInt();
417:                    readUCAConstcount += 4;
418:                    UCAConst.FIRST_IMPLICIT_[1] = m_dataInputStream_.readInt();
419:                    readUCAConstcount += 4;
420:                    UCAConst.LAST_IMPLICIT_[0] = m_dataInputStream_.readInt();
421:                    readUCAConstcount += 4;
422:                    UCAConst.LAST_IMPLICIT_[1] = m_dataInputStream_.readInt();
423:                    readUCAConstcount += 4;
424:                    UCAConst.FIRST_TRAILING_[0] = m_dataInputStream_.readInt();
425:                    readUCAConstcount += 4;
426:                    UCAConst.FIRST_TRAILING_[1] = m_dataInputStream_.readInt();
427:                    readUCAConstcount += 4;
428:                    UCAConst.LAST_TRAILING_[0] = m_dataInputStream_.readInt();
429:                    readUCAConstcount += 4;
430:                    UCAConst.LAST_TRAILING_[1] = m_dataInputStream_.readInt();
431:                    readUCAConstcount += 4;
432:                    UCAConst.PRIMARY_TOP_MIN_ = m_dataInputStream_.readInt();
433:                    readUCAConstcount += 4;
434:                    UCAConst.PRIMARY_IMPLICIT_MIN_ = m_dataInputStream_
435:                            .readInt();
436:                    readUCAConstcount += 4;
437:                    UCAConst.PRIMARY_IMPLICIT_MAX_ = m_dataInputStream_
438:                            .readInt();
439:                    readUCAConstcount += 4;
440:                    UCAConst.PRIMARY_TRAILING_MIN_ = m_dataInputStream_
441:                            .readInt();
442:                    readUCAConstcount += 4;
443:                    UCAConst.PRIMARY_TRAILING_MAX_ = m_dataInputStream_
444:                            .readInt();
445:                    readUCAConstcount += 4;
446:                    UCAConst.PRIMARY_SPECIAL_MIN_ = m_dataInputStream_
447:                            .readInt();
448:                    readUCAConstcount += 4;
449:                    UCAConst.PRIMARY_SPECIAL_MAX_ = m_dataInputStream_
450:                            .readInt();
451:                    readUCAConstcount += 4;
452:                    int resultsize = (m_UCAValuesSize_ - readUCAConstcount) >> 1;
453:                    char result[] = new char[resultsize];
454:                    for (int i = 0; i < resultsize; i++) {
455:                        result[i] = m_dataInputStream_.readChar();
456:                    }
457:                    readcount += m_UCAValuesSize_;
458:                    if (readcount != m_size_) {
459:                        throw new IOException(
460:                                "Internal Error: Data file size error");
461:                    }
462:                    return result;
463:                }
464:                if (readcount != m_size_) {
465:                    throw new IOException(
466:                            "Internal Error: Data file size error");
467:                }
468:                return null;
469:            }
470:
471:            /**
472:             * Reads in the inverse uca data
473:             * @param input input stream with the inverse uca data
474:             * @return an object containing the inverse uca data
475:             * @exception IOException thrown when error occurs while reading the 
476:             *            inverse uca
477:             */
478:            private static CollationParsedRuleBuilder.InverseUCA readInverseUCA(
479:                    InputStream inputStream) throws IOException {
480:                byte[] UnicodeVersion = ICUBinary.readHeader(inputStream,
481:                        INVERSE_UCA_DATA_FORMAT_ID_, INVERSE_UCA_AUTHENTICATE_);
482:
483:                // weiv: check that we have the correct Unicode version in 
484:                // binary files
485:                VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
486:                if (UnicodeVersion[0] != UCDVersion.getMajor()
487:                        || UnicodeVersion[1] != UCDVersion.getMinor()) {
488:                    throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
489:                }
490:
491:                CollationParsedRuleBuilder.InverseUCA result = new CollationParsedRuleBuilder.InverseUCA();
492:                DataInputStream input = new DataInputStream(inputStream);
493:                input.readInt(); // bytesize
494:                int tablesize = input.readInt(); // in int size
495:                int contsize = input.readInt(); // in char size
496:                input.readInt(); // table in bytes
497:                input.readInt(); // conts in bytes
498:                result.m_UCA_version_ = readVersion(input);
499:                input.skipBytes(8); // skip padding
500:
501:                int size = tablesize * 3; // one column for each strength
502:                result.m_table_ = new int[size];
503:                result.m_continuations_ = new char[contsize];
504:
505:                for (int i = 0; i < size; i++) {
506:                    result.m_table_[i] = input.readInt();
507:                }
508:                for (int i = 0; i < contsize; i++) {
509:                    result.m_continuations_[i] = input.readChar();
510:                }
511:                input.close();
512:                return result;
513:            }
514:
515:            /**
516:             * Reads four bytes from the input and returns a VersionInfo
517:             * object. Use it to read different collator versions.
518:             * @param input already instantiated DataInputStream, positioned 
519:             *              at the start of four version bytes
520:             * @return a ready VersionInfo object
521:             * @throws IOException thrown when error occurs while reading  
522:             *            version bytes
523:             */
524:
525:            protected static VersionInfo readVersion(DataInputStream input)
526:                    throws IOException {
527:                byte[] version = new byte[4];
528:                version[0] = input.readByte();
529:                version[1] = input.readByte();
530:                version[2] = input.readByte();
531:                version[3] = input.readByte();
532:
533:                VersionInfo result = VersionInfo.getInstance((int) version[0],
534:                        (int) version[1], (int) version[2], (int) version[3]);
535:
536:                return result;
537:            }
538:
539:            // private inner class -----------------------------------------------
540:
541:            // private variables -------------------------------------------------
542:
543:            /**
544:             * Authenticate uca data format version
545:             */
546:            private static final ICUBinary.Authenticate UCA_AUTHENTICATE_ = new ICUBinary.Authenticate() {
547:                public boolean isDataVersionAcceptable(byte version[]) {
548:                    return version[0] == DATA_FORMAT_VERSION_[0]
549:                            && version[1] >= DATA_FORMAT_VERSION_[1];
550:                    // Too harsh 
551:                    //&& version[1] == DATA_FORMAT_VERSION_[1]
552:                    //&& version[2] == DATA_FORMAT_VERSION_[2] 
553:                    //&& version[3] == DATA_FORMAT_VERSION_[3];
554:                }
555:            };
556:
557:            /**
558:             * Authenticate uca data format version
559:             */
560:            private static final ICUBinary.Authenticate INVERSE_UCA_AUTHENTICATE_ = new ICUBinary.Authenticate() {
561:                public boolean isDataVersionAcceptable(byte version[]) {
562:                    return version[0] == INVERSE_UCA_DATA_FORMAT_VERSION_[0]
563:                            && version[1] >= INVERSE_UCA_DATA_FORMAT_VERSION_[1];
564:                }
565:            };
566:
567:            /**
568:             * Data input stream for uca.icu 
569:             */
570:            private DataInputStream m_dataInputStream_;
571:
572:            /**
573:             * File format version and id that this class understands.
574:             * No guarantees are made if a older version is used
575:             */
576:            private static final byte DATA_FORMAT_VERSION_[] = { (byte) 0x2,
577:                    (byte) 0x2, (byte) 0x0, (byte) 0x0 };
578:            private static final byte DATA_FORMAT_ID_[] = { (byte) 0x55,
579:                    (byte) 0x43, (byte) 0x6f, (byte) 0x6c };
580:            /**
581:             * Inverse UCA file format version and id that this class understands.
582:             * No guarantees are made if a older version is used
583:             */
584:            private static final byte INVERSE_UCA_DATA_FORMAT_VERSION_[] = {
585:                    (byte) 0x2, (byte) 0x1, (byte) 0x0, (byte) 0x0 };
586:            private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = {
587:                    (byte) 0x49, (byte) 0x6e, (byte) 0x76, (byte) 0x43 };
588:            /**
589:             * Corrupted error string
590:             */
591:            private static final String CORRUPTED_DATA_ERROR_ = "Data corrupted in Collation data file";
592:
593:            /**
594:             * Wrong unicode version error string
595:             */
596:            private static final String WRONG_UNICODE_VERSION_ERROR_ = "Unicode version in binary image is not compatible with the current Unicode version";
597:
598:            /**
599:             * Size of expansion table in bytes
600:             */
601:            private int m_expansionSize_;
602:            /**
603:             * Size of contraction index table in bytes
604:             */
605:            private int m_contractionIndexSize_;
606:            /**
607:             * Size of contraction table in bytes
608:             */
609:            private int m_contractionCESize_;
610:            /**
611:             * Size of the Trie in bytes
612:             */
613:            private int m_trieSize_;
614:            /**
615:             * Size of the table that contains information about collation elements
616:             * that end with an expansion 
617:             */
618:            private int m_expansionEndCESize_;
619:            /**
620:             * Size of the table that contains information about the maximum size of 
621:             * collation elements that end with a particular expansion CE corresponding
622:             * to the ones in expansionEndCE
623:             */
624:            private int m_expansionEndCEMaxSizeSize_;
625:            /**
626:             * Size of the option table that contains information about the collation
627:             * options
628:             */
629:            private int m_optionSize_;
630:            /**
631:             * Size of the whole data file minusing the ICU header
632:             */
633:            private int m_size_;
634:            /**
635:             * Size of the collation data header
636:             */
637:            private int m_headerSize_;
638:            /**
639:             * Size of the table that contains information about the "Unsafe" 
640:             * codepoints
641:             */
642:            private int m_unsafeSize_;
643:            /**
644:             * Size of the table that contains information about codepoints that ends
645:             * with a contraction
646:             */
647:            private int m_contractionEndSize_;
648:            /**
649:             * Size of the table that contains UCA contraction information
650:             */
651:            private int m_UCAValuesSize_;
652:
653:            // private methods ---------------------------------------------------
654:
655:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.