Source Code Cross Referenced for RBBIDataWrapper.java in  » Internationalization-Localization » icu4j » com » ibm » icu » text » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.text 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /**
002:         *******************************************************************************
003:         * Copyright (C) 1996-2006, International Business Machines Corporation and    *
004:         * others. All Rights Reserved.                                                *
005:         *******************************************************************************
006:         */package com.ibm.icu.text;
007:
008:        import java.io.BufferedInputStream;
009:        import java.io.InputStream;
010:        import java.io.DataInputStream;
011:        import java.io.IOException;
012:
013:        import com.ibm.icu.impl.ICUData;
014:        import com.ibm.icu.impl.ICUResourceBundle;
015:        import com.ibm.icu.impl.Trie;
016:        import com.ibm.icu.impl.CharTrie;
017:
018:        /**
019:         * <p>Internal class used for Rule Based Break Iterators</p>
020:         * <p>This class provides access to the compiled break rule data, as
021:         * it is stored in a .brk file.  
022:         * @internal
023:         * 
024:         */
025:        final class RBBIDataWrapper {
026:            //
027:            // These fields are the ready-to-use compiled rule data, as
028:            //   read from the file.
029:            //
030:            RBBIDataHeader fHeader;
031:            short fFTable[];
032:            short fRTable[];
033:            short fSFTable[];
034:            short fSRTable[];
035:            CharTrie fTrie;
036:            String fRuleSource;
037:            int fStatusTable[];
038:
039:            //
040:            // Indexes to fields in the ICU4C style binary form of the RBBI Data Header
041:            //   Used by the rule compiler when flattening the data.
042:            //
043:            final static int DH_SIZE = 24;
044:            final static int DH_MAGIC = 0;
045:            final static int DH_FORMATVERSION = 1;
046:            final static int DH_LENGTH = 2;
047:            final static int DH_CATCOUNT = 3;
048:            final static int DH_FTABLE = 4;
049:            final static int DH_FTABLELEN = 5;
050:            final static int DH_RTABLE = 6;
051:            final static int DH_RTABLELEN = 7;
052:            final static int DH_SFTABLE = 8;
053:            final static int DH_SFTABLELEN = 9;
054:            final static int DH_SRTABLE = 10;
055:            final static int DH_SRTABLELEN = 11;
056:            final static int DH_TRIE = 12;
057:            final static int DH_TRIELEN = 13;
058:            final static int DH_RULESOURCE = 14;
059:            final static int DH_RULESOURCELEN = 15;
060:            final static int DH_STATUSTABLE = 16;
061:            final static int DH_STATUSTABLELEN = 17;
062:
063:            // Index offsets to the fields in a state table row.
064:            //    Corresponds to struct RBBIStateTableRow in the C version.
065:            //   
066:            final static int ACCEPTING = 0;
067:            final static int LOOKAHEAD = 1;
068:            final static int TAGIDX = 2;
069:            final static int RESERVED = 3;
070:            final static int NEXTSTATES = 4;
071:
072:            // Index offsets to header fields of a state table
073:            //     struct RBBIStateTable {...   in the C version.
074:            //
075:            final static int NUMSTATES = 0;
076:            final static int ROWLEN = 2;
077:            final static int FLAGS = 4;
078:            final static int RESERVED_2 = 6;
079:            final static int ROW_DATA = 8;
080:
081:            //  Bit selectors for the "FLAGS" field of the state table header
082:            //     enum RBBIStateTableFlags in the C version.
083:            //
084:            final static int RBBI_LOOKAHEAD_HARD_BREAK = 1;
085:            final static int RBBI_BOF_REQUIRED = 2;
086:
087:            //  Getters for fields from the state table header
088:            //
089:            final static int getNumStates(short table[]) {
090:                int hi = table[NUMSTATES];
091:                int lo = table[NUMSTATES + 1];
092:                int val = (hi << 16) + (lo & 0x0000ffff);
093:                return val;
094:            }
095:
096:            /**
097:             * Data Header.  A struct-like class with the fields from the RBBI data file header.
098:             */
099:            final static class RBBIDataHeader {
100:                int fMagic; //  == 0xbla0 
101:                int fVersion; //  == 1 (for ICU 3.2 and earlier. 
102:                byte[] fFormatVersion; //  For ICU 3.4 and later.
103:                int fLength; //  Total length in bytes of this RBBI Data, 
104:                //      including all sections, not just the header. 
105:                int fCatCount; //  Number of character categories. 
106:
107:                //  
108:                //  Offsets and sizes of each of the subsections within the RBBI data. 
109:                //  All offsets are bytes from the start of the RBBIDataHeader. 
110:                //  All sizes are in bytes. 
111:                //  
112:                int fFTable; //  forward state transition table. 
113:                int fFTableLen;
114:                int fRTable; //  Offset to the reverse state transition table. 
115:                int fRTableLen;
116:                int fSFTable; //  safe point forward transition table 
117:                int fSFTableLen;
118:                int fSRTable; //  safe point reverse transition table 
119:                int fSRTableLen;
120:                int fTrie; //  Offset to Trie data for character categories 
121:                int fTrieLen;
122:                int fRuleSource; //  Offset to the source for for the break 
123:                int fRuleSourceLen; //    rules.  Stored UChar *. 
124:                int fStatusTable; // Offset to the table of rule status values 
125:                int fStatusTableLen;
126:
127:                public RBBIDataHeader() {
128:                    fMagic = 0;
129:                    fFormatVersion = new byte[4];
130:                }
131:            }
132:
133:            /**
134:             * RBBI State Table Indexing Function.  Given a state number, return the
135:             * array index of the start of the state table row for that state.
136:             * 
137:             */
138:            int getRowIndex(int state) {
139:                return ROW_DATA + state * (fHeader.fCatCount + 4);
140:            }
141:
142:            static class TrieFoldingFunc implements  Trie.DataManipulate {
143:                public int getFoldingOffset(int data) {
144:                    if ((data & 0x8000) != 0) {
145:                        return data & 0x7fff;
146:                    } else {
147:                        return 0;
148:                    }
149:                }
150:            }
151:
152:            static TrieFoldingFunc fTrieFoldingFunc = new TrieFoldingFunc();
153:
154:            RBBIDataWrapper() {
155:            }
156:
157:            static RBBIDataWrapper get(String name) throws IOException {
158:                String fullName = "data/" + name;
159:                InputStream is = ICUData.getRequiredStream(fullName);
160:                return get(is);
161:            }
162:
163:            /*
164:             *  Get an RBBIDataWrapper from an InputStream onto a pre-compiled set
165:             *  of RBBI rules.
166:             */
167:            static RBBIDataWrapper get(InputStream is) throws IOException {
168:                int i;
169:
170:                DataInputStream dis = new DataInputStream(
171:                        new BufferedInputStream(is));
172:                RBBIDataWrapper This = new RBBIDataWrapper();
173:
174:                // Seek past the ICU data header.
175:                //   TODO:  verify that the header looks good.
176:                dis.skip(0x80);
177:
178:                // Read in the RBBI data header...
179:                This.fHeader = new RBBIDataHeader();
180:                This.fHeader.fMagic = dis.readInt();
181:                This.fHeader.fVersion = dis.readInt();
182:                This.fHeader.fFormatVersion[0] = (byte) (This.fHeader.fVersion >> 24);
183:                This.fHeader.fFormatVersion[1] = (byte) (This.fHeader.fVersion >> 16);
184:                This.fHeader.fFormatVersion[2] = (byte) (This.fHeader.fVersion >> 8);
185:                This.fHeader.fFormatVersion[3] = (byte) (This.fHeader.fVersion);
186:                This.fHeader.fLength = dis.readInt();
187:                This.fHeader.fCatCount = dis.readInt();
188:                This.fHeader.fFTable = dis.readInt();
189:                This.fHeader.fFTableLen = dis.readInt();
190:                This.fHeader.fRTable = dis.readInt();
191:                This.fHeader.fRTableLen = dis.readInt();
192:                This.fHeader.fSFTable = dis.readInt();
193:                This.fHeader.fSFTableLen = dis.readInt();
194:                This.fHeader.fSRTable = dis.readInt();
195:                This.fHeader.fSRTableLen = dis.readInt();
196:                This.fHeader.fTrie = dis.readInt();
197:                This.fHeader.fTrieLen = dis.readInt();
198:                This.fHeader.fRuleSource = dis.readInt();
199:                This.fHeader.fRuleSourceLen = dis.readInt();
200:                This.fHeader.fStatusTable = dis.readInt();
201:                This.fHeader.fStatusTableLen = dis.readInt();
202:                dis.skip(6 * 4); // uint32_t  fReserved[6];
203:
204:                if (This.fHeader.fMagic != 0xb1a0
205:                        || !(This.fHeader.fVersion == 1 || // ICU 3.2 and earlier
206:                        This.fHeader.fFormatVersion[0] == 3) // ICU 3.4
207:                ) {
208:                    throw new IOException(
209:                            "Break Iterator Rule Data Magic Number Incorrect, or unsupported data version.");
210:                }
211:
212:                // Current position in input stream.  
213:                int pos = 24 * 4; // offset of end of header, which has 24 fields, all int32_t (4 bytes)
214:
215:                //
216:                // Read in the Forward state transition table as an array of shorts.
217:                //
218:
219:                //   Quick Sanity Check
220:                if (This.fHeader.fFTable < pos
221:                        || This.fHeader.fFTable > This.fHeader.fLength) {
222:                    throw new IOException("Break iterator Rule data corrupt");
223:                }
224:
225:                //    Skip over any padding preceding this table
226:                dis.skip(This.fHeader.fFTable - pos);
227:                pos = This.fHeader.fFTable;
228:
229:                This.fFTable = new short[This.fHeader.fFTableLen / 2];
230:                for (i = 0; i < This.fFTable.length; i++) {
231:                    This.fFTable[i] = dis.readShort();
232:                    pos += 2;
233:                }
234:
235:                //
236:                // Read in the Reverse state table
237:                //
238:
239:                // Skip over any padding in the file
240:                dis.skip(This.fHeader.fRTable - pos);
241:                pos = This.fHeader.fRTable;
242:
243:                // Create & fill the table itself.
244:                This.fRTable = new short[This.fHeader.fRTableLen / 2];
245:                for (i = 0; i < This.fRTable.length; i++) {
246:                    This.fRTable[i] = dis.readShort();
247:                    pos += 2;
248:                }
249:
250:                //
251:                // Read in the Safe Forward state table
252:                // 
253:                if (This.fHeader.fSFTableLen > 0) {
254:                    // Skip over any padding in the file
255:                    dis.skip(This.fHeader.fSFTable - pos);
256:                    pos = This.fHeader.fSFTable;
257:
258:                    // Create & fill the table itself.
259:                    This.fSFTable = new short[This.fHeader.fSFTableLen / 2];
260:                    for (i = 0; i < This.fSFTable.length; i++) {
261:                        This.fSFTable[i] = dis.readShort();
262:                        pos += 2;
263:                    }
264:                }
265:
266:                //
267:                // Read in the Safe Reverse state table
268:                // 
269:                if (This.fHeader.fSRTableLen > 0) {
270:                    // Skip over any padding in the file
271:                    dis.skip(This.fHeader.fSRTable - pos);
272:                    pos = This.fHeader.fSRTable;
273:
274:                    // Create & fill the table itself.
275:                    This.fSRTable = new short[This.fHeader.fSRTableLen / 2];
276:                    for (i = 0; i < This.fSRTable.length; i++) {
277:                        This.fSRTable[i] = dis.readShort();
278:                        pos += 2;
279:                    }
280:                }
281:
282:                //
283:                // Unserialize the Character categories TRIE
284:                //     Because we can't be absolutely certain where the Trie deserialize will
285:                //     leave the input stream, leave position unchanged.
286:                //     The seek to the start of the next item following the TRIE will get us
287:                //     back in sync.
288:                //
289:                dis.skip(This.fHeader.fTrie - pos); // seek input stream from end of previous section to
290:                pos = This.fHeader.fTrie; //   to the start of the trie
291:
292:                dis.mark(This.fHeader.fTrieLen + 100); // Mark position of start of TRIE in the input
293:                //  and tell Java to keep the mark valid so long
294:                //  as we don't go more than 100 bytes past the
295:                //  past the end of the TRIE.
296:
297:                This.fTrie = new CharTrie(dis, fTrieFoldingFunc); // Deserialize the TRIE, leaving input
298:                //  stream at an unknown position, preceding the
299:                //  padding between TRIE and following section.
300:
301:                dis.reset(); // Move input stream back to marked position at
302:                //   the start of the serialized TRIE.  Now our
303:                //   "pos" variable and the input stream are in
304:                //   agreement.
305:
306:                //
307:                // Read the Rule Status Table
308:                //
309:                if (pos > This.fHeader.fStatusTable) {
310:                    throw new IOException("Break iterator Rule data corrupt");
311:                }
312:                dis.skip(This.fHeader.fStatusTable - pos);
313:                pos = This.fHeader.fStatusTable;
314:                This.fStatusTable = new int[This.fHeader.fStatusTableLen / 4];
315:                for (i = 0; i < This.fStatusTable.length; i++) {
316:                    This.fStatusTable[i] = dis.readInt();
317:                    pos += 4;
318:                }
319:
320:                //
321:                // Put the break rule source into a String
322:                //
323:                if (pos > This.fHeader.fRuleSource) {
324:                    throw new IOException("Break iterator Rule data corrupt");
325:                }
326:                dis.skip(This.fHeader.fRuleSource - pos);
327:                pos = This.fHeader.fRuleSource;
328:                StringBuffer sb = new StringBuffer(
329:                        This.fHeader.fRuleSourceLen / 2);
330:                for (i = 0; i < This.fHeader.fRuleSourceLen; i += 2) {
331:                    sb.append(dis.readChar());
332:                    pos += 2;
333:                }
334:                This.fRuleSource = sb.toString();
335:
336:                if (RuleBasedBreakIterator.fDebugEnv != null
337:                        && RuleBasedBreakIterator.fDebugEnv.indexOf("data") >= 0) {
338:                    This.dump();
339:                }
340:                return This;
341:            }
342:
343:            /** Debug function to display the break iterator data.  
344:             *  @internal
345:             */
346:            void dump() {
347:                System.out.println("RBBI Data Wrapper dump ...");
348:                System.out.println();
349:                System.out.println("Forward State Table");
350:                dumpTable(fFTable);
351:                System.out.println("Reverse State Table");
352:                dumpTable(fRTable);
353:                System.out.println("Forward Safe Points Table");
354:                dumpTable(fSFTable);
355:                System.out.println("Reverse Safe Points Table");
356:                dumpTable(fSRTable);
357:
358:                dumpCharCategories();
359:                System.out.println("Source Rules: " + fRuleSource);
360:
361:            }
362:
363:            /** Fixed width int-to-string conversion.   
364:             *  @internal
365:             * 
366:             */
367:            static public String intToString(int n, int width) {
368:                StringBuffer dest = new StringBuffer(width);
369:                dest.append(n);
370:                while (dest.length() < width) {
371:                    dest.insert(0, ' ');
372:                }
373:                return dest.toString();
374:            }
375:
376:            /** Fixed width int-to-string conversion.   
377:             *  @internal
378:             * 
379:             */
380:            static public String intToHexString(int n, int width) {
381:                StringBuffer dest = new StringBuffer(width);
382:                dest.append(Integer.toHexString(n));
383:                while (dest.length() < width) {
384:                    dest.insert(0, ' ');
385:                }
386:                return dest.toString();
387:            }
388:
389:            /** Dump a state table.  (A full set of RBBI rules has 4 state tables.)  */
390:            private void dumpTable(short table[]) {
391:                if (table == null) {
392:                    System.out.println("  -- null -- ");
393:                } else {
394:                    int n;
395:                    int state;
396:                    String header = " Row  Acc Look  Tag";
397:                    for (n = 0; n < fHeader.fCatCount; n++) {
398:                        header += intToString(n, 5);
399:                    }
400:                    System.out.println(header);
401:                    for (n = 0; n < header.length(); n++) {
402:                        System.out.print("-");
403:                    }
404:                    System.out.println();
405:                    for (state = 0; state < getNumStates(table); state++) {
406:                        dumpRow(table, state);
407:                    }
408:                    System.out.println();
409:                }
410:            }
411:
412:            /**
413:             * Dump (for debug) a single row of an RBBI state table
414:             * @param table
415:             * @param state
416:             * @internal
417:             */
418:            private void dumpRow(short table[], int state) {
419:                StringBuffer dest = new StringBuffer(fHeader.fCatCount * 5 + 20);
420:                dest.append(intToString(state, 4));
421:                int row = getRowIndex(state);
422:                if (table[row + ACCEPTING] != 0) {
423:                    dest.append(intToString(table[row + ACCEPTING], 5));
424:                } else {
425:                    dest.append("     ");
426:                }
427:                if (table[row + LOOKAHEAD] != 0) {
428:                    dest.append(intToString(table[row + LOOKAHEAD], 5));
429:                } else {
430:                    dest.append("     ");
431:                }
432:                dest.append(intToString(table[row + TAGIDX], 5));
433:
434:                for (int col = 0; col < fHeader.fCatCount; col++) {
435:                    dest.append(intToString(table[row + NEXTSTATES + col], 5));
436:                }
437:
438:                System.out.println(dest);
439:            }
440:
441:            private void dumpCharCategories() {
442:                int n = fHeader.fCatCount;
443:                String catStrings[] = new String[n + 1];
444:                int rangeStart = 0;
445:                int rangeEnd = 0;
446:                int lastCat = -1;
447:                int char32;
448:                int category;
449:                int lastNewline[] = new int[n + 1];
450:
451:                for (category = 0; category <= fHeader.fCatCount; category++) {
452:                    catStrings[category] = "";
453:                }
454:                System.out.println("\nCharacter Categories");
455:                System.out.println("--------------------");
456:                for (char32 = 0; char32 <= 0x10ffff; char32++) {
457:                    category = fTrie.getCodePointValue(char32);
458:                    category &= ~0x4000; // Mask off dictionary bit.
459:                    if (category < 0 || category > fHeader.fCatCount) {
460:                        System.out.println("Error, bad category "
461:                                + Integer.toHexString(category) + " for char "
462:                                + Integer.toHexString(char32));
463:                        break;
464:                    }
465:                    if (category == lastCat) {
466:                        rangeEnd = char32;
467:                    } else {
468:                        if (lastCat >= 0) {
469:                            if (catStrings[lastCat].length() > lastNewline[lastCat] + 70) {
470:                                lastNewline[lastCat] = catStrings[lastCat]
471:                                        .length() + 10;
472:                                catStrings[lastCat] += "\n       ";
473:                            }
474:
475:                            catStrings[lastCat] += " "
476:                                    + Integer.toHexString(rangeStart);
477:                            if (rangeEnd != rangeStart) {
478:                                catStrings[lastCat] += "-"
479:                                        + Integer.toHexString(rangeEnd);
480:                            }
481:                        }
482:                        lastCat = category;
483:                        rangeStart = rangeEnd = char32;
484:                    }
485:                }
486:                catStrings[lastCat] += " " + Integer.toHexString(rangeStart);
487:                if (rangeEnd != rangeStart) {
488:                    catStrings[lastCat] += "-" + Integer.toHexString(rangeEnd);
489:                }
490:
491:                for (category = 0; category <= fHeader.fCatCount; category++) {
492:                    System.out.println(intToString(category, 5) + "  "
493:                            + catStrings[category]);
494:                }
495:                System.out.println();
496:            }
497:
498:            public static void main(String[] args) {
499:                String s;
500:                if (args.length == 0) {
501:                    s = "char";
502:                } else {
503:                    s = args[0];
504:                }
505:                System.out.println("RBBIDataWrapper.main(" + s + ") ");
506:
507:                String versionedName = ICUResourceBundle.ICU_BUNDLE + "/" + s
508:                        + ".brk";
509:
510:                try {
511:                    RBBIDataWrapper This = RBBIDataWrapper.get(versionedName);
512:                    This.dump();
513:                } catch (Exception e) {
514:                    System.out.println("Exception: " + e.toString());
515:                }
516:
517:            }
518:
519:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.