Source Code Cross Referenced for RBBIRuleBuilder.java in  » Internationalization-Localization » icu4j » com » ibm » icu » text » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.text 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        //
002:        //    Copyright (C) 2002-2006, International Business Machines Corporation and others.
003:        //    All Rights Reserved.
004:        //
005:        //
006:
007:        package com.ibm.icu.text;
008:
009:        import java.util.HashMap;
010:        import java.util.List;
011:        import java.util.ArrayList;
012:        import java.util.Map;
013:        import java.io.OutputStream;
014:        import java.io.DataOutputStream;
015:        import java.io.IOException;
016:        import com.ibm.icu.impl.Assert;
017:
018:        class RBBIRuleBuilder {
019:            //   This is the main class for building (compiling) break rules into the tables
020:            //    required by the runtime RBBI engine.
021:            //
022:
023:            String fDebugEnv; // controls debug trace output
024:            String fRules; // The rule string that we are compiling
025:            RBBIRuleScanner fScanner; // The scanner.
026:
027:            //
028:            //  There are four separate parse trees generated, one for each of the
029:            //    forward rules, reverse rules, safe forward rules and safe reverse rules.
030:            //  This array references the root of each of the trees.
031:            //  
032:            RBBINode[] fTreeRoots = new RBBINode[4];
033:            static final int fForwardTree = 0; // Indexes into the above fTreeRoots array
034:            static final int fReverseTree = 1; //   for each of the trees.
035:            static final int fSafeFwdTree = 2; //   (in C, these are pointer variables and
036:            static final int fSafeRevTree = 3; //    there is no array.)
037:            int fDefaultTree = fForwardTree; // For rules not qualified with a !
038:            //   the tree to which they belong to.
039:
040:            boolean fChainRules; // True for chained Unicode TR style rules.
041:            // False for traditional regexp rules.
042:
043:            boolean fLBCMNoChain; // True:  suppress chaining of rules on
044:            //   chars with LineBreak property == CM.
045:
046:            boolean fLookAheadHardBreak; // True:  Look ahead matches cause an
047:            // immediate break, no continuing for the
048:            // longest match.
049:
050:            RBBISetBuilder fSetBuilder; // Set and Character Category builder.
051:            List fUSetNodes; // Vector of all uset nodes.
052:            RBBITableBuilder fForwardTables; // State transition tables
053:            RBBITableBuilder fReverseTables;
054:            RBBITableBuilder fSafeFwdTables;
055:            RBBITableBuilder fSafeRevTables;
056:
057:            //
058:            // Status {tag} values.   These structures are common to all of the rule sets (Forward, Reverse, etc.).
059:            //
060:            Map fStatusSets = new HashMap(); // Status value sets encountered so far.
061:            //  Map Key is the set of values.
062:            //  Map Value is the runtime array index.
063:
064:            List fRuleStatusVals; // List of Integer objects.  Has same layout as the
065:            //   runtime array of status (tag) values - 
066:            //     number of values in group 1
067:            //        first status value in group 1
068:            //        2nd status value in group 1
069:            //        ...
070:            //     number of values in group 2
071:            //        first status value in group 2
072:            //        etc.
073:            //
074:            // Error codes from ICU4C.
075:            //    using these simplified the porting, and consolidated the
076:            //    creation of Java exceptions
077:            //
078:            static final int U_BRK_ERROR_START = 0x10200;
079:            /**< Start of codes indicating Break Iterator failures */
080:
081:            static final int U_BRK_INTERNAL_ERROR = 0x10201;
082:            /**< An internal error (bug) was detected.             */
083:
084:            static final int U_BRK_HEX_DIGITS_EXPECTED = 0x10202;
085:            /**< Hex digits expected as part of a escaped char in a rule. */
086:
087:            static final int U_BRK_SEMICOLON_EXPECTED = 0x10203;
088:            /**< Missing ';' at the end of a RBBI rule.            */
089:
090:            static final int U_BRK_RULE_SYNTAX = 0x10204;
091:            /**< Syntax error in RBBI rule.                        */
092:
093:            static final int U_BRK_UNCLOSED_SET = 0x10205;
094:            /**< UnicodeSet witing an RBBI rule missing a closing ']'.  */
095:
096:            static final int U_BRK_ASSIGN_ERROR = 0x10206;
097:            /**< Syntax error in RBBI rule assignment statement.   */
098:
099:            static final int U_BRK_VARIABLE_REDFINITION = 0x10207;
100:            /**< RBBI rule $Variable redefined.                    */
101:
102:            static final int U_BRK_MISMATCHED_PAREN = 0x10208;
103:            /**< Mis-matched parentheses in an RBBI rule.          */
104:
105:            static final int U_BRK_NEW_LINE_IN_QUOTED_STRING = 0x10209;
106:            /**< Missing closing quote in an RBBI rule.            */
107:
108:            static final int U_BRK_UNDEFINED_VARIABLE = 0x1020a;
109:            /**< Use of an undefined $Variable in an RBBI rule.    */
110:
111:            static final int U_BRK_INIT_ERROR = 0x1020b;
112:            /**< Initialization failure.  Probable missing ICU Data. */
113:
114:            static final int U_BRK_RULE_EMPTY_SET = 0x1020c;
115:            /**< Rule contains an empty Unicode Set.               */
116:
117:            static final int U_BRK_UNRECOGNIZED_OPTION = 0x1020d;
118:            /**< !!option in RBBI rules not recognized.            */
119:
120:            static final int U_BRK_MALFORMED_RULE_TAG = 0x1020e;
121:            /**< The {nnn} tag on a rule is mal formed             */
122:            static final int U_BRK_MALFORMED_SET = 0x1020f;
123:
124:            static final int U_BRK_ERROR_LIMIT = 0x10210;
125:
126:            /**< This must always be the last value to indicate the limit for Break Iterator failures */
127:
128:            //----------------------------------------------------------------------------------------
129:            //
130:            //  Constructor.
131:            //
132:            //----------------------------------------------------------------------------------------
133:            RBBIRuleBuilder(String rules) {
134:                fDebugEnv = System.getProperty("U_RBBIDEBUG");
135:                fRules = rules;
136:                fUSetNodes = new ArrayList();
137:                fRuleStatusVals = new ArrayList();
138:                fScanner = new RBBIRuleScanner(this );
139:                fSetBuilder = new RBBISetBuilder(this );
140:            }
141:
142:            //----------------------------------------------------------------------------------------
143:            //
144:            //   flattenData() -  Collect up the compiled RBBI rule data and put it into
145:            //                    the format for saving in ICU data files,
146:            //
147:            //                    See the ICU4C file common/rbidata.h for a detailed description.
148:            //
149:            //----------------------------------------------------------------------------------------
150:            static final int align8(int i) {
151:                return (i + 7) & 0xfffffff8;
152:            }
153:
154:            void flattenData(OutputStream os) throws IOException {
155:                DataOutputStream dos = new DataOutputStream(os);
156:                int i;
157:
158:                //  Remove comments and whitespace from the rules to make it smaller.
159:                String strippedRules = RBBIRuleScanner.stripRules(fRules);
160:
161:                // Calculate the size of each section in the data in bytes.
162:                //   Sizes here are padded up to a multiple of 8 for better memory alignment.
163:                //   Sections sizes actually stored in the header are for the actual data
164:                //     without the padding.
165:                //
166:                int headerSize = 24 * 4; // align8(sizeof(RBBIDataHeader));
167:                int forwardTableSize = align8(fForwardTables.getTableSize());
168:                int reverseTableSize = align8(fReverseTables.getTableSize());
169:                int safeFwdTableSize = align8(fSafeFwdTables.getTableSize());
170:                int safeRevTableSize = align8(fSafeRevTables.getTableSize());
171:                int trieSize = align8(fSetBuilder.getTrieSize());
172:                int statusTableSize = align8(fRuleStatusVals.size() * 4);
173:                int rulesSize = align8((strippedRules.length()) * 2);
174:                int totalSize = headerSize + forwardTableSize
175:                        + reverseTableSize + safeFwdTableSize
176:                        + safeRevTableSize + statusTableSize + trieSize
177:                        + rulesSize;
178:                int outputPos = 0; // Track stream position, starting from RBBIDataHeader.
179:
180:                //
181:                // Write out an ICU Data Header
182:                //   TODO:  actually create a real header, rather than just a placeholder.
183:                //           The empty placeholder is ok for compile-and-go from within ICU4J.
184:                //           Replicating the ICU4C genbrk tool for building .brk resources would need a real header.
185:                //
186:                byte[] ICUDataHeader = new byte[0x80];
187:                dos.write(ICUDataHeader);
188:
189:                //
190:                // Write out the RBBIDataHeader
191:                //
192:                int[] header = new int[RBBIDataWrapper.DH_SIZE]; // sizeof struct RBBIDataHeader
193:                header[RBBIDataWrapper.DH_MAGIC] = 0xb1a0;
194:                header[RBBIDataWrapper.DH_FORMATVERSION] = 0x03010000; // uint8_t fFormatVersion[4];
195:                header[RBBIDataWrapper.DH_LENGTH] = totalSize; // fLength, the total size of all rule sections.
196:                header[RBBIDataWrapper.DH_CATCOUNT] = fSetBuilder
197:                        .getNumCharCategories(); // fCatCount.
198:                header[RBBIDataWrapper.DH_FTABLE] = headerSize; // fFTable
199:                header[RBBIDataWrapper.DH_FTABLELEN] = forwardTableSize; // fTableLen
200:                header[RBBIDataWrapper.DH_RTABLE] = header[RBBIDataWrapper.DH_FTABLE]
201:                        + forwardTableSize; // fRTable
202:                header[RBBIDataWrapper.DH_RTABLELEN] = reverseTableSize; // fRTableLen
203:                header[RBBIDataWrapper.DH_SFTABLE] = header[RBBIDataWrapper.DH_RTABLE]
204:                        + reverseTableSize; // fSTable
205:                header[RBBIDataWrapper.DH_SFTABLELEN] = safeFwdTableSize; // fSTableLen
206:                header[RBBIDataWrapper.DH_SRTABLE] = header[RBBIDataWrapper.DH_SFTABLE]
207:                        + safeFwdTableSize; // fSRTable
208:                header[RBBIDataWrapper.DH_SRTABLELEN] = safeRevTableSize; // fSRTableLen
209:                header[RBBIDataWrapper.DH_TRIE] = header[RBBIDataWrapper.DH_SRTABLE]
210:                        + safeRevTableSize; // fTrie
211:                header[RBBIDataWrapper.DH_TRIELEN] = fSetBuilder.getTrieSize(); // fTrieLen
212:                header[RBBIDataWrapper.DH_STATUSTABLE] = header[RBBIDataWrapper.DH_TRIE]
213:                        + header[RBBIDataWrapper.DH_TRIELEN];
214:                header[RBBIDataWrapper.DH_STATUSTABLELEN] = statusTableSize; // fStatusTableLen
215:                header[RBBIDataWrapper.DH_RULESOURCE] = header[RBBIDataWrapper.DH_STATUSTABLE]
216:                        + statusTableSize;
217:                header[RBBIDataWrapper.DH_RULESOURCELEN] = strippedRules
218:                        .length() * 2;
219:                for (i = 0; i < header.length; i++) {
220:                    dos.writeInt(header[i]);
221:                    outputPos += 4;
222:                }
223:
224:                // Write out the actual state tables.
225:                short[] tableData;
226:                tableData = fForwardTables.exportTable();
227:                Assert.assrt(outputPos == header[4]);
228:                for (i = 0; i < tableData.length; i++) {
229:                    dos.writeShort(tableData[i]);
230:                    outputPos += 2;
231:                }
232:
233:                tableData = fReverseTables.exportTable();
234:                Assert.assrt(outputPos == header[6]);
235:                for (i = 0; i < tableData.length; i++) {
236:                    dos.writeShort(tableData[i]);
237:                    outputPos += 2;
238:                }
239:
240:                Assert.assrt(outputPos == header[8]);
241:                tableData = fSafeFwdTables.exportTable();
242:                for (i = 0; i < tableData.length; i++) {
243:                    dos.writeShort(tableData[i]);
244:                    outputPos += 2;
245:                }
246:
247:                Assert.assrt(outputPos == header[10]);
248:                tableData = fSafeRevTables.exportTable();
249:                for (i = 0; i < tableData.length; i++) {
250:                    dos.writeShort(tableData[i]);
251:                    outputPos += 2;
252:                }
253:
254:                // write out the Trie table
255:                Assert.assrt(outputPos == header[12]);
256:                fSetBuilder.serializeTrie(os);
257:                outputPos += header[13];
258:                while (outputPos % 8 != 0) { // pad to an 8 byte boundary
259:                    dos.write(0);
260:                    outputPos += 1;
261:                }
262:
263:                // Write out the status {tag} table.
264:                Assert.assrt(outputPos == header[16]);
265:                for (i = 0; i < fRuleStatusVals.size(); i++) {
266:                    Integer val = (Integer) fRuleStatusVals.get(i);
267:                    dos.writeInt(val.intValue());
268:                    outputPos += 4;
269:                }
270:
271:                while (outputPos % 8 != 0) { // pad to an 8 byte boundary
272:                    dos.write(0);
273:                    outputPos += 1;
274:                }
275:
276:                // Write out the stripped rules (rules with extra spaces removed
277:                //   These go last in the data area, even though they are not last in the header.
278:                Assert.assrt(outputPos == header[14]);
279:                dos.writeChars(strippedRules);
280:                outputPos += strippedRules.length() * 2;
281:                while (outputPos % 8 != 0) { // pad to an 8 byte boundary
282:                    dos.write(0);
283:                    outputPos += 1;
284:                }
285:            }
286:
287:            //----------------------------------------------------------------------------------------
288:            //
289:            //  compileRules          compile source rules, placing the compiled form into a output stream
290:            //                        The compiled form is identical to that from ICU4C (Big Endian).
291:            //
292:            //----------------------------------------------------------------------------------------
293:            static void compileRules(String rules, OutputStream os)
294:                    throws IOException {
295:                //
296:                // Read the input rules, generate a parse tree, symbol table,
297:                // and list of all Unicode Sets referenced by the rules.
298:                //
299:                RBBIRuleBuilder builder = new RBBIRuleBuilder(rules);
300:                builder.fScanner.parse();
301:
302:                //
303:                // UnicodeSet processing.
304:                //    Munge the Unicode Sets to create a set of character categories.
305:                //    Generate the mapping tables (TRIE) from input 32-bit characters to
306:                //    the character categories.
307:                //
308:                builder.fSetBuilder.build();
309:
310:                //
311:                //   Generate the DFA state transition table.
312:                //
313:                builder.fForwardTables = new RBBITableBuilder(builder,
314:                        fForwardTree);
315:                builder.fReverseTables = new RBBITableBuilder(builder,
316:                        fReverseTree);
317:                builder.fSafeFwdTables = new RBBITableBuilder(builder,
318:                        fSafeFwdTree);
319:                builder.fSafeRevTables = new RBBITableBuilder(builder,
320:                        fSafeRevTree);
321:                builder.fForwardTables.build();
322:                builder.fReverseTables.build();
323:                builder.fSafeFwdTables.build();
324:                builder.fSafeRevTables.build();
325:                if (builder.fDebugEnv != null
326:                        && builder.fDebugEnv.indexOf("states") >= 0) {
327:                    builder.fForwardTables.printRuleStatusTable();
328:                }
329:
330:                //
331:                //   Package up the compiled data, writing it to an output stream
332:                //      in the serialization format.  This is the same as the ICU4C runtime format.
333:                //
334:                builder.flattenData(os);
335:            }
336:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.