Source Code Cross Referenced for RBBIRuleScanner.java in  » Internationalization-Localization » icu4j » com » ibm » icu » text » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.text 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        /*
0002:         *******************************************************************************
0003:         * Copyright (C) 2003-2006,
0004:         * International Business Machines Corporation and others. All Rights Reserved.
0005:         *******************************************************************************
0006:         */
0007:
0008:        package com.ibm.icu.text;
0009:
0010:        import java.util.HashMap;
0011:        import java.lang.IllegalArgumentException;
0012:        import java.text.ParsePosition;
0013:
0014:        import com.ibm.icu.lang.UCharacter;
0015:        import com.ibm.icu.impl.Assert;
0016:        import com.ibm.icu.impl.Utility;
0017:
0018:        /**
0019:         *  This class is part of the Rule Based Break Iterator rule compiler.
0020:         *  It scans the rules and builds the parse tree.
0021:         *  There is no public API here.
0022:         *  @internal
0023:         */
0024:        class RBBIRuleScanner {
0025:
0026:            private final int kStackSize = 100; // The size of the state stack for
0027:
0028:            //   rules parsing.  Corresponds roughly
0029:            //   to the depth of parentheses nesting
0030:            //   that is allowed in the rules.
0031:
0032:            static class RBBIRuleChar {
0033:                int fChar;
0034:                boolean fEscaped;
0035:            }
0036:
0037:            RBBIRuleBuilder fRB; // The rule builder that we are part of.
0038:
0039:            int fScanIndex; // Index of current character being processed
0040:            //   in the rule input string.
0041:            int fNextIndex; // Index of the next character, which
0042:            //   is the first character not yet scanned.
0043:            boolean fQuoteMode; // Scan is in a 'quoted region'
0044:            int fLineNum; // Line number in input file.
0045:            int fCharNum; // Char position within the line.
0046:            int fLastChar; // Previous char, needed to count CR-LF
0047:            //   as a single line, not two.
0048:
0049:            RBBIRuleChar fC = new RBBIRuleChar(); // Current char for parse state machine
0050:            //   processing.
0051:            String fVarName; // $variableName, valid when we've just
0052:            //   scanned one.
0053:
0054:            short fStack[] = new short[kStackSize]; // State stack, holds state pushes
0055:            int fStackPtr; //  and pops as specified in the state
0056:            //  transition rules.
0057:
0058:            RBBINode fNodeStack[] = new RBBINode[kStackSize]; // Node stack, holds nodes created
0059:            //  during the parse of a rule
0060:            int fNodeStackPtr;
0061:
0062:            boolean fReverseRule; // True if the rule currently being scanned
0063:            //  is a reverse direction rule (if it
0064:            //  starts with a '!')
0065:
0066:            boolean fLookAheadRule; // True if the rule includes a '/'
0067:            //   somewhere within it.
0068:
0069:            RBBISymbolTable fSymbolTable; // symbol table, holds definitions of
0070:            //   $variable symbols.
0071:
0072:            HashMap fSetTable = new HashMap(); // UnicocodeSet hash table, holds indexes to
0073:            //   the sets created while parsing rules.
0074:            //   The key is the string used for creating
0075:            //   the set.
0076:
0077:            UnicodeSet fRuleSets[] = new UnicodeSet[10]; // Unicode Sets that are needed during
0078:            //  the scanning of RBBI rules.  The
0079:            //  indicies for these are assigned by the
0080:            //  perl script that builds the state tables.
0081:            //  See rbbirpt.h.
0082:
0083:            int fRuleNum; // Counts each rule as it is scanned.
0084:
0085:            int fOptionStart; // Input index of start of a !!option
0086:            //   keyword, while being scanned.
0087:
0088:            static private String gRuleSet_rule_char_pattern = "[^[\\p{Z}\\u0020-\\u007f]-[\\p{L}]-[\\p{N}]]";
0089:            static private String gRuleSet_name_char_pattern = "[_\\p{L}\\p{N}]";
0090:            static private String gRuleSet_digit_char_pattern = "[0-9]";
0091:            static private String gRuleSet_name_start_char_pattern = "[_\\p{L}]";
0092:            static private String gRuleSet_white_space_pattern = "[\\p{Pattern_White_Space}]";
0093:            static private String kAny = "any";
0094:
0095:            //----------------------------------------------------------------------------------------
0096:            //
0097:            //  Constructor.
0098:            //
0099:            //----------------------------------------------------------------------------------------
0100:            RBBIRuleScanner(RBBIRuleBuilder rb) {
0101:                fRB = rb;
0102:                fLineNum = 1;
0103:
0104:                //
0105:                //  Set up the constant Unicode Sets.
0106:                //     Note:  These could be made static and shared among
0107:                //            all instances of RBBIRuleScanners. 
0108:                fRuleSets[RBBIRuleParseTable.kRuleSet_rule_char - 128] = new UnicodeSet(
0109:                        gRuleSet_rule_char_pattern);
0110:                fRuleSets[RBBIRuleParseTable.kRuleSet_white_space - 128] = new UnicodeSet(
0111:                        gRuleSet_white_space_pattern);
0112:                fRuleSets[RBBIRuleParseTable.kRuleSet_name_char - 128] = new UnicodeSet(
0113:                        gRuleSet_name_char_pattern);
0114:                fRuleSets[RBBIRuleParseTable.kRuleSet_name_start_char - 128] = new UnicodeSet(
0115:                        gRuleSet_name_start_char_pattern);
0116:                fRuleSets[RBBIRuleParseTable.kRuleSet_digit_char - 128] = new UnicodeSet(
0117:                        gRuleSet_digit_char_pattern);
0118:
0119:                fSymbolTable = new RBBISymbolTable(this , rb.fRules);
0120:            }
0121:
0122:            //----------------------------------------------------------------------------------------
0123:            //
0124:            //  doParseAction        Do some action during rule parsing.
0125:            //                       Called by the parse state machine.
0126:            //                       Actions build the parse tree and Unicode Sets,
0127:            //                       and maintain the parse stack for nested expressions.
0128:            //
0129:            //----------------------------------------------------------------------------------------
0130:            boolean doParseActions(int action) {
0131:                RBBINode n = null;
0132:
0133:                boolean returnVal = true;
0134:
0135:                switch (action) {
0136:
0137:                case RBBIRuleParseTable.doExprStart:
0138:                    pushNewNode(RBBINode.opStart);
0139:                    fRuleNum++;
0140:                    break;
0141:
0142:                case RBBIRuleParseTable.doExprOrOperator: {
0143:                    fixOpStack(RBBINode.precOpCat);
0144:                    RBBINode operandNode = fNodeStack[fNodeStackPtr--];
0145:                    RBBINode orNode = pushNewNode(RBBINode.opOr);
0146:                    orNode.fLeftChild = operandNode;
0147:                    operandNode.fParent = orNode;
0148:                }
0149:                    break;
0150:
0151:                case RBBIRuleParseTable.doExprCatOperator:
0152:                    // concatenation operator.
0153:                    // For the implicit concatenation of adjacent terms in an expression that are
0154:                    //   not separated by any other operator.  Action is invoked between the
0155:                    //   actions for the two terms.
0156:                {
0157:                    fixOpStack(RBBINode.precOpCat);
0158:                    RBBINode operandNode = fNodeStack[fNodeStackPtr--];
0159:                    RBBINode catNode = pushNewNode(RBBINode.opCat);
0160:                    catNode.fLeftChild = operandNode;
0161:                    operandNode.fParent = catNode;
0162:                }
0163:                    break;
0164:
0165:                case RBBIRuleParseTable.doLParen:
0166:                    // Open Paren.
0167:                    //   The openParen node is a dummy operation type with a low precedence,
0168:                    //     which has the affect of ensuring that any real binary op that
0169:                    //     follows within the parens binds more tightly to the operands than
0170:                    //     stuff outside of the parens.
0171:                    pushNewNode(RBBINode.opLParen);
0172:                    break;
0173:
0174:                case RBBIRuleParseTable.doExprRParen:
0175:                    fixOpStack(RBBINode.precLParen);
0176:                    break;
0177:
0178:                case RBBIRuleParseTable.doNOP:
0179:                    break;
0180:
0181:                case RBBIRuleParseTable.doStartAssign:
0182:                    // We've just scanned "$variable = "
0183:                    // The top of the node stack has the $variable ref node.
0184:
0185:                    // Save the start position of the RHS text in the StartExpression node
0186:                    //   that precedes the $variableReference node on the stack.
0187:                    //   This will eventually be used when saving the full $variable replacement
0188:                    //   text as a string.
0189:                    n = fNodeStack[fNodeStackPtr - 1];
0190:                    n.fFirstPos = fNextIndex; // move past the '='
0191:
0192:                    // Push a new start-of-expression node; needed to keep parse of the
0193:                    //   RHS expression happy.
0194:                    pushNewNode(RBBINode.opStart);
0195:                    break;
0196:
0197:                case RBBIRuleParseTable.doEndAssign: {
0198:                    // We have reached the end of an assignement statement.
0199:                    //   Current scan char is the ';' that terminates the assignment.
0200:
0201:                    // Terminate expression, leaves expression parse tree rooted in TOS node.
0202:                    fixOpStack(RBBINode.precStart);
0203:
0204:                    RBBINode startExprNode = fNodeStack[fNodeStackPtr - 2];
0205:                    RBBINode varRefNode = fNodeStack[fNodeStackPtr - 1];
0206:                    RBBINode RHSExprNode = fNodeStack[fNodeStackPtr];
0207:
0208:                    // Save original text of right side of assignment, excluding the terminating ';'
0209:                    //  in the root of the node for the right-hand-side expression.
0210:                    RHSExprNode.fFirstPos = startExprNode.fFirstPos;
0211:                    RHSExprNode.fLastPos = fScanIndex;
0212:                    // fRB.fRules.extractBetween(RHSExprNode.fFirstPos, RHSExprNode.fLastPos, RHSExprNode.fText);
0213:                    RHSExprNode.fText = fRB.fRules.substring(
0214:                            RHSExprNode.fFirstPos, RHSExprNode.fLastPos);
0215:
0216:                    // Expression parse tree becomes l. child of the $variable reference node.
0217:                    varRefNode.fLeftChild = RHSExprNode;
0218:                    RHSExprNode.fParent = varRefNode;
0219:
0220:                    // Make a symbol table entry for the $variableRef node.
0221:                    fSymbolTable.addEntry(varRefNode.fText, varRefNode);
0222:
0223:                    // Clean up the stack.
0224:                    fNodeStackPtr -= 3;
0225:                    break;
0226:                }
0227:
0228:                case RBBIRuleParseTable.doEndOfRule: {
0229:                    fixOpStack(RBBINode.precStart); // Terminate expression, leaves expression
0230:
0231:                    if (fRB.fDebugEnv != null
0232:                            && fRB.fDebugEnv.indexOf("rtree") >= 0) {
0233:                        printNodeStack("end of rule");
0234:                    }
0235:                    Assert.assrt(fNodeStackPtr == 1);
0236:
0237:                    // If this rule includes a look-ahead '/', add a endMark node to the
0238:                    //   expression tree.
0239:                    if (fLookAheadRule) {
0240:                        RBBINode this Rule = fNodeStack[fNodeStackPtr];
0241:                        RBBINode endNode = pushNewNode(RBBINode.endMark);
0242:                        RBBINode catNode = pushNewNode(RBBINode.opCat);
0243:                        fNodeStackPtr -= 2;
0244:                        catNode.fLeftChild = this Rule;
0245:                        catNode.fRightChild = endNode;
0246:                        fNodeStack[fNodeStackPtr] = catNode;
0247:                        endNode.fVal = fRuleNum;
0248:                        endNode.fLookAheadEnd = true;
0249:                    }
0250:
0251:                    // All rule expressions are ORed together.
0252:                    // The ';' that terminates an expression really just functions as a '|' with
0253:                    //   a low operator prededence.
0254:                    //
0255:                    // Each of the four sets of rules are collected separately.
0256:                    //  (forward, reverse, safe_forward, safe_reverse)
0257:                    //  OR this rule into the appropriate group of them.
0258:                    //
0259:
0260:                    int destRules = (fReverseRule ? fRB.fReverseTree
0261:                            : fRB.fDefaultTree);
0262:
0263:                    if (fRB.fTreeRoots[destRules] != null) {
0264:                        // This is not the first rule encounted.
0265:                        // OR previous stuff  (from *destRules)
0266:                        // with the current rule expression (on the Node Stack)
0267:                        //  with the resulting OR expression going to *destRules
0268:                        //
0269:                        RBBINode this Rule = fNodeStack[fNodeStackPtr];
0270:                        RBBINode prevRules = fRB.fTreeRoots[destRules];
0271:                        RBBINode orNode = pushNewNode(RBBINode.opOr);
0272:                        orNode.fLeftChild = prevRules;
0273:                        prevRules.fParent = orNode;
0274:                        orNode.fRightChild = this Rule;
0275:                        this Rule.fParent = orNode;
0276:                        fRB.fTreeRoots[destRules] = orNode;
0277:                    } else {
0278:                        // This is the first rule encountered (for this direction).
0279:                        // Just move its parse tree from the stack to *destRules.
0280:                        fRB.fTreeRoots[destRules] = fNodeStack[fNodeStackPtr];
0281:                    }
0282:                    fReverseRule = false; // in preparation for the next rule.
0283:                    fLookAheadRule = false;
0284:                    fNodeStackPtr = 0;
0285:                }
0286:                    break;
0287:
0288:                case RBBIRuleParseTable.doRuleError:
0289:                    error(RBBIRuleBuilder.U_BRK_RULE_SYNTAX);
0290:                    returnVal = false;
0291:                    break;
0292:
0293:                case RBBIRuleParseTable.doVariableNameExpectedErr:
0294:                    error(RBBIRuleBuilder.U_BRK_RULE_SYNTAX);
0295:                    break;
0296:
0297:                //
0298:                //  Unary operands  + ? *
0299:                //    These all appear after the operand to which they apply.
0300:                //    When we hit one, the operand (may be a whole sub expression)
0301:                //    will be on the top of the stack.
0302:                //    Unary Operator becomes TOS, with the old TOS as its one child.
0303:                case RBBIRuleParseTable.doUnaryOpPlus: {
0304:                    RBBINode operandNode = fNodeStack[fNodeStackPtr--];
0305:                    RBBINode plusNode = pushNewNode(RBBINode.opPlus);
0306:                    plusNode.fLeftChild = operandNode;
0307:                    operandNode.fParent = plusNode;
0308:                }
0309:                    break;
0310:
0311:                case RBBIRuleParseTable.doUnaryOpQuestion: {
0312:                    RBBINode operandNode = fNodeStack[fNodeStackPtr--];
0313:                    RBBINode qNode = pushNewNode(RBBINode.opQuestion);
0314:                    qNode.fLeftChild = operandNode;
0315:                    operandNode.fParent = qNode;
0316:                }
0317:                    break;
0318:
0319:                case RBBIRuleParseTable.doUnaryOpStar: {
0320:                    RBBINode operandNode = fNodeStack[fNodeStackPtr--];
0321:                    RBBINode starNode = pushNewNode(RBBINode.opStar);
0322:                    starNode.fLeftChild = operandNode;
0323:                    operandNode.fParent = starNode;
0324:                }
0325:                    break;
0326:
0327:                case RBBIRuleParseTable.doRuleChar:
0328:                    // A "Rule Character" is any single character that is a literal part
0329:                    // of the regular expression.  Like a, b and c in the expression "(abc*) | [:L:]"
0330:                    // These are pretty uncommon in break rules; the terms are more commonly
0331:                    //  sets.  To keep things uniform, treat these characters like as
0332:                    // sets that just happen to contain only one character.
0333:                {
0334:                    n = pushNewNode(RBBINode.setRef);
0335:                    String s = (new StringBuffer().append((char) fC.fChar))
0336:                            .toString();
0337:                    findSetFor(s, n, null);
0338:                    n.fFirstPos = fScanIndex;
0339:                    n.fLastPos = fNextIndex;
0340:                    n.fText = fRB.fRules.substring(n.fFirstPos, n.fLastPos);
0341:                    break;
0342:                }
0343:
0344:                case RBBIRuleParseTable.doDotAny:
0345:                    // scanned a ".", meaning match any single character.
0346:                {
0347:                    n = pushNewNode(RBBINode.setRef);
0348:                    findSetFor(kAny, n, null);
0349:                    n.fFirstPos = fScanIndex;
0350:                    n.fLastPos = fNextIndex;
0351:                    n.fText = fRB.fRules.substring(n.fFirstPos, n.fLastPos);
0352:                    break;
0353:                }
0354:
0355:                case RBBIRuleParseTable.doSlash:
0356:                    // Scanned a '/', which identifies a look-ahead break position in a rule.
0357:                    n = pushNewNode(RBBINode.lookAhead);
0358:                    n.fVal = fRuleNum;
0359:                    n.fFirstPos = fScanIndex;
0360:                    n.fLastPos = fNextIndex;
0361:                    n.fText = fRB.fRules.substring(n.fFirstPos, n.fLastPos);
0362:                    fLookAheadRule = true;
0363:                    break;
0364:
0365:                case RBBIRuleParseTable.doStartTagValue:
0366:                    // Scanned a '{', the opening delimiter for a tag value within a rule.
0367:                    n = pushNewNode(RBBINode.tag);
0368:                    n.fVal = 0;
0369:                    n.fFirstPos = fScanIndex;
0370:                    n.fLastPos = fNextIndex;
0371:                    break;
0372:
0373:                case RBBIRuleParseTable.doTagDigit:
0374:                    // Just scanned a decimal digit that's part of a tag value
0375:                {
0376:                    n = fNodeStack[fNodeStackPtr];
0377:                    int v = Character.digit((char) fC.fChar, 10);
0378:                    n.fVal = n.fVal * 10 + v;
0379:                    break;
0380:                }
0381:
0382:                case RBBIRuleParseTable.doTagValue:
0383:                    n = fNodeStack[fNodeStackPtr];
0384:                    n.fLastPos = fNextIndex;
0385:                    n.fText = fRB.fRules.substring(n.fFirstPos, n.fLastPos);
0386:                    break;
0387:
0388:                case RBBIRuleParseTable.doTagExpectedError:
0389:                    error(RBBIRuleBuilder.U_BRK_MALFORMED_RULE_TAG);
0390:                    returnVal = false;
0391:                    break;
0392:
0393:                case RBBIRuleParseTable.doOptionStart:
0394:                    // Scanning a !!option.   At the start of string.
0395:                    fOptionStart = fScanIndex;
0396:                    break;
0397:
0398:                case RBBIRuleParseTable.doOptionEnd: {
0399:                    String opt = fRB.fRules.substring(fOptionStart, fScanIndex);
0400:                    if (opt.equals("chain")) {
0401:                        fRB.fChainRules = true;
0402:                    } else if (opt.equals("LBCMNoChain")) {
0403:                        fRB.fLBCMNoChain = true;
0404:                    } else if (opt.equals("forward")) {
0405:                        fRB.fDefaultTree = fRB.fForwardTree;
0406:                    } else if (opt.equals("reverse")) {
0407:                        fRB.fDefaultTree = fRB.fReverseTree;
0408:                    } else if (opt.equals("safe_forward")) {
0409:                        fRB.fDefaultTree = fRB.fSafeFwdTree;
0410:                    } else if (opt.equals("safe_reverse")) {
0411:                        fRB.fDefaultTree = fRB.fSafeRevTree;
0412:                    } else if (opt.equals("lookAheadHardBreak")) {
0413:                        fRB.fLookAheadHardBreak = true;
0414:                    } else {
0415:                        error(RBBIRuleBuilder.U_BRK_UNRECOGNIZED_OPTION);
0416:                    }
0417:                    break;
0418:                }
0419:
0420:                case RBBIRuleParseTable.doReverseDir:
0421:                    fReverseRule = true;
0422:                    break;
0423:
0424:                case RBBIRuleParseTable.doStartVariableName:
0425:                    n = pushNewNode(RBBINode.varRef);
0426:                    n.fFirstPos = fScanIndex;
0427:                    break;
0428:
0429:                case RBBIRuleParseTable.doEndVariableName:
0430:                    n = fNodeStack[fNodeStackPtr];
0431:                    if (n == null || n.fType != RBBINode.varRef) {
0432:                        error(RBBIRuleBuilder.U_BRK_INTERNAL_ERROR);
0433:                        break;
0434:                    }
0435:                    n.fLastPos = fScanIndex;
0436:                    n.fText = fRB.fRules.substring(n.fFirstPos + 1, n.fLastPos);
0437:                    // Look the newly scanned name up in the symbol table
0438:                    //   If there's an entry, set the l. child of the var ref to the replacement expression.
0439:                    //   (We also pass through here when scanning assignments, but no harm is done, other
0440:                    //    than a slight wasted effort that seems hard to avoid.  Lookup will be null)
0441:                    n.fLeftChild = fSymbolTable.lookupNode(n.fText);
0442:                    break;
0443:
0444:                case RBBIRuleParseTable.doCheckVarDef:
0445:                    n = fNodeStack[fNodeStackPtr];
0446:                    if (n.fLeftChild == null) {
0447:                        error(RBBIRuleBuilder.U_BRK_UNDEFINED_VARIABLE);
0448:                        returnVal = false;
0449:                    }
0450:                    break;
0451:
0452:                case RBBIRuleParseTable.doExprFinished:
0453:                    break;
0454:
0455:                case RBBIRuleParseTable.doRuleErrorAssignExpr:
0456:                    error(RBBIRuleBuilder.U_BRK_ASSIGN_ERROR);
0457:                    returnVal = false;
0458:                    break;
0459:
0460:                case RBBIRuleParseTable.doExit:
0461:                    returnVal = false;
0462:                    break;
0463:
0464:                case RBBIRuleParseTable.doScanUnicodeSet:
0465:                    scanSet();
0466:                    break;
0467:
0468:                default:
0469:                    error(RBBIRuleBuilder.U_BRK_INTERNAL_ERROR);
0470:                    returnVal = false;
0471:                    break;
0472:                }
0473:                return returnVal;
0474:            }
0475:
0476:            //----------------------------------------------------------------------------------------
0477:            //
0478:            //  Error         Throw and IllegalArgumentException in response to a rule parse error.
0479:            //
0480:            //----------------------------------------------------------------------------------------
0481:            void error(int e) {
0482:                String s = "Error " + e + " at line " + fLineNum + " column "
0483:                        + fCharNum;
0484:                IllegalArgumentException ex = new IllegalArgumentException(s);
0485:                throw ex;
0486:
0487:            }
0488:
0489:            //----------------------------------------------------------------------------------------
0490:            //
0491:            //  fixOpStack   The parse stack holds partially assembled chunks of the parse tree.
0492:            //               An entry on the stack may be as small as a single setRef node,
0493:            //               or as large as the parse tree
0494:            //               for an entire expression (this will be the one item left on the stack
0495:            //               when the parsing of an RBBI rule completes.
0496:            //
0497:            //               This function is called when a binary operator is encountered.
0498:            //               It looks back up the stack for operators that are not yet associated
0499:            //               with a right operand, and if the precedence of the stacked operator >=
0500:            //               the precedence of the current operator, binds the operand left,
0501:            //               to the previously encountered operator.
0502:            //
0503:            //----------------------------------------------------------------------------------------
0504:            void fixOpStack(int p) {
0505:                RBBINode n;
0506:                // printNodeStack("entering fixOpStack()");
0507:                for (;;) {
0508:                    n = fNodeStack[fNodeStackPtr - 1]; // an operator node
0509:                    if (n.fPrecedence == 0) {
0510:                        System.out
0511:                                .print("RBBIRuleScanner.fixOpStack, bad operator node");
0512:                        error(RBBIRuleBuilder.U_BRK_INTERNAL_ERROR);
0513:                        return;
0514:                    }
0515:
0516:                    if (n.fPrecedence < p
0517:                            || n.fPrecedence <= RBBINode.precLParen) {
0518:                        // The most recent operand goes with the current operator,
0519:                        //   not with the previously stacked one.
0520:                        break;
0521:                    }
0522:                    // Stack operator is a binary op  ( '|' or concatenation)
0523:                    //   TOS operand becomes right child of this operator.
0524:                    //   Resulting subexpression becomes the TOS operand.
0525:                    n.fRightChild = fNodeStack[fNodeStackPtr];
0526:                    fNodeStack[fNodeStackPtr].fParent = n;
0527:                    fNodeStackPtr--;
0528:                    // printNodeStack("looping in fixOpStack()   ");
0529:                }
0530:
0531:                if (p <= RBBINode.precLParen) {
0532:                    // Scan is at a right paren or end of expression.
0533:                    //  The scanned item must match the stack, or else there was an error.
0534:                    //  Discard the left paren (or start expr) node from the stack,
0535:                    //  leaving the completed (sub)expression as TOS.
0536:                    if (n.fPrecedence != p) {
0537:                        // Right paren encountered matched start of expression node, or
0538:                        // end of expression matched with a left paren node.
0539:                        error(RBBIRuleBuilder.U_BRK_MISMATCHED_PAREN);
0540:                    }
0541:                    fNodeStack[fNodeStackPtr - 1] = fNodeStack[fNodeStackPtr];
0542:                    fNodeStackPtr--;
0543:                    // Delete the now-discarded LParen or Start node.
0544:                    // delete n;
0545:                }
0546:                // printNodeStack("leaving fixOpStack()");
0547:            }
0548:
0549:            //----------------------------------------------------------------------------
0550:            //
0551:            //       RBBISetTableEl   is an entry in the hash table of UnicodeSets that have
0552:            //                        been encountered.  The val Node will be of nodetype uset
0553:            //                        and contain pointers to the actual UnicodeSets.
0554:            //                        The Key is the source string for initializing the set.
0555:            //
0556:            //                        The hash table is used to avoid creating duplicate
0557:            //                        unnamed (not $var references) UnicodeSets.
0558:            //
0559:            //----------------------------------------------------------------------------
0560:            static class RBBISetTableEl {
0561:                String key;
0562:                RBBINode val;
0563:            }
0564:
0565:            //----------------------------------------------------------------------------------------
0566:            //
0567:            //   findSetFor    given a String,
0568:            //                  - find the corresponding Unicode Set  (uset node)
0569:            //                         (create one if necessary)
0570:            //                  - Set fLeftChild of the caller's node (should be a setRef node)
0571:            //                         to the uset node
0572:            //                 Maintain a hash table of uset nodes, so the same one is always used
0573:            //                    for the same string.
0574:            //                 If a "to adopt" set is provided and we haven't seen this key before,
0575:            //                    add the provided set to the hash table.
0576:            //                 If the string is one (32 bit) char in length, the set contains
0577:            //                    just one element which is the char in question.
0578:            //                 If the string is "any", return a set containing all chars.
0579:            //
0580:            //----------------------------------------------------------------------------------------
0581:            void findSetFor(String s, RBBINode node, UnicodeSet setToAdopt) {
0582:
0583:                RBBISetTableEl el;
0584:
0585:                // First check whether we've already cached a set for this string.
0586:                // If so, just use the cached set in the new node.
0587:                //   delete any set provided by the caller, since we own it.
0588:                el = (RBBISetTableEl) fSetTable.get(s);
0589:                if (el != null) {
0590:                    node.fLeftChild = el.val;
0591:                    Assert.assrt(node.fLeftChild.fType == RBBINode.uset);
0592:                    return;
0593:                }
0594:
0595:                // Haven't seen this set before.
0596:                // If the caller didn't provide us with a prebuilt set,
0597:                //   create a new UnicodeSet now.
0598:                if (setToAdopt == null) {
0599:                    if (s.equals(kAny)) {
0600:                        setToAdopt = new UnicodeSet(0x000000, 0x10ffff);
0601:                    } else {
0602:                        int c;
0603:                        c = UTF16.charAt(s, 0);
0604:                        setToAdopt = new UnicodeSet(c, c);
0605:                    }
0606:                }
0607:
0608:                //
0609:                // Make a new uset node to refer to this UnicodeSet
0610:                // This new uset node becomes the child of the caller's setReference node.
0611:                //
0612:                RBBINode usetNode = new RBBINode(RBBINode.uset);
0613:                usetNode.fInputSet = setToAdopt;
0614:                usetNode.fParent = node;
0615:                node.fLeftChild = usetNode;
0616:                usetNode.fText = s;
0617:
0618:                //
0619:                // Add the new uset node to the list of all uset nodes.
0620:                //
0621:                fRB.fUSetNodes.add(usetNode);
0622:
0623:                //
0624:                // Add the new set to the set hash table.
0625:                //
0626:                el = new RBBISetTableEl();
0627:                el.key = s;
0628:                el.val = usetNode;
0629:                fSetTable.put(el.key, el);
0630:
0631:                return;
0632:            }
0633:
0634:            //
0635:            //  Assorted Unicode character constants.
0636:            //     Numeric because there is no portable way to enter them as literals.
0637:            //     (Think EBCDIC).
0638:            //
0639:            static final int chNEL = 0x85; //    NEL newline variant
0640:            static final int chLS = 0x2028; //    Unicode Line Separator
0641:
0642:            //----------------------------------------------------------------------------------------
0643:            //
0644:            //  stripRules    Return a rules string without unnecessary
0645:            //                characters.
0646:            //
0647:            //----------------------------------------------------------------------------------------
0648:            static String stripRules(String rules) {
0649:                StringBuffer strippedRules = new StringBuffer();
0650:                int rulesLength = rules.length();
0651:                for (int idx = 0; idx < rulesLength;) {
0652:                    char ch = rules.charAt(idx++);
0653:                    if (ch == '#') {
0654:                        while (idx < rulesLength && ch != '\r' && ch != '\n'
0655:                                && ch != chNEL) {
0656:                            ch = rules.charAt(idx++);
0657:                        }
0658:                    }
0659:                    if (!UCharacter.isISOControl(ch)) {
0660:                        strippedRules.append(ch);
0661:                    }
0662:                }
0663:                return strippedRules.toString();
0664:            }
0665:
0666:            //----------------------------------------------------------------------------------------
0667:            //
0668:            //  nextCharLL    Low Level Next Char from rule input source.
0669:            //                Get a char from the input character iterator,
0670:            //                keep track of input position for error reporting.
0671:            //
0672:            //----------------------------------------------------------------------------------------
0673:            int nextCharLL() {
0674:                int ch;
0675:
0676:                if (fNextIndex >= fRB.fRules.length()) {
0677:                    return -1;
0678:                }
0679:                ch = UTF16.charAt(fRB.fRules, fNextIndex);
0680:                fNextIndex = UTF16.moveCodePointOffset(fRB.fRules, fNextIndex,
0681:                        1);
0682:
0683:                if (ch == '\r' || ch == chNEL || ch == chLS || ch == '\n'
0684:                        && fLastChar != '\r') {
0685:                    // Character is starting a new line.  Bump up the line number, and
0686:                    //  reset the column to 0.
0687:                    fLineNum++;
0688:                    fCharNum = 0;
0689:                    if (fQuoteMode) {
0690:                        error(RBBIRuleBuilder.U_BRK_NEW_LINE_IN_QUOTED_STRING);
0691:                        fQuoteMode = false;
0692:                    }
0693:                } else {
0694:                    // Character is not starting a new line.  Except in the case of a
0695:                    //   LF following a CR, increment the column position.
0696:                    if (ch != '\n') {
0697:                        fCharNum++;
0698:                    }
0699:                }
0700:                fLastChar = ch;
0701:                return ch;
0702:            }
0703:
0704:            //---------------------------------------------------------------------------------
0705:            //
0706:            //   nextChar     for rules scanning.  At this level, we handle stripping
0707:            //                out comments and processing backslash character escapes.
0708:            //                The rest of the rules grammar is handled at the next level up.
0709:            //
0710:            //---------------------------------------------------------------------------------
0711:            void nextChar(RBBIRuleChar c) {
0712:
0713:                // Unicode Character constants needed for the processing done by nextChar(),
0714:                //   in hex because literals wont work on EBCDIC machines.
0715:
0716:                fScanIndex = fNextIndex;
0717:                c.fChar = nextCharLL();
0718:                c.fEscaped = false;
0719:
0720:                //
0721:                //  check for '' sequence.
0722:                //  These are recognized in all contexts, whether in quoted text or not.
0723:                //
0724:                if (c.fChar == '\'') {
0725:                    if (UTF16.charAt(fRB.fRules, fNextIndex) == '\'') {
0726:                        c.fChar = nextCharLL(); // get nextChar officially so character counts
0727:                        c.fEscaped = true; //   stay correct.
0728:                    } else {
0729:                        // Single quote, by itself.
0730:                        //   Toggle quoting mode.
0731:                        //   Return either '('  or ')', because quotes cause a grouping of the quoted text.
0732:                        fQuoteMode = !fQuoteMode;
0733:                        if (fQuoteMode == true) {
0734:                            c.fChar = '(';
0735:                        } else {
0736:                            c.fChar = ')';
0737:                        }
0738:                        c.fEscaped = false; // The paren that we return is not escaped.
0739:                        return;
0740:                    }
0741:                }
0742:
0743:                if (fQuoteMode) {
0744:                    c.fEscaped = true;
0745:                } else {
0746:                    // We are not in a 'quoted region' of the source.
0747:                    //
0748:                    if (c.fChar == '#') {
0749:                        // Start of a comment.  Consume the rest of it.
0750:                        //  The new-line char that terminates the comment is always returned.
0751:                        //  It will be treated as white-space, and serves to break up anything
0752:                        //    that might otherwise incorrectly clump together with a comment in
0753:                        //    the middle (a variable name, for example.)
0754:                        for (;;) {
0755:                            c.fChar = nextCharLL();
0756:                            if (c.fChar == (int) -1
0757:                                    || // EOF
0758:                                    c.fChar == '\r' || c.fChar == '\n'
0759:                                    || c.fChar == chNEL || c.fChar == chLS) {
0760:                                break;
0761:                            }
0762:                        }
0763:                    }
0764:                    if (c.fChar == (int) -1) {
0765:                        return;
0766:                    }
0767:
0768:                    //
0769:                    //  check for backslash escaped characters.
0770:                    //  Use String.unescapeAt() to handle them.
0771:                    //
0772:                    if (c.fChar == '\\') {
0773:                        c.fEscaped = true;
0774:                        int[] unescapeIndex = new int[1];
0775:                        unescapeIndex[0] = fNextIndex;
0776:                        c.fChar = Utility.unescapeAt(fRB.fRules, unescapeIndex);
0777:                        if (unescapeIndex[0] == fNextIndex) {
0778:                            error(RBBIRuleBuilder.U_BRK_HEX_DIGITS_EXPECTED);
0779:                        }
0780:
0781:                        fCharNum += unescapeIndex[0] - fNextIndex;
0782:                        fNextIndex = unescapeIndex[0];
0783:                    }
0784:                }
0785:                // putc(c.fChar, stdout);
0786:            }
0787:
0788:            //---------------------------------------------------------------------------------
0789:            //
0790:            //  Parse RBBI rules.   The state machine for rules parsing is here.
0791:            //                      The state tables are hand-written in the file rbbirpt.txt,
0792:            //                      and converted to the form used here by a perl
0793:            //                      script rbbicst.pl
0794:            //
0795:            //---------------------------------------------------------------------------------
0796:            void parse() {
0797:                int state;
0798:                RBBIRuleParseTable.RBBIRuleTableElement tableEl;
0799:
0800:                state = 1;
0801:                nextChar(fC);
0802:                //
0803:                // Main loop for the rule parsing state machine.
0804:                //   Runs once per state transition.
0805:                //   Each time through optionally performs, depending on the state table,
0806:                //      - an advance to the the next input char
0807:                //      - an action to be performed.
0808:                //      - pushing or popping a state to/from the local state return stack.
0809:                //
0810:                for (;;) {
0811:                    // Quit if state == 0.  This is the normal way to exit the state machine.
0812:                    //
0813:                    if (state == 0) {
0814:                        break;
0815:                    }
0816:
0817:                    // Find the state table element that matches the input char from the rule, or the
0818:                    //    class of the input character.  Start with the first table row for this
0819:                    //    state, then linearly scan forward until we find a row that matches the
0820:                    //    character.  The last row for each state always matches all characters, so
0821:                    //    the search will stop there, if not before.
0822:                    //
0823:                    tableEl = RBBIRuleParseTable.gRuleParseStateTable[state];
0824:                    if (fRB.fDebugEnv != null
0825:                            && fRB.fDebugEnv.indexOf("scan") >= 0) {
0826:                        System.out.println("char, line, col = (\'"
0827:                                + (char) fC.fChar + "\', " + fLineNum + ", "
0828:                                + fCharNum + "    state = "
0829:                                + tableEl.fStateName);
0830:                    }
0831:
0832:                    for (int tableRow = state;; tableRow++) { // loop over the state table rows associated with this state.
0833:                        tableEl = RBBIRuleParseTable.gRuleParseStateTable[tableRow];
0834:                        if (fRB.fDebugEnv != null
0835:                                && fRB.fDebugEnv.indexOf("scan") >= 0) {
0836:                            System.out.print(".");
0837:                        }
0838:                        if (tableEl.fCharClass < 127 && fC.fEscaped == false
0839:                                && tableEl.fCharClass == fC.fChar) {
0840:                            // Table row specified an individual character, not a set, and
0841:                            //   the input character is not escaped, and
0842:                            //   the input character matched it.
0843:                            break;
0844:                        }
0845:                        if (tableEl.fCharClass == 255) {
0846:                            // Table row specified default, match anything character class.
0847:                            break;
0848:                        }
0849:                        if (tableEl.fCharClass == 254 && fC.fEscaped) {
0850:                            // Table row specified "escaped" and the char was escaped.
0851:                            break;
0852:                        }
0853:                        if (tableEl.fCharClass == 253 && fC.fEscaped
0854:                                && (fC.fChar == 0x50 || fC.fChar == 0x70)) {
0855:                            // Table row specified "escaped P" and the char is either 'p' or 'P'.
0856:                            break;
0857:                        }
0858:                        if (tableEl.fCharClass == 252 && fC.fChar == (int) -1) {
0859:                            // Table row specified eof and we hit eof on the input.
0860:                            break;
0861:                        }
0862:
0863:                        if (tableEl.fCharClass >= 128
0864:                                && tableEl.fCharClass < 240 && // Table specs a char class &&
0865:                                fC.fEscaped == false && //   char is not escaped &&
0866:                                fC.fChar != (int) -1) { //   char is not EOF
0867:                            UnicodeSet uniset = fRuleSets[tableEl.fCharClass - 128];
0868:                            if (uniset.contains(fC.fChar)) {
0869:                                // Table row specified a character class, or set of characters,
0870:                                //   and the current char matches it.
0871:                                break;
0872:                            }
0873:                        }
0874:                    }
0875:
0876:                    if (fRB.fDebugEnv != null
0877:                            && fRB.fDebugEnv.indexOf("scan") >= 0) {
0878:                        System.out.println("");
0879:                    }
0880:                    //
0881:                    // We've found the row of the state table that matches the current input
0882:                    //   character from the rules string.
0883:                    // Perform any action specified  by this row in the state table.
0884:                    if (doParseActions(tableEl.fAction) == false) {
0885:                        // Break out of the state machine loop if the
0886:                        //   the action signalled some kind of error, or
0887:                        //   the action was to exit, occurs on normal end-of-rules-input.
0888:                        break;
0889:                    }
0890:
0891:                    if (tableEl.fPushState != 0) {
0892:                        fStackPtr++;
0893:                        if (fStackPtr >= kStackSize) {
0894:                            System.out
0895:                                    .println("RBBIRuleScanner.parse() - state stack overflow.");
0896:                            error(RBBIRuleBuilder.U_BRK_INTERNAL_ERROR);
0897:                        }
0898:                        fStack[fStackPtr] = tableEl.fPushState;
0899:                    }
0900:
0901:                    if (tableEl.fNextChar) {
0902:                        nextChar(fC);
0903:                    }
0904:
0905:                    // Get the next state from the table entry, or from the
0906:                    //   state stack if the next state was specified as "pop".
0907:                    if (tableEl.fNextState != 255) {
0908:                        state = tableEl.fNextState;
0909:                    } else {
0910:                        state = fStack[fStackPtr];
0911:                        fStackPtr--;
0912:                        if (fStackPtr < 0) {
0913:                            System.out
0914:                                    .println("RBBIRuleScanner.parse() - state stack underflow.");
0915:                            error(RBBIRuleBuilder.U_BRK_INTERNAL_ERROR);
0916:                        }
0917:                    }
0918:
0919:                }
0920:
0921:                //
0922:                // If there were NO user specified reverse rules, set up the equivalent of ".*;"
0923:                //
0924:                if (fRB.fTreeRoots[RBBIRuleBuilder.fReverseTree] == null) {
0925:                    fRB.fTreeRoots[RBBIRuleBuilder.fReverseTree] = pushNewNode(RBBINode.opStar);
0926:                    RBBINode operand = pushNewNode(RBBINode.setRef);
0927:                    findSetFor(kAny, operand, null);
0928:                    fRB.fTreeRoots[RBBIRuleBuilder.fReverseTree].fLeftChild = operand;
0929:                    operand.fParent = fRB.fTreeRoots[RBBIRuleBuilder.fReverseTree];
0930:                    fNodeStackPtr -= 2;
0931:                }
0932:
0933:                //
0934:                // Parsing of the input RBBI rules is complete.
0935:                // We now have a parse tree for the rule expressions
0936:                // and a list of all UnicodeSets that are referenced.
0937:                //
0938:                if (fRB.fDebugEnv != null
0939:                        && fRB.fDebugEnv.indexOf("symbols") >= 0) {
0940:                    fSymbolTable.rbbiSymtablePrint();
0941:                }
0942:                if (fRB.fDebugEnv != null
0943:                        && fRB.fDebugEnv.indexOf("ptree") >= 0) {
0944:                    System.out.println("Completed Forward Rules Parse Tree...");
0945:                    fRB.fTreeRoots[RBBIRuleBuilder.fForwardTree]
0946:                            .printTree(true);
0947:                    System.out
0948:                            .println("\nCompleted Reverse Rules Parse Tree...");
0949:                    fRB.fTreeRoots[RBBIRuleBuilder.fReverseTree]
0950:                            .printTree(true);
0951:                    System.out
0952:                            .println("\nCompleted Safe Point Forward Rules Parse Tree...");
0953:                    if (fRB.fTreeRoots[RBBIRuleBuilder.fSafeFwdTree] == null) {
0954:                        System.out.println("  -- null -- ");
0955:                    } else {
0956:                        fRB.fTreeRoots[RBBIRuleBuilder.fSafeFwdTree]
0957:                                .printTree(true);
0958:                    }
0959:                    System.out
0960:                            .println("\nCompleted Safe Point Reverse Rules Parse Tree...");
0961:                    if (fRB.fTreeRoots[RBBIRuleBuilder.fSafeRevTree] == null) {
0962:                        System.out.println("  -- null -- ");
0963:                    } else {
0964:                        fRB.fTreeRoots[RBBIRuleBuilder.fSafeRevTree]
0965:                                .printTree(true);
0966:                    }
0967:                }
0968:            }
0969:
0970:            //---------------------------------------------------------------------------------
0971:            //
0972:            //  printNodeStack     for debugging...
0973:            //
0974:            //---------------------------------------------------------------------------------
0975:            void printNodeStack(String title) {
0976:                int i;
0977:                System.out.println(title + ".  Dumping node stack...\n");
0978:                for (i = fNodeStackPtr; i > 0; i--) {
0979:                    fNodeStack[i].printTree(true);
0980:                }
0981:            }
0982:
0983:            //---------------------------------------------------------------------------------
0984:            //
0985:            //  pushNewNode   create a new RBBINode of the specified type and push it
0986:            //                onto the stack of nodes.
0987:            //
0988:            //---------------------------------------------------------------------------------
0989:            RBBINode pushNewNode(int nodeType) {
0990:                fNodeStackPtr++;
0991:                if (fNodeStackPtr >= kStackSize) {
0992:                    System.out
0993:                            .println("RBBIRuleScanner.pushNewNode - stack overflow.");
0994:                    error(RBBIRuleBuilder.U_BRK_INTERNAL_ERROR);
0995:                }
0996:                fNodeStack[fNodeStackPtr] = new RBBINode(nodeType);
0997:                return fNodeStack[fNodeStackPtr];
0998:            }
0999:
1000:            //---------------------------------------------------------------------------------
1001:            //
1002:            //  scanSet    Construct a UnicodeSet from the text at the current scan
1003:            //             position.  Advance the scan position to the first character
1004:            //             after the set.
1005:            //
1006:            //             A new RBBI setref node referring to the set is pushed onto the node
1007:            //             stack.
1008:            //
1009:            //             The scan position is normally under the control of the state machine
1010:            //             that controls rule parsing.  UnicodeSets, however, are parsed by
1011:            //             the UnicodeSet constructor, not by the RBBI rule parser.
1012:            //
1013:            //---------------------------------------------------------------------------------
1014:            void scanSet() {
1015:                UnicodeSet uset = null;
1016:                int startPos;
1017:                ParsePosition pos = new ParsePosition(fScanIndex);
1018:                int i;
1019:
1020:                startPos = fScanIndex;
1021:                try {
1022:                    uset = new UnicodeSet(fRB.fRules, pos, fSymbolTable,
1023:                            UnicodeSet.IGNORE_SPACE);
1024:                } catch (Exception e) { // TODO:  catch fewer exception types.
1025:                    // Repackage UnicodeSet errors as RBBI rule builder errors, with location info.
1026:                    error(RBBIRuleBuilder.U_BRK_MALFORMED_SET);
1027:                }
1028:
1029:                // Verify that the set contains at least one code point.
1030:                //
1031:                if (uset.isEmpty()) {
1032:                    // This set is empty.
1033:                    //  Make it an error, because it almost certainly is not what the user wanted.
1034:                    //  Also, avoids having to think about corner cases in the tree manipulation code
1035:                    //   that occurs later on.
1036:                    //  TODO:  this shouldn't be an error; it does happen.
1037:                    error(RBBIRuleBuilder.U_BRK_RULE_EMPTY_SET);
1038:                }
1039:
1040:                // Advance the RBBI parse postion over the UnicodeSet pattern.
1041:                //   Don't just set fScanIndex because the line/char positions maintained
1042:                //   for error reporting would be thrown off.
1043:                i = pos.getIndex();
1044:                for (;;) {
1045:                    if (fNextIndex >= i) {
1046:                        break;
1047:                    }
1048:                    nextCharLL();
1049:                }
1050:
1051:                RBBINode n;
1052:
1053:                n = pushNewNode(RBBINode.setRef);
1054:                n.fFirstPos = startPos;
1055:                n.fLastPos = fNextIndex;
1056:                n.fText = fRB.fRules.substring(n.fFirstPos, n.fLastPos);
1057:                //  findSetFor() serves several purposes here:
1058:                //     - Adopts storage for the UnicodeSet, will be responsible for deleting.
1059:                //     - Mantains collection of all sets in use, needed later for establishing
1060:                //          character categories for run time engine.
1061:                //     - Eliminates mulitiple instances of the same set.
1062:                //     - Creates a new uset node if necessary (if this isn't a duplicate.)
1063:                findSetFor(n.fText, n, uset);
1064:            }
1065:
1066:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.