001: /*
002: ***************************************************************************
003: * Copyright (C) 2002-2006 International Business Machines Corporation *
004: * and others. All rights reserved. *
005: ***************************************************************************
006: */
007: package com.ibm.icu.text;
008:
009: import java.util.HashMap;
010: import java.util.Collection;
011:
012: import java.text.ParsePosition;
013: import com.ibm.icu.lang.UCharacter;
014:
015: class RBBISymbolTable implements SymbolTable {
016:
017: String fRules;
018: HashMap fHashTable;
019: RBBIRuleScanner fRuleScanner;
020:
021: // These next two fields are part of the mechanism for passing references to
022: // already-constructed UnicodeSets back to the UnicodeSet constructor
023: // when the pattern includes $variable references.
024: String ffffString;
025: UnicodeSet fCachedSetLookup;
026:
027: static class RBBISymbolTableEntry {
028: String key;
029: RBBINode val;
030: };
031:
032: RBBISymbolTable(RBBIRuleScanner rs, String rules) {
033: fRules = rules;
034: fRuleScanner = rs;
035: fHashTable = new HashMap();
036: ffffString = "\uffff";
037: }
038:
039: //
040: // RBBISymbolTable::lookup This function from the abstract symbol table inteface
041: // looks up a variable name and returns a UnicodeString
042: // containing the substitution text.
043: //
044: // The variable name does NOT include the leading $.
045: //
046: public char[] lookup(String s) {
047: RBBISymbolTableEntry el;
048: RBBINode varRefNode;
049: RBBINode exprNode;
050:
051: RBBINode usetNode;
052: String retString;
053:
054: el = (RBBISymbolTableEntry) fHashTable.get(s);
055: if (el == null) {
056: return null;
057: }
058:
059: // Walk through any chain of variable assignments that ultimately resolve to a Set Ref.
060: varRefNode = el.val;
061: while (varRefNode.fLeftChild.fType == RBBINode.varRef) {
062: varRefNode = varRefNode.fLeftChild;
063: }
064:
065: exprNode = varRefNode.fLeftChild; // Root node of expression for variable
066: if (exprNode.fType == RBBINode.setRef) {
067: // The $variable refers to a single UnicodeSet
068: // return the ffffString, which will subsequently be interpreted as a
069: // stand-in character for the set by RBBISymbolTable::lookupMatcher()
070: usetNode = exprNode.fLeftChild;
071: fCachedSetLookup = usetNode.fInputSet;
072: retString = ffffString;
073: } else {
074: // The variable refers to something other than just a set.
075: // This is an error in the rules being compiled. $Variables inside of UnicodeSets
076: // must refer only to another set, not to some random non-set expression.
077: // Note: single characters are represented as sets, so they are ok.
078: fRuleScanner.error(RBBIRuleBuilder.U_BRK_MALFORMED_SET);
079: retString = exprNode.fText;
080: fCachedSetLookup = null;
081: }
082: return retString.toCharArray();
083: }
084:
085: //
086: // RBBISymbolTable::lookupMatcher This function from the abstract symbol table
087: // interface maps a single stand-in character to a
088: // pointer to a Unicode Set. The Unicode Set code uses this
089: // mechanism to get all references to the same $variable
090: // name to refer to a single common Unicode Set instance.
091: //
092: // This implementation cheats a little, and does not maintain a map of stand-in chars
093: // to sets. Instead, it takes advantage of the fact that the UnicodeSet
094: // constructor will always call this function right after calling lookup(),
095: // and we just need to remember what set to return between these two calls.
096: public UnicodeMatcher lookupMatcher(int ch) {
097: UnicodeSet retVal = null;
098: if (ch == 0xffff) {
099: retVal = fCachedSetLookup;
100: fCachedSetLookup = null;
101: }
102: return retVal;
103: }
104:
105: //
106: // RBBISymbolTable::parseReference This function from the abstract symbol table interface
107: // looks for a $variable name in the source text.
108: // It does not look it up, only scans for it.
109: // It is used by the UnicodeSet parser.
110: //
111: public String parseReference(String text, ParsePosition pos,
112: int limit) {
113: int start = pos.getIndex();
114: int i = start;
115: String result = "";
116: while (i < limit) {
117: int c = UTF16.charAt(text, i);
118: if ((i == start && !UCharacter.isUnicodeIdentifierStart(c))
119: || !UCharacter.isUnicodeIdentifierPart(c)) {
120: break;
121: }
122: i += UTF16.getCharCount(c);
123: }
124: if (i == start) { // No valid name chars
125: return result; // Indicate failure with empty string
126: }
127: pos.setIndex(i);
128: result = text.substring(start, i);
129: return result;
130: }
131:
132: //
133: // RBBISymbolTable::lookupNode Given a key (a variable name), return the
134: // corresponding RBBI Node. If there is no entry
135: // in the table for this name, return NULL.
136: //
137: RBBINode lookupNode(String key) {
138:
139: RBBINode retNode = null;
140: RBBISymbolTableEntry el;
141:
142: el = (RBBISymbolTableEntry) fHashTable.get(key);
143: if (el != null) {
144: retNode = el.val;
145: }
146: return retNode;
147: }
148:
149: //
150: // RBBISymbolTable::addEntry Add a new entry to the symbol table.
151: // Indicate an error if the name already exists -
152: // this will only occur in the case of duplicate
153: // variable assignments.
154: //
155: void addEntry(String key, RBBINode val) {
156: RBBISymbolTableEntry e;
157: e = (RBBISymbolTableEntry) fHashTable.get(key);
158: if (e != null) {
159: fRuleScanner
160: .error(RBBIRuleBuilder.U_BRK_VARIABLE_REDFINITION);
161: return;
162: }
163:
164: e = new RBBISymbolTableEntry();
165: e.key = key;
166: e.val = val;
167: fHashTable.put(e.key, e);
168: }
169:
170: //
171: // RBBISymbolTable::print Debugging function, dump out the symbol table contents.
172: //
173: void rbbiSymtablePrint() {
174: System.out
175: .print("Variable Definitions\n"
176: + "Name Node Val String Val\n"
177: + "----------------------------------------------------------------------\n");
178:
179: int pos = -1;
180: RBBISymbolTableEntry[] syms = new RBBISymbolTableEntry[0];
181: Collection t = fHashTable.values();
182: syms = (RBBISymbolTableEntry[]) t.toArray(syms);
183:
184: for (int i = 0; i < syms.length; i++) {
185: RBBISymbolTableEntry s = syms[i];
186:
187: System.out.print(" " + s.key + " "); // TODO: format output into columns.
188: System.out.print(" " + s.val + " ");
189: System.out.print(s.val.fLeftChild.fText);
190: System.out.print("\n");
191: }
192:
193: System.out.println("\nParsed Variable Definitions\n");
194: pos = -1;
195: for (int i = 0; i < syms.length; i++) {
196: RBBISymbolTableEntry s = syms[i];
197: System.out.print(s.key);
198: s.val.fLeftChild.printTree(true);
199: System.out.print("\n");
200: }
201: }
202:
203: }
|