001: /********************************************************************
002: * COPYRIGHT:
003: * Copyright (c) 2001-2006, International Business Machines Corporation and
004: * others. All Rights Reserved.
005: ********************************************************************/package com.ibm.icu.text;
006:
007: import java.util.List;
008: import java.util.HashSet;
009: import java.util.Set;
010:
011: import com.ibm.icu.impl.Assert;
012:
013: /**
014: * This class represents a node in the parse tree created by the RBBI Rule compiler.
015: * @internal
016: *
017: */
018: class RBBINode {
019:
020: // enum NodeType {
021: static final int setRef = 0;
022: static final int uset = 1;
023: static final int varRef = 2;
024: static final int leafChar = 3;
025: static final int lookAhead = 4;
026: static final int tag = 5;
027: static final int endMark = 6;
028: static final int opStart = 7;
029: static final int opCat = 8;
030: static final int opOr = 9;
031: static final int opStar = 10;
032: static final int opPlus = 11;
033: static final int opQuestion = 12;
034: static final int opBreak = 13;
035: static final int opReverse = 14;
036: static final int opLParen = 15;
037: static final int nodeTypeLimit = 16; // For Assertion checking only.
038:
039: static String[] nodeTypeNames = { "setRef", "uset", "varRef",
040: "leafChar", "lookAhead", "tag", "endMark", "opStart",
041: "opCat", "opOr", "opStar", "opPlus", "opQuestion",
042: "opBreak", "opReverse", "opLParen" };
043:
044: // enum OpPrecedence {
045: static final int precZero = 0;
046: static final int precStart = 1;
047: static final int precLParen = 2;
048: static final int precOpOr = 3;
049: static final int precOpCat = 4;
050:
051: int fType; // enum NodeType
052: RBBINode fParent;
053: RBBINode fLeftChild;
054: RBBINode fRightChild;
055: UnicodeSet fInputSet; // For uset nodes only.
056: int fPrecedence = precZero; // enum OpPrecedence, For binary ops only.
057:
058: String fText; // Text corresponding to this node.
059: // May be lazily evaluated when (if) needed
060: // for some node types.
061: int fFirstPos; // Position in the rule source string of the
062: // first text associated with the node.
063: // If there's a left child, this will be the same
064: // as that child's left pos.
065: int fLastPos; // Last position in the rule source string
066: // of any text associated with this node.
067: // If there's a right child, this will be the same
068: // as that child's last postion.
069:
070: boolean fNullable; // See Aho DFA table generation algorithm
071: int fVal; // For leafChar nodes, the value.
072: // Values are the character category,
073: // corresponds to columns in the final
074: // state transition table.
075:
076: boolean fLookAheadEnd; // For endMark nodes, set TRUE if
077: // marking the end of a look-ahead rule.
078:
079: Set fFirstPosSet; // See Aho DFA table generation algorithm
080: Set fLastPosSet; // See Aho.
081: Set fFollowPos; // See Aho.
082:
083: int fSerialNum; // Debugging aids. Each node gets a unique serial number.
084: static int gLastSerial;
085:
086: RBBINode(int t) {
087: Assert.assrt(t < nodeTypeLimit);
088: fSerialNum = ++gLastSerial;
089: fType = t;
090:
091: fFirstPosSet = new HashSet();
092: fLastPosSet = new HashSet();
093: fFollowPos = new HashSet();
094: if (t == opCat) {
095: fPrecedence = precOpCat;
096: } else if (t == opOr) {
097: fPrecedence = precOpOr;
098: } else if (t == opStart) {
099: fPrecedence = precStart;
100: } else if (t == opLParen) {
101: fPrecedence = precLParen;
102: } else
103: fPrecedence = precZero;
104: }
105:
106: RBBINode(RBBINode other) {
107: fSerialNum = ++gLastSerial;
108: fType = other.fType;
109: fInputSet = other.fInputSet;
110: fPrecedence = other.fPrecedence;
111: fText = other.fText;
112: fFirstPos = other.fFirstPos;
113: fLastPos = other.fLastPos;
114: fNullable = other.fNullable;
115: fVal = other.fVal;
116: fFirstPosSet = new HashSet(other.fFirstPosSet);
117: fLastPosSet = new HashSet(other.fLastPosSet);
118: fFollowPos = new HashSet(other.fFollowPos);
119: }
120:
121: //-------------------------------------------------------------------------
122: //
123: // cloneTree Make a copy of the subtree rooted at this node.
124: // Discard any variable references encountered along the way,
125: // and replace with copies of the variable's definitions.
126: // Used to replicate the expression underneath variable
127: // references in preparation for generating the DFA tables.
128: //
129: //-------------------------------------------------------------------------
130: RBBINode cloneTree() {
131: RBBINode n;
132:
133: if (fType == RBBINode.varRef) {
134: // If the current node is a variable reference, skip over it
135: // and clone the definition of the variable instead.
136: n = fLeftChild.cloneTree();
137: } else if (fType == RBBINode.uset) {
138: n = this ;
139: } else {
140: n = new RBBINode(this );
141: if (fLeftChild != null) {
142: n.fLeftChild = fLeftChild.cloneTree();
143: n.fLeftChild.fParent = n;
144: }
145: if (fRightChild != null) {
146: n.fRightChild = fRightChild.cloneTree();
147: n.fRightChild.fParent = n;
148: }
149: }
150: return n;
151: }
152:
153: //-------------------------------------------------------------------------
154: //
155: // flattenVariables Walk a parse tree, replacing any variable
156: // references with a copy of the variable's definition.
157: // Aside from variables, the tree is not changed.
158: //
159: // Return the root of the tree. If the root was not a variable
160: // reference, it remains unchanged - the root we started with
161: // is the root we return. If, however, the root was a variable
162: // reference, the root of the newly cloned replacement tree will
163: // be returned, and the original tree deleted.
164: //
165: // This function works by recursively walking the tree
166: // without doing anything until a variable reference is
167: // found, then calling cloneTree() at that point. Any
168: // nested references are handled by cloneTree(), not here.
169: //
170: //-------------------------------------------------------------------------
171: RBBINode flattenVariables() {
172: if (fType == varRef) {
173: RBBINode retNode = fLeftChild.cloneTree();
174: // delete this;
175: return retNode;
176: }
177:
178: if (fLeftChild != null) {
179: fLeftChild = fLeftChild.flattenVariables();
180: fLeftChild.fParent = this ;
181: }
182: if (fRightChild != null) {
183: fRightChild = fRightChild.flattenVariables();
184: fRightChild.fParent = this ;
185: }
186: return this ;
187: }
188:
189: //-------------------------------------------------------------------------
190: //
191: // flattenSets Walk the parse tree, replacing any nodes of type setRef
192: // with a copy of the expression tree for the set. A set's
193: // equivalent expression tree is precomputed and saved as
194: // the left child of the uset node.
195: //
196: //-------------------------------------------------------------------------
197: void flattenSets() {
198: Assert.assrt(fType != setRef);
199:
200: if (fLeftChild != null) {
201: if (fLeftChild.fType == setRef) {
202: RBBINode setRefNode = fLeftChild;
203: RBBINode usetNode = setRefNode.fLeftChild;
204: RBBINode replTree = usetNode.fLeftChild;
205: fLeftChild = replTree.cloneTree();
206: fLeftChild.fParent = this ;
207: } else {
208: fLeftChild.flattenSets();
209: }
210: }
211:
212: if (fRightChild != null) {
213: if (fRightChild.fType == setRef) {
214: RBBINode setRefNode = fRightChild;
215: RBBINode usetNode = setRefNode.fLeftChild;
216: RBBINode replTree = usetNode.fLeftChild;
217: fRightChild = replTree.cloneTree();
218: fRightChild.fParent = this ;
219: // delete setRefNode;
220: } else {
221: fRightChild.flattenSets();
222: }
223: }
224: }
225:
226: //-------------------------------------------------------------------------
227: //
228: // findNodes() Locate all the nodes of the specified type, starting
229: // at the specified root.
230: //
231: //-------------------------------------------------------------------------
232: void findNodes(List dest, int kind) {
233: if (fType == kind) {
234: dest.add(this );
235: }
236: if (fLeftChild != null) {
237: fLeftChild.findNodes(dest, kind);
238: }
239: if (fRightChild != null) {
240: fRightChild.findNodes(dest, kind);
241: }
242: }
243:
244: //-------------------------------------------------------------------------
245: //
246: // print. Print out a single node, for debugging.
247: //
248: //-------------------------------------------------------------------------
249: static void printNode(RBBINode n) {
250:
251: if (n == null) {
252: System.out.print(" -- null --\n");
253: } else {
254: RBBINode.printInt(n.fSerialNum, 10);
255: RBBINode.printString(nodeTypeNames[n.fType], 11);
256: RBBINode.printInt(n.fParent == null ? 0
257: : n.fParent.fSerialNum, 11);
258: RBBINode.printInt(n.fLeftChild == null ? 0
259: : n.fLeftChild.fSerialNum, 11);
260: RBBINode.printInt(n.fRightChild == null ? 0
261: : n.fRightChild.fSerialNum, 12);
262: RBBINode.printInt(n.fFirstPos, 12);
263: RBBINode.printInt(n.fVal, 7);
264:
265: if (n.fType == varRef) {
266: System.out.print(" " + n.fText);
267: }
268: }
269: System.out.println("");
270: }
271:
272: // Print a String in a fixed field size.
273: // Debugging function.
274: static void printString(String s, int minWidth) {
275: for (int i = minWidth; i < 0; i++) {
276: // negative width means pad leading spaces, not fixed width.
277: System.out.print(' ');
278: }
279: for (int i = s.length(); i < minWidth; i++) {
280: System.out.print(' ');
281: }
282: System.out.print(s);
283: }
284:
285: //
286: // Print an int in a fixed size field.
287: // Debugging function.
288: //
289: static void printInt(int i, int minWidth) {
290: String s = Integer.toString(i);
291: printString(s, Math.max(minWidth, s.length() + 1));
292: }
293:
294: static void printHex(int i, int minWidth) {
295: String s = Integer.toString(i, 16);
296: String leadingZeroes = "00000".substring(0, Math.max(0, 5 - s
297: .length()));
298: s = leadingZeroes + s;
299: printString(s, minWidth);
300: }
301:
302: // -------------------------------------------------------------------------
303: //
304: // print. Print out the tree of nodes rooted at "this"
305: //
306: // -------------------------------------------------------------------------
307: void printTree(boolean printHeading) {
308: if (printHeading) {
309: System.out
310: .println("-------------------------------------------------------------------");
311: System.out
312: .println(" Serial type Parent LeftChild RightChild position value");
313: }
314: printNode(this );
315: // Only dump the definition under a variable reference if asked to.
316: // Unconditinally dump children of all other node types.
317: if (fType != varRef) {
318: if (fLeftChild != null) {
319: fLeftChild.printTree(false);
320: }
321:
322: if (fRightChild != null) {
323: fRightChild.printTree(false);
324: }
325: }
326: }
327:
328: }
|