001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package org.apache.regexp;
019:
020: import java.io.PrintWriter;
021: import java.util.Hashtable;
022:
023: /**
024: * A subclass of RECompiler which can dump a regular expression program
025: * for debugging purposes.
026: *
027: * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
028: * @version $Id: REDebugCompiler.java 518169 2007-03-14 15:03:35Z vgritsenko $
029: */
030: public class REDebugCompiler extends RECompiler {
031: /**
032: * Mapping from opcodes to descriptive strings
033: */
034: static Hashtable hashOpcode = new Hashtable();
035: static {
036: hashOpcode.put(new Integer(RE.OP_RELUCTANTSTAR),
037: "OP_RELUCTANTSTAR");
038: hashOpcode.put(new Integer(RE.OP_RELUCTANTPLUS),
039: "OP_RELUCTANTPLUS");
040: hashOpcode.put(new Integer(RE.OP_RELUCTANTMAYBE),
041: "OP_RELUCTANTMAYBE");
042: hashOpcode.put(new Integer(RE.OP_END), "OP_END");
043: hashOpcode.put(new Integer(RE.OP_BOL), "OP_BOL");
044: hashOpcode.put(new Integer(RE.OP_EOL), "OP_EOL");
045: hashOpcode.put(new Integer(RE.OP_ANY), "OP_ANY");
046: hashOpcode.put(new Integer(RE.OP_ANYOF), "OP_ANYOF");
047: hashOpcode.put(new Integer(RE.OP_BRANCH), "OP_BRANCH");
048: hashOpcode.put(new Integer(RE.OP_ATOM), "OP_ATOM");
049: hashOpcode.put(new Integer(RE.OP_STAR), "OP_STAR");
050: hashOpcode.put(new Integer(RE.OP_PLUS), "OP_PLUS");
051: hashOpcode.put(new Integer(RE.OP_MAYBE), "OP_MAYBE");
052: hashOpcode.put(new Integer(RE.OP_NOTHING), "OP_NOTHING");
053: hashOpcode.put(new Integer(RE.OP_GOTO), "OP_GOTO");
054: hashOpcode.put(new Integer(RE.OP_CONTINUE), "OP_CONTINUE");
055: hashOpcode.put(new Integer(RE.OP_ESCAPE), "OP_ESCAPE");
056: hashOpcode.put(new Integer(RE.OP_OPEN), "OP_OPEN");
057: hashOpcode.put(new Integer(RE.OP_CLOSE), "OP_CLOSE");
058: hashOpcode.put(new Integer(RE.OP_BACKREF), "OP_BACKREF");
059: hashOpcode.put(new Integer(RE.OP_POSIXCLASS), "OP_POSIXCLASS");
060: hashOpcode.put(new Integer(RE.OP_OPEN_CLUSTER),
061: "OP_OPEN_CLUSTER");
062: hashOpcode.put(new Integer(RE.OP_CLOSE_CLUSTER),
063: "OP_CLOSE_CLUSTER");
064: }
065:
066: /**
067: * Returns a descriptive string for an opcode.
068: *
069: * @param opcode Opcode to convert to a string
070: * @return Description of opcode
071: */
072: String opcodeToString(char opcode) {
073: // Get string for opcode
074: String ret = (String) hashOpcode.get(new Integer(opcode));
075:
076: // Just in case we have a corrupt program
077: if (ret == null) {
078: ret = "OP_????";
079: }
080: return ret;
081: }
082:
083: /**
084: * Return a string describing a (possibly unprintable) character.
085: *
086: * @param c Character to convert to a printable representation
087: * @return String representation of character
088: */
089: String charToString(char c) {
090: // If it's unprintable, convert to '\###'
091: if (c < ' ' || c > 127) {
092: return "\\" + (int) c;
093: }
094:
095: // Return the character as a string
096: return String.valueOf(c);
097: }
098:
099: /**
100: * Returns a descriptive string for a node in a regular expression program.
101: * @param node Node to describe
102: * @return Description of node
103: */
104: String nodeToString(int node) {
105: // Get opcode and opdata for node
106: char opcode = instruction[node /* + RE.offsetOpcode */];
107: int opdata = (int) instruction[node + RE.offsetOpdata];
108:
109: // Return opcode as a string and opdata value
110: return opcodeToString(opcode) + ", opdata = " + opdata;
111: }
112:
113: /**
114: * Adds a new node
115: *
116: * @param opcode Opcode for node
117: * @param opdata Opdata for node (only the low 16 bits are currently used)
118: * @return Index of new node in program
119: * /
120: int node(char opcode, int opdata)
121: {
122: System.out.println("====> Add " + opcode + " " + opdata);
123: PrintWriter writer = new PrintWriter(System.out);
124: dumpProgram(writer);
125: writer.flush();
126: int r = super.node(opcode, opdata);
127: System.out.println("====< ");
128: dumpProgram(writer);
129: writer.flush();
130: return r;
131: }/**/
132:
133: /**
134: * Inserts a node with a given opcode and opdata at insertAt. The node relative next
135: * pointer is initialized to 0.
136: *
137: * @param opcode Opcode for new node
138: * @param opdata Opdata for new node (only the low 16 bits are currently used)
139: * @param insertAt Index at which to insert the new node in the program
140: * /
141: void nodeInsert(char opcode, int opdata, int insertAt)
142: {
143: System.out.println("====> Ins " + opcode + " " + opdata + " " + insertAt);
144: PrintWriter writer = new PrintWriter(System.out);
145: dumpProgram(writer);
146: writer.flush();
147: super.nodeInsert(opcode, opdata, insertAt);
148: System.out.println("====< ");
149: dumpProgram(writer);
150: writer.flush();
151: }/**/
152:
153: /**
154: * Appends a node to the end of a node chain
155: *
156: * @param node Start of node chain to traverse
157: * @param pointTo Node to have the tail of the chain point to
158: * /
159: void setNextOfEnd(int node, int pointTo)
160: {
161: System.out.println("====> Link " + node + " " + pointTo);
162: PrintWriter writer = new PrintWriter(System.out);
163: dumpProgram(writer);
164: writer.flush();
165: super.setNextOfEnd(node, pointTo);
166: System.out.println("====< ");
167: dumpProgram(writer);
168: writer.flush();
169: }/**/
170:
171: /**
172: * Dumps the current program to a {@link PrintWriter}.
173: *
174: * @param p PrintWriter for program dump output
175: */
176: public void dumpProgram(PrintWriter p) {
177: // Loop through the whole program
178: for (int i = 0; i < lenInstruction;) {
179: // Get opcode, opdata and next fields of current program node
180: char opcode = instruction[i /* + RE.offsetOpcode */];
181: char opdata = instruction[i + RE.offsetOpdata];
182: int next = (short) instruction[i + RE.offsetNext];
183:
184: // Display the current program node
185: p.print(i + ". " + nodeToString(i) + ", next = ");
186:
187: // If there's no next, say 'none', otherwise give absolute index of next node
188: if (next == 0) {
189: p.print("none");
190: } else {
191: p.print(i + next);
192: }
193:
194: // Move past node
195: i += RE.nodeSize;
196:
197: // If character class
198: if (opcode == RE.OP_ANYOF) {
199: // Opening bracket for start of char class
200: p.print(", [");
201:
202: // Show each range in the char class
203: // int rangeCount = opdata;
204: for (int r = 0; r < opdata; r++) {
205: // Get first and last chars in range
206: char charFirst = instruction[i++];
207: char charLast = instruction[i++];
208:
209: // Print range as X-Y, unless range encompasses only one char
210: if (charFirst == charLast) {
211: p.print(charToString(charFirst));
212: } else {
213: p.print(charToString(charFirst) + "-"
214: + charToString(charLast));
215: }
216: }
217:
218: // Annotate the end of the char class
219: p.print("]");
220: }
221:
222: // If atom
223: if (opcode == RE.OP_ATOM) {
224: // Open quote
225: p.print(", \"");
226:
227: // Print each character in the atom
228: for (int len = opdata; len-- != 0;) {
229: p.print(charToString(instruction[i++]));
230: }
231:
232: // Close quote
233: p.print("\"");
234: }
235:
236: // Print a newline
237: p.println("");
238: }
239: }
240:
241: /**
242: * Dumps the current program to a <code>System.out</code>.
243: */
244: public void dumpProgram() {
245: PrintWriter w = new PrintWriter(System.out);
246: dumpProgram(w);
247: w.flush();
248: }
249: }
|