001: package org.apache.regexp;
002:
003: /*
004: * ====================================================================
005: *
006: * The Apache Software License, Version 1.1
007: *
008: * Copyright (c) 1999 The Apache Software Foundation. All rights
009: * reserved.
010: *
011: * Redistribution and use in source and binary forms, with or without
012: * modification, are permitted provided that the following conditions
013: * are met:
014: *
015: * 1. Redistributions of source code must retain the above copyright
016: * notice, this list of conditions and the following disclaimer.
017: *
018: * 2. Redistributions in binary form must reproduce the above copyright
019: * notice, this list of conditions and the following disclaimer in
020: * the documentation and/or other materials provided with the
021: * distribution.
022: *
023: * 3. The end-user documentation included with the redistribution, if
024: * any, must include the following acknowlegement:
025: * "This product includes software developed by the
026: * Apache Software Foundation (http://www.apache.org/)."
027: * Alternately, this acknowlegement may appear in the software itself,
028: * if and wherever such third-party acknowlegements normally appear.
029: *
030: * 4. The names "The Jakarta Project", "Jakarta-Regexp", and "Apache Software
031: * Foundation" must not be used to endorse or promote products derived
032: * from this software without prior written permission. For written
033: * permission, please contact apache@apache.org.
034: *
035: * 5. Products derived from this software may not be called "Apache"
036: * nor may "Apache" appear in their names without prior written
037: * permission of the Apache Group.
038: *
039: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
040: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
041: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
042: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
043: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
044: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
045: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
046: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
047: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
048: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
049: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
050: * SUCH DAMAGE.
051: * ====================================================================
052: *
053: * This software consists of voluntary contributions made by many
054: * individuals on behalf of the Apache Software Foundation. For more
055: * information on the Apache Software Foundation, please see
056: * <http://www.apache.org/>.
057: *
058: */
059:
060: import java.util.*;
061: import java.io.*;
062:
063: /**
064: * A subclass of RECompiler which can dump a regular expression program
065: * for debugging purposes.
066: *
067: * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
068: * @version $Id: REDebugCompiler.java,v 1.1.1.1 2002/01/31 03:14:36 rcm Exp $
069: */
070: public class REDebugCompiler extends RECompiler {
071: /**
072: * Mapping from opcodes to descriptive strings
073: */
074: static Hashtable hashOpcode = new Hashtable();
075: static {
076: hashOpcode.put(new Integer(RE.OP_RELUCTANTSTAR),
077: "OP_RELUCTANTSTAR");
078: hashOpcode.put(new Integer(RE.OP_RELUCTANTPLUS),
079: "OP_RELUCTANTPLUS");
080: hashOpcode.put(new Integer(RE.OP_RELUCTANTMAYBE),
081: "OP_RELUCTANTMAYBE");
082: hashOpcode.put(new Integer(RE.OP_END), "OP_END");
083: hashOpcode.put(new Integer(RE.OP_BOL), "OP_BOL");
084: hashOpcode.put(new Integer(RE.OP_EOL), "OP_EOL");
085: hashOpcode.put(new Integer(RE.OP_ANY), "OP_ANY");
086: hashOpcode.put(new Integer(RE.OP_ANYOF), "OP_ANYOF");
087: hashOpcode.put(new Integer(RE.OP_BRANCH), "OP_BRANCH");
088: hashOpcode.put(new Integer(RE.OP_ATOM), "OP_ATOM");
089: hashOpcode.put(new Integer(RE.OP_STAR), "OP_STAR");
090: hashOpcode.put(new Integer(RE.OP_PLUS), "OP_PLUS");
091: hashOpcode.put(new Integer(RE.OP_MAYBE), "OP_MAYBE");
092: hashOpcode.put(new Integer(RE.OP_NOTHING), "OP_NOTHING");
093: hashOpcode.put(new Integer(RE.OP_GOTO), "OP_GOTO");
094: hashOpcode.put(new Integer(RE.OP_ESCAPE), "OP_ESCAPE");
095: hashOpcode.put(new Integer(RE.OP_OPEN), "OP_OPEN");
096: hashOpcode.put(new Integer(RE.OP_CLOSE), "OP_CLOSE");
097: hashOpcode.put(new Integer(RE.OP_BACKREF), "OP_BACKREF");
098: hashOpcode.put(new Integer(RE.OP_POSIXCLASS), "OP_POSIXCLASS");
099: }
100:
101: /**
102: * Returns a descriptive string for an opcode.
103: * @param opcode Opcode to convert to a string
104: * @return Description of opcode
105: */
106: String opcodeToString(char opcode) {
107: // Get string for opcode
108: String ret = (String) hashOpcode.get(new Integer(opcode));
109:
110: // Just in case we have a corrupt program
111: if (ret == null) {
112: ret = "OP_????";
113: }
114: return ret;
115: }
116:
117: /**
118: * Return a string describing a (possibly unprintable) character.
119: * @param c Character to convert to a printable representation
120: * @return String representation of character
121: */
122: String charToString(char c) {
123: // If it's unprintable, convert to '\###'
124: if (c < ' ' || c > 127) {
125: return "\\" + (int) c;
126: }
127:
128: // Return the character as a string
129: return String.valueOf(c);
130: }
131:
132: /**
133: * Returns a descriptive string for a node in a regular expression program.
134: * @param node Node to describe
135: * @return Description of node
136: */
137: String nodeToString(int node) {
138: // Get opcode and opdata for node
139: char opcode = instruction[node + RE.offsetOpcode];
140: int opdata = (int) instruction[node + RE.offsetOpdata];
141:
142: // Return opcode as a string and opdata value
143: return opcodeToString(opcode) + ", opdata = " + opdata;
144: }
145:
146: /**
147: * Dumps the current program to a PrintWriter
148: * @param p PrintWriter for program dump output
149: */
150: public void dumpProgram(PrintWriter p) {
151: // Loop through the whole program
152: for (int i = 0; i < lenInstruction;) {
153: // Get opcode, opdata and next fields of current program node
154: char opcode = instruction[i + RE.offsetOpcode];
155: char opdata = instruction[i + RE.offsetOpdata];
156: short next = (short) instruction[i + RE.offsetNext];
157:
158: // Display the current program node
159: p.print(i + ". " + nodeToString(i) + ", next = ");
160:
161: // If there's no next, say 'none', otherwise give absolute index of next node
162: if (next == 0) {
163: p.print("none");
164: } else {
165: p.print(i + next);
166: }
167:
168: // Move past node
169: i += RE.nodeSize;
170:
171: // If character class
172: if (opcode == RE.OP_ANYOF) {
173: // Opening bracket for start of char class
174: p.print(", [");
175:
176: // Show each range in the char class
177: int rangeCount = opdata;
178: for (int r = 0; r < rangeCount; r++) {
179: // Get first and last chars in range
180: char charFirst = instruction[i++];
181: char charLast = instruction[i++];
182:
183: // Print range as X-Y, unless range encompasses only one char
184: if (charFirst == charLast) {
185: p.print(charToString(charFirst));
186: } else {
187: p.print(charToString(charFirst) + "-"
188: + charToString(charLast));
189: }
190: }
191:
192: // Annotate the end of the char class
193: p.print("]");
194: }
195:
196: // If atom
197: if (opcode == RE.OP_ATOM) {
198: // Open quote
199: p.print(", \"");
200:
201: // Print each character in the atom
202: for (int len = opdata; len-- != 0;) {
203: p.print(charToString(instruction[i++]));
204: }
205:
206: // Close quote
207: p.print("\"");
208: }
209:
210: // Print a newline
211: p.println("");
212: }
213: }
214: }
|