001: /*
002: * $Id: Perl5Debug.java,v 1.11 2003/11/07 20:16:25 dfs Exp $
003: *
004: * ====================================================================
005: * The Apache Software License, Version 1.1
006: *
007: * Copyright (c) 2000 The Apache Software Foundation. All rights
008: * reserved.
009: *
010: * Redistribution and use in source and binary forms, with or without
011: * modification, are permitted provided that the following conditions
012: * are met:
013: *
014: * 1. Redistributions of source code must retain the above copyright
015: * notice, this list of conditions and the following disclaimer.
016: *
017: * 2. Redistributions in binary form must reproduce the above copyright
018: * notice, this list of conditions and the following disclaimer in
019: * the documentation and/or other materials provided with the
020: * distribution.
021: *
022: * 3. The end-user documentation included with the redistribution,
023: * if any, must include the following acknowledgment:
024: * "This product includes software developed by the
025: * Apache Software Foundation (http://www.apache.org/)."
026: * Alternately, this acknowledgment may appear in the software itself,
027: * if and wherever such third-party acknowledgments normally appear.
028: *
029: * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
030: * must not be used to endorse or promote products derived from this
031: * software without prior written permission. For written
032: * permission, please contact apache@apache.org.
033: *
034: * 5. Products derived from this software may not be called "Apache"
035: * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
036: * name, without prior written permission of the Apache Software Foundation.
037: *
038: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
039: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
040: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
041: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
042: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
043: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
044: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
045: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
046: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
047: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
048: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
049: * SUCH DAMAGE.
050: * ====================================================================
051: *
052: * This software consists of voluntary contributions made by many
053: * individuals on behalf of the Apache Software Foundation. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: package org.apache.oro.text.regex;
059:
060: /**
061: * The Perl5Debug class is not intended for general use and should not
062: * be instantiated, but is provided because some users may find the output
063: * of its single method to be useful.
064: * The Perl5Compiler class generates a representation of a
065: * regular expression identical to that of Perl5 in the abstract, but
066: * not in terms of actual data structures. The Perl5Debug class allows
067: * the bytecode program contained by a Perl5Pattern to be printed out for
068: * comparison with the program generated by Perl5 with the -r option.
069: *
070: * @version @version@
071: * @since 1.0
072: * @see Perl5Pattern
073: */
074: public final class Perl5Debug {
075:
076: /**
077: * A dummy constructor to prevent instantiation of Perl5Debug.
078: */
079: private Perl5Debug() {
080: }
081:
082: /**
083: * This method prints to a String the bytecode program contained in a
084: * Perl5Pattern._ The program byte codes are identical to those
085: * generated by Perl5 with the -r option, but the offsets are
086: * different due to the different data structures used. This
087: * method is useful for diagnosing suspected bugs. The Perl5Compiler
088: * class is designed to produce regular expression programs identical
089: * to those produced by Perl5. By comparing the output of this method
090: * and the output of Perl5 with the -r option on the same regular
091: * expression, you can determine if Perl5Compiler correctly compiled
092: * an expression.
093: * <p>
094: * @param regexp The Perl5Pattern to print.
095: * @return A string representation of the bytecode program defining the
096: * regular expression.
097: */
098:
099: public static String printProgram(Perl5Pattern regexp) {
100: StringBuffer buffer;
101: char operator = OpCode._OPEN, prog[];
102: int offset, next;
103:
104: prog = regexp._program;
105: offset = 1;
106: buffer = new StringBuffer();
107:
108: while (operator != OpCode._END) {
109: operator = prog[offset];
110: buffer.append(offset);
111: _printOperator(prog, offset, buffer);
112:
113: next = OpCode._getNext(prog, offset);
114: offset += OpCode._operandLength[operator];
115:
116: buffer.append("(" + next + ")");
117:
118: offset += 2;
119:
120: if (operator == OpCode._ANYOF) {
121: offset += 16;
122: } else if (operator == OpCode._ANYOFUN
123: || operator == OpCode._NANYOFUN) {
124: while (prog[offset] != OpCode._END) {
125: if (prog[offset] == OpCode._RANGE)
126: offset += 3;
127: else
128: offset += 2;
129: }
130: ++offset;
131: } else if (operator == OpCode._EXACTLY) {
132: ++offset;
133: buffer.append(" <");
134:
135: //while(prog[offset] != '0')
136: while (prog[offset] != CharStringPointer._END_OF_STRING) {
137: //while(prog[offset] != 0 &&
138: // prog[offset] != CharStringPointer._END_OF_STRING) {
139: buffer.append(prog[offset]);
140: ++offset;
141: }
142: buffer.append(">");
143: ++offset;
144: }
145:
146: buffer.append('\n');
147: }
148:
149: // Can print some other stuff here.
150: if (regexp._startString != null)
151: buffer.append("start `" + new String(regexp._startString)
152: + "' ");
153: if (regexp._startClassOffset != OpCode._NULL_OFFSET) {
154: buffer.append("stclass `");
155: _printOperator(prog, regexp._startClassOffset, buffer);
156: buffer.append("' ");
157: }
158: if ((regexp._anchor & Perl5Pattern._OPT_ANCH) != 0)
159: buffer.append("anchored ");
160: if ((regexp._anchor & Perl5Pattern._OPT_SKIP) != 0)
161: buffer.append("plus ");
162: if ((regexp._anchor & Perl5Pattern._OPT_IMPLICIT) != 0)
163: buffer.append("implicit ");
164: if (regexp._mustString != null)
165: buffer.append("must have \""
166: + new String(regexp._mustString) + "\" back "
167: + regexp._back + " ");
168: buffer.append("minlen " + regexp._minLength + '\n');
169:
170: return buffer.toString();
171: }
172:
173: static void _printOperator(char[] program, int offset,
174: StringBuffer buffer) {
175: String str = null;
176:
177: buffer.append(":");
178:
179: switch (program[offset]) {
180: case OpCode._BOL:
181: str = "BOL";
182: break;
183: case OpCode._MBOL:
184: str = "MBOL";
185: break;
186: case OpCode._SBOL:
187: str = "SBOL";
188: break;
189: case OpCode._EOL:
190: str = "EOL";
191: break;
192: case OpCode._MEOL:
193: str = "MEOL";
194: break;
195: case OpCode._ANY:
196: str = "ANY";
197: break;
198: case OpCode._SANY:
199: str = "SANY";
200: break;
201: case OpCode._ANYOF:
202: str = "ANYOF";
203: break;
204: case OpCode._ANYOFUN:
205: str = "ANYOFUN";
206: break;
207: case OpCode._NANYOFUN:
208: str = "NANYOFUN";
209: break;
210: /*
211: case OpCode._ANYOF : // debug
212: buffer.append("ANYOF\n\n");
213: int foo = OpCode._OPERAND(offset);
214: char ch;
215: for(ch=0; ch < 256; ch++) {
216: if(ch % 16 == 0)
217: buffer.append(" ");
218: buffer.append((program[foo + (ch >> 4)] &
219: (1 << (ch & 0xf))) == 0 ? 0 : 1);
220: }
221: buffer.append("\n\n");
222: break;
223: */
224: case OpCode._BRANCH:
225: str = "BRANCH";
226: break;
227: case OpCode._EXACTLY:
228: str = "EXACTLY";
229: break;
230: case OpCode._NOTHING:
231: str = "NOTHING";
232: break;
233: case OpCode._BACK:
234: str = "BACK";
235: break;
236: case OpCode._END:
237: str = "END";
238: break;
239: case OpCode._ALNUM:
240: str = "ALNUM";
241: break;
242: case OpCode._NALNUM:
243: str = "NALNUM";
244: break;
245: case OpCode._BOUND:
246: str = "BOUND";
247: break;
248: case OpCode._NBOUND:
249: str = "NBOUND";
250: break;
251: case OpCode._SPACE:
252: str = "SPACE";
253: break;
254: case OpCode._NSPACE:
255: str = "NSPACE";
256: break;
257: case OpCode._DIGIT:
258: str = "DIGIT";
259: break;
260: case OpCode._NDIGIT:
261: str = "NDIGIT";
262: break;
263: case OpCode._ALPHA:
264: str = "ALPHA";
265: break;
266: case OpCode._BLANK:
267: str = "BLANK";
268: break;
269: case OpCode._CNTRL:
270: str = "CNTRL";
271: break;
272: case OpCode._GRAPH:
273: str = "GRAPH";
274: break;
275: case OpCode._LOWER:
276: str = "LOWER";
277: break;
278: case OpCode._PRINT:
279: str = "PRINT";
280: break;
281: case OpCode._PUNCT:
282: str = "PUNCT";
283: break;
284: case OpCode._UPPER:
285: str = "UPPER";
286: break;
287: case OpCode._XDIGIT:
288: str = "XDIGIT";
289: break;
290: case OpCode._ALNUMC:
291: str = "ALNUMC";
292: break;
293: case OpCode._ASCII:
294: str = "ASCII";
295: break;
296: case OpCode._CURLY:
297: buffer.append("CURLY {");
298: buffer.append((int) OpCode._getArg1(program, offset));
299: buffer.append(',');
300: buffer.append((int) OpCode._getArg2(program, offset));
301: buffer.append('}');
302: break;
303: case OpCode._CURLYX:
304: buffer.append("CURLYX {");
305: buffer.append((int) OpCode._getArg1(program, offset));
306: buffer.append(',');
307: buffer.append((int) OpCode._getArg2(program, offset));
308: buffer.append('}');
309: break;
310: case OpCode._REF:
311: buffer.append("REF");
312: buffer.append((int) OpCode._getArg1(program, offset));
313: break;
314: case OpCode._OPEN:
315: buffer.append("OPEN");
316: buffer.append((int) OpCode._getArg1(program, offset));
317: break;
318: case OpCode._CLOSE:
319: buffer.append("CLOSE");
320: buffer.append((int) OpCode._getArg1(program, offset));
321: break;
322: case OpCode._STAR:
323: str = "STAR";
324: break;
325: case OpCode._PLUS:
326: str = "PLUS";
327: break;
328: case OpCode._MINMOD:
329: str = "MINMOD";
330: break;
331: case OpCode._GBOL:
332: str = "GBOL";
333: break;
334: case OpCode._UNLESSM:
335: str = "UNLESSM";
336: break;
337: case OpCode._IFMATCH:
338: str = "IFMATCH";
339: break;
340: case OpCode._SUCCEED:
341: str = "SUCCEED";
342: break;
343: case OpCode._WHILEM:
344: str = "WHILEM";
345: break;
346: default:
347: buffer
348: .append("Operator is unrecognized. Faulty expression code!");
349: break;
350: }
351:
352: if (str != null)
353: buffer.append(str);
354: }
355: }
|