001: /* ====================================================================
002: * The Jcorporate Apache Style Software License, Version 1.2 05-07-2002
003: *
004: * Copyright (c) 1995-2002 Jcorporate Ltd. All rights reserved.
005: *
006: * Redistribution and use in source and binary forms, with or without
007: * modification, are permitted provided that the following conditions
008: * are met:
009: *
010: * 1. Redistributions of source code must retain the above copyright
011: * notice, this list of conditions and the following disclaimer.
012: *
013: * 2. Redistributions in binary form must reproduce the above copyright
014: * notice, this list of conditions and the following disclaimer in
015: * the documentation and/or other materials provided with the
016: * distribution.
017: *
018: * 3. The end-user documentation included with the redistribution,
019: * if any, must include the following acknowledgment:
020: * "This product includes software developed by Jcorporate Ltd.
021: * (http://www.jcorporate.com/)."
022: * Alternately, this acknowledgment may appear in the software itself,
023: * if and wherever such third-party acknowledgments normally appear.
024: *
025: * 4. "Jcorporate" and product names such as "Expresso" must
026: * not be used to endorse or promote products derived from this
027: * software without prior written permission. For written permission,
028: * please contact info@jcorporate.com.
029: *
030: * 5. Products derived from this software may not be called "Expresso",
031: * or other Jcorporate product names; nor may "Expresso" or other
032: * Jcorporate product names appear in their name, without prior
033: * written permission of Jcorporate Ltd.
034: *
035: * 6. No product derived from this software may compete in the same
036: * market space, i.e. framework, without prior written permission
037: * of Jcorporate Ltd. For written permission, please contact
038: * partners@jcorporate.com.
039: *
040: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
041: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
042: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
043: * DISCLAIMED. IN NO EVENT SHALL JCORPORATE LTD OR ITS CONTRIBUTORS
044: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
045: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
046: * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
047: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
048: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
049: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
050: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
051: * SUCH DAMAGE.
052: * ====================================================================
053: *
054: * This software consists of voluntary contributions made by many
055: * individuals on behalf of the Jcorporate Ltd. Contributions back
056: * to the project(s) are encouraged when you make modifications.
057: * Please send them to support@jcorporate.com. For more information
058: * on Jcorporate Ltd. and its products, please see
059: * <http://www.jcorporate.com/>.
060: *
061: * Portions of this software are based upon other open source
062: * products and are subject to their respective licenses.
063: */
064:
065: package com.jcorporate.expresso.ext.regexp;
066:
067: /*
068: * ====================================================================
069: *
070: * The Apache Software License, Version 1.1
071: *
072: * Copyright (c) 1999 The Apache Software Foundation. All rights
073: * reserved.
074: *
075: * Redistribution and use in source and binary forms, with or without
076: * modification, are permitted provided that the following conditions
077: * are met:
078: *
079: * 1. Redistributions of source code must retain the above copyright
080: * notice, this list of conditions and the following disclaimer.
081: *
082: * 2. Redistributions in binary form must reproduce the above copyright
083: * notice, this list of conditions and the following disclaimer in
084: * the documentation and/or other materials provided with the
085: * distribution.
086: *
087: * 3. The end-user documentation included with the redistribution, if
088: * any, must include the following acknowlegement:
089: * "This product includes software developed by the
090: * Apache Software Foundation (http://www.apache.org/)."
091: * Alternately, this acknowlegement may appear in the software itself,
092: * if and wherever such third-party acknowlegements normally appear.
093: *
094: * 4. The names "The Jakarta Project", "Jakarta-Regexp", and "Apache Software
095: * Foundation" must not be used to endorse or promote products derived
096: * from this software without prior written permission. For written
097: * permission, please contact apache@apache.org.
098: *
099: * 5. Products derived from this software may not be called "Apache"
100: * nor may "Apache" appear in their names without prior written
101: * permission of the Apache Group.
102: *
103: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
104: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
105: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
106: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
107: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
108: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
109: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
110: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
111: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
112: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
113: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
114: * SUCH DAMAGE.
115: * ====================================================================
116: *
117: * This software consists of voluntary contributions made by many
118: * individuals on behalf of the Apache Software Foundation. For more
119: * information on the Apache Software Foundation, please see
120: * <http://www.apache.org/>.
121: *
122: */
123:
124: /**
125: * A class that holds compiled regular expressions. This is exposed mainly
126: * for use by the recompile utility (which helps you produce precompiled
127: * REProgram objects). You should not otherwise need to work directly with
128: * this class.
129: *
130: * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
131: * @version $Id: REProgram.java,v 1.7 2004/11/17 20:48:15 lhamel Exp $
132: * @see RE
133: * @see RECompiler
134: * @deprecated since v5.6, use jakarta oro
135: */
136: public class REProgram {
137: static final int OPT_HASBACKREFS = 1;
138: char[] instruction; // The compiled regular expression 'program'
139: int lenInstruction; // The amount of the instruction buffer in use
140: char[] prefix; // Prefix string optimization
141: int flags; // Optimization flags (REProgram.OPT_*)
142:
143: /**
144: * Constructs a program object from a character array
145: *
146: * @param instruction Character array with RE opcode instructions in it
147: */
148: public REProgram(char[] instruction) {
149: this (instruction, instruction.length);
150: }
151:
152: /**
153: * Constructs a program object from a character array
154: *
155: * @param instruction Character array with RE opcode instructions in it
156: * @param lenInstruction Amount of instruction array in use
157: */
158: public REProgram(char[] instruction, int lenInstruction) {
159: setInstructions(instruction, lenInstruction);
160: }
161:
162: /**
163: * Returns a copy of the current regular expression program in a character
164: * array that is exactly the right length to hold the program. If there is
165: * no program compiled yet, getInstructions() will return null.
166: *
167: * @return A copy of the current compiled RE program
168: */
169: public char[] getInstructions() {
170:
171: // Ensure program has been compiled!
172: if (lenInstruction != 0) {
173:
174: // Return copy of program
175: char[] ret = new char[lenInstruction];
176: System.arraycopy(instruction, 0, ret, 0, lenInstruction);
177:
178: return ret;
179: }
180:
181: return null;
182: }
183:
184: /**
185: * Sets a new regular expression program to run. It is this method which
186: * performs any special compile-time search optimizations. Currently only
187: * two optimizations are in place - one which checks for backreferences
188: * (so that they can be lazily allocated) and another which attempts to
189: * find an prefix anchor string so that substantial amounts of input can
190: * potentially be skipped without running the actual program.
191: *
192: * @param instruction Program instruction buffer
193: * @param lenInstruction Length of instruction buffer in use
194: */
195: public void setInstructions(char[] instruction, int lenInstruction) {
196:
197: // Save reference to instruction array
198: this .instruction = instruction;
199: this .lenInstruction = lenInstruction;
200:
201: // Initialize other program-related variables
202: flags = 0;
203: prefix = null;
204:
205: // Try various compile-time optimizations if there's a program
206: if (instruction != null && lenInstruction != 0) {
207:
208: // If the first node is a branch
209: if (lenInstruction >= RE.nodeSize
210: && instruction[0 + RE.offsetOpcode] == RE.OP_BRANCH) {
211:
212: // to the end node
213: int next = instruction[0 + RE.offsetNext];
214:
215: if (instruction[next + RE.offsetOpcode] == RE.OP_END) {
216:
217: // and the branch starts with an atom
218: if (lenInstruction >= (RE.nodeSize * 2)
219: && instruction[RE.nodeSize
220: + RE.offsetOpcode] == RE.OP_ATOM) {
221:
222: // then get that atom as an prefix because there's no other choice
223: int lenAtom = instruction[RE.nodeSize
224: + RE.offsetOpdata];
225: prefix = new char[lenAtom];
226: System.arraycopy(instruction, RE.nodeSize * 2,
227: prefix, 0, lenAtom);
228: }
229: }
230: }
231: BackrefScanLoop:
232:
233: // Check for backreferences
234: for (int i = 0; i < lenInstruction; i += RE.nodeSize) {
235: switch (instruction[i + RE.offsetOpcode]) {
236: case RE.OP_ANYOF:
237: i += (instruction[i + RE.offsetOpdata] * 2);
238: break;
239:
240: case RE.OP_ATOM:
241: i += instruction[i + RE.offsetOpdata];
242: break;
243:
244: case RE.OP_BACKREF:
245: flags |= OPT_HASBACKREFS;
246: break BackrefScanLoop;
247: }
248: }
249: }
250: }
251: }
|