001: /*
002: * $Id: AwkPattern.java,v 1.7 2003/11/07 20:16:24 dfs Exp $
003: *
004: * ====================================================================
005: * The Apache Software License, Version 1.1
006: *
007: * Copyright (c) 2000 The Apache Software Foundation. All rights
008: * reserved.
009: *
010: * Redistribution and use in source and binary forms, with or without
011: * modification, are permitted provided that the following conditions
012: * are met:
013: *
014: * 1. Redistributions of source code must retain the above copyright
015: * notice, this list of conditions and the following disclaimer.
016: *
017: * 2. Redistributions in binary form must reproduce the above copyright
018: * notice, this list of conditions and the following disclaimer in
019: * the documentation and/or other materials provided with the
020: * distribution.
021: *
022: * 3. The end-user documentation included with the redistribution,
023: * if any, must include the following acknowledgment:
024: * "This product includes software developed by the
025: * Apache Software Foundation (http://www.apache.org/)."
026: * Alternately, this acknowledgment may appear in the software itself,
027: * if and wherever such third-party acknowledgments normally appear.
028: *
029: * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
030: * must not be used to endorse or promote products derived from this
031: * software without prior written permission. For written
032: * permission, please contact apache@apache.org.
033: *
034: * 5. Products derived from this software may not be called "Apache"
035: * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
036: * name, without prior written permission of the Apache Software Foundation.
037: *
038: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
039: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
040: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
041: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
042: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
043: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
044: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
045: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
046: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
047: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
048: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
049: * SUCH DAMAGE.
050: * ====================================================================
051: *
052: * This software consists of voluntary contributions made by many
053: * individuals on behalf of the Apache Software Foundation. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: package org.apache.oro.text.awk;
059:
060: import java.io.Serializable;
061: import java.util.*;
062:
063: import org.apache.oro.text.regex.*;
064:
065: final class DFAState {
066: int _stateNumber;
067: BitSet _state;
068:
069: DFAState(BitSet s, int num) {
070: _state = s;
071: _stateNumber = num;
072: }
073: }
074:
075: /**
076: * An implementation of the Pattern interface for Awk regular expressions.
077: * This class is compatible with the AwkCompiler and AwkMatcher
078: * classes. When an AwkCompiler instance compiles a regular expression
079: * pattern, it produces an AwkPattern instance containing internal
080: * data structures used by AwkMatcher to perform pattern matches.
081: * This class cannot be subclassed and cannot be directly instantiated
082: * by the programmer as it would not make sense. It is however serializable
083: * so that pre-compiled patterns may be saved to disk and re-read at a later
084: * time. AwkPattern instances should only be created through calls to an
085: * AwkCompiler instance's compile() methods
086: *
087: * @version @version@
088: * @since 1.0
089: * @see AwkCompiler
090: * @see AwkMatcher
091: */
092: public final class AwkPattern implements Pattern, Serializable {
093: final static int _INVALID_STATE = -1, _START_STATE = 1;
094:
095: int _numStates, _endPosition, _options;
096: String _expression;
097: Vector _Dtrans, _nodeList[], _stateList;
098: BitSet _U, _emptySet, _followSet[], _endStates;
099: Hashtable _stateMap;
100: boolean _matchesNullString, _fastMap[];
101: boolean _hasBeginAnchor = false, _hasEndAnchor = false;
102:
103: AwkPattern(String expression, SyntaxTree tree) {
104: int token, node, tstateArray[];
105: DFAState dfaState;
106:
107: _expression = expression;
108:
109: // Assume endPosition always occurs at end of parse.
110: _endPosition = tree._positions - 1;
111: _followSet = tree._followSet;
112:
113: _Dtrans = new Vector();
114: _stateList = new Vector();
115: _endStates = new BitSet();
116:
117: _U = new BitSet(tree._positions);
118: _U.or(tree._root._firstPosition());
119:
120: tstateArray = new int[LeafNode._NUM_TOKENS];
121: _Dtrans.addElement(tstateArray); // this is a dummy entry because we
122: // number our states starting from 1
123: _Dtrans.addElement(tstateArray);
124:
125: _numStates = _START_STATE;
126: if (_U.get(_endPosition))
127: _endStates.set(_numStates);
128: dfaState = new DFAState((BitSet) _U.clone(), _numStates);
129: _stateMap = new Hashtable();
130: _stateMap.put(dfaState._state, dfaState);
131: _stateList.addElement(dfaState); // this is a dummy entry because we
132: // number our states starting from 1
133: _stateList.addElement(dfaState);
134: _numStates++;
135:
136: _U.xor(_U); // clear bits
137: _emptySet = new BitSet(tree._positions);
138:
139: _nodeList = new Vector[LeafNode._NUM_TOKENS];
140: for (token = 0; token < LeafNode._NUM_TOKENS; token++) {
141: _nodeList[token] = new Vector();
142: for (node = 0; node < tree._positions; node++)
143: if (tree._nodes[node]._matches((char) token))
144: _nodeList[token].addElement(tree._nodes[node]);
145: }
146:
147: _fastMap = tree.createFastMap();
148: _matchesNullString = _endStates.get(_START_STATE);
149: }
150:
151: // tstateArray is assumed to have been set before calling this method
152: void _createNewState(int current, int token, int[] tstateArray) {
153: int node, pos;
154: DFAState T, dfaState;
155:
156: T = (DFAState) _stateList.elementAt(current);
157: node = _nodeList[token].size();
158: _U.xor(_U); // clear bits
159: while (node-- > 0) {
160: pos = ((LeafNode) _nodeList[token].elementAt(node))._position;
161: if (T._state.get(pos))
162: _U.or(_followSet[pos]);
163: }
164:
165: if (!_stateMap.containsKey(_U)) {
166: dfaState = new DFAState((BitSet) _U.clone(), _numStates++);
167: _stateList.addElement(dfaState);
168: _stateMap.put(dfaState._state, dfaState);
169: _Dtrans.addElement(new int[LeafNode._NUM_TOKENS]);
170:
171: if (!_U.equals(_emptySet)) {
172: tstateArray[token] = _numStates - 1;
173:
174: if (_U.get(_endPosition))
175: _endStates.set(_numStates - 1);
176: } else
177: tstateArray[token] = _INVALID_STATE;
178: } else {
179: if (_U.equals(_emptySet))
180: tstateArray[token] = _INVALID_STATE;
181: else
182: tstateArray[token] = ((DFAState) _stateMap.get(_U))._stateNumber;
183: }
184: }
185:
186: int[] _getStateArray(int state) {
187: return ((int[]) _Dtrans.elementAt(state));
188: }
189:
190: /**
191: * This method returns the string representation of the pattern.
192: * <p>
193: * @return The original string representation of the regular expression
194: * pattern.
195: */
196: public String getPattern() {
197: return _expression;
198: }
199:
200: /**
201: * This method returns an integer containing the compilation options used
202: * to compile this pattern.
203: * <p>
204: * @return The compilation options used to compile the pattern.
205: */
206: public int getOptions() {
207: return _options;
208: }
209: }
|