001: /*
002: * Copyright (C) Chaperon. All rights reserved.
003: * -------------------------------------------------------------------------
004: * This software is published under the terms of the Apache Software License
005: * version 1.1, a copy of which has been included with this distribution in
006: * the LICENSE file.
007: */
008:
009: package net.sourceforge.chaperon.process;
010:
011: import net.sourceforge.chaperon.common.IntegerList;
012:
013: /**
014: * Processor for pattern, which try for matching against a pattern.
015: *
016: * @author <a href="mailto:stephan@apache.org">Stephan Michels</a>
017: * @version CVS $Id: PatternProcessor.java,v 1.9 2003/12/09 19:55:53 benedikta Exp $
018: */
019: public class PatternProcessor {
020: private PatternAutomaton automaton;
021: private IntegerList states = new IntegerList();
022: private IntegerList statekeys = new IntegerList();
023: private IntegerList newstates = new IntegerList();
024: private IntegerList newstatekeys = new IntegerList();
025:
026: // for storing the traversed states
027: private IntegerList historystates = new IntegerList();
028: private IntegerList dependencies = new IntegerList();
029:
030: // for storing the group, which were found
031: private String[] groups = new String[10];
032: private int[] groupstarts = new int[10];
033: private int[] groupends = new int[10];
034: private int groupcount = 0;
035:
036: /**
037: * Create a new pattern processor.
038: */
039: public PatternProcessor() {
040: }
041:
042: /**
043: * Create a new pattern processor.
044: *
045: * @param automaton Automaton, which the processor should use.
046: */
047: public PatternProcessor(PatternAutomaton automaton) {
048: setPatternAutomaton(automaton);
049: }
050:
051: /**
052: * Set the pattern automaton.
053: *
054: * @param automaton Automaton, which the processor should use.
055: */
056: public void setPatternAutomaton(PatternAutomaton automaton) {
057: if (automaton == null)
058: throw new NullPointerException();
059:
060: this .automaton = automaton;
061: if (groupstarts.length <= (automaton.getGroupCount())) {
062: groups = new String[automaton.getGroupCount() + 1];
063: groupstarts = new int[automaton.getGroupCount() + 1];
064: groupends = new int[automaton.getGroupCount() + 1];
065: }
066: }
067:
068: /**
069: * Search a postion, where the processor is successful.
070: *
071: * @param text Text.
072: *
073: * @return Next first position where the parser is successfull otherwise -1.
074: */
075: public boolean search(char[] text) {
076: return search(text, 0);
077: }
078:
079: /**
080: * Search a postion, where the processor is successful.
081: *
082: * @param text Text.
083: * @param start Start position within the text.
084: *
085: * @return Next first position where the parser is successfull otherwise -1.
086: */
087: public boolean search(char[] text, int start) {
088: for (int position = start; position <= text.length; position++)
089: if (match(text, position))
090: return true;
091:
092: return false;
093: }
094:
095: private boolean verbose = false;
096:
097: public void setVerbose(boolean verbose) {
098: this .verbose = verbose;
099: }
100:
101: /**
102: * Matches for pattern.
103: *
104: * @param text Text.
105: *
106: * @return True, if the processor matches successfully.
107: */
108: public boolean match(char[] text) {
109: return match(text, 0);
110: }
111:
112: /**
113: * Matches for pattern at specified position within the text.
114: *
115: * @param text Text
116: * @param start Position within the text.
117: *
118: * @return True, if the processor matches successfully.
119: */
120: public boolean match(char[] text, int start) {
121: if (automaton == null)
122: throw new NullPointerException("PatternAutomaton is null");
123:
124: if (automaton.getStateCount() <= 0)
125: return false;
126:
127: for (int i = 0; i <= automaton.getGroupCount(); i++) {
128: groupstarts[i] = start;
129: groupends[i] = start;
130: groups[i] = null;
131: }
132:
133: int position = start;
134: int state;
135: int key;
136: boolean found = false;
137: int foundkey = 0;
138: int foundPosition = start;
139:
140: states.clear();
141: states.push(automaton.getFirstState());
142:
143: newstates.clear();
144:
145: statekeys.clear();
146: statekeys.push(0);
147:
148: historystates.clear();
149: historystates.push(automaton.getFirstState());
150:
151: dependencies.clear();
152: dependencies.push(0);
153:
154: while ((!states.isEmpty()) && (position <= text.length)) {
155: state = states.pop();
156: key = statekeys.pop();
157:
158: if (automaton.isFinalState(state)) {
159: found = true;
160: foundkey = key;
161: foundPosition = position;
162: } else {
163: switch (automaton.getType(state)) {
164: case PatternAutomaton.TYPE_NOMATCH:
165: pushState(states, statekeys, state, key);
166: break;
167: case PatternAutomaton.TYPE_MATCH:
168: if ((position < text.length)
169: && (automaton.getIntervalBegin(state) <= text[position])
170: && (automaton.getIntervalEnd(state) >= text[position]))
171: pushState(newstates, newstatekeys, state, key);
172:
173: break;
174: case PatternAutomaton.TYPE_EXMATCH:
175: if ((position < text.length)
176: && ((automaton.getIntervalBegin(state) > text[position]) || (automaton
177: .getIntervalEnd(state) < text[position])))
178: pushState(states, statekeys, state, key);
179:
180: break;
181: case PatternAutomaton.TYPE_MATCHANY:
182: pushState(newstates, newstatekeys, state, key);
183: break;
184: case PatternAutomaton.TYPE_BOL:
185: if (position == 0)
186: pushState(states, statekeys, state, key);
187: else if ((position == 1)
188: && (((text[position - 1] == '\n') && (text[position] != '\r')) || (text[position - 1] == '\r')))
189: pushState(states, statekeys, state, key);
190: else if ((text[position - 1] == '\r')
191: || ((text[position - 1] == '\n') && (text[position] != '\r')))
192: pushState(states, statekeys, state, key);
193:
194: break;
195: case PatternAutomaton.TYPE_EOL:
196: if (position >= text.length)
197: pushState(states, statekeys, state, key);
198: else if (((position + 1) == text.length)
199: && ((text[position] == '\r') || (text[position] == '\n')))
200: pushState(states, statekeys, state, key);
201: else if ((text[position] == '\r')
202: || (text[position] == '\n'))
203: pushState(states, statekeys, state, key);
204:
205: break;
206: case PatternAutomaton.TYPE_GROUPSTART:
207: pushState(states, statekeys, state, key);
208: break;
209: case PatternAutomaton.TYPE_GROUPEND:
210: pushState(states, statekeys, state, key);
211: break;
212: }
213: }
214:
215: if (states.isEmpty()) {
216: IntegerList temp = newstates;
217: newstates = states;
218: states = temp;
219:
220: temp = newstatekeys;
221: newstatekeys = statekeys;
222: statekeys = temp;
223:
224: position++; // next character
225: }
226: }
227:
228: position = foundPosition;
229: key = foundkey;
230: while (key != 0) {
231: key = dependencies.get(key);
232: state = historystates.get(key);
233:
234: switch (automaton.getType(state)) {
235: case PatternAutomaton.TYPE_NOMATCH:
236: break;
237: case PatternAutomaton.TYPE_MATCH:
238: position--;
239: break;
240: case PatternAutomaton.TYPE_EXMATCH:
241: break;
242: case PatternAutomaton.TYPE_MATCHANY:
243: position--;
244: break;
245: case PatternAutomaton.TYPE_BOL:
246: break;
247: case PatternAutomaton.TYPE_EOL:
248: break;
249: case PatternAutomaton.TYPE_GROUPSTART:
250: if (groups[automaton.getGroupIndex(state)] == null) {
251: groupstarts[automaton.getGroupIndex(state)] = position;
252: groups[automaton.getGroupIndex(state)] = new String(
253: text, position, groupends[automaton
254: .getGroupIndex(state)]
255: - position);
256: }
257:
258: break;
259: case PatternAutomaton.TYPE_GROUPEND:
260: if (groups[automaton.getGroupIndex(state)] == null)
261: groupends[automaton.getGroupIndex(state)] = position;
262:
263: break;
264: }
265: }
266:
267: groupcount = automaton.getGroupCount();
268: for (int i = groupcount - 1; (i >= 0) && (groups[i] == null); i--)
269: groupcount--;
270:
271: return found;
272: }
273:
274: /**
275: * Push a state into the history.
276: *
277: * @param states The traversed states.
278: * @param keys Keys of the traversed paths.
279: * @param state Current state.
280: * @param key Current key.
281: */
282: private void pushState(IntegerList states, IntegerList keys,
283: int state, int key) {
284: states.push(automaton.getTransitions(state));
285: historystates.push(automaton.getTransitions(state));
286: for (int i = automaton.getTransitions(state).length; i > 0; i--) {
287: keys.push(dependencies.getCount());
288: dependencies.push(key);
289: }
290: }
291:
292: /**
293: * Return the text, which in last match was found.
294: *
295: * @return Text.
296: */
297: public String getGroup() {
298: return groups[0];
299: }
300:
301: /**
302: * Return the text of the specifed group, which in last match was found.
303: *
304: * @param group Index of group;
305: *
306: * @return Text
307: */
308: public String getGroup(int group) {
309: return groups[group];
310: }
311:
312: /**
313: * Return count of groups.
314: *
315: * @return Count of groups.
316: */
317: public int getGroupCount() {
318: return groupcount;
319: }
320:
321: /**
322: * Return the start position of the last match.
323: *
324: * @return Start position.
325: */
326: public int getGroupStart() {
327: return groupstarts[0];
328: }
329:
330: /**
331: * Return the start position of a group from the last match.
332: *
333: * @param group Index of group.
334: *
335: * @return Start position.
336: */
337: public int getGroupStart(int group) {
338: return groupstarts[group];
339: }
340:
341: /**
342: * Return the end position of the last match.
343: *
344: * @return End position.
345: */
346: public int getGroupEnd() {
347: return groupends[0];
348: }
349:
350: /**
351: * Return the end position of a group from the last match.
352: *
353: * @param group Index of group.
354: *
355: * @return End position.
356: */
357: public int getGroupEnd(int group) {
358: return groupends[group];
359: }
360: }
|