001: /*
002: * Copyright (C) Chaperon. All rights reserved.
003: * -------------------------------------------------------------------------
004: * This software is published under the terms of the Apache Software License
005: * version 1.1, a copy of which has been included with this distribution in
006: * the LICENSE file.
007: */
008:
009: package net.sourceforge.chaperon.process;
010:
011: import net.sourceforge.chaperon.common.Decoder;
012:
013: import java.io.Serializable;
014:
015: /**
016: * This class represents automaton to recognized text pattern.
017: *
018: * @author <a href="mailto:stephan@apache.org">Stephan Michels</a>
019: * @version CVS $Id: PatternAutomaton.java,v 1.8 2004/01/08 11:30:52 benedikta Exp $
020: */
021: public class PatternAutomaton implements Serializable {
022: /** A simple transition without matching */
023: public static final int TYPE_NOMATCH = 0;
024:
025: /** If the character should match */
026: public static final int TYPE_MATCH = 1;
027:
028: /** If the characher shouldn't match */
029: public static final int TYPE_EXMATCH = 2;
030:
031: /** Match all characters */
032: public static final int TYPE_MATCHANY = 3;
033:
034: /** Begin of line */
035: public static final int TYPE_BOL = 4;
036:
037: /** End of line */
038: public static final int TYPE_EOL = 5;
039:
040: /** End of file */
041:
042: // public final static int TYPE_EOF = 6;
043: /** Start of group */
044: public static final int TYPE_GROUPSTART = 6;
045:
046: /** End of group */
047: public static final int TYPE_GROUPEND = 7;
048: private int[] types = new int[0];
049: private char[] intervalbegin = new char[0];
050: private char[] intervalend = new char[0];
051: private int[] groupindices = new int[0];
052: private int[][] transitions = new int[0][0];
053:
054: // Count of states
055: private int statecount = 0;
056:
057: // Initial state
058: private int firststate = -1;
059:
060: // Accepted state
061: private int finalstate = -1;
062: private int groupcount = 0;
063: private static final long serialVersionUID = 1246342009422283917L;
064:
065: /**
066: * Create a new pattern automaton.
067: *
068: * @param statecount Count of states.
069: */
070: public PatternAutomaton(int statecount) {
071: if (statecount <= 0)
072: throw new IllegalArgumentException(
073: "Count of states is invalid");
074:
075: this .statecount = statecount;
076:
077: types = new int[statecount];
078: intervalbegin = new char[statecount];
079: intervalend = new char[statecount];
080: groupindices = new int[statecount];
081: transitions = new int[statecount][0];
082:
083: for (int state = 0; state < statecount; state++) {
084: types[state] = TYPE_NOMATCH;
085: intervalbegin[state] = '\u0000';
086: intervalend[state] = '\u0000';
087: groupindices[state] = 0;
088: }
089: }
090:
091: /**
092: * Set the of of transition.
093: *
094: * @param state Index of state.
095: * @param type Type of transition.
096: */
097: public void setType(int state, int type) {
098: if ((type < TYPE_NOMATCH) || (type > TYPE_GROUPEND))
099: throw new IndexOutOfBoundsException();
100:
101: this .types[state] = type;
102: }
103:
104: /**
105: * Return the type of transition.
106: *
107: * @param state Index of state.
108: *
109: * @return Type of transition.
110: */
111: public int getType(int state) {
112: return types[state];
113: }
114:
115: /**
116: * Set the character interval, which the processor should matches against.
117: *
118: * @param state Index of state.
119: * @param begin Begin of the character interval.
120: * @param end End of the character interval.
121: */
122: public void setInterval(int state, char begin, char end) {
123: this .intervalbegin[state] = begin;
124: this .intervalend[state] = end;
125: }
126:
127: /**
128: * Return the begin of the character interval.
129: *
130: * @param state Index of state.
131: *
132: * @return Begin of the character interval.
133: */
134: public char getIntervalBegin(int state) {
135: return intervalbegin[state];
136: }
137:
138: /**
139: * Return the end of the character interval.
140: *
141: * @param state Index of state.
142: *
143: * @return End of the character interval.
144: */
145: public char getIntervalEnd(int state) {
146: return intervalend[state];
147: }
148:
149: /**
150: * Set the group index for a transition.
151: *
152: * @param state Index of state.
153: * @param groupindex Index of group.
154: */
155: public void setGroupIndex(int state, int groupindex) {
156: groupindices[state] = groupindex;
157: }
158:
159: /**
160: * Return the index of a group for a transition.
161: *
162: * @param state Index of state.
163: *
164: * @return Index of group.
165: */
166: public int getGroupIndex(int state) {
167: return groupindices[state];
168: }
169:
170: /**
171: * Set the count of groups.
172: *
173: * @param groupcount Count of groups.
174: */
175: public void setGroupCount(int groupcount) {
176: this .groupcount = groupcount;
177: }
178:
179: /**
180: * Return the count of groups.
181: *
182: * @return Count of groups.
183: */
184: public int getGroupCount() {
185: return groupcount;
186: }
187:
188: /**
189: * Set the destination states for a transition.
190: *
191: * @param state Index of state.
192: * @param transitions Destination states.
193: */
194: public void setTransitions(int state, int[] transitions) {
195: this .transitions[state] = transitions;
196: }
197:
198: /**
199: * Returns the destinations of the transition.
200: *
201: * @param state Index of the state
202: *
203: * @return Destinations of the transition.
204: */
205: public int[] getTransitions(int state) {
206: return transitions[state];
207: }
208:
209: /**
210: * Add a state as destination to the transition.
211: *
212: * @param state Index of transition.
213: * @param nextstate Destination state.
214: */
215: public void addTransition(int state, int nextstate) {
216: // Prevent multiple entries
217: for (int i = 0; i < transitions[state].length; i++)
218: if (transitions[state][i] == nextstate)
219: return;
220:
221: int[] newtransitions = new int[transitions[state].length + 1];
222:
223: System.arraycopy(transitions[state], 0, newtransitions, 0,
224: transitions[state].length);
225: newtransitions[transitions[state].length] = nextstate;
226: transitions[state] = newtransitions;
227: }
228:
229: /**
230: * Set the first state of the automaton.
231: *
232: * @param firststate First state of the automaton.
233: */
234: public void setFirstState(int firststate) {
235: if ((firststate < 0) || (firststate > statecount))
236: throw new IllegalArgumentException();
237:
238: this .firststate = firststate;
239: }
240:
241: /**
242: * Return the first state of the automaton.
243: *
244: * @return First state of the automaton.
245: */
246: public int getFirstState() {
247: return firststate;
248: }
249:
250: /**
251: * Set the final state. If the automaton reaches this state, the automate was successful
252: *
253: * @param finalstate Final state
254: */
255: public void setFinalState(int finalstate) {
256: if ((finalstate < 0) || (finalstate > statecount))
257: throw new IllegalArgumentException();
258:
259: this .finalstate = finalstate;
260: }
261:
262: /**
263: * Returns the index of the final state
264: *
265: * @return Index of the final state
266: */
267: public int getFinalState() {
268: return finalstate;
269: }
270:
271: /**
272: * Test if the state is the final state
273: *
274: * @param state Index of the state
275: *
276: * @return True, if the state is the final state
277: */
278: public boolean isFinalState(int state) {
279: return finalstate == state;
280: }
281:
282: /**
283: * Returns the count of states
284: *
285: * @return Count of states
286: */
287: public int getStateCount() {
288: return statecount;
289: }
290:
291: /** Creates empty string for indenting */
292: private static final String spaces = " ";
293:
294: /**
295: * Return a string representation of the automaton.
296: *
297: * @return String representation of the automaton.
298: */
299: public String toString() {
300: String[] chars = new String[statecount];
301: int i;
302:
303: for (i = 0; i < statecount; i++)
304: switch (types[i]) {
305: case TYPE_NOMATCH:
306: chars[i] = " ";
307: break;
308: case TYPE_MATCH:
309: chars[i] = Decoder.toClass(intervalbegin[i],
310: intervalend[i]);
311: break;
312: case TYPE_EXMATCH:
313: chars[i] = Decoder.toNegativeClass(intervalbegin[i],
314: intervalend[i]);
315: break;
316: case TYPE_MATCHANY:
317: chars[i] = ".";
318: break;
319: case TYPE_BOL:
320: chars[i] = "^";
321: break;
322: case TYPE_EOL:
323: chars[i] = "$";
324: break;
325: case TYPE_GROUPSTART:
326: chars[i] = "([" + groupindices[i] + "]";
327: break;
328: case TYPE_GROUPEND:
329: chars[i] = ")[" + groupindices[i] + "]";
330: break;
331: }
332:
333: StringBuffer buffer = new StringBuffer();
334: String dummy;
335:
336: for (i = 0; i < statecount; i++) {
337: dummy = String.valueOf(i);
338: buffer.append(" ");
339: buffer.append(dummy);
340: buffer.append(spaces.substring(0, Math.max(0, chars[i]
341: .length()
342: - dummy.length() + 1)));
343: }
344:
345: buffer.append("\n");
346:
347: for (i = 0; i < statecount; i++) {
348: buffer.append(" ");
349: buffer.append(String.valueOf(chars[i]));
350: buffer.append(" ");
351: }
352:
353: buffer.append("\n");
354:
355: int maxtransitions = 0;
356:
357: for (i = 0; i < statecount; i++)
358: maxtransitions = Math.max(maxtransitions,
359: transitions[i].length);
360:
361: for (int j = 0; j < maxtransitions; j++) {
362: for (i = 0; i < statecount; i++)
363: if (j < transitions[i].length) {
364: dummy = String.valueOf(transitions[i][j]);
365:
366: buffer.append(" ");
367: buffer.append(dummy);
368: buffer.append(spaces.substring(0, Math.max(0,
369: chars[i].length() - dummy.length() + 1)));
370: } else {
371: buffer.append(" ");
372: buffer.append(spaces.substring(0, Math.max(0,
373: chars[i].length() + 1)));
374: }
375:
376: buffer.append("\n");
377: }
378:
379: buffer.append("First state = ");
380: buffer.append(String.valueOf(firststate));
381: buffer.append("\n");
382: buffer.append("Final state = ");
383: buffer.append(String.valueOf(finalstate));
384: buffer.append("\n");
385: buffer.append("State count = ");
386: buffer.append(String.valueOf(statecount));
387: buffer.append("\n");
388: buffer.append("Group count = ");
389: buffer.append(String.valueOf(groupcount));
390:
391: return buffer.toString();
392: }
393: }
|