001: /*
002: * Copyright (C) Chaperon. All rights reserved.
003: * -------------------------------------------------------------------------
004: * This software is published under the terms of the Apache Software License
005: * version 1.1, a copy of which has been included with this distribution in
006: * the LICENSE file.
007: */
008:
009: package net.sourceforge.chaperon.build;
010:
011: import net.sourceforge.chaperon.model.Violations;
012: import net.sourceforge.chaperon.model.pattern.Alternation;
013: import net.sourceforge.chaperon.model.pattern.BeginOfLine;
014: import net.sourceforge.chaperon.model.pattern.CharacterClass;
015: import net.sourceforge.chaperon.model.pattern.CharacterInterval;
016: import net.sourceforge.chaperon.model.pattern.CharacterSet;
017: import net.sourceforge.chaperon.model.pattern.CharacterString;
018: import net.sourceforge.chaperon.model.pattern.Concatenation;
019: import net.sourceforge.chaperon.model.pattern.EndOfLine;
020: import net.sourceforge.chaperon.model.pattern.Pattern;
021: import net.sourceforge.chaperon.model.pattern.PatternGroup;
022: import net.sourceforge.chaperon.model.pattern.UniversalCharacter;
023: import net.sourceforge.chaperon.process.PatternAutomaton;
024:
025: /**
026: * This class represents a builder for the pattern automata.
027: *
028: * @author <a href="mailto:stephan@apache.org">Stephan Michels</a>
029: * @version CVS $Id: PatternAutomatonBuilder.java,v 1.7 2003/12/09 19:55:52 benedikta Exp $
030: */
031: public class PatternAutomatonBuilder {
032: private Pattern pattern = null;
033: private PatternAutomaton automaton = null;
034: private Violations violations = new Violations();
035: private int statecount = 0;
036: private int stateindex = 0;
037: private int groupcount = 0;
038: private int groupindex = 0;
039:
040: /**
041: * Create a builder for the pattern automata.
042: *
043: * @param pattern Pattern, which should be used to build the pattern automaton.
044: */
045: public PatternAutomatonBuilder(Pattern pattern) {
046: violations.addViolations(pattern.validate());
047:
048: if ((violations != null)
049: && (violations.getViolationCount() > 0))
050: throw new IllegalArgumentException("Pattern is not valid: "
051: + violations.getViolation(0));
052:
053: this .pattern = pattern;
054:
055: statecount = getStateCount(pattern) + 3;
056: stateindex = statecount - 1;
057:
058: groupcount = getGroupCount(pattern);
059: groupindex = groupcount;
060:
061: PatternAutomaton automaton = new PatternAutomaton(statecount);
062:
063: automaton.setGroupCount(groupcount + 1);
064:
065: int finalstate = stateindex--;
066:
067: automaton.setFinalState(finalstate);
068:
069: automaton.setType(stateindex, PatternAutomaton.TYPE_GROUPEND);
070: automaton.setGroupIndex(stateindex, 0);
071: automaton.setTransitions(stateindex, new int[] { finalstate });
072:
073: int state = stateindex--;
074:
075: state = traverse(automaton, pattern, state);
076:
077: automaton.setType(stateindex, PatternAutomaton.TYPE_GROUPSTART);
078: automaton.setGroupIndex(stateindex, 0);
079: automaton.setTransitions(stateindex, new int[] { state });
080:
081: automaton.setFirstState(stateindex);
082:
083: this .automaton = automaton;
084: }
085:
086: /**
087: * Return the builded automaton. This method will return null, if an error occurs.
088: *
089: * @return Pattern automaton.
090: */
091: public PatternAutomaton getPatternAutomaton() {
092: return automaton;
093: }
094:
095: /**
096: * Calculates the count of group in the pattern.
097: *
098: * @param element Root pattern.
099: *
100: * @return Count of groups.
101: */
102: private int getGroupCount(Pattern element) {
103: int groupcount = 0;
104:
105: if (element instanceof Alternation) {
106: Alternation alternation = (Alternation) element;
107:
108: for (int i = 0; i < alternation.getPatternCount(); i++)
109: groupcount += getGroupCount(alternation.getPattern(i));
110: } else if (element instanceof Concatenation) {
111: Concatenation concatenation = (Concatenation) element;
112:
113: for (int i = 0; i < concatenation.getPatternCount(); i++)
114: groupcount += getGroupCount(concatenation.getPattern(i));
115: } else if (element instanceof PatternGroup) {
116: groupcount++;
117:
118: PatternGroup group = (PatternGroup) element;
119:
120: for (int i = 0; i < group.getPatternCount(); i++)
121: groupcount += getGroupCount(group.getPattern(i));
122: }
123:
124: return groupcount;
125: }
126:
127: /**
128: * Calculates the count of states.
129: *
130: * @param element Root pattern.
131: *
132: * @return Count of states.
133: */
134: private int getStateCount(Pattern element) {
135: int factor = 1;
136: int offset = 0;
137: int statecount = 0;
138:
139: // generate closure for p
140: if ((element.getMinOccurs() == 1)
141: && (element.getMaxOccurs() == 1)) {
142: // nothing
143: }
144:
145: // generate closure for p?
146: else if ((element.getMinOccurs() == 0)
147: && (element.getMaxOccurs() == 1))
148: offset = 1;
149:
150: // generate closure for p+
151: else if ((element.getMinOccurs() == 1)
152: && (element.getMaxOccurs() == Integer.MAX_VALUE))
153: offset = 1;
154:
155: // generate closure for p*
156: else if ((element.getMinOccurs() == 0)
157: && (element.getMaxOccurs() == Integer.MAX_VALUE))
158: offset = 2;
159:
160: // generate closure for p{n,m}
161: else {
162: factor = element.getMaxOccurs();
163: offset = 1;
164: }
165:
166: if (element instanceof Alternation) {
167: Alternation alternation = (Alternation) element;
168:
169: for (int i = 0; i < alternation.getPatternCount(); i++)
170: statecount += getStateCount(alternation.getPattern(i));
171:
172: if (alternation.getPatternCount() > 1)
173: statecount++;
174: } else if (element instanceof BeginOfLine)
175: statecount = 1;
176: else if (element instanceof CharacterClass) {
177: CharacterClass characterclass = (CharacterClass) element;
178:
179: for (int i = 0; i < characterclass
180: .getCharacterClassElementCount(); i++) {
181: if (characterclass.getCharacterClassElement(i) instanceof CharacterInterval)
182: statecount++;
183: else if (characterclass.getCharacterClassElement(i) instanceof CharacterSet) {
184: CharacterSet set = (CharacterSet) characterclass
185: .getCharacterClassElement(i);
186:
187: statecount += set.getCharacters().length();
188: }
189: }
190:
191: statecount++;
192: } else if (element instanceof CharacterString) {
193: CharacterString string = (CharacterString) element;
194:
195: statecount += string.getString().length();
196: } else if (element instanceof Concatenation) {
197: Concatenation concatenation = (Concatenation) element;
198:
199: for (int i = 0; i < concatenation.getPatternCount(); i++)
200: statecount += getStateCount(concatenation.getPattern(i));
201: } else if (element instanceof EndOfLine)
202: statecount = 1;
203: else if (element instanceof PatternGroup) {
204: statecount = 2;
205:
206: PatternGroup group = (PatternGroup) element;
207:
208: for (int i = 0; i < group.getPatternCount(); i++)
209: statecount += getStateCount(group.getPattern(i));
210: } else if (element instanceof UniversalCharacter)
211: statecount = 1;
212: else
213: throw new IllegalArgumentException(
214: "Pattern element not recognized");
215:
216: return (factor * statecount) + offset;
217: }
218:
219: /*
220: * @param laststate First state of the following pattern
221: *
222: * @return First state of the current pattern
223: */
224:
225: /**
226: * Build the automaton by traversing the pattern tree.
227: *
228: * @param automaton The current automaton.
229: * @param element The current pattern element.
230: * @param laststate Last used state in the automaton.
231: *
232: * @return New last state in the automaton.
233: */
234: private int traverse(PatternAutomaton automaton, Pattern element,
235: int laststate) {
236: int firststate;
237:
238: // generate closure for p
239: if ((element.getMinOccurs() == 1)
240: && (element.getMaxOccurs() == 1))
241: firststate = evalPattern(automaton, element, laststate);
242:
243: // generate closure for p?
244: else if ((element.getMinOccurs() == 0)
245: && (element.getMaxOccurs() == 1)) {
246: int s1 = evalPattern(automaton, element, laststate);
247: automaton.setTransitions(stateindex, new int[] { s1,
248: laststate });
249: firststate = stateindex--;
250: }
251:
252: // generate closure for p+
253: else if ((element.getMinOccurs() == 1)
254: && (element.getMaxOccurs() == Integer.MAX_VALUE)) {
255: int s1 = stateindex--;
256: firststate = evalPattern(automaton, element, s1);
257: automaton.setTransitions(s1, new int[] { firststate,
258: laststate });
259: }
260:
261: // generate closure for p*
262: else if ((element.getMinOccurs() == 0)
263: && (element.getMaxOccurs() == Integer.MAX_VALUE)) {
264: int s2 = stateindex--;
265: int s1 = evalPattern(automaton, element, s2);
266: automaton.setTransitions(s2, new int[] { s1, laststate });
267:
268: firststate = stateindex--;
269: automaton.setTransitions(firststate, new int[] { s1,
270: laststate });
271: }
272:
273: // generate closure for p{n,m}
274: else {
275: int s2 = laststate;
276: for (int i = 0; i < element.getMinOccurs(); i++)
277: s2 = evalPattern(automaton, element, s2);
278:
279: int s1 = s2;
280:
281: for (int i = element.getMinOccurs(); i < element
282: .getMaxOccurs(); i++) {
283: s1 = evalPattern(automaton, element, s1);
284: if (i > element.getMinOccurs())
285: automaton.addTransition(s1, s2);
286: }
287:
288: firststate = stateindex--;
289: automaton.setTransitions(firststate, new int[] { s1, s2 });
290: }
291:
292: if (element instanceof PatternGroup)
293: groupindex--;
294:
295: return firststate;
296: }
297:
298: /**
299: * Evalutates a pattern element.
300: *
301: * @param automaton The current automaton.
302: * @param element The current pattern element.
303: * @param laststate Last used state in the automaton.
304: *
305: * @return New last state in the automaton.
306: */
307: private int evalPattern(PatternAutomaton automaton,
308: Pattern element, int laststate) {
309: if (element instanceof Alternation)
310: return evalAlternation(automaton, (Alternation) element,
311: laststate);
312: else if (element instanceof BeginOfLine)
313: return evalBeginOfLine(automaton, (BeginOfLine) element,
314: laststate);
315: else if (element instanceof CharacterClass)
316: return evalCharacterClass(automaton,
317: (CharacterClass) element, laststate);
318: else if (element instanceof CharacterString)
319: return evalCharacterString(automaton,
320: (CharacterString) element, laststate);
321: else if (element instanceof Concatenation)
322: return evalConcatenation(automaton,
323: (Concatenation) element, laststate);
324: else if (element instanceof EndOfLine)
325: return evalEndOfLine(automaton, (EndOfLine) element,
326: laststate);
327: else if (element instanceof PatternGroup)
328: return evalPatternGroup(automaton, (PatternGroup) element,
329: laststate);
330: else if (element instanceof UniversalCharacter)
331: return evalUniversalCharacter(automaton,
332: (UniversalCharacter) element, laststate);
333: else
334: throw new IllegalArgumentException(
335: "Pattern element not recognized");
336: }
337:
338: /**
339: * Create the states and transitions for an alternation
340: *
341: * @param automaton The current automaton.
342: * @param element The current pattern element.
343: * @param laststate Last used state in the automaton.
344: *
345: * @return New last state in the automaton.
346: */
347: private int evalAlternation(PatternAutomaton automaton,
348: Alternation element, int laststate) {
349: if (element.getPatternCount() == 1)
350: return traverse(automaton, element.getPattern(0), laststate);
351: else {
352: int nextstate = stateindex--;
353: int state;
354:
355: for (int i = element.getPatternCount() - 1; i >= 0; i--) {
356: state = traverse(automaton, element.getPattern(i),
357: laststate);
358: automaton.addTransition(nextstate, state);
359: }
360:
361: return nextstate;
362: }
363: }
364:
365: /**
366: * Create the states and transitions for a pattern that matches the begin of line.
367: *
368: * @param automaton The current automaton.
369: * @param element The current pattern element.
370: * @param laststate Last used state in the automaton.
371: *
372: * @return New last state in the automaton.
373: */
374: private int evalBeginOfLine(PatternAutomaton automaton,
375: BeginOfLine element, int laststate) {
376: automaton.setType(stateindex, PatternAutomaton.TYPE_BOL);
377: automaton.setTransitions(stateindex, new int[] { laststate });
378: return stateindex--;
379: }
380:
381: /**
382: * Create the states and transition for a character class.
383: *
384: * @param automaton The current automaton.
385: * @param element The current pattern element.
386: * @param laststate Last used state in the automaton.
387: *
388: * @return New last state in the automaton.
389: */
390: private int evalCharacterClass(PatternAutomaton automaton,
391: CharacterClass element, int laststate) {
392: int state;
393:
394: if (!element.isExclusive()) {
395: int firststate = stateindex--;
396:
397: for (int i = 0; i < element.getCharacterClassElementCount(); i++) {
398: if (element.getCharacterClassElement(i) instanceof CharacterInterval) {
399: CharacterInterval interval = (CharacterInterval) element
400: .getCharacterClassElement(i);
401:
402: automaton.setType(stateindex,
403: PatternAutomaton.TYPE_MATCH);
404: automaton.setInterval(stateindex, interval
405: .getMinimum(), interval.getMaximum());
406: automaton.addTransition(stateindex, laststate);
407: state = stateindex--;
408: automaton.addTransition(firststate, state);
409: } else if (element.getCharacterClassElement(i) instanceof CharacterSet) {
410: CharacterSet set = (CharacterSet) element
411: .getCharacterClassElement(i);
412: String chars = set.getCharacters();
413:
414: for (int j = 0; j < chars.length(); j++) {
415: automaton.setType(stateindex,
416: PatternAutomaton.TYPE_MATCH);
417: automaton.setInterval(stateindex, chars
418: .charAt(j), chars.charAt(j));
419: automaton.addTransition(stateindex, laststate);
420: state = stateindex--;
421: automaton.addTransition(firststate, state);
422: }
423: }
424: }
425:
426: return firststate;
427: } else {
428: state = stateindex--;
429: automaton.setType(state, PatternAutomaton.TYPE_MATCHANY);
430: automaton.setTransitions(state, new int[] { laststate });
431: for (int i = element.getCharacterClassElementCount() - 1; i >= 0; i--) {
432: if (element.getCharacterClassElement(i) instanceof CharacterInterval) {
433: CharacterInterval interval = (CharacterInterval) element
434: .getCharacterClassElement(i);
435:
436: automaton.setType(stateindex,
437: PatternAutomaton.TYPE_MATCH);
438: automaton.setInterval(stateindex, interval
439: .getMinimum(), interval.getMaximum());
440: automaton.setTransitions(stateindex,
441: new int[] { state });
442: state = stateindex--;
443: } else if (element.getCharacterClassElement(i) instanceof CharacterSet) {
444: CharacterSet set = (CharacterSet) element
445: .getCharacterClassElement(i);
446: String chars = set.getCharacters();
447:
448: for (int j = 0; j < chars.length(); j++) {
449: automaton.setType(stateindex,
450: PatternAutomaton.TYPE_EXMATCH);
451: automaton.setInterval(stateindex, chars
452: .charAt(j), chars.charAt(j));
453: automaton.setType(stateindex,
454: PatternAutomaton.TYPE_EXMATCH);
455: automaton.setTransitions(stateindex,
456: new int[] { state });
457: state = stateindex--;
458: }
459: }
460: }
461:
462: return state;
463: }
464: }
465:
466: /**
467: * Create the states and transitions for a string of characters.
468: *
469: * @param automaton The current automaton.
470: * @param element The current pattern element.
471: * @param laststate Last used state in the automaton.
472: *
473: * @return New last state in the automaton.
474: */
475: private int evalCharacterString(PatternAutomaton automaton,
476: CharacterString element, int laststate) {
477: int state = laststate;
478:
479: for (int i = element.getString().length() - 1; i >= 0; i--) {
480: automaton.setType(stateindex, PatternAutomaton.TYPE_MATCH);
481: automaton.setInterval(stateindex, element.getString()
482: .charAt(i), element.getString().charAt(i));
483: automaton.setTransitions(stateindex, new int[] { state });
484: state = stateindex--;
485: }
486:
487: return state;
488: }
489:
490: /**
491: * Create the states and transitions for a catenation of pattern
492: *
493: * @param automaton The current automaton.
494: * @param element The current pattern element.
495: * @param laststate Last used state in the automaton.
496: *
497: * @return New last state in the automaton.
498: */
499: private int evalConcatenation(PatternAutomaton automaton,
500: Concatenation element, int laststate) {
501: int state = laststate;
502:
503: for (int i = element.getPatternCount() - 1; i >= 0; i--)
504: state = traverse(automaton, element.getPattern(i), state);
505:
506: return state;
507: }
508:
509: /**
510: * Create the states and transitions for a pattern that matches the end of line.
511: *
512: * @param automaton The current automaton.
513: * @param element The current pattern element.
514: * @param laststate Last used state in the automaton.
515: *
516: * @return New last state in the automaton.
517: */
518: private int evalEndOfLine(PatternAutomaton automaton,
519: EndOfLine element, int laststate) {
520: automaton.setType(stateindex, PatternAutomaton.TYPE_EOL);
521: automaton.setTransitions(stateindex, new int[] { laststate });
522: return stateindex--;
523: }
524:
525: /**
526: * Create the states and transitions for a pattern group.
527: *
528: * @param automaton The current automaton.
529: * @param element The current pattern element.
530: * @param laststate Last used state in the automaton.
531: *
532: * @return New last state in the automaton.
533: */
534: private int evalPatternGroup(PatternAutomaton automaton,
535: PatternGroup element, int laststate) {
536: int endstate = stateindex--;
537:
538: automaton.setType(endstate, PatternAutomaton.TYPE_GROUPEND);
539: automaton.setGroupIndex(endstate, groupindex);
540: automaton.setTransitions(endstate, new int[] { laststate });
541:
542: int nextstate = endstate;
543:
544: for (int i = element.getPatternCount() - 1; i >= 0; i--)
545: nextstate = traverse(automaton, element.getPattern(i),
546: nextstate);
547:
548: automaton.setGroupIndex(endstate, groupindex);
549:
550: automaton.setType(stateindex, PatternAutomaton.TYPE_GROUPSTART);
551: automaton.setGroupIndex(stateindex, groupindex);
552: automaton.setTransitions(stateindex, new int[] { nextstate });
553: return stateindex--;
554: }
555:
556: /**
557: * Create the states and transition for an universal character.
558: *
559: * @param automaton The current automaton.
560: * @param element The current pattern element.
561: * @param laststate Last used state in the automaton.
562: *
563: * @return New last state in the automaton.
564: */
565: private int evalUniversalCharacter(PatternAutomaton automaton,
566: UniversalCharacter element, int laststate) {
567: automaton.setType(stateindex, PatternAutomaton.TYPE_MATCHANY);
568: automaton.setTransitions(stateindex, new int[] { laststate });
569: return stateindex--;
570: }
571: }
|