0001: /*
0002: * 01/07/2003 - 15:19:32
0003: *
0004: * Automaton_Pattern.java -
0005: * Copyright (C) 2003 Buero fuer Softwarearchitektur GbR
0006: * ralf.meyer@karneim.com
0007: * http://jrexx.sf.net
0008: *
0009: * This program is free software; you can redistribute it and/or
0010: * modify it under the terms of the GNU Lesser General Public License
0011: * as published by the Free Software Foundation; either version 2
0012: * of the License, or (at your option) any later version.
0013: *
0014: * This program is distributed in the hope that it will be useful,
0015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
0016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0017: * GNU Lesser General Public License for more details.
0018: *
0019: * You should have received a copy of the GNU Lesser General Public License
0020: * along with this program; if not, write to the Free Software
0021: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
0022: */
0023: package com.tc.jrexx.regex;
0024:
0025: import com.tc.jrexx.set.*;
0026: import com.tc.jrexx.automaton.*;
0027: import java.text.*;
0028: import java.util.*;
0029:
0030: public class Automaton_Pattern extends
0031: com.tc.jrexx.set.AutomatonSet_String {
0032:
0033: protected static class PProperties extends SProperties {
0034: }
0035:
0036: protected interface IPState extends ISState {
0037:
0038: }
0039:
0040: protected class PState extends AutomatonSet_String.SState implements
0041: IPState {
0042: public PState(boolean isFinal) {
0043: super (isFinal);
0044: }
0045:
0046: protected Transition addTransition(IProperties properties,
0047: ISet_char charSet, State toState) {
0048: return super .addTransition(properties, charSet, toState);
0049: }
0050:
0051: protected boolean removeTransition(Transition trans) {
0052: return super .removeTransition(trans);
0053: }
0054:
0055: protected void setFinal(boolean isFinal) {
0056: super .setFinal(isFinal);
0057: }
0058:
0059: protected IState getEClosure() {
0060: return super .getEClosure();
0061: }
0062:
0063: }
0064:
0065: protected class LinkedSet_PState extends
0066: AutomatonSet_String.LinkedSet_SState implements IPState {
0067:
0068: protected LinkedSet_PState() {
0069: super ();
0070: }
0071:
0072: protected LinkedSet_PState(PState state) {
0073: super (state);
0074: }
0075:
0076: }
0077:
0078: private Map preDefinedAutomatons = null;
0079: protected String regEx = null;
0080:
0081: protected Automaton_Pattern(ISet_char fullSet) {
0082: super (fullSet);
0083: }
0084:
0085: protected Automaton_Pattern() {
0086: super ();
0087: this .regEx = "";
0088: }
0089:
0090: protected Automaton_Pattern(String regEx) {
0091: super ();
0092: this .regEx = "";
0093: this .addAll(regEx);
0094: }
0095:
0096: protected Automaton.State getStartState() {
0097: return super .getStartState();
0098: }
0099:
0100: protected State createState() {
0101: return new PState(false);
0102: }
0103:
0104: protected SState createState(boolean isFinal) {
0105: return new PState(isFinal);
0106: }
0107:
0108: protected LinkedSet_State newLinkedSet_State() {
0109: return new LinkedSet_PState();
0110: }
0111:
0112: protected LinkedSet_State newLinkedSet_State(State state) {
0113: return new LinkedSet_PState((PState) state);
0114: }
0115:
0116: protected void setStartState(SState state) {
0117: super .setStartState(state);
0118: }
0119:
0120: protected SState addState(boolean isFinal) {
0121: return super .addState(isFinal);
0122: }
0123:
0124: protected boolean removeState(PState removeState) {
0125: return super .removeState(removeState);
0126: }
0127:
0128: protected void clear() {
0129: super .clear();
0130: this .regEx = "";
0131: }
0132:
0133: protected LinkedSet_State getStates() {
0134: return super .getStates();
0135: }
0136:
0137: protected void minimize() {
0138: super .minimize();
0139: }
0140:
0141: protected void removeUselessStates() {
0142: super .removeUselessStates();
0143: }
0144:
0145: protected void addAll(SState state) {
0146: super .addAll(state);
0147: }
0148:
0149: protected SState complement(SState state) {
0150: return super .complement(state);
0151: }
0152:
0153: protected SState concat(SState state_A, SState state_B) {
0154: return super .concat(state_A, state_B);
0155: }
0156:
0157: protected SState repeat(SState state, int minTimes, int maxTimes) {
0158: //performance leak
0159: if ((state instanceof PState) == false)
0160: throw new IllegalArgumentException(
0161: "(state instanceof PState)==false");
0162:
0163: return super .repeat(state, minTimes, maxTimes);
0164: }
0165:
0166: protected SState union(SState state_A, SState state_B) {
0167: return super .union(state_A, state_B);
0168: }
0169:
0170: protected SState intersect(SState state_A, SState state_B) {
0171: return super .intersect(state_A, state_B);
0172: }
0173:
0174: /*
0175: protected PState minus(PState state_A,PState state_B) {
0176: return (PState)super.minus(state_A,state_B);
0177: }
0178: */
0179:
0180: protected void complement() {
0181: super .complement();
0182: if (this .regEx == null)
0183: return;
0184: if (this .regEx == "")
0185: this .regEx = ".*";
0186: else
0187: this .regEx = "!(" + this .regEx + ")";
0188: }
0189:
0190: protected void addAll(String regEx) {
0191: if (this .regEx == null)
0192: return;
0193: if (this .regEx == "")
0194: this .regEx = regEx;
0195: else {
0196: this .regEx = new StringBuffer(this .regEx.length()
0197: + regEx.length() + 5).append('(')
0198: .append(this .regEx).append(')').append('|').append(
0199: '(').append(regEx).append(')').toString();
0200: }
0201:
0202: this .addAll(this .parseRegEx(regEx));
0203: this .removeUselessStates();
0204: }
0205:
0206: protected void retainAll(String regEx) {
0207: if (this .regEx == null)
0208: return;
0209: if (this .regEx == "" || regEx == "")
0210: this .regEx = "";
0211: else {
0212: this .regEx = new StringBuffer(this .regEx.length()
0213: + regEx.length() + 5).append('(')
0214: .append(this .regEx).append(')').append('&').append(
0215: '(').append(regEx).append(')').toString();
0216: }
0217:
0218: this .retainAll(this .parseRegEx(regEx));
0219: this .removeUselessStates();
0220: }
0221:
0222: protected void removeAll(String regEx) {
0223: if (this .regEx == null)
0224: return;
0225: if (this .regEx == "")
0226: this .regEx = "";
0227: else {
0228: this .regEx = new StringBuffer(this .regEx.length()
0229: + regEx.length() + 6).append('(')
0230: .append(this .regEx).append(')').append("&!")
0231: .append('(').append(regEx).append(')').toString();
0232: }
0233:
0234: this .removeAll(this .parseRegEx(regEx));
0235: this .removeUselessStates();
0236: }
0237:
0238: protected boolean isDeterministic() {
0239: return super .isDeterministic();
0240: }
0241:
0242: protected boolean isDeterministic(State startState) {
0243: return super .isDeterministic(startState);
0244: }
0245:
0246: protected void addAll(AutomatonSet_String automaton) {
0247: super .addAll(automaton);
0248:
0249: Automaton_Pattern pAutomaton = (Automaton_Pattern) automaton;
0250:
0251: if (this .regEx == null || pAutomaton.regEx == null)
0252: return;
0253: if (this .regEx == "")
0254: this .regEx = pAutomaton.regEx;
0255: else {
0256: this .regEx = new StringBuffer(this .regEx.length()
0257: + pAutomaton.regEx.length() + 5).append('(')
0258: .append(this .regEx).append(')').append('|').append(
0259: '(').append(pAutomaton.regEx).append(')')
0260: .toString();
0261: }
0262: }
0263:
0264: protected void retainAll(AutomatonSet_String automaton) {
0265: super .retainAll(automaton);
0266:
0267: Automaton_Pattern pAutomaton = (Automaton_Pattern) automaton;
0268:
0269: if (this .regEx == null || pAutomaton.regEx == null)
0270: return;
0271: if (this .regEx == "" || pAutomaton.regEx == "")
0272: this .regEx = "";
0273: else {
0274: this .regEx = new StringBuffer(this .regEx.length()
0275: + pAutomaton.regEx.length() + 5).append('(')
0276: .append(this .regEx).append(')').append('&').append(
0277: '(').append(pAutomaton.regEx).append(')')
0278: .toString();
0279: }
0280: }
0281:
0282: protected void removeAll(AutomatonSet_String automaton) {
0283: super .removeAll(automaton);
0284:
0285: Automaton_Pattern pAutomaton = (Automaton_Pattern) automaton;
0286:
0287: if (this .regEx == null || pAutomaton.regEx == null)
0288: return;
0289: if (this .regEx == "")
0290: this .regEx = "";
0291: else if (pAutomaton.regEx != "") {
0292: this .regEx = new StringBuffer(this .regEx.length()
0293: + pAutomaton.regEx.length() + 6).append('(')
0294: .append(this .regEx).append(')').append("&!")
0295: .append('(').append(pAutomaton.regEx).append(')')
0296: .toString();
0297: }
0298: }
0299:
0300: protected Object clone() {
0301: final Automaton_Pattern clone = (Automaton_Pattern) super
0302: .clone();
0303: clone.scanner = clone.newScanner();
0304: return clone;
0305: }
0306:
0307: //////////////////////////////////////////////////////////////////////////
0308: ///// P A R S I N G
0309: //////////////////////////////////////////////////////////////////////////
0310:
0311: private static final int ERROR = -2, // the possible parser
0312: SHIFT = -3, // actions used in ACTIONTABLE
0313: REDUCE = -4, // value -1 is reserved
0314: ACCEPT = -5, // for unknown constant value
0315:
0316: RE = 0, // NonTerminal Symbols
0317: TERM = 1, // IMPORTANT: the value represents the
0318: ELEMENT = 2, // rowNr in ACTIONTABLE
0319:
0320: notOp = 3, // Terminal Symbols
0321: andOp = 4, // .
0322: orOp = 5, // .
0323: groupBegin = 6, // .
0324: groupEnd = 7, // .
0325: repetition = 8, // .
0326: label = 9, // .
0327: regExp = 10, // IMPORTANT: the value represents the
0328: EOF = 11; // rowNr in ACTIONTABLE
0329:
0330: private static final int[][][] ACTIONTABLE = {
0331: // state RE TERM ELEMENT notOp andOp orOp groupBegin groupEnd repetition label regExp EOF
0332: /* 0 */{ { SHIFT, 2 }, { SHIFT, 7 }, { SHIFT, 5 },
0333: { SHIFT, 11 }, { ERROR, 0 }, { ERROR, 0 },
0334: { SHIFT, 14 }, { ERROR, 0 }, { ERROR, 0 },
0335: { ERROR, 0 }, { SHIFT, 16 }, { ERROR, 0 } },
0336: /* 1 */{ { ERROR, 0 }, { ERROR, 0 }, { ERROR, 0 },
0337: { REDUCE, 3 }, { REDUCE, 3 }, { REDUCE, 3 },
0338: { REDUCE, 3 }, { REDUCE, 3 }, { REDUCE, 3 },
0339: { REDUCE, 3 }, { REDUCE, 3 }, { REDUCE, 3 } },
0340: /* 2 */{ { ERROR, 0 }, { ERROR, 0 }, { ERROR, 0 },
0341: { ERROR, 0 }, { ERROR, 0 }, { ERROR, 0 },
0342: { ERROR, 0 }, { ERROR, 0 }, { ERROR, 0 },
0343: { ERROR, 0 }, { ERROR, 0 }, { ACCEPT, 0 } },
0344: /* 3 */{ { ERROR, 0 }, { ERROR, 0 }, { ERROR, 0 },
0345: { ERROR, 0 }, { ERROR, 0 }, { ERROR, 0 },
0346: { ERROR, 0 }, { REDUCE, 1 }, { ERROR, 0 },
0347: { ERROR, 0 }, { ERROR, 0 }, { REDUCE, 1 } },
0348: /* 4 */{ { ERROR, 0 }, { ERROR, 0 }, { ERROR, 0 },
0349: { ERROR, 0 }, { ERROR, 0 }, { ERROR, 0 },
0350: { ERROR, 0 }, { SHIFT, 13 }, { ERROR, 0 },
0351: { ERROR, 0 }, { ERROR, 0 }, { ERROR, 0 } },
0352: /* 5 */{ { ERROR, 0 }, { SHIFT, 8 }, { SHIFT, 5 },
0353: { SHIFT, 11 }, { SHIFT, 10 }, { REDUCE, 6 },
0354: { SHIFT, 14 }, { REDUCE, 6 }, { SHIFT, 1 },
0355: { SHIFT, 12 }, { SHIFT, 16 }, { REDUCE, 6 } },
0356: /* 6 */{ { ERROR, 0 }, { ERROR, 0 }, { ERROR, 0 },
0357: { ERROR, 0 }, { ERROR, 0 }, { REDUCE, 9 },
0358: { ERROR, 0 }, { REDUCE, 9 }, { SHIFT, 1 },
0359: { SHIFT, 12 }, { ERROR, 0 }, { REDUCE, 9 } },
0360: /* 7 */{ { ERROR, 0 }, { ERROR, 0 }, { ERROR, 0 },
0361: { ERROR, 0 }, { ERROR, 0 }, { SHIFT, 15 },
0362: { ERROR, 0 }, { REDUCE, 0 }, { ERROR, 0 },
0363: { ERROR, 0 }, { ERROR, 0 }, { REDUCE, 0 } },
0364: /* 8 */{ { ERROR, 0 }, { ERROR, 0 }, { ERROR, 0 },
0365: { ERROR, 0 }, { ERROR, 0 }, { REDUCE, 7 },
0366: { ERROR, 0 }, { REDUCE, 7 }, { ERROR, 0 },
0367: { ERROR, 0 }, { ERROR, 0 }, { REDUCE, 7 } },
0368: /* 9 */{ { ERROR, 0 }, { ERROR, 0 }, { ERROR, 0 },
0369: { ERROR, 0 }, { ERROR, 0 }, { REDUCE, 8 },
0370: { ERROR, 0 }, { REDUCE, 8 }, { ERROR, 0 },
0371: { ERROR, 0 }, { ERROR, 0 }, { REDUCE, 8 } },
0372: /* 10 */{ { ERROR, 0 }, { SHIFT, 9 }, { SHIFT, 5 },
0373: { SHIFT, 11 }, { ERROR, 0 }, { ERROR, 0 },
0374: { SHIFT, 14 }, { ERROR, 0 }, { ERROR, 0 },
0375: { ERROR, 0 }, { SHIFT, 16 }, { ERROR, 0 } },
0376: /* 11 */{ { ERROR, 0 }, { ERROR, 0 }, { SHIFT, 6 },
0377: { ERROR, 0 }, { ERROR, 0 }, { ERROR, 0 },
0378: { SHIFT, 14 }, { ERROR, 0 }, { ERROR, 0 },
0379: { ERROR, 0 }, { SHIFT, 16 }, { ERROR, 0 } },
0380: /* 12 */{ { ERROR, 0 }, { ERROR, 0 }, { ERROR, 0 },
0381: { REDUCE, 4 }, { REDUCE, 4 }, { REDUCE, 4 },
0382: { REDUCE, 4 }, { REDUCE, 4 }, { REDUCE, 4 },
0383: { REDUCE, 4 }, { REDUCE, 4 }, { REDUCE, 4 } },
0384: /* 13 */{ { ERROR, 0 }, { ERROR, 0 }, { ERROR, 0 },
0385: { REDUCE, 2 }, { REDUCE, 2 }, { REDUCE, 2 },
0386: { REDUCE, 2 }, { REDUCE, 2 }, { REDUCE, 2 },
0387: { REDUCE, 2 }, { REDUCE, 2 }, { REDUCE, 2 } },
0388: /* 14 */{ { SHIFT, 4 }, { SHIFT, 7 }, { SHIFT, 5 },
0389: { SHIFT, 11 }, { ERROR, 0 }, { ERROR, 0 },
0390: { SHIFT, 14 }, { ERROR, 0 }, { ERROR, 0 },
0391: { ERROR, 0 }, { SHIFT, 16 }, { ERROR, 0 } },
0392: /* 15 */{ { SHIFT, 3 }, { SHIFT, 7 }, { SHIFT, 5 },
0393: { SHIFT, 11 }, { ERROR, 0 }, { ERROR, 0 },
0394: { SHIFT, 14 }, { ERROR, 0 }, { ERROR, 0 },
0395: { ERROR, 0 }, { SHIFT, 16 }, { ERROR, 0 } },
0396: /* 16 */{ { ERROR, 0 }, { ERROR, 0 }, { ERROR, 0 },
0397: { REDUCE, 5 }, { REDUCE, 5 }, { REDUCE, 5 },
0398: { REDUCE, 5 }, { REDUCE, 5 }, { REDUCE, 5 },
0399: { REDUCE, 5 }, { REDUCE, 5 }, { REDUCE, 5 } } };
0400:
0401: // the number after a SHIFT action is the next state to go to (see case SHIFT)
0402: // the number after a REDUCE action is the number of a rule (see case REDUCE)
0403:
0404: private static final Integer[] INTEGERS = new Integer[ACTIONTABLE.length];
0405: static {
0406: for (int i = 0; i < INTEGERS.length; i++)
0407: INTEGERS[i] = new Integer(i);
0408: }
0409:
0410: protected SState parseRegEx(String regEx) throws InvalidExpression {
0411: final java.util.List tokenList = this .scanner.scan(regEx);
0412:
0413: final Object[] extdTokenList = tokenList
0414: .toArray(new Object[tokenList.size() + 1]);
0415: extdTokenList[extdTokenList.length - 1] = Terminal_EOF.INSTANCE;
0416:
0417: java.util.Stack symbolStack = new java.util.Stack();
0418: java.util.Stack stateStack = new java.util.Stack();
0419:
0420: int extdTokenListIndex = 0;
0421: Object token = extdTokenList[extdTokenListIndex];
0422:
0423: int stateNr = 0, tokenSymbol = -1, action = Automaton_Pattern.ERROR;
0424: do {
0425: if (tokenSymbol == -1) {
0426: if (token instanceof SState)
0427: tokenSymbol = Automaton_Pattern.regExp;
0428: else if (token instanceof Terminal_Repetition)
0429: tokenSymbol = Automaton_Pattern.repetition;
0430: else if (token instanceof Terminal_GroupBegin)
0431: tokenSymbol = Automaton_Pattern.groupBegin;
0432: else if (token instanceof Terminal_GroupEnd)
0433: tokenSymbol = Automaton_Pattern.groupEnd;
0434: else if (token instanceof String)
0435: tokenSymbol = Automaton_Pattern.label;
0436: else if (token instanceof Terminal_OrOp)
0437: tokenSymbol = Automaton_Pattern.orOp;
0438: else if (token instanceof Terminal_RegExp)
0439: tokenSymbol = Automaton_Pattern.regExp;
0440: else if (token instanceof Terminal_AndOp)
0441: tokenSymbol = Automaton_Pattern.andOp;
0442: else if (token instanceof Terminal_NotOp)
0443: tokenSymbol = Automaton_Pattern.notOp;
0444: else if (token instanceof Terminal_EOF)
0445: tokenSymbol = Automaton_Pattern.EOF;
0446: else {
0447: String message = "Unknown symbol/token: " + token;
0448: message += "\n(check Parser or Scanner for this symbol/token)";
0449: throw new RuntimeException(message);
0450: }
0451: }
0452:
0453: //System.out.println("$ "+symbolStack);
0454: //System.out.print("+ "+stateNr+","+tokenSymbol+" -> ");
0455: action = Automaton_Pattern.ACTIONTABLE[stateNr][tokenSymbol][0];
0456:
0457: PState finalState, aState;
0458:
0459: switch (action) {
0460: case Automaton_Pattern.SHIFT:
0461: //System.out.println("SHIFT "+ACTIONTABLE[stateNr][tokenSymbol][1]);
0462: stateStack.push(Automaton_Pattern.INTEGERS[stateNr]);
0463: symbolStack.push(token);
0464: stateNr = Automaton_Pattern.ACTIONTABLE[stateNr][tokenSymbol][1];
0465: ++extdTokenListIndex;
0466: token = extdTokenList[extdTokenListIndex];
0467: tokenSymbol = -1;
0468: break;
0469:
0470: case Automaton_Pattern.REDUCE:
0471: //System.out.println("REDUCE "+ACTIONTABLE[stateNr][tokenSymbol][1]);
0472: final int ruleNr = Automaton_Pattern.ACTIONTABLE[stateNr][tokenSymbol][1];
0473:
0474: Object node = null;
0475: int nodeSymbol = -1;
0476: switch (ruleNr) {
0477: case 0: // RE ::= TERM
0478: {
0479: node = symbolStack.pop();
0480: nodeSymbol = Automaton_Pattern.RE;
0481: break;
0482: }
0483: case 1: // RE ::= TERM orOp RE
0484: {
0485: PState re = (PState) symbolStack.pop();
0486: /*Terminal_OrOp =*/symbolStack.pop();
0487: PState term = (PState) symbolStack.pop();
0488:
0489: node = this .union(term, re);
0490: nodeSymbol = Automaton_Pattern.RE;
0491: break;
0492: }
0493: case 2: // ELEMENT ::= groupBegin RE groupEnd
0494: {
0495: Terminal_GroupEnd end = (Terminal_GroupEnd) symbolStack
0496: .pop();
0497: node = symbolStack.pop();
0498: Terminal_GroupBegin begin = (Terminal_GroupBegin) symbolStack
0499: .pop();
0500: if (begin.name == null && end.name != null
0501: || begin.name != null
0502: && begin.name.equals(end.name) == false)
0503: throw new IllegalArgumentException(
0504: "endtag exspected for " + begin
0505: + " but found: " + end);
0506:
0507: nodeSymbol = Automaton_Pattern.ELEMENT;
0508: break;
0509: }
0510: case 3: // ELEMENT ::= ELEMENT repetition
0511: {
0512: Terminal_Repetition repetition = (Terminal_Repetition) symbolStack
0513: .pop();
0514: PState element = (PState) symbolStack.pop();
0515:
0516: node = repetition.to == Terminal_Repetition.UNLIMITED ? this
0517: .repeat(element, repetition.from, 0)
0518: : this .repeat(element, repetition.from,
0519: repetition.to);
0520:
0521: nodeSymbol = Automaton_Pattern.ELEMENT;
0522: break;
0523: }
0524:
0525: case 4: // ELEMENT ::= ELEMENT label
0526: {
0527: String label = (String) symbolStack.pop();
0528: String labelDot = null;
0529: PState element = (PState) symbolStack.pop();
0530:
0531: node = element;
0532: nodeSymbol = Automaton_Pattern.ELEMENT;
0533: break;
0534: }
0535: case 5: // ELEMENT ::= regExp
0536: {
0537: node = symbolStack.pop();
0538: if (node instanceof Terminal_RegExp) { // or instanceOf Terminal_RuntimeValue
0539: Automaton_Pattern preDefAutomaton;
0540: if (this .preDefinedAutomatons == null)
0541: preDefAutomaton = null;
0542: else {
0543: preDefAutomaton = (Automaton_Pattern) this .preDefinedAutomatons
0544: .get(((Terminal_RegExp) node).name);
0545: }
0546: if (preDefAutomaton == null)
0547: throw new IllegalArgumentException(
0548: ((Terminal_RegExp) node).name
0549: + " is not defined");
0550:
0551: final Automaton.State startState = preDefAutomaton
0552: .getStartState();
0553: if (startState == null) {
0554: node = this .addState(false);
0555: } else {
0556: java.util.Map map = this
0557: .cloneState(startState);
0558: node = (Automaton_Pattern.PState) map
0559: .get(startState);
0560: }
0561: }
0562: nodeSymbol = Automaton_Pattern.ELEMENT;
0563: break;
0564: }
0565: case 6: // TERM ::= ELEMENT
0566: {
0567: node = symbolStack.pop();
0568: nodeSymbol = Automaton_Pattern.TERM;
0569: break;
0570: }
0571: case 7: // TERM ::= ELEMENT TERM
0572: {
0573: PState term = (PState) symbolStack.pop();
0574: PState element = (PState) symbolStack.pop();
0575:
0576: node = this .concat(element, term);
0577:
0578: nodeSymbol = Automaton_Pattern.TERM;
0579: break;
0580: }
0581: case 8: // TERM ::= ELEMENT andOp TERM
0582: {
0583: PState term = (PState) symbolStack.pop();
0584: /*Terminal_AndOp = */symbolStack.pop();
0585: PState element = (PState) symbolStack.pop();
0586:
0587: node = this .intersect(element, term);
0588: nodeSymbol = Automaton_Pattern.TERM;
0589: break;
0590: }
0591: case 9: // TERM ::= notOp ELEMENT
0592: {
0593: PState element = (PState) symbolStack.pop();
0594: /*Terminal_NotOp = */symbolStack.pop();
0595:
0596: node = this .complement(element);
0597: nodeSymbol = Automaton_Pattern.TERM;
0598: break;
0599: }
0600: default:
0601: String message = "\nProgramming error in RE-Parser:"
0602: + "\nACTIONTABLE contains wrong ruleNr "
0603: + ruleNr
0604: + "\nor case "
0605: + ruleNr
0606: + " statement missing";
0607: throw new RuntimeException(message);
0608: } // end switch(rule)
0609:
0610: for (int i = stateStack.size() - symbolStack.size(); i > 1; i--)
0611: stateStack.pop();
0612: stateNr = ((Integer) stateStack.peek()).intValue();
0613: symbolStack.push(node);
0614: stateNr = Automaton_Pattern.ACTIONTABLE[stateNr][nodeSymbol][1];
0615: break;
0616: } // end switch(action)
0617:
0618: } while (action != Automaton_Pattern.ACCEPT
0619: && action != Automaton_Pattern.ERROR);
0620:
0621: if (action == Automaton_Pattern.ERROR) {
0622: System.out.print("parsed:");
0623: for (int i = 0; i < extdTokenListIndex; ++i) {
0624: System.out.print(" " + extdTokenList[i]);
0625: }
0626: System.out.println("");
0627: System.out.print("rest: ");
0628: for (int i = extdTokenListIndex; i < extdTokenList.length - 1; ++i) {
0629: System.out.print(" " + extdTokenList[i]);
0630: }
0631: System.out.println("");
0632: System.out.println("current state: " + stateNr);
0633: System.out.print("current Token: " + tokenSymbol);
0634:
0635: // for (int i=0; i<Automaton_Pattern.ACTIONTABLE[stateNr].length; ++i) {
0636: // if (Automaton_Pattern.ACTIONTABLE[stateNr][i][0]!=Automaton_Pattern.ERROR) {
0637: // System.out.println(
0638: // }
0639: // }
0640: // System.out.println([stateNr][0];
0641: throw new Error();
0642: }
0643:
0644: // String expression = ""; int tokenPosition=-1;
0645: // for (int i=0; i<tokenList.size(); i++) {
0646: // if (i==extdTokenListIndex) tokenPosition=expression.length();
0647: // expression+= String.valueOf(tokenList.get(i));
0648: // }
0649: // throw new InvalidExpression(
0650: // expression,
0651: // String.valueOf( extdTokenList[extdTokenListIndex] ),
0652: // tokenPosition
0653: // );
0654: // }
0655: // return (SState)this.minimize(((SState)symbolStack.peek()));
0656: // return (SState)this.makeDeterministic(((SState)symbolStack.peek()));
0657: return (SState) symbolStack.peek();
0658: }
0659:
0660: interface TerminalFormat {
0661: public Object parseObject(char[] source, ParsePosition status);
0662:
0663: public int maxLength();
0664: }
0665:
0666: /*
0667: final class TerminalFormat_SPECIALLITERALS implements TerminalFormat {
0668:
0669: public TerminalFormat_SPECIALLITERALS() {};
0670:
0671: public Object parseObject(char[] source, ParsePosition status) {
0672: final int index = status.getIndex();
0673:
0674: switch (source[index]) {
0675: case '|' : status.setIndex(index+1); return Terminal_OrOp.INSTANCE;
0676: case '(' : status.setIndex(index+1); return Terminal_GroupBegin.INSTANCE;
0677: case ')' : status.setIndex(index+1); return Terminal_GroupEnd.INSTANCE;
0678: case '*' : status.setIndex(index+1); return new Terminal_Repetition(0,Terminal_Repetition.UNLIMITED);
0679: case '+' : status.setIndex(index+1); return new Terminal_Repetition(1,Terminal_Repetition.UNLIMITED);
0680: case '?' : status.setIndex(index+1); return new Terminal_Repetition(0,1);
0681: case '.' : status.setIndex(index+1); return Det_AnyLiteral.INSTANCE;
0682: default : return null; // throw new ParseException
0683: }
0684: }
0685:
0686: public int maxLength() {return 1;}
0687:
0688: }
0689: */
0690:
0691: final class TerminalFormat_LITERAL implements TerminalFormat {
0692:
0693: private final static char ESCAPE_CHAR = '\\';
0694:
0695: public TerminalFormat_LITERAL() {
0696: };
0697:
0698: public Object parseObject(char[] source, ParsePosition status) {
0699: int index = status.getIndex();
0700:
0701: switch (source[index]) {
0702: case '\\': {
0703: ++index;
0704: if (index == source.length)
0705: return null;
0706: status.setIndex(index + 1);
0707:
0708: final PState startState = (PState) Automaton_Pattern.this
0709: .addState(false);
0710: startState.addTransition(null, new CharSet(
0711: source[index]), Automaton_Pattern.this
0712: .addState(true));
0713: return startState;
0714: }
0715:
0716: case '|':
0717: status.setIndex(index + 1);
0718: return Terminal_OrOp.INSTANCE;
0719: case '&':
0720: status.setIndex(index + 1);
0721: return Terminal_AndOp.INSTANCE;
0722: case '!':
0723: status.setIndex(index + 1);
0724: return Terminal_NotOp.INSTANCE;
0725: case '(':
0726: status.setIndex(index + 1);
0727: return Terminal_GroupBegin.INSTANCE;
0728: case ')':
0729: status.setIndex(index + 1);
0730: return Terminal_GroupEnd.INSTANCE;
0731: case '*':
0732: status.setIndex(index + 1);
0733: return new Terminal_Repetition(0,
0734: Terminal_Repetition.UNLIMITED);
0735: case '+':
0736: status.setIndex(index + 1);
0737: return new Terminal_Repetition(1,
0738: Terminal_Repetition.UNLIMITED);
0739: case '?':
0740: status.setIndex(index + 1);
0741: return new Terminal_Repetition(0, 1);
0742: case '.': {
0743: status.setIndex(index + 1);
0744: ISet_char charSet = new CharSet();
0745: charSet.complement();
0746: final PState startState = (PState) Automaton_Pattern.this
0747: .addState(false);
0748: startState.addTransition(null, charSet,
0749: Automaton_Pattern.this .addState(true));
0750: return startState;
0751: }
0752: case '{':
0753: case '}':
0754: case '[':
0755: case ']':
0756: case '<':
0757: case '>':
0758: return null;
0759:
0760: default: {
0761: status.setIndex(index + 1);
0762: final PState startState = (PState) Automaton_Pattern.this
0763: .addState(false);
0764: startState.addTransition(null, new CharSet(
0765: source[index]), Automaton_Pattern.this
0766: .addState(true));
0767: return startState;
0768: }
0769: }
0770: }
0771:
0772: public int maxLength() {
0773: return 2;
0774: }
0775:
0776: }
0777:
0778: final class TerminalFormat_LITERALSET implements TerminalFormat {
0779:
0780: private static final int START = 0;
0781: private static final int FIRSTCHAR = 1;
0782: private static final int NORMAL = 2;
0783: private static final int ESCAPED = 3;
0784:
0785: public TerminalFormat_LITERALSET() {
0786: // this.automaton = automaton;
0787: //startState = automaton.addState(false);
0788: //automaton.addTransition(new CharSet('.'),automaton.addState(true));
0789: };
0790:
0791: public Object parseObject(char[] source, ParsePosition status) {
0792: int index = status.getIndex();
0793: final int sourceLength = source.length;
0794:
0795: ISet_char charSet = new CharSet();
0796: StringBuffer chars = new StringBuffer();
0797: boolean complement = false;
0798: boolean intervall = false;
0799: int state = START;
0800: while (index < sourceLength) {
0801: char ch = source[index];
0802: switch (state) {
0803: case START:
0804: switch (ch) {
0805: case '[':
0806: state = FIRSTCHAR;
0807: break;
0808: default:
0809: return null;
0810: }
0811: break;
0812: case FIRSTCHAR:
0813: switch (ch) {
0814: case ']':
0815: return null;
0816: case '\\':
0817: state = ESCAPED;
0818: break;
0819: case '^':
0820: complement = true;
0821: state = NORMAL;
0822: break;
0823: default:
0824: chars.append(ch);
0825: state = NORMAL;
0826: }
0827: break;
0828: case NORMAL:
0829: switch (ch) {
0830: case '\\':
0831: state = ESCAPED;
0832: break;
0833: case ']': { // END
0834: index++;
0835: status.setIndex(index);
0836:
0837: charSet.addAll(chars.toString());
0838: if (complement)
0839: charSet.complement();
0840:
0841: final PState startState = (PState) Automaton_Pattern.this
0842: .addState(false);
0843: startState.addTransition(null, charSet,
0844: Automaton_Pattern.this .addState(true));
0845: return startState;
0846: }
0847: default:
0848: if (intervall) {
0849: char from = chars
0850: .charAt(chars.length() - 1);
0851: if (from > ch)
0852: return null;
0853: for (char c = ++from; c <= ch; c++)
0854: charSet.add(c);
0855: intervall = false;
0856: } else {
0857: if (ch == '-') {
0858: if (chars.length() == 0)
0859: return null;
0860: intervall = true;
0861: } else
0862: chars.append(ch);
0863: }
0864: // STATE = NORMAL; (not necessary because state is NORMAL)
0865: }
0866: break;
0867: case ESCAPED:
0868: switch (ch) {
0869: default:
0870: if (intervall) {
0871: char from = (char) (((int) chars
0872: .charAt(chars.length() - 1)) + 1);
0873: for (char c = from; c <= ch; c++)
0874: charSet.add(c);
0875: intervall = false;
0876: } else
0877: chars.append(ch);
0878: state = NORMAL;
0879: }
0880: break;
0881: default:
0882: String message = "unknown state " + state;
0883: throw new RuntimeException(message);
0884: }
0885:
0886: index++;
0887: }
0888:
0889: return null;
0890: }
0891:
0892: public int maxLength() {
0893: return PScanner.UNLIMITED_MAX_LENGTH;
0894: }
0895: }
0896:
0897: final class TerminalFormat_GroupBegin implements TerminalFormat {
0898:
0899: public TerminalFormat_GroupBegin() {
0900: }
0901:
0902: public Object parseObject(char[] source, ParsePosition status) {
0903: final int sourceLength = source.length;
0904: int index = status.getIndex();
0905: if (index >= sourceLength) {
0906: String message = "";
0907: throw new ArrayIndexOutOfBoundsException(message);
0908: }
0909:
0910: if (source[index] != '<')
0911: return null;
0912:
0913: index++;
0914: final int startIndex = index;
0915: while (index < sourceLength && source[index] != '>'
0916: && source[index] != '.')
0917: index++;
0918: if (index == sourceLength)
0919: return null;
0920: if (source[index] == '.')
0921: return null;
0922:
0923: status.setIndex(index + 1);
0924:
0925: // if (startIndex==index) return Terminal_GroupBegin.INSTANCE;
0926: return new Terminal_GroupBegin(new String(source,
0927: startIndex, index - startIndex));
0928: }
0929:
0930: public int maxLength() {
0931: return PScanner.UNLIMITED_MAX_LENGTH;
0932: }
0933: }
0934:
0935: final class TerminalFormat_GroupEnd implements TerminalFormat {
0936:
0937: public TerminalFormat_GroupEnd() {
0938: }
0939:
0940: public Object parseObject(char[] source, ParsePosition status) {
0941: final int sourceLength = source.length;
0942: int index = status.getIndex();
0943: if (index >= sourceLength) {
0944: String message = "";
0945: throw new ArrayIndexOutOfBoundsException(message);
0946: }
0947:
0948: if (source[index] != '<')
0949: return null;
0950: index++;
0951: if (source[index] != '/')
0952: return null;
0953:
0954: index++;
0955: final int startIndex = index;
0956: while (index < sourceLength && source[index] != '>')
0957: index++;
0958: if (index == sourceLength)
0959: return null;
0960:
0961: status.setIndex(index + 1);
0962:
0963: // if (startIndex+1==index) return Terminal_GroupEnd.INSTANCE;
0964: return new Terminal_GroupEnd(new String(source, startIndex,
0965: index - startIndex));
0966: }
0967:
0968: public int maxLength() {
0969: return PScanner.UNLIMITED_MAX_LENGTH;
0970: }
0971: }
0972:
0973: final class TerminalFormat_REPETITION implements TerminalFormat {
0974:
0975: private final static int START = 0;
0976: private final static int FROM_FIRSTCHAR = 1;
0977: private final static int FROM_NORMAL = 2;
0978: private final static int TO_FIRSTCHAR = 3;
0979: private final static int TO_NORMAL = 4;
0980:
0981: public TerminalFormat_REPETITION() {
0982: };
0983:
0984: public Object parseObject(char[] source, ParsePosition status) {
0985: int index = status.getIndex();
0986: final int sourceLength = source.length;
0987:
0988: StringBuffer chars = new StringBuffer();
0989: int from = 0;
0990: int state = START;
0991: while (index < sourceLength) {
0992: char ch = source[index];
0993: switch (state) {
0994: case START:
0995: switch (ch) {
0996: case '{':
0997: state = FROM_FIRSTCHAR;
0998: break;
0999: default:
1000: return null;
1001: }
1002: break;
1003: case FROM_FIRSTCHAR:
1004: switch (ch) {
1005: case '0':
1006: case '1':
1007: case '2':
1008: case '3':
1009: case '4':
1010: case '5':
1011: case '6':
1012: case '7':
1013: case '8':
1014: case '9':
1015: chars.append(ch);
1016: state = FROM_NORMAL;
1017: break;
1018: default:
1019: return null;
1020: }
1021: break;
1022: case FROM_NORMAL:
1023: switch (ch) {
1024: case '0':
1025: case '1':
1026: case '2':
1027: case '3':
1028: case '4':
1029: case '5':
1030: case '6':
1031: case '7':
1032: case '8':
1033: case '9':
1034: chars.append(ch);
1035: //state = NORMAL; // not necessary because state is NORMAL
1036: break;
1037: case ',':
1038: from = Integer.parseInt(chars.toString());
1039: chars.setLength(0);
1040: state = TO_FIRSTCHAR;
1041: break;
1042: case '}': // END
1043: index++;
1044: status.setIndex(index);
1045: final int count = Integer.parseInt(chars
1046: .toString());
1047: return new Terminal_Repetition(count, count);
1048: default:
1049: return null;
1050: }
1051: break;
1052: case TO_FIRSTCHAR:
1053: switch (ch) {
1054: case '0':
1055: case '1':
1056: case '2':
1057: case '3':
1058: case '4':
1059: case '5':
1060: case '6':
1061: case '7':
1062: case '8':
1063: case '9':
1064: chars.append(ch);
1065: state = TO_NORMAL;
1066: break;
1067: case '*': // may be END
1068: index++;
1069: if (index == sourceLength)
1070: return null;
1071: if (source[index] != '}')
1072: return null;
1073: index++;
1074: status.setIndex(index);
1075: return new Terminal_Repetition(from,
1076: Terminal_Repetition.UNLIMITED);
1077: default:
1078: return null;
1079: }
1080: break;
1081: case TO_NORMAL:
1082: switch (ch) {
1083: case '0':
1084: case '1':
1085: case '2':
1086: case '3':
1087: case '4':
1088: case '5':
1089: case '6':
1090: case '7':
1091: case '8':
1092: case '9':
1093: chars.append(ch);
1094: state = TO_NORMAL;
1095: break;
1096: case '}': // END
1097: index++;
1098: status.setIndex(index);
1099: final int to = Integer.parseInt(chars
1100: .toString());
1101: return new Terminal_Repetition(from, to);
1102: default:
1103: return null;
1104: }
1105: break;
1106: }
1107:
1108: index++;
1109: }
1110:
1111: return null;
1112: }
1113:
1114: public int maxLength() {
1115: return PScanner.UNLIMITED_MAX_LENGTH;
1116: }
1117: }
1118:
1119: final class TerminalFormat_LABEL implements TerminalFormat {
1120: public TerminalFormat_LABEL() {
1121: };
1122:
1123: public Object parseObject(char[] source, ParsePosition status) {
1124: int startIndex = status.getIndex();
1125: int index = startIndex;
1126: if (source[index++] != '{')
1127: return null;
1128: if (source[index++] != '=')
1129: return null;
1130:
1131: while (index < source.length
1132: && ('A' <= source[index] && source[index] <= 'Z'
1133: || 'a' <= source[index]
1134: && source[index] <= 'z' || '0' <= source[index]
1135: && source[index] <= '9'))
1136: ++index;
1137:
1138: if (index == source.length)
1139: return null;
1140: if (source[index] != '}')
1141: return null;
1142:
1143: status.setIndex(index + 1);
1144: return new String(source, startIndex + 2, index
1145: - startIndex - 2);
1146: }
1147:
1148: public int maxLength() {
1149: return PScanner.UNLIMITED_MAX_LENGTH;
1150: }
1151: }
1152:
1153: final class TerminalFormat_RegExp implements TerminalFormat {
1154: public TerminalFormat_RegExp() {
1155: };
1156:
1157: public Object parseObject(char[] source, ParsePosition status) {
1158: int startIndex = status.getIndex();
1159: int index = startIndex;
1160: if (source[index++] != '{')
1161: return null;
1162:
1163: if (('A' <= source[index] && source[index] <= 'Z' || 'a' <= source[index]
1164: && source[index] <= 'z') == false)
1165: return null;
1166: ++index;
1167: while (index < source.length
1168: && ('A' <= source[index] && source[index] <= 'Z'
1169: || 'a' <= source[index]
1170: && source[index] <= 'z'
1171: || '0' <= source[index]
1172: && source[index] <= '9'
1173: || source[index] == '_'
1174: || source[index] == '/' || source[index] == '-'))
1175: ++index;
1176:
1177: if (index == source.length)
1178: return null;
1179: if (source[index] != '}')
1180: return null;
1181:
1182: status.setIndex(index + 1);
1183: return new Terminal_RegExp(new String(source,
1184: startIndex + 1, index - startIndex - 1));
1185: }
1186:
1187: public int maxLength() {
1188: return PScanner.UNLIMITED_MAX_LENGTH;
1189: }
1190: }
1191:
1192: protected PScanner scanner = this .newScanner();
1193:
1194: protected PScanner newScanner() {
1195: return new PScanner(new TerminalFormat[] {
1196: //new TerminalFormat_SPECIALLITERALS() // RegEx_SpecialLiteralsFormat();
1197: new TerminalFormat_LITERALSET() // RegEx_LiteralSe<tFormat();
1198: //,new TerminalFormat_STRING() // RegEx_StringFormat();
1199: ,
1200: new TerminalFormat_REPETITION() // RegEx_RepetitionFormat();
1201: , new TerminalFormat_LABEL(),
1202: new TerminalFormat_GroupBegin(),
1203: new TerminalFormat_GroupEnd(),
1204: new TerminalFormat_LITERAL() // RegEx_LiteralFormat();
1205: , new TerminalFormat_RegExp() },
1206: /*terminalFormatsAreExclusive=*/true);
1207: }
1208:
1209: }
|