0001: /**
0002: * Copyright (c) 2001, Sergey A. Samokhodkin
0003: * All rights reserved.
0004: *
0005: * Redistribution and use in source and binary forms, with or without modification,
0006: * are permitted provided that the following conditions are met:
0007: *
0008: * - Redistributions of source code must retain the above copyright notice,
0009: * this list of conditions and the following disclaimer.
0010: * - Redistributions in binary form
0011: * must reproduce the above copyright notice, this list of conditions and the following
0012: * disclaimer in the documentation and/or other materials provided with the distribution.
0013: * - Neither the name of jregex nor the names of its contributors may be used
0014: * to endorse or promote products derived from this software without specific prior
0015: * written permission.
0016: *
0017: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
0018: * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
0019: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
0020: * IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
0021: * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0022: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
0023: * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
0024: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
0025: * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0026: *
0027: * @version 1.2_01
0028: */package jregex;
0029:
0030: import java.util.*;
0031:
0032: class Term implements REFlags {
0033: //runtime Term types
0034: static final int CHAR = 0;
0035: static final int BITSET = 1;
0036: static final int BITSET2 = 2;
0037: static final int ANY_CHAR = 4;
0038: static final int ANY_CHAR_NE = 5;
0039:
0040: static final int REG = 6;
0041: static final int REG_I = 7;
0042: static final int FIND = 8;
0043: static final int FINDREG = 9;
0044: static final int SUCCESS = 10;
0045:
0046: /*optimization-transparent types*/
0047: static final int BOUNDARY = 11;
0048: static final int DIRECTION = 12;
0049: static final int UBOUNDARY = 13;
0050: static final int UDIRECTION = 14;
0051:
0052: static final int GROUP_IN = 15;
0053: static final int GROUP_OUT = 16;
0054: static final int VOID = 17;
0055:
0056: static final int START = 18;
0057: static final int END = 19;
0058: static final int END_EOL = 20;
0059: static final int LINE_START = 21;
0060: static final int LINE_END = 22;
0061: static final int LAST_MATCH_END = 23;
0062:
0063: static final int CNT_SET_0 = 24;
0064: static final int CNT_INC = 25;
0065: static final int CNT_GT_EQ = 26;
0066: static final int READ_CNT_LT = 27;
0067:
0068: static final int CRSTORE_CRINC = 28; //store on 'actual' search entry
0069: static final int CR_SET_0 = 29;
0070: static final int CR_LT = 30;
0071: static final int CR_GT_EQ = 31;
0072:
0073: /*optimization-nontransparent types*/
0074: static final int BRANCH = 32;
0075: static final int BRANCH_STORE_CNT = 33;
0076: static final int BRANCH_STORE_CNT_AUX1 = 34;
0077:
0078: static final int PLOOKAHEAD_IN = 35;
0079: static final int PLOOKAHEAD_OUT = 36;
0080: static final int NLOOKAHEAD_IN = 37;
0081: static final int NLOOKAHEAD_OUT = 38;
0082: static final int PLOOKBEHIND_IN = 39;
0083: static final int PLOOKBEHIND_OUT = 40;
0084: static final int NLOOKBEHIND_IN = 41;
0085: static final int NLOOKBEHIND_OUT = 42;
0086: static final int INDEPENDENT_IN = 43; //functionally the same as NLOOKAHEAD_IN
0087: static final int INDEPENDENT_OUT = 44;
0088:
0089: static final int REPEAT_0_INF = 45;
0090: static final int REPEAT_MIN_INF = 46;
0091: static final int REPEAT_MIN_MAX = 47;
0092: static final int REPEAT_REG_MIN_INF = 48;
0093: static final int REPEAT_REG_MIN_MAX = 49;
0094:
0095: static final int BACKTRACK_0 = 50;
0096: static final int BACKTRACK_MIN = 51;
0097: static final int BACKTRACK_FIND_MIN = 52;
0098: static final int BACKTRACK_FINDREG_MIN = 53;
0099: static final int BACKTRACK_REG_MIN = 54;
0100:
0101: static final int MEMREG_CONDITION = 55;
0102: static final int LOOKAHEAD_CONDITION_IN = 56;
0103: static final int LOOKAHEAD_CONDITION_OUT = 57;
0104: static final int LOOKBEHIND_CONDITION_IN = 58;
0105: static final int LOOKBEHIND_CONDITION_OUT = 59;
0106:
0107: //optimization
0108: static final int FIRST_TRANSPARENT = BOUNDARY;
0109: static final int LAST_TRANSPARENT = CR_GT_EQ;
0110:
0111: // compiletime: length of vars[] (see makeTree())
0112: static final int VARS_LENGTH = 4;
0113:
0114: // compiletime variable indicies:
0115: private static final int MEMREG_COUNT = 0; //refers current memreg index
0116: private static final int CNTREG_COUNT = 1; //refers current counters number
0117: private static final int DEPTH = 2; //refers current depth: (((depth=3)))
0118: private static final int LOOKAHEAD_COUNT = 3; //refers current memreg index
0119:
0120: private static final int LIMITS_LENGTH = 3;
0121: private static final int LIMITS_PARSE_RESULT_INDEX = 2;
0122: private static final int LIMITS_OK = 1;
0123: private static final int LIMITS_FAILURE = 2;
0124:
0125: //static CustomParser[] customParsers=new CustomParser[256];
0126:
0127: // **** CONTROL FLOW ****
0128:
0129: // next-to-execute and next-if-failed commands;
0130: Term next, failNext;
0131:
0132: // **** TYPES ****
0133:
0134: int type = VOID;
0135: boolean inverse;
0136:
0137: // used with type=CHAR
0138: char c;
0139:
0140: // used with type=FIND
0141: int distance;
0142: boolean eat;
0143:
0144: // used with type=BITSET(2);
0145: boolean[] bitset;
0146: boolean[][] bitset2;
0147: boolean[] categoryBitset; //types(unicode categories)
0148:
0149: // used with type=BALANCE;
0150: char[] brackets;
0151:
0152: // used for optimization with type=BITSET,BITSET2
0153: int weight;
0154:
0155: // **** MEMORISATION ****
0156:
0157: // memory slot, used with type=REG,GROUP_IN,GROUP_OUT
0158: int memreg = -1;
0159:
0160: // **** COUNTERS ****
0161:
0162: // max|min number of iterations
0163: // used with CNT_GT_EQ ,REPEAT_* etc.;
0164: int minCount, maxCount;
0165:
0166: // used with REPEAT_*,REPEAT_REG_*;
0167: Term target;
0168:
0169: // a counter slot to increment & compare with maxCount (CNT_INC etc.);
0170: int cntreg = 0;
0171:
0172: // lookahead group id;
0173: int lookaheadId;
0174:
0175: // **** COMPILE HELPERS ****
0176:
0177: protected Term prev, in, out, out1, first, current;
0178:
0179: //new!!
0180: protected Term branchOut;
0181:
0182: //protected boolean newBranch=false,closed=false;
0183: //protected boolean newBranch=false;
0184:
0185: //for debugging
0186: static int instances;
0187: int instanceNum;
0188:
0189: Term() {
0190: //for debugging
0191: instanceNum = instances;
0192: instances++;
0193: in = out = this ;
0194: }
0195:
0196: Term(int type) {
0197: this ();
0198: this .type = type;
0199: }
0200:
0201: static void makeTree(String s, int flags, Pattern re)
0202: throws PatternSyntaxException {
0203: char[] data = s.toCharArray();
0204: makeTree(data, 0, data.length, flags, re);
0205: }
0206:
0207: static void makeTree(char[] data, int offset, int end, int flags,
0208: Pattern re) throws PatternSyntaxException {
0209: // memreg,counter,depth,lookahead
0210: int[] vars = { 1, 0, 0, 0 }; //don't use counters[0]
0211:
0212: //collect iterators for subsequent optimization
0213: Vector iterators = new Vector();
0214: Hashtable groupNames = new Hashtable();
0215:
0216: Pretokenizer t = new Pretokenizer(data, offset, end);
0217: Term term = makeTree(t, data, vars, flags, new Group(),
0218: iterators, groupNames);
0219: // term=(0-...-0)
0220:
0221: // convert closing outer bracket into success term
0222: term.out.type = SUCCESS;
0223: // term=(0-...-!!!
0224:
0225: //throw out opening bracket
0226: Term first = term.next;
0227: // term=...-!!!
0228:
0229: // Optimisation:
0230: Term optimized = first;
0231: Optimizer opt = Optimizer.find(first);
0232: if (opt != null)
0233: optimized = opt.makeFirst(first);
0234:
0235: Enumeration en = iterators.elements();
0236: while (en.hasMoreElements()) {
0237: Iterator i = (Iterator) en.nextElement();
0238: i.optimize();
0239: }
0240: // ===
0241:
0242: re.root = optimized;
0243: re.root0 = first;
0244: re.memregs = vars[MEMREG_COUNT];
0245: re.counters = vars[CNTREG_COUNT];
0246: re.lookaheads = vars[LOOKAHEAD_COUNT];
0247: re.namedGroupMap = groupNames;
0248: }
0249:
0250: private static Term makeTree(Pretokenizer t, char[] data,
0251: int[] vars, int flags, Term term, Vector iterators,
0252: Hashtable groupNames) throws PatternSyntaxException {
0253: //System.out.println("Term.makeTree(): flags="+flags);
0254: if (vars.length != VARS_LENGTH)
0255: throw new IllegalArgumentException("vars.length should be "
0256: + VARS_LENGTH + ", not " + vars.length);
0257: //Term term=new Term(isMemReg? vars[MEMREG_COUNT]: -1);
0258: // use memreg 0 as unsignificant
0259: //Term term=new Group(isMemReg? vars[MEMREG_COUNT]: 0);
0260: while (true) {
0261: t.next();
0262: term.append(t.tOffset, t.tOutside, data, vars, flags,
0263: iterators, groupNames);
0264: switch (t.ttype) {
0265: case Pretokenizer.FLAGS:
0266: flags = t.flags(flags);
0267: continue;
0268: case Pretokenizer.CLASS_GROUP:
0269: t.next();
0270: Term clg = new Term();
0271: CharacterClass
0272: .parseGroup(data, t.tOffset, t.tOutside, clg,
0273: (flags & IGNORE_CASE) > 0,
0274: (flags & IGNORE_SPACES) > 0,
0275: (flags & UNICODE) > 0,
0276: (flags & XML_SCHEMA) > 0);
0277: term.append(clg);
0278: continue;
0279: case Pretokenizer.PLAIN_GROUP:
0280: vars[DEPTH]++;
0281: //System.out.println("PLAIN_GROUP, t.tOffset="+t.tOffset+", t.tOutside="+t.tOutside+", t.flags("+flags+")="+t.flags(flags));
0282: term.append(makeTree(t, data, vars, t.flags(flags),
0283: new Group(), iterators, groupNames));
0284: break;
0285: case Pretokenizer.NAMED_GROUP:
0286: String gname = t.groupName;
0287: int id;
0288: if (Character.isDigit(gname.charAt(0))) {
0289: try {
0290: id = Integer.parseInt(gname);
0291: } catch (NumberFormatException e) {
0292: throw new PatternSyntaxException(
0293: "group name starts with digit but is not a number");
0294: }
0295: if (groupNames.contains(new Integer(id))) {
0296: if (t.groupDeclared)
0297: throw new PatternSyntaxException(
0298: "group redeclaration: "
0299: + gname
0300: + "; use ({=id}...) for multiple group assignments");
0301: }
0302: if (vars[MEMREG_COUNT] <= id)
0303: vars[MEMREG_COUNT] = id + 1;
0304: } else {
0305: Integer no = (Integer) groupNames.get(gname);
0306: if (no == null) {
0307: id = vars[MEMREG_COUNT]++;
0308: groupNames.put(t.groupName, new Integer(id));
0309: } else {
0310: if (t.groupDeclared)
0311: throw new PatternSyntaxException(
0312: "group redeclaration "
0313: + gname
0314: + "; use ({=name}...) for group reassignments");
0315: id = no.intValue();
0316: }
0317: }
0318: vars[DEPTH]++;
0319: term.append(makeTree(t, data, vars, flags,
0320: new Group(id), iterators, groupNames));
0321: break;
0322: case '(':
0323: vars[DEPTH]++;
0324: term.append(makeTree(t, data, vars, flags, new Group(
0325: vars[MEMREG_COUNT]++), iterators, groupNames));
0326: break;
0327: case Pretokenizer.POS_LOOKAHEAD:
0328: vars[DEPTH]++;
0329: term.append(makeTree(t, data, vars, flags,
0330: new Lookahead(vars[LOOKAHEAD_COUNT]++, true),
0331: iterators, groupNames));
0332: break;
0333: case Pretokenizer.NEG_LOOKAHEAD:
0334: vars[DEPTH]++;
0335: term.append(makeTree(t, data, vars, flags,
0336: new Lookahead(vars[LOOKAHEAD_COUNT]++, false),
0337: iterators, groupNames));
0338: break;
0339: case Pretokenizer.POS_LOOKBEHIND:
0340: vars[DEPTH]++;
0341: term.append(makeTree(t, data, vars, flags,
0342: new Lookbehind(vars[LOOKAHEAD_COUNT]++, true),
0343: iterators, groupNames));
0344: break;
0345: case Pretokenizer.NEG_LOOKBEHIND:
0346: vars[DEPTH]++;
0347: term.append(makeTree(t, data, vars, flags,
0348: new Lookbehind(vars[LOOKAHEAD_COUNT]++, false),
0349: iterators, groupNames));
0350: break;
0351: case Pretokenizer.INDEPENDENT_REGEX:
0352: vars[DEPTH]++;
0353: term.append(makeTree(t, data, vars, flags,
0354: new IndependentGroup(vars[LOOKAHEAD_COUNT]++),
0355: iterators, groupNames));
0356: break;
0357: case Pretokenizer.CONDITIONAL_GROUP:
0358: vars[DEPTH]++;
0359: t.next();
0360: Term fork = null;
0361: boolean positive = true;
0362: switch (t.ttype) {
0363: case Pretokenizer.NEG_LOOKAHEAD:
0364: positive = false;
0365: case Pretokenizer.POS_LOOKAHEAD:
0366: vars[DEPTH]++;
0367: Lookahead la = new Lookahead(
0368: vars[LOOKAHEAD_COUNT]++, positive);
0369: makeTree(t, data, vars, flags, la, iterators,
0370: groupNames);
0371: fork = new ConditionalExpr(la);
0372: break;
0373: case Pretokenizer.NEG_LOOKBEHIND:
0374: positive = false;
0375: case Pretokenizer.POS_LOOKBEHIND:
0376: vars[DEPTH]++;
0377: Lookbehind lb = new Lookbehind(
0378: vars[LOOKAHEAD_COUNT]++, positive);
0379: makeTree(t, data, vars, flags, lb, iterators,
0380: groupNames);
0381: fork = new ConditionalExpr(lb);
0382: break;
0383: case '(':
0384: t.next();
0385: if (t.ttype != ')')
0386: throw new PatternSyntaxException(
0387: "malformed condition");
0388: int memregNo;
0389: if (Character.isDigit(data[t.tOffset]))
0390: memregNo = makeNumber(t.tOffset, t.tOutside,
0391: data);
0392: else {
0393: String gn = new String(data, t.tOffset,
0394: t.tOutside - t.tOffset);
0395: Integer gno = (Integer) groupNames.get(gn);
0396: if (gno == null)
0397: throw new PatternSyntaxException(
0398: "unknown group name in conditional expr.: "
0399: + gn);
0400: memregNo = gno.intValue();
0401: }
0402: fork = new ConditionalExpr(memregNo);
0403: break;
0404: default:
0405: throw new PatternSyntaxException(
0406: "malformed conditional expression: "
0407: + t.ttype + " '" + (char) t.ttype
0408: + "'");
0409: }
0410: term.append(makeTree(t, data, vars, flags, fork,
0411: iterators, groupNames));
0412: break;
0413: case '|':
0414: term.newBranch();
0415: break;
0416: case Pretokenizer.END:
0417: if (vars[DEPTH] > 0)
0418: throw new PatternSyntaxException(
0419: "unbalanced parenthesis");
0420: term.close();
0421: return term;
0422: case ')':
0423: if (vars[DEPTH] <= 0)
0424: throw new PatternSyntaxException(
0425: "unbalanced parenthesis");
0426: term.close();
0427: vars[DEPTH]--;
0428: return term;
0429: case Pretokenizer.COMMENT:
0430: while (t.ttype != ')')
0431: t.next();
0432: continue;
0433: default:
0434: throw new PatternSyntaxException("unknown token type: "
0435: + t.ttype);
0436: }
0437: }
0438: }
0439:
0440: static int makeNumber(int off, int out, char[] data) {
0441: int n = 0;
0442: for (int i = off; i < out; i++) {
0443: int d = data[i] - '0';
0444: if (d < 0 || d > 9)
0445: return -1;
0446: n *= 10;
0447: n += d;
0448: }
0449: return n;
0450: }
0451:
0452: protected void append(int offset, int end, char[] data, int[] vars,
0453: int flags, Vector iterators, Hashtable gmap)
0454: throws PatternSyntaxException {
0455: //System.out.println("append("+new String(data,offset,end-offset)+")");
0456: //System.out.println("current="+this.current);
0457: int[] limits = new int[3];
0458: int i = offset;
0459: Term tmp, current = this .current;
0460: while (i < end) {
0461: char c = data[i];
0462: boolean greedy = true;
0463: switch (c) {
0464: //operations
0465: case '*':
0466: if (current == null)
0467: throw new PatternSyntaxException(
0468: "missing term before *");
0469: i++;
0470: if (i < end) {
0471: switch (data[i]) {
0472: case '?':
0473: greedy ^= true;
0474: i++;
0475: break;
0476: case '*':
0477: case '+':
0478: throw new PatternSyntaxException(
0479: "nested *?+ in regexp");
0480: }
0481: }
0482: tmp = greedy ? makeGreedyStar(vars, current, iterators)
0483: : makeLazyStar(vars, current);
0484: current = replaceCurrent(tmp);
0485: break;
0486:
0487: case '+':
0488: if (current == null)
0489: throw new PatternSyntaxException(
0490: "missing term before +");
0491: i++;
0492: if (i < end) {
0493: switch (data[i]) {
0494: case '?':
0495: greedy ^= true;
0496: i++;
0497: break;
0498: case '*':
0499: case '+':
0500: throw new PatternSyntaxException(
0501: "nested *?+ in regexp");
0502: }
0503: }
0504: tmp = greedy ? makeGreedyPlus(vars, current, iterators)
0505: : makeLazyPlus(vars, current);
0506: current = replaceCurrent(tmp);
0507: break;
0508:
0509: case '?':
0510: if (current == null)
0511: throw new PatternSyntaxException(
0512: "missing term before ?");
0513: i++;
0514: if (i < end) {
0515: switch (data[i]) {
0516: case '?':
0517: greedy ^= true;
0518: i++;
0519: break;
0520: case '*':
0521: case '+':
0522: throw new PatternSyntaxException(
0523: "nested *?+ in regexp");
0524: }
0525: }
0526:
0527: tmp = greedy ? makeGreedyQMark(vars, current)
0528: : makeLazyQMark(vars, current);
0529: current = replaceCurrent(tmp);
0530: break;
0531:
0532: case '{':
0533: limits[0] = 0;
0534: limits[1] = -1;
0535: int le = parseLimits(i + 1, end, data, limits);
0536: if (limits[LIMITS_PARSE_RESULT_INDEX] == LIMITS_OK) { //parse ok
0537: if (current == null)
0538: throw new PatternSyntaxException(
0539: "missing term before {}");
0540: i = le;
0541: if (i < end && data[i] == '?') {
0542: greedy ^= true;
0543: i++;
0544: }
0545: tmp = greedy ? makeGreedyLimits(vars, current,
0546: limits, iterators) : makeLazyLimits(vars,
0547: current, limits);
0548: current = replaceCurrent(tmp);
0549: break;
0550: } else { //unicode class or named backreference
0551: if (data[i + 1] == '\\') { //'{\name}' - backreference
0552: int p = i + 2;
0553: if (p == end)
0554: throw new PatternSyntaxException(
0555: "'group_id' expected");
0556: while (Character.isWhitespace(data[p])) {
0557: p++;
0558: if (p == end)
0559: throw new PatternSyntaxException(
0560: "'group_id' expected");
0561: }
0562: BackReference br = new BackReference(-1,
0563: (flags & IGNORE_CASE) > 0);
0564: i = parseGroupId(data, p, end, br, gmap);
0565: current = append(br);
0566: continue;
0567: } else {
0568: Term t = new Term();
0569: i = CharacterClass.parseName(data, i, end, t,
0570: false, (flags & IGNORE_SPACES) > 0);
0571: current = append(t);
0572: continue;
0573: }
0574: }
0575:
0576: case ' ':
0577: case '\t':
0578: case '\r':
0579: case '\n':
0580: if ((flags & IGNORE_SPACES) > 0) {
0581: i++;
0582: continue;
0583: }
0584: //else go on as default
0585:
0586: //symbolic items
0587: default:
0588: tmp = new Term();
0589: i = parseTerm(data, i, end, tmp, flags);
0590:
0591: if (tmp.type == END && i < end) {
0592: if ((flags & IGNORE_SPACES) > 0) {
0593: i++;
0594: while (i < end) {
0595: c = data[i];
0596: switch (c) {
0597: case ' ':
0598: case '\t':
0599: case '\r':
0600: case '\n':
0601: i++;
0602: continue;
0603: default:
0604: throw new PatternSyntaxException(
0605: "'$' is not a last term in the group: <"
0606: + new String(data,
0607: offset,
0608: end - offset)
0609: + ">");
0610: }
0611: }
0612: } else {
0613: throw new PatternSyntaxException(
0614: "'$' is not a last term in the group: <"
0615: + new String(data, offset, end
0616: - offset) + ">");
0617: }
0618: }
0619: //"\A"
0620: //if(tmp.type==START && i>(offset+1)){
0621: // throw new PatternSyntaxException("'^' is not a first term in the group: <"+new String(data,offset,end-offset)+">");
0622: //}
0623: current = append(tmp);
0624: break;
0625: }
0626: //System.out.println("next term: "+next);
0627: //System.out.println(" next.out="+next.out);
0628: //System.out.println(" next.out1="+next.out1);
0629: //System.out.println(" next.branchOut="+next.branchOut);
0630: }
0631: //System.out.println(in.toStringAll());
0632: //System.out.println("current="+current);
0633: //System.out.println();
0634: }
0635:
0636: private static int parseGroupId(char[] data, int i, int end,
0637: Term term, Hashtable gmap) throws PatternSyntaxException {
0638: int id;
0639: int nstart = i;
0640: if (Character.isDigit(data[i])) {
0641: while (Character.isDigit(data[i])) {
0642: i++;
0643: if (i == end)
0644: throw new PatternSyntaxException(
0645: "group_id expected");
0646: }
0647: id = makeNumber(nstart, i, data);
0648: } else {
0649: while (Character.isJavaIdentifierPart(data[i])) {
0650: i++;
0651: if (i == end)
0652: throw new PatternSyntaxException(
0653: "group_id expected");
0654: }
0655: String s = new String(data, nstart, i - nstart);
0656: Integer no = (Integer) gmap.get(s);
0657: if (no == null)
0658: throw new PatternSyntaxException(
0659: "backreference to unknown group: " + s);
0660: id = no.intValue();
0661: }
0662: while (Character.isWhitespace(data[i])) {
0663: i++;
0664: if (i == end)
0665: throw new PatternSyntaxException("'}' expected");
0666: }
0667:
0668: int c = data[i++];
0669:
0670: if (c != '}')
0671: throw new PatternSyntaxException("'}' expected");
0672:
0673: term.memreg = id;
0674: return i;
0675: }
0676:
0677: protected Term append(Term term) throws PatternSyntaxException {
0678: //System.out.println("append("+term.toStringAll()+"), this="+toStringAll());
0679: //Term prev=this.prev;
0680: Term current = this .current;
0681: if (current == null) {
0682: //System.out.println("2");
0683: //System.out.println(" term="+term);
0684: //System.out.println(" term.in="+term.in);
0685: in.next = term;
0686: term.prev = in;
0687: this .current = term;
0688: //System.out.println(" result: "+in.toStringAll()+"\r\n");
0689: return term;
0690: }
0691: //System.out.println("3");
0692: link(current, term);
0693: //this.prev=current;
0694: this .current = term;
0695: //System.out.println(in.toStringAll());
0696: //System.out.println("current="+this.current);
0697: //System.out.println();
0698: return term;
0699: }
0700:
0701: protected Term replaceCurrent(Term term)
0702: throws PatternSyntaxException {
0703: //System.out.println("replaceCurrent("+term+"), current="+current+", current.prev="+current.prev);
0704: //Term prev=this.prev;
0705: Term prev = current.prev;
0706: if (prev != null) {
0707: Term in = this .in;
0708: if (prev == in) {
0709: //in.next=term;
0710: //term.prev=in;
0711: in.next = term.in;
0712: term.in.prev = in;
0713: } else
0714: link(prev, term);
0715: }
0716: this .current = term;
0717: //System.out.println(" new current="+this.current);
0718: return term;
0719: }
0720:
0721: protected void newBranch() throws PatternSyntaxException {
0722: //System.out.println("newBranch()");
0723: close();
0724: startNewBranch();
0725: //System.out.println(in.toStringAll());
0726: //System.out.println("current="+current);
0727: //System.out.println();
0728: }
0729:
0730: protected void close() throws PatternSyntaxException {
0731: //System.out.println("close(), current="+current+", this="+toStringAll());
0732: //System.out.println();
0733: //System.out.println("close()");
0734: //System.out.println("current="+this.current);
0735: //System.out.println("prev="+this.prev);
0736: //System.out.println();
0737: /*
0738: Term prev=this.prev;
0739: if(prev!=null){
0740: Term current=this.current;
0741: if(current!=null){
0742: link(prev,current);
0743: prev=current;
0744: this.current=null;
0745: }
0746: link(prev,out);
0747: this.prev=null;
0748: }
0749: */
0750: Term current = this .current;
0751: if (current != null)
0752: linkd(current, out);
0753: else
0754: in.next = out;
0755: //System.out.println(in.toStringAll());
0756: //System.out.println("current="+this.current);
0757: //System.out.println("prev="+this.prev);
0758: //System.out.println();
0759: }
0760:
0761: private final static void link(Term term, Term next) {
0762: linkd(term, next.in);
0763: next.prev = term;
0764: }
0765:
0766: private final static void linkd(Term term, Term next) {
0767: //System.out.println("linkDirectly(\""+term+"\" -> \""+next+"\")");
0768: Term prev_out = term.out;
0769: if (prev_out != null) {
0770: //System.out.println(" prev_out="+prev_out);
0771: prev_out.next = next;
0772: }
0773: Term prev_out1 = term.out1;
0774: if (prev_out1 != null) {
0775: //System.out.println(" prev_out1="+prev_out1);
0776: prev_out1.next = next;
0777: }
0778: Term prev_branch = term.branchOut;
0779: if (prev_branch != null) {
0780: //System.out.println(" prev_branch="+prev_branch);
0781: prev_branch.failNext = next;
0782: }
0783: }
0784:
0785: protected void startNewBranch() throws PatternSyntaxException {
0786: //System.out.println("newBranch()");
0787: //System.out.println("before startNewBranch(), this="+toStringAll());
0788: //System.out.println();
0789: Term tmp = in.next;
0790: Term b = new Branch();
0791: in.next = b;
0792: b.next = tmp;
0793: b.in = null;
0794: b.out = null;
0795: b.out1 = null;
0796: b.branchOut = b;
0797: current = b;
0798: //System.out.println("startNewBranch(), this="+toStringAll());
0799: //System.out.println();
0800: }
0801:
0802: private final static Term makeGreedyStar(int[] vars, Term term,
0803: Vector iterators) throws PatternSyntaxException {
0804: //vars[STACK_SIZE]++;
0805: switch (term.type) {
0806: case REPEAT_0_INF:
0807: case REPEAT_MIN_INF:
0808: case REPEAT_MIN_MAX:
0809: case REPEAT_REG_MIN_INF:
0810: case REPEAT_REG_MIN_MAX:
0811: case INDEPENDENT_IN:
0812: case GROUP_IN: {
0813: Term b = new Branch();
0814: b.next = term.in;
0815: term.out.next = b;
0816:
0817: b.in = b;
0818: b.out = null;
0819: b.out1 = null;
0820: b.branchOut = b;
0821:
0822: return b;
0823: }
0824: default: {
0825: Iterator i = new Iterator(term, 0, -1, iterators);
0826: return i;
0827: }
0828: }
0829: }
0830:
0831: private final static Term makeLazyStar(int[] vars, Term term) {
0832: //vars[STACK_SIZE]++;
0833: switch (term.type) {
0834: case REPEAT_0_INF:
0835: case REPEAT_MIN_INF:
0836: case REPEAT_MIN_MAX:
0837: case REPEAT_REG_MIN_INF:
0838: case REPEAT_REG_MIN_MAX:
0839: case GROUP_IN: {
0840: Term b = new Branch();
0841: b.failNext = term.in;
0842: term.out.next = b;
0843:
0844: b.in = b;
0845: b.out = b;
0846: b.out1 = null;
0847: b.branchOut = null;
0848:
0849: return b;
0850: }
0851: default: {
0852: Term b = new Branch();
0853: b.failNext = term;
0854: term.next = b;
0855:
0856: b.in = b;
0857: b.out = b;
0858: b.out1 = null;
0859: b.branchOut = null;
0860:
0861: return b;
0862: }
0863: }
0864: }
0865:
0866: private final static Term makeGreedyPlus(int[] vars, Term term,
0867: Vector iterators) throws PatternSyntaxException {
0868: //vars[STACK_SIZE]++;
0869: switch (term.type) {
0870: case REPEAT_0_INF:
0871: case REPEAT_MIN_INF:
0872: case REPEAT_MIN_MAX:
0873: case REPEAT_REG_MIN_INF:
0874: case REPEAT_REG_MIN_MAX:
0875: case INDEPENDENT_IN://?
0876: case GROUP_IN: {
0877: //System.out.println("makeGreedyPlus():");
0878: //System.out.println(" in="+term.in);
0879: //System.out.println(" out="+term.out);
0880: Term b = new Branch();
0881: b.next = term.in;
0882: term.out.next = b;
0883:
0884: b.in = term.in;
0885: b.out = null;
0886: b.out1 = null;
0887: b.branchOut = b;
0888:
0889: //System.out.println(" returning "+b.in);
0890:
0891: return b;
0892: }
0893: default: {
0894: return new Iterator(term, 1, -1, iterators);
0895: }
0896: }
0897: }
0898:
0899: private final static Term makeLazyPlus(int[] vars, Term term) {
0900: //vars[STACK_SIZE]++;
0901: switch (term.type) {
0902: case REPEAT_0_INF:
0903: case REPEAT_MIN_INF:
0904: case REPEAT_MIN_MAX:
0905: case REPEAT_REG_MIN_INF:
0906: case REPEAT_REG_MIN_MAX:
0907: case GROUP_IN: {
0908: Term b = new Branch();
0909: term.out.next = b;
0910: b.failNext = term.in;
0911:
0912: b.in = term.in;
0913: b.out = b;
0914: b.out1 = null;
0915: b.branchOut = null;
0916:
0917: return b;
0918: }
0919: case REG:
0920: default: {
0921: Term b = new Branch();
0922: term.next = b;
0923: b.failNext = term;
0924:
0925: b.in = term;
0926: b.out = b;
0927: b.out1 = null;
0928: b.branchOut = null;
0929:
0930: return b;
0931: }
0932: }
0933: }
0934:
0935: private final static Term makeGreedyQMark(int[] vars, Term term) {
0936: //vars[STACK_SIZE]++;
0937: switch (term.type) {
0938: case REPEAT_0_INF:
0939: case REPEAT_MIN_INF:
0940: case REPEAT_MIN_MAX:
0941: case REPEAT_REG_MIN_INF:
0942: case REPEAT_REG_MIN_MAX:
0943: case GROUP_IN: {
0944: Term b = new Branch();
0945: b.next = term.in;
0946:
0947: b.in = b;
0948: b.out = term.out;
0949: b.out1 = null;
0950: b.branchOut = b;
0951:
0952: return b;
0953: }
0954: case REG:
0955: default: {
0956: Term b = new Branch();
0957: b.next = term;
0958:
0959: b.in = b;
0960: b.out = term;
0961: b.out1 = null;
0962: b.branchOut = b;
0963:
0964: return b;
0965: }
0966: }
0967: }
0968:
0969: private final static Term makeLazyQMark(int[] vars, Term term) {
0970: //vars[STACK_SIZE]++;
0971: switch (term.type) {
0972: case REPEAT_0_INF:
0973: case REPEAT_MIN_INF:
0974: case REPEAT_MIN_MAX:
0975: case REPEAT_REG_MIN_INF:
0976: case REPEAT_REG_MIN_MAX:
0977: case GROUP_IN: {
0978: Term b = new Branch();
0979: b.failNext = term.in;
0980:
0981: b.in = b;
0982: b.out = b;
0983: b.out1 = term.out;
0984: b.branchOut = null;
0985:
0986: return b;
0987: }
0988: case REG:
0989: default: {
0990: Term b = new Branch();
0991: b.failNext = term;
0992:
0993: b.in = b;
0994: b.out = b;
0995: b.out1 = term;
0996: b.branchOut = null;
0997:
0998: return b;
0999: }
1000: }
1001: }
1002:
1003: private final static Term makeGreedyLimits(int[] vars, Term term,
1004: int[] limits, Vector iterators)
1005: throws PatternSyntaxException {
1006: //vars[STACK_SIZE]++;
1007: int m = limits[0];
1008: int n = limits[1];
1009: switch (term.type) {
1010: case REPEAT_0_INF:
1011: case REPEAT_MIN_INF:
1012: case REPEAT_MIN_MAX:
1013: case REPEAT_REG_MIN_INF:
1014: case REPEAT_REG_MIN_MAX:
1015: case GROUP_IN: {
1016: int cntreg = vars[CNTREG_COUNT]++;
1017: Term reset = new Term(CR_SET_0);
1018: reset.cntreg = cntreg;
1019: Term b = new Term(BRANCH);
1020:
1021: Term inc = new Term(CRSTORE_CRINC);
1022: inc.cntreg = cntreg;
1023:
1024: reset.next = b;
1025:
1026: if (n >= 0) {
1027: Term lt = new Term(CR_LT);
1028: lt.cntreg = cntreg;
1029: lt.maxCount = n;
1030: b.next = lt;
1031: lt.next = term.in;
1032: } else {
1033: b.next = term.in;
1034: }
1035: term.out.next = inc;
1036: inc.next = b;
1037:
1038: if (m >= 0) {
1039: Term gt = new Term(CR_GT_EQ);
1040: gt.cntreg = cntreg;
1041: gt.maxCount = m;
1042: b.failNext = gt;
1043:
1044: reset.in = reset;
1045: reset.out = gt;
1046: reset.out1 = null;
1047: reset.branchOut = null;
1048: } else {
1049: reset.in = reset;
1050: reset.out = null;
1051: reset.out1 = null;
1052: reset.branchOut = b;
1053: }
1054: return reset;
1055: }
1056: default: {
1057: return new Iterator(term, limits[0], limits[1], iterators);
1058: }
1059: }
1060: }
1061:
1062: private final static Term makeLazyLimits(int[] vars, Term term,
1063: int[] limits) {
1064: //vars[STACK_SIZE]++;
1065: int m = limits[0];
1066: int n = limits[1];
1067: switch (term.type) {
1068: case REPEAT_0_INF:
1069: case REPEAT_MIN_INF:
1070: case REPEAT_MIN_MAX:
1071: case REPEAT_REG_MIN_INF:
1072: case REPEAT_REG_MIN_MAX:
1073: case GROUP_IN: {
1074: int cntreg = vars[CNTREG_COUNT]++;
1075: Term reset = new Term(CR_SET_0);
1076: reset.cntreg = cntreg;
1077: Term b = new Term(BRANCH);
1078: Term inc = new Term(CRSTORE_CRINC);
1079: inc.cntreg = cntreg;
1080:
1081: reset.next = b;
1082:
1083: if (n >= 0) {
1084: Term lt = new Term(CR_LT);
1085: lt.cntreg = cntreg;
1086: lt.maxCount = n;
1087: b.failNext = lt;
1088: lt.next = term.in;
1089: } else {
1090: b.failNext = term.in;
1091: }
1092: term.out.next = inc;
1093: inc.next = b;
1094:
1095: if (m >= 0) {
1096: Term gt = new Term(CR_GT_EQ);
1097: gt.cntreg = cntreg;
1098: gt.maxCount = m;
1099: b.next = gt;
1100:
1101: reset.in = reset;
1102: reset.out = gt;
1103: reset.out1 = null;
1104: reset.branchOut = null;
1105:
1106: return reset;
1107: } else {
1108: reset.in = reset;
1109: reset.out = b;
1110: reset.out1 = null;
1111: reset.branchOut = null;
1112:
1113: return reset;
1114: }
1115: }
1116: case REG:
1117: default: {
1118: Term reset = new Term(CNT_SET_0);
1119: Term b = new Branch(BRANCH_STORE_CNT);
1120: Term inc = new Term(CNT_INC);
1121:
1122: reset.next = b;
1123:
1124: if (n >= 0) {
1125: Term lt = new Term(READ_CNT_LT);
1126: lt.maxCount = n;
1127: b.failNext = lt;
1128: lt.next = term;
1129: term.next = inc;
1130: inc.next = b;
1131: } else {
1132: b.next = term;
1133: term.next = inc;
1134: inc.next = term;
1135: }
1136:
1137: if (m >= 0) {
1138: Term gt = new Term(CNT_GT_EQ);
1139: gt.maxCount = m;
1140: b.next = gt;
1141:
1142: reset.in = reset;
1143: reset.out = gt;
1144: reset.out1 = null;
1145: reset.branchOut = null;
1146:
1147: return reset;
1148: } else {
1149: reset.in = reset;
1150: reset.out = b;
1151: reset.out1 = null;
1152: reset.branchOut = null;
1153:
1154: return reset;
1155: }
1156: }
1157: }
1158: }
1159:
1160: private final int parseTerm(char[] data, int i, int out, Term term,
1161: int flags) throws PatternSyntaxException {
1162: char c = data[i++];
1163: boolean inv = false;
1164: switch (c) {
1165: case '[':
1166: return CharacterClass.parseClass(data, i, out, term,
1167: (flags & IGNORE_CASE) > 0,
1168: (flags & IGNORE_SPACES) > 0, (flags & UNICODE) > 0,
1169: (flags & XML_SCHEMA) > 0);
1170:
1171: case '.':
1172: term.type = (flags & DOTALL) > 0 ? ANY_CHAR : ANY_CHAR_NE;
1173: break;
1174:
1175: case '$':
1176: //term.type=mods[MULTILINE_IND]? LINE_END: END; //??
1177: term.type = (flags & MULTILINE) > 0 ? LINE_END : END_EOL;
1178: break;
1179:
1180: case '^':
1181: term.type = (flags & MULTILINE) > 0 ? LINE_START : START;
1182: break;
1183:
1184: case '\\':
1185: if (i >= out)
1186: throw new PatternSyntaxException(
1187: "Escape without a character");
1188: c = data[i++];
1189: esc: switch (c) {
1190: case 'f':
1191: c = '\f'; // form feed
1192: break;
1193:
1194: case 'n':
1195: c = '\n'; // new line
1196: break;
1197:
1198: case 'r':
1199: c = '\r'; // carriage return
1200: break;
1201:
1202: case 't':
1203: c = '\t'; // tab
1204: break;
1205:
1206: case 'u':
1207: if (i + 4 >= out)
1208: throw new PatternSyntaxException(
1209: "To few characters for u-escape");
1210:
1211: c = (char) ((CharacterClass.toHexDigit(data[i++]) << 12)
1212: + (CharacterClass.toHexDigit(data[i++]) << 8)
1213: + (CharacterClass.toHexDigit(data[i++]) << 4) + CharacterClass
1214: .toHexDigit(data[i++]));
1215: break;
1216:
1217: case 'v':
1218: if (i + 6 >= out)
1219: throw new PatternSyntaxException(
1220: "To few characters for u-escape");
1221: c = (char) ((CharacterClass.toHexDigit(data[i++]) << 24)
1222: + (CharacterClass.toHexDigit(data[i++]) << 16)
1223: + (CharacterClass.toHexDigit(data[i++]) << 12)
1224: + (CharacterClass.toHexDigit(data[i++]) << 8)
1225: + (CharacterClass.toHexDigit(data[i++]) << 4) + CharacterClass
1226: .toHexDigit(data[i++]));
1227: break;
1228:
1229: case 'x': { // hex 2-digit number -> char
1230: if (i >= out)
1231: throw new PatternSyntaxException(
1232: "To few characters for x-escape");
1233: int hex = 0;
1234: char d;
1235: if ((d = data[i++]) == '{') {
1236: while (i < out && (d = data[i++]) != '}') {
1237: hex = (hex << 4) + CharacterClass.toHexDigit(d);
1238: if (hex > 0xffff)
1239: throw new PatternSyntaxException(
1240: "\\x{<out of range>}");
1241: }
1242: } else {
1243: if (i >= out)
1244: throw new PatternSyntaxException(
1245: "To few characters for x-escape");
1246: hex = (CharacterClass.toHexDigit(d) << 4)
1247: + CharacterClass.toHexDigit(data[i++]);
1248: }
1249: c = (char) hex;
1250: break;
1251: }
1252: case '0':
1253: case 'o': // oct 2- or 3-digit number -> char
1254: int oct = 0;
1255: for (;;) {
1256: char d = data[i];
1257: if (d >= '0' && d <= '7') {
1258: i++;
1259: oct *= 8;
1260: oct += d - '0';
1261: if (oct > 0xffff)
1262: break;
1263: if (i >= out)
1264: break;
1265: } else
1266: break;
1267: }
1268: c = (char) oct;
1269: break;
1270:
1271: case 'm': // decimal number -> char
1272: int dec = 0;
1273: for (;;) {
1274: char d = data[i++];
1275: if (d >= '0' && d <= '9') {
1276: dec *= 10;
1277: dec += d - '0';
1278: if (dec > 0xffff)
1279: break;
1280: if (i >= out)
1281: break;
1282: } else
1283: break;
1284: }
1285: i--;
1286: c = (char) dec;
1287: break;
1288:
1289: case 'c': // ctrl-char
1290: c = (char) (data[i++] & 0x1f);
1291: break;
1292:
1293: case 'D': // non-digit
1294: inv = true;
1295: // go on
1296: case 'd': // digit
1297: CharacterClass.makeDigit(term, inv,
1298: (flags & UNICODE) > 0);
1299: return i;
1300:
1301: case 'S': // non-space
1302: inv = true;
1303: // go on
1304: case 's': // space
1305: CharacterClass.makeSpace(term, inv,
1306: (flags & UNICODE) > 0);
1307: return i;
1308:
1309: case 'W': // non-letter
1310: inv = true;
1311: // go on
1312: case 'w': // letter
1313: CharacterClass.makeWordChar(term, inv,
1314: (flags & UNICODE) > 0);
1315: return i;
1316:
1317: case 'B': // non-(word boundary)
1318: inv = true;
1319: // go on
1320: case 'b': // word boundary
1321: CharacterClass.makeWordBoundary(term, inv,
1322: (flags & UNICODE) > 0);
1323: return i;
1324: /* NOT SUPPORTED IN RUBY
1325: case '<': // non-(word boundary)
1326: CharacterClass.makeWordStart(term,(flags&UNICODE)>0);
1327: return i;
1328:
1329: case '>': // word boundary
1330: CharacterClass.makeWordEnd(term,(flags&UNICODE)>0);
1331: return i;
1332: */
1333: case 'A': // text beginning
1334: term.type = START;
1335: return i;
1336:
1337: case 'Z': // text end
1338: term.type = END_EOL;
1339: return i;
1340:
1341: case 'z': // text end
1342: term.type = END;
1343: return i;
1344:
1345: case 'G': // end of last match
1346: term.type = LAST_MATCH_END;
1347: return i;
1348:
1349: case 'P': // \\P{..}
1350: inv = true;
1351: case 'p': // \\p{..}
1352: i = CharacterClass.parseName(data, i, out, term, inv,
1353: (flags & IGNORE_SPACES) > 0);
1354: return i;
1355:
1356: default:
1357: if (c >= '1' && c <= '9') {
1358: int n = c - '0';
1359: while ((i < out) && (c = data[i]) >= '0'
1360: && c <= '9') {
1361: n = (n * 10) + c - '0';
1362: i++;
1363: }
1364: term.type = (flags & IGNORE_CASE) > 0 ? REG_I : REG;
1365: term.memreg = n;
1366: return i;
1367: }
1368: /*
1369: if(c<256){
1370: CustomParser termp=customParsers[c];
1371: if(termp!=null){
1372: i=termp.parse(i,data,term);
1373: return i;
1374: }
1375: }
1376: */
1377: }
1378: term.type = CHAR;
1379: term.c = c;
1380: break;
1381:
1382: default:
1383: if ((flags & IGNORE_CASE) == 0) {
1384: term.type = CHAR;
1385: term.c = c;
1386: } else {
1387: CharacterClass.makeICase(term, c);
1388: }
1389: break;
1390: }
1391: return i;
1392: }
1393:
1394: // one of {n},{n,},{,n},{n1,n2}
1395: protected static final int parseLimits(int i, int end, char[] data,
1396: int[] limits) throws PatternSyntaxException {
1397: if (limits.length != LIMITS_LENGTH)
1398: throw new IllegalArgumentException("maxTimess.length="
1399: + limits.length + ", should be 2");
1400: limits[LIMITS_PARSE_RESULT_INDEX] = LIMITS_OK;
1401: int ind = 0;
1402: int v = 0;
1403: char c;
1404: while (i < end) {
1405: c = data[i++];
1406: switch (c) {
1407: case ' ':
1408: continue;
1409:
1410: case ',':
1411: if (ind > 0)
1412: throw new PatternSyntaxException(
1413: "illegal construction: {.. , , ..}");
1414: limits[ind++] = v;
1415: v = -1;
1416: continue;
1417:
1418: case '}':
1419: limits[ind] = v;
1420: if (ind == 0)
1421: limits[1] = v;
1422: return i;
1423:
1424: default:
1425: if (c > '9' || c < '0') {
1426: //throw new PatternSyntaxException("illegal symbol in iterator: '{"+c+"}'");
1427: limits[LIMITS_PARSE_RESULT_INDEX] = LIMITS_FAILURE;
1428: return i;
1429: }
1430: if (v < 0)
1431: v = 0;
1432: v = v * 10 + (c - '0');
1433: }
1434: }
1435: throw new PatternSyntaxException("malformed quantifier");
1436: }
1437:
1438: public String toString() {
1439: StringBuffer b = new StringBuffer(100);
1440: b.append(instanceNum);
1441: b.append(": ");
1442: if (inverse)
1443: b.append('^');
1444: switch (type) {
1445: case VOID:
1446: b.append("[]");
1447: b.append(" , ");
1448: break;
1449: case CHAR:
1450: b.append(CharacterClass.stringValue(c));
1451: b.append(" , ");
1452: break;
1453: case ANY_CHAR:
1454: b.append("dotall, ");
1455: break;
1456: case ANY_CHAR_NE:
1457: b.append("dot-eols, ");
1458: break;
1459: case BITSET:
1460: b.append('[');
1461: b.append(CharacterClass.stringValue0(bitset));
1462: b.append(']');
1463: b.append(" , weight=");
1464: b.append(weight);
1465: b.append(" , ");
1466: break;
1467: case BITSET2:
1468: b.append('[');
1469: b.append(CharacterClass.stringValue2(bitset2));
1470: b.append(']');
1471: b.append(" , weight=");
1472: b.append(weight);
1473: b.append(" , ");
1474: break;
1475: case START:
1476: b.append("abs.start");
1477: break;
1478: case END:
1479: b.append("abs.end");
1480: break;
1481: case END_EOL:
1482: b.append("abs.end-eol");
1483: break;
1484: case LINE_START:
1485: b.append("line start");
1486: break;
1487: case LINE_END:
1488: b.append("line end");
1489: break;
1490: case LAST_MATCH_END:
1491: if (inverse)
1492: b.append("non-");
1493: b.append("BOUNDARY");
1494: break;
1495: case BOUNDARY:
1496: if (inverse)
1497: b.append("non-");
1498: b.append("BOUNDARY");
1499: break;
1500: case UBOUNDARY:
1501: if (inverse)
1502: b.append("non-");
1503: b.append("UBOUNDARY");
1504: break;
1505: case DIRECTION:
1506: b.append("DIRECTION");
1507: break;
1508: case UDIRECTION:
1509: b.append("UDIRECTION");
1510: break;
1511: case FIND:
1512: b.append(">>>{");
1513: b.append(target);
1514: b.append("}, <<");
1515: b.append(distance);
1516: if (eat) {
1517: b.append(",eat");
1518: }
1519: b.append(", ");
1520: break;
1521: case REPEAT_0_INF:
1522: b.append("rpt{");
1523: b.append(target);
1524: b.append(",0,inf}");
1525: if (failNext != null) {
1526: b.append(", =>");
1527: b.append(failNext.instanceNum);
1528: b.append(", ");
1529: }
1530: break;
1531: case REPEAT_MIN_INF:
1532: b.append("rpt{");
1533: b.append(target);
1534: b.append(",");
1535: b.append(minCount);
1536: b.append(",inf}");
1537: if (failNext != null) {
1538: b.append(", =>");
1539: b.append(failNext.instanceNum);
1540: b.append(", ");
1541: }
1542: break;
1543: case REPEAT_MIN_MAX:
1544: b.append("rpt{");
1545: b.append(target);
1546: b.append(",");
1547: b.append(minCount);
1548: b.append(",");
1549: b.append(maxCount);
1550: b.append("}");
1551: if (failNext != null) {
1552: b.append(", =>");
1553: b.append(failNext.instanceNum);
1554: b.append(", ");
1555: }
1556: break;
1557: case REPEAT_REG_MIN_INF:
1558: b.append("rpt{$");
1559: b.append(memreg);
1560: b.append(',');
1561: b.append(minCount);
1562: b.append(",inf}");
1563: if (failNext != null) {
1564: b.append(", =>");
1565: b.append(failNext.instanceNum);
1566: b.append(", ");
1567: }
1568: break;
1569: case REPEAT_REG_MIN_MAX:
1570: b.append("rpt{$");
1571: b.append(memreg);
1572: b.append(',');
1573: b.append(minCount);
1574: b.append(',');
1575: b.append(maxCount);
1576: b.append("}");
1577: if (failNext != null) {
1578: b.append(", =>");
1579: b.append(failNext.instanceNum);
1580: b.append(", ");
1581: }
1582: break;
1583: case BACKTRACK_0:
1584: b.append("back(0)");
1585: break;
1586: case BACKTRACK_MIN:
1587: b.append("back(");
1588: b.append(minCount);
1589: b.append(")");
1590: break;
1591: case BACKTRACK_REG_MIN:
1592: b.append("back");
1593: b.append("_$");
1594: b.append(memreg);
1595: b.append("(");
1596: b.append(minCount);
1597: b.append(")");
1598: break;
1599: case GROUP_IN:
1600: b.append('(');
1601: if (memreg > 0)
1602: b.append(memreg);
1603: b.append('-');
1604: b.append(" , ");
1605: break;
1606: case GROUP_OUT:
1607: b.append('-');
1608: if (memreg > 0)
1609: b.append(memreg);
1610: b.append(')');
1611: b.append(" , ");
1612: break;
1613: case PLOOKAHEAD_IN:
1614: b.append('(');
1615: b.append("=");
1616: b.append(lookaheadId);
1617: b.append(" , ");
1618: break;
1619: case PLOOKAHEAD_OUT:
1620: b.append('=');
1621: b.append(lookaheadId);
1622: b.append(')');
1623: b.append(" , ");
1624: break;
1625: case NLOOKAHEAD_IN:
1626: b.append("(!");
1627: b.append(lookaheadId);
1628: b.append(" , ");
1629: if (failNext != null) {
1630: b.append(", =>");
1631: b.append(failNext.instanceNum);
1632: b.append(", ");
1633: }
1634: break;
1635: case NLOOKAHEAD_OUT:
1636: b.append('!');
1637: b.append(lookaheadId);
1638: b.append(')');
1639: b.append(" , ");
1640: break;
1641: case PLOOKBEHIND_IN:
1642: b.append('(');
1643: b.append("<=");
1644: b.append(lookaheadId);
1645: b.append(" , dist=");
1646: b.append(distance);
1647: b.append(" , ");
1648: break;
1649: case PLOOKBEHIND_OUT:
1650: b.append("<=");
1651: b.append(lookaheadId);
1652: b.append(')');
1653: b.append(" , ");
1654: break;
1655: case NLOOKBEHIND_IN:
1656: b.append("(<!");
1657: b.append(lookaheadId);
1658: b.append(" , dist=");
1659: b.append(distance);
1660: b.append(" , ");
1661: if (failNext != null) {
1662: b.append(", =>");
1663: b.append(failNext.instanceNum);
1664: b.append(", ");
1665: }
1666: break;
1667: case NLOOKBEHIND_OUT:
1668: b.append("<!");
1669: b.append(lookaheadId);
1670: b.append(')');
1671: b.append(" , ");
1672: break;
1673: case MEMREG_CONDITION:
1674: b.append("(reg");
1675: b.append(memreg);
1676: b.append("?)");
1677: if (failNext != null) {
1678: b.append(", =>");
1679: b.append(failNext.instanceNum);
1680: b.append(", ");
1681: }
1682: break;
1683: case LOOKAHEAD_CONDITION_IN:
1684: b.append("(cond");
1685: b.append(lookaheadId);
1686: b.append(((Lookahead) this ).isPositive ? '=' : '!');
1687: b.append(" , ");
1688: if (failNext != null) {
1689: b.append(", =>");
1690: b.append(failNext.instanceNum);
1691: b.append(", ");
1692: }
1693: break;
1694: case LOOKAHEAD_CONDITION_OUT:
1695: b.append("cond");
1696: b.append(lookaheadId);
1697: b.append(")");
1698: if (failNext != null) {
1699: b.append(", =>");
1700: b.append(failNext.instanceNum);
1701: b.append(", ");
1702: }
1703: break;
1704: case REG:
1705: b.append("$");
1706: b.append(memreg);
1707: b.append(", ");
1708: break;
1709: case SUCCESS:
1710: b.append("END");
1711: break;
1712: case BRANCH_STORE_CNT_AUX1:
1713: b.append("(aux1)");
1714: case BRANCH_STORE_CNT:
1715: b.append("(cnt)");
1716: case BRANCH:
1717: b.append("=>");
1718: if (failNext != null)
1719: b.append(failNext.instanceNum);
1720: else
1721: b.append("null");
1722: b.append(" , ");
1723: break;
1724: default:
1725: b.append('[');
1726: switch (type) {
1727: case CNT_SET_0:
1728: b.append("cnt=0");
1729: break;
1730: case CNT_INC:
1731: b.append("cnt++");
1732: break;
1733: case CNT_GT_EQ:
1734: b.append("cnt>=" + maxCount);
1735: break;
1736: case READ_CNT_LT:
1737: b.append("->cnt<" + maxCount);
1738: break;
1739: case CRSTORE_CRINC:
1740: b.append("M(" + memreg + ")->,Cr(" + cntreg + ")->,Cr("
1741: + cntreg + ")++");
1742: break;
1743: case CR_SET_0:
1744: b.append("Cr(" + cntreg + ")=0");
1745: break;
1746: case CR_LT:
1747: b.append("Cr(" + cntreg + ")<" + maxCount);
1748: break;
1749: case CR_GT_EQ:
1750: b.append("Cr(" + cntreg + ")>=" + maxCount);
1751: break;
1752: default:
1753: b.append("unknown type: " + type);
1754: }
1755: b.append("] , ");
1756: }
1757: if (next != null) {
1758: b.append("->");
1759: b.append(next.instanceNum);
1760: b.append(", ");
1761: }
1762: //b.append("\r\n");
1763: return b.toString();
1764: }
1765:
1766: public String toStringAll() {
1767: return toStringAll(new Vector());
1768: }
1769:
1770: public String toStringAll(Vector v) {
1771: v.addElement(new Integer(instanceNum));
1772: String s = toString();
1773: if (next != null) {
1774: if (!v.contains(new Integer(next.instanceNum))) {
1775: s += "\r\n";
1776: s += next.toStringAll(v);
1777: }
1778: }
1779: if (failNext != null) {
1780: if (!v.contains(new Integer(failNext.instanceNum))) {
1781: s += "\r\n";
1782: s += failNext.toStringAll(v);
1783: }
1784: }
1785: return s;
1786: }
1787: }
1788:
1789: class Pretokenizer {
1790: private static final int START = 1;
1791: static final int END = 2;
1792: static final int PLAIN_GROUP = 3;
1793: static final int POS_LOOKAHEAD = 4;
1794: static final int NEG_LOOKAHEAD = 5;
1795: static final int POS_LOOKBEHIND = 6;
1796: static final int NEG_LOOKBEHIND = 7;
1797: static final int INDEPENDENT_REGEX = 8;
1798: static final int COMMENT = 9;
1799: static final int CONDITIONAL_GROUP = 10;
1800: static final int FLAGS = 11;
1801: static final int CLASS_GROUP = 12;
1802: static final int NAMED_GROUP = 13;
1803:
1804: int tOffset, tOutside, skip;
1805: int offset, end;
1806: int c;
1807:
1808: int ttype = START;
1809:
1810: char[] data;
1811:
1812: //results
1813: private int flags;
1814: private boolean flagsChanged;
1815:
1816: char[] brackets;
1817: String groupName;
1818: boolean groupDeclared;
1819:
1820: Pretokenizer(char[] data, int offset, int end) {
1821: if (offset < 0 || end > data.length)
1822: throw new IndexOutOfBoundsException("offset=" + offset
1823: + ", end=" + end + ", length=" + data.length);
1824: this .offset = offset;
1825: this .end = end;
1826:
1827: this .tOffset = offset;
1828: this .tOutside = offset;
1829:
1830: this .data = data;
1831: }
1832:
1833: int flags(int def) {
1834: return flagsChanged ? flags : def;
1835: }
1836:
1837: void next() throws PatternSyntaxException {
1838: int tOffset = this .tOutside;
1839: int skip = this .skip;
1840:
1841: tOffset += skip;
1842: flagsChanged = false;
1843:
1844: int end = this .end;
1845: char[] data = this .data;
1846: boolean esc = false;
1847: for (int i = tOffset; i < end; i++) {
1848: if (esc) {
1849: esc = false;
1850: continue;
1851: }
1852: char c = data[i];
1853: switch (c) {
1854: case '\\':
1855: esc = true;
1856: continue;
1857: case '|':
1858: case ')':
1859: ttype = c;
1860: this .tOffset = tOffset;
1861: this .tOutside = i;
1862: this .skip = 1;
1863: return;
1864: case '(':
1865: if (((i + 2) < end) && (data[i + 1] == '?')) {
1866: char c1 = data[i + 2];
1867: switch (c1) {
1868: case ':':
1869: ttype = PLAIN_GROUP;
1870: skip = 3; // "(?:" - skip 3 chars
1871: break;
1872: case '=':
1873: ttype = POS_LOOKAHEAD;
1874: skip = 3; // "(?="
1875: break;
1876: case '!':
1877: ttype = NEG_LOOKAHEAD;
1878: skip = 3; // "(?!"
1879: break;
1880: case '<':
1881: switch (c1 = data[i + 3]) {
1882: case '=':
1883: ttype = POS_LOOKBEHIND;
1884: skip = 4; // "(?<="
1885: break;
1886: case '!':
1887: ttype = NEG_LOOKBEHIND;
1888: skip = 4; // "(?<!"
1889: break;
1890: default:
1891: throw new PatternSyntaxException(
1892: "invalid character after '(?<' : "
1893: + c1);
1894: }
1895: break;
1896: case '>':
1897: ttype = INDEPENDENT_REGEX;
1898: skip = 3; // "(?>"
1899: break;
1900: case '#':
1901: ttype = COMMENT;
1902: skip = 3; // ="(?#".length, the makeTree() skips the rest by itself
1903: break;
1904: case '(':
1905: ttype = CONDITIONAL_GROUP;
1906: skip = 2; //"(?"+"(..." - skip "(?" (2 chars) and parse condition as a group
1907: break;
1908: case '[':
1909: ttype = CLASS_GROUP;
1910: skip = 2; // "(?"+"[..]+...-...&...)" - skip 2 chars and parse a class group
1911: break;
1912: default:
1913: int mOff,
1914: mLen;
1915: mLoop: for (int p = i + 2; p < end; p++) {
1916: char c2 = data[p];
1917: switch (c2) {
1918: case '-':
1919: case 'i':
1920: case 'm':
1921: case 's':
1922: case 'x':
1923: case 'u':
1924: case 'X':
1925: //System.out.println("case '+-imsxuX' ("+c2+")");
1926: continue mLoop;
1927:
1928: case ':':
1929: mOff = i + 2;
1930: mLen = p - mOff;
1931: if (mLen > 0) {
1932: flags = Pattern.parseFlags(data,
1933: mOff, mLen);
1934: flagsChanged = true;
1935: }
1936: ttype = PLAIN_GROUP;
1937: skip = mLen + 3; // "(?imsx:" mLen=4; skip= "(?".len + ":".len + mLen = 2+1+4=7
1938: break mLoop;
1939: case ')':
1940: flags = Pattern.parseFlags(data,
1941: mOff = (i + 2),
1942: mLen = (p - mOff));
1943: flagsChanged = true;
1944: ttype = FLAGS;
1945: skip = mLen + 3; // "(?imsx)" mLen=4, skip="(?".len+")".len+mLen=2+1+4=7
1946: break mLoop;
1947: default:
1948: throw new PatternSyntaxException(
1949: "wrong char after \"(?\": "
1950: + c2);
1951: }
1952: }
1953: break;
1954: }
1955: } else if (((i + 2) < end) && (data[i + 1] == '{')) { //parse named group: ({name}....),({=name}....)
1956: int p = i + 2;
1957: skip = 3; //'({' + '}'
1958: int nstart, nend;
1959: boolean isDecl;
1960: c = data[p];
1961: //System.out.println("NG: p="+p+", c="+c);
1962: while (Character.isWhitespace(c)) {
1963: c = data[++p];
1964: skip++;
1965: if (p == end)
1966: throw new PatternSyntaxException(
1967: "malformed named group");
1968: }
1969:
1970: if (c == '=') {
1971: isDecl = false;
1972: c = data[++p];
1973: skip++;
1974: if (p == end)
1975: throw new PatternSyntaxException(
1976: "malformed named group");
1977: } else
1978: isDecl = true;
1979:
1980: nstart = p;
1981: while (Character.isJavaIdentifierPart(c)) {
1982: c = data[++p];
1983: skip++;
1984: if (p == end)
1985: throw new PatternSyntaxException(
1986: "malformed named group");
1987: }
1988: nend = p;
1989: while (Character.isWhitespace(c)) {
1990: c = data[++p];
1991: skip++;
1992: if (p == end)
1993: throw new PatternSyntaxException(
1994: "malformed named group");
1995: }
1996: if (c != '}')
1997: throw new PatternSyntaxException(
1998: "'}' expected at " + (p - i) + " in "
1999: + new String(data, i, end - i));
2000:
2001: this .groupName = new String(data, nstart, nend
2002: - nstart);
2003: this .groupDeclared = isDecl;
2004: ttype = NAMED_GROUP;
2005: } else {
2006: ttype = '(';
2007: skip = 1;
2008: }
2009: this .tOffset = tOffset;
2010: this .tOutside = i;
2011: this .skip = skip;
2012: return;
2013: case '[':
2014: loop: for (;; i++) {
2015: if (i == end)
2016: throw new PatternSyntaxException(
2017: "malformed character class");
2018: char c1 = data[i];
2019: switch (c1) {
2020: case '\\':
2021: i++;
2022: continue;
2023: case ']':
2024: break loop;
2025: }
2026: }
2027: }
2028: }
2029: ttype = END;
2030: this .tOffset = tOffset;
2031: this .tOutside = end;
2032: }
2033:
2034: }
2035:
2036: class Branch extends Term {
2037: Branch() {
2038: type = BRANCH;
2039: }
2040:
2041: Branch(int type) {
2042: switch (type) {
2043: case BRANCH:
2044: case BRANCH_STORE_CNT:
2045: case BRANCH_STORE_CNT_AUX1:
2046: this .type = type;
2047: break;
2048: default:
2049: throw new IllegalArgumentException("not a branch type: "
2050: + type);
2051: }
2052: }
2053: }
2054:
2055: class BackReference extends Term {
2056: BackReference(int no, boolean icase) {
2057: super (icase ? REG_I : REG);
2058: memreg = no;
2059: }
2060: }
2061:
2062: class Group extends Term {
2063: Group() {
2064: this (0);
2065: }
2066:
2067: Group(int memreg) {
2068: type = GROUP_IN;
2069: this .memreg = memreg;
2070:
2071: //used in append()
2072: current = null;
2073: in = this ;
2074: prev = null;
2075:
2076: out = new Term();
2077: out.type = GROUP_OUT;
2078: out.memreg = memreg;
2079: }
2080: }
2081:
2082: class ConditionalExpr extends Group {
2083: protected Term node;
2084: protected boolean newBranchStarted = false;
2085: protected boolean linkAsBranch = true;
2086:
2087: ConditionalExpr(Lookahead la) {
2088: super (0);
2089: //System.out.println("ConditionalExpr("+la+")");
2090: /*
2091: * This all is rather tricky.
2092: * See how this types are handled in Matcher.
2093: * The shortcoming is that we strongly rely upon
2094: * the internal structure of Lookahead.
2095: */
2096: la.in.type = LOOKAHEAD_CONDITION_IN;
2097: la.out.type = LOOKAHEAD_CONDITION_OUT;
2098: if (la.isPositive) {
2099: node = la.in;
2100: linkAsBranch = true;
2101:
2102: //empty 2'nd branch
2103: node.failNext = out;
2104: } else {
2105: node = la.out;
2106: linkAsBranch = false;
2107:
2108: //empty 2'nd branch
2109: node.next = out;
2110: }
2111:
2112: //node.prev=in;
2113: //in.next=node;
2114:
2115: la.prev = in;
2116: in.next = la;
2117:
2118: current = la;
2119: //current=node;
2120: }
2121:
2122: ConditionalExpr(Lookbehind lb) {
2123: super (0);
2124: //System.out.println("ConditionalExpr("+la+")");
2125: /*
2126: * This all is rather tricky.
2127: * See how this types are handled in Matcher.
2128: * The shortcoming is that we strongly rely upon
2129: * the internal structure of Lookahead.
2130: */
2131: lb.in.type = LOOKBEHIND_CONDITION_IN;
2132: lb.out.type = LOOKBEHIND_CONDITION_OUT;
2133: if (lb.isPositive) {
2134: node = lb.in;
2135: linkAsBranch = true;
2136:
2137: //empty 2'nd branch
2138: node.failNext = out;
2139: } else {
2140: node = lb.out;
2141: linkAsBranch = false;
2142:
2143: //empty 2'nd branch
2144: node.next = out;
2145: }
2146:
2147: lb.prev = in;
2148: in.next = lb;
2149:
2150: current = lb;
2151: //current=node;
2152: }
2153:
2154: ConditionalExpr(int memreg) {
2155: super (0);
2156: //System.out.println("ConditionalExpr("+memreg+")");
2157: Term condition = new Term(MEMREG_CONDITION);
2158: condition.memreg = memreg;
2159: condition.out = condition;
2160: condition.out1 = null;
2161: condition.branchOut = null;
2162:
2163: //default branch
2164: condition.failNext = out;
2165:
2166: node = current = condition;
2167: linkAsBranch = true;
2168:
2169: condition.prev = in;
2170: in.next = condition;
2171:
2172: current = condition;
2173: }
2174:
2175: protected void startNewBranch() throws PatternSyntaxException {
2176: if (newBranchStarted)
2177: throw new PatternSyntaxException(
2178: "attempt to set a 3'd choice in a conditional expr.");
2179: Term node = this .node;
2180: node.out1 = null;
2181: if (linkAsBranch) {
2182: node.out = null;
2183: node.branchOut = node;
2184: } else {
2185: node.out = node;
2186: node.branchOut = null;
2187: }
2188: newBranchStarted = true;
2189: //System.out.println("CondGrp.startNewBranch(): current="+current+", this="+this.toStringAll());
2190: current = node;
2191: }
2192: }
2193:
2194: class IndependentGroup extends Term {
2195: IndependentGroup(int id) {
2196: super (0);
2197: in = this ;
2198: out = new Term();
2199: type = INDEPENDENT_IN;
2200: out.type = INDEPENDENT_OUT;
2201: lookaheadId = out.lookaheadId = id;
2202: }
2203: }
2204:
2205: class Lookahead extends Term {
2206: final boolean isPositive;
2207:
2208: Lookahead(int id, boolean isPositive) {
2209: this .isPositive = isPositive;
2210: in = this ;
2211: out = new Term();
2212: if (isPositive) {
2213: type = PLOOKAHEAD_IN;
2214: out.type = PLOOKAHEAD_OUT;
2215: } else {
2216: type = NLOOKAHEAD_IN;
2217: out.type = NLOOKAHEAD_OUT;
2218: branchOut = this ;
2219: }
2220: lookaheadId = id;
2221: out.lookaheadId = id;
2222: }
2223: }
2224:
2225: class Lookbehind extends Term {
2226: final boolean isPositive;
2227: private int prevDistance = -1;
2228:
2229: Lookbehind(int id, boolean isPositive) {
2230: distance = 0;
2231: this .isPositive = isPositive;
2232: in = this ;
2233: out = new Term();
2234: if (isPositive) {
2235: type = PLOOKBEHIND_IN;
2236: out.type = PLOOKBEHIND_OUT;
2237: } else {
2238: type = NLOOKBEHIND_IN;
2239: out.type = NLOOKBEHIND_OUT;
2240: branchOut = this ;
2241: }
2242: lookaheadId = id;
2243: out.lookaheadId = id;
2244: }
2245:
2246: protected Term append(Term t) throws PatternSyntaxException {
2247: distance += length(t);
2248: return super .append(t);
2249: }
2250:
2251: protected Term replaceCurrent(Term t) throws PatternSyntaxException {
2252: distance += length(t) - length(current);
2253: return super .replaceCurrent(t);
2254: }
2255:
2256: private static int length(Term t) throws PatternSyntaxException {
2257: int type = t.type;
2258: switch (type) {
2259: case CHAR:
2260: case BITSET:
2261: case BITSET2:
2262: case ANY_CHAR:
2263: case ANY_CHAR_NE:
2264: return 1;
2265: case BOUNDARY:
2266: case DIRECTION:
2267: case UBOUNDARY:
2268: case UDIRECTION:
2269: return 0;
2270: default:
2271: if (type >= FIRST_TRANSPARENT && type <= LAST_TRANSPARENT)
2272: return 0;
2273: throw new PatternSyntaxException(
2274: "variable length element within a lookbehind assertion");
2275: }
2276: }
2277:
2278: protected void startNewBranch() throws PatternSyntaxException {
2279: prevDistance = distance;
2280: distance = 0;
2281: super .startNewBranch();
2282: }
2283:
2284: protected void close() throws PatternSyntaxException {
2285: int pd = prevDistance;
2286: if (pd >= 0) {
2287: if (distance != pd)
2288: throw new PatternSyntaxException(
2289: "non-equal branch lengths within a lookbehind assertion");
2290: }
2291: super .close();
2292: }
2293: }
2294:
2295: class Iterator extends Term {
2296:
2297: Iterator(Term term, int min, int max, Vector collection)
2298: throws PatternSyntaxException {
2299: collection.addElement(this );
2300: switch (term.type) {
2301: case CHAR:
2302: case ANY_CHAR:
2303: case ANY_CHAR_NE:
2304: case BITSET:
2305: case BITSET2: {
2306: target = term;
2307: Term back = new Term();
2308: if (min <= 0 && max < 0) {
2309: type = REPEAT_0_INF;
2310: back.type = BACKTRACK_0;
2311: } else if (min > 0 && max < 0) {
2312: type = REPEAT_MIN_INF;
2313: back.type = BACKTRACK_MIN;
2314: minCount = back.minCount = min;
2315: } else {
2316: type = REPEAT_MIN_MAX;
2317: back.type = BACKTRACK_MIN;
2318: minCount = back.minCount = min;
2319: maxCount = max;
2320: }
2321:
2322: failNext = back;
2323:
2324: in = this ;
2325: out = this ;
2326: out1 = back;
2327: branchOut = null;
2328: return;
2329: }
2330: case REG: {
2331: target = term;
2332: memreg = term.memreg;
2333: Term back = new Term();
2334: if (max < 0) {
2335: type = REPEAT_REG_MIN_INF;
2336: back.type = BACKTRACK_REG_MIN;
2337: minCount = back.minCount = min;
2338: } else {
2339: type = REPEAT_REG_MIN_MAX;
2340: back.type = BACKTRACK_REG_MIN;
2341: minCount = back.minCount = min;
2342: maxCount = max;
2343: }
2344:
2345: failNext = back;
2346:
2347: in = this ;
2348: out = this ;
2349: out1 = back;
2350: branchOut = null;
2351: return;
2352: }
2353: default:
2354: throw new PatternSyntaxException(
2355: "can't iterate this type: " + term.type);
2356: }
2357: }
2358:
2359: void optimize() {
2360: //System.out.println("optimizing myself: "+this);
2361: //BACKTRACK_MIN_REG_FIND
2362: Term back = failNext;
2363: Optimizer opt = Optimizer.find(back.next);
2364: if (opt == null)
2365: return;
2366: failNext = opt.makeBacktrack(back);
2367: }
2368: }
|