0001: /**
0002: * Copyright (c) 2001, Sergey A. Samokhodkin
0003: * All rights reserved.
0004: *
0005: * Redistribution and use in source and binary forms, with or without modification,
0006: * are permitted provided that the following conditions are met:
0007: *
0008: * - Redistributions of source code must retain the above copyright notice,
0009: * this list of conditions and the following disclaimer.
0010: * - Redistributions in binary form
0011: * must reproduce the above copyright notice, this list of conditions and the following
0012: * disclaimer in the documentation and/or other materials provided with the distribution.
0013: * - Neither the name of jregex nor the names of its contributors may be used
0014: * to endorse or promote products derived from this software without specific prior
0015: * written permission.
0016: *
0017: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
0018: * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
0019: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
0020: * IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
0021: * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0022: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
0023: * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
0024: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
0025: * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0026: *
0027: * @version 1.2_01
0028: */package jregex;
0029:
0030: import java.util.*;
0031: import java.io.*;
0032:
0033: /**
0034: * Matcher instance is an automaton that actually performs matching. It provides the following methods:
0035: * <li> searching for a matching substrings : matcher.find() or matcher.findAll();
0036: * <li> testing whether a text matches a whole pattern : matcher.matches();
0037: * <li> testing whether the text matches the beginning of a pattern : matcher.matchesPrefix();
0038: * <li> searching with custom options : matcher.find(int options)
0039: * <p>
0040: * <b>Obtaining results</b><br>
0041: * After the search succeded, i.e. if one of above methods returned <code>true</code>
0042: * one may obtain an information on the match:
0043: * <li> may check whether some group is captured : matcher.isCaptured(int);
0044: * <li> may obtain start and end positions of the match and its length : matcher.start(int),matcher.end(int),matcher.length(int);
0045: * <li> may obtain match contents as String : matcher.group(int).<br>
0046: * The same way can be obtained the match prefix and suffix information.
0047: * The appropriate methods are grouped in MatchResult interface, which the Matcher class implements.<br>
0048: * Matcher objects are not thread-safe, so only one thread may use a matcher instance at a time.
0049: * Note, that Pattern objects are thread-safe(the same instanse may be shared between
0050: * multiple threads), and the typical tactics in multithreaded applications is to have one Pattern instance per expression(a singleton),
0051: * and one Matcher object per thread.
0052: */
0053:
0054: public class Matcher implements MatchResult {
0055: /* Matching options*/
0056: /**
0057: * The same effect as "^" without REFlags.MULTILINE.
0058: * @see Matcher#find(int)
0059: */
0060: public static final int ANCHOR_START = 1;
0061:
0062: /**
0063: * The same effect as "\\G".
0064: * @see Matcher#find(int)
0065: */
0066: public static final int ANCHOR_LASTMATCH = 2;
0067:
0068: /**
0069: * The same effect as "$" without REFlags.MULTILINE.
0070: * @see Matcher#find(int)
0071: */
0072: public static final int ANCHOR_END = 4;
0073:
0074: /**
0075: * Experimental option; if a text ends up before the end of a pattern,report a match.
0076: * @see Matcher#find(int)
0077: */
0078: public static final int ACCEPT_INCOMPLETE = 8;
0079:
0080: //see search(ANCHOR_START|...)
0081: private static Term startAnchor = new Term(Term.START);
0082:
0083: //see search(ANCHOR_LASTMATCH|...)
0084: private static Term lastMatchAnchor = new Term(Term.LAST_MATCH_END);
0085:
0086: private Pattern re;
0087: private int[] counters;
0088: private MemReg[] memregs;
0089: private LAEntry[] lookaheads;
0090: private int counterCount;
0091: private int memregCount;
0092: private int lookaheadCount;
0093:
0094: private char[] data;
0095: private int offset, end, wOffset, wEnd;
0096: private boolean shared;
0097:
0098: private SearchEntry top; //stack entry
0099: private SearchEntry first; //object pool entry
0100: private SearchEntry defaultEntry; //called when moving the window
0101:
0102: private boolean called;
0103:
0104: private int minQueueLength;
0105:
0106: private String cache;
0107:
0108: //cache may be longer than the actual data
0109: //and contrariwise; so cacheOffset may have both signs.
0110: //cacheOffset is actually -(data offset).
0111: private int cacheOffset, cacheLength;
0112:
0113: private MemReg prefixBounds, suffixBounds, targetBounds;
0114:
0115: Matcher(Pattern regex) {
0116: this .re = regex;
0117: //int memregCount=(memregs=new MemReg[regex.memregs]).length;
0118: //for(int i=0;i<memregCount;i++){
0119: // this.memregs[i]=new MemReg(-1); //unlikely to SearchEntry, in this case we know memreg indicies by definition
0120: //}
0121: //counters=new int[regex.counters];
0122: //int lookaheadCount=(lookaheads=new LAEntry[regex.lookaheads]).length;
0123: //for(int i=0;i<lookaheadCount;i++){
0124: // this.lookaheads[i]=new LAEntry();
0125: //}
0126:
0127: int memregCount, counterCount, lookaheadCount;
0128: if ((memregCount = regex.memregs) > 0) {
0129: MemReg[] memregs = new MemReg[memregCount];
0130: for (int i = 0; i < memregCount; i++) {
0131: memregs[i] = new MemReg(-1); //unlikely to SearchEntry, in this case we know memreg indicies by definition
0132: }
0133: this .memregs = memregs;
0134: }
0135:
0136: if ((counterCount = regex.counters) > 0)
0137: counters = new int[counterCount];
0138:
0139: if ((lookaheadCount = regex.lookaheads) > 0) {
0140: LAEntry[] lookaheads = new LAEntry[lookaheadCount];
0141: for (int i = 0; i < lookaheadCount; i++) {
0142: lookaheads[i] = new LAEntry();
0143: }
0144: this .lookaheads = lookaheads;
0145: }
0146:
0147: this .memregCount = memregCount;
0148: this .counterCount = counterCount;
0149: this .lookaheadCount = lookaheadCount;
0150:
0151: first = new SearchEntry();
0152: defaultEntry = new SearchEntry();
0153: minQueueLength = regex.stringRepr.length() / 2; // just evaluation!!!
0154: }
0155:
0156: /**
0157: * This method allows to efficiently pass data between matchers.
0158: * Note that a matcher may pass data to itself:<pre>
0159: * Matcher m=new Pattern("\\w+").matcher(myString);
0160: * if(m.find())m.setTarget(m,m.SUFFIX); //forget all that is not a suffix
0161: * </pre>
0162: * Resets current search position to zero.
0163: * @param m - a matcher that is a source of data
0164: * @param groupId - which group to take data from
0165: * @see Matcher#setTarget(java.lang.String)
0166: * @see Matcher#setTarget(java.lang.String,int,int)
0167: * @see Matcher#setTarget(char[],int,int)
0168: * @see Matcher#setTarget(java.io.Reader,int)
0169: */
0170: public final void setTarget(Matcher m, int groupId) {
0171: MemReg mr = m.bounds(groupId);
0172: //System.out.println("setTarget("+m+","+groupId+")");
0173: //System.out.println(" in="+mr.in);
0174: //System.out.println(" out="+mr.out);
0175: if (mr == null)
0176: throw new IllegalArgumentException("group #" + groupId
0177: + " is not assigned");
0178: data = m.data;
0179: offset = mr.in;
0180: end = mr.out;
0181: cache = m.cache;
0182: cacheLength = m.cacheLength;
0183: cacheOffset = m.cacheOffset;
0184: if (m != this ) {
0185: shared = true;
0186: m.shared = true;
0187: }
0188: init();
0189: }
0190:
0191: /**
0192: * Supplies a text to search in/match with.
0193: * Resets current search position to zero.
0194: * @param text - a data
0195: * @see Matcher#setTarget(jregex.Matcher,int)
0196: * @see Matcher#setTarget(java.lang.String,int,int)
0197: * @see Matcher#setTarget(char[],int,int)
0198: * @see Matcher#setTarget(java.io.Reader,int)
0199: */
0200: public void setTarget(String text) {
0201: setTarget(text, 0, text.length());
0202: }
0203:
0204: /**
0205: * Supplies a text to search in/match with, as a part of String.
0206: * Resets current search position to zero.
0207: * @param text - a data source
0208: * @param start - where the target starts
0209: * @param len - how long is the target
0210: * @see Matcher#setTarget(jregex.Matcher,int)
0211: * @see Matcher#setTarget(java.lang.String)
0212: * @see Matcher#setTarget(char[],int,int)
0213: * @see Matcher#setTarget(java.io.Reader,int)
0214: */
0215: public void setTarget(String text, int start, int len) {
0216: char[] mychars = data;
0217: if (mychars == null || shared || mychars.length < len) {
0218: data = mychars = new char[(int) (1.7f * len)];
0219: shared = false;
0220: }
0221: text.getChars(start, len, mychars, 0); //(srcBegin,srcEnd,dst[],dstBegin)
0222: offset = 0;
0223: end = len;
0224:
0225: cache = text;
0226: cacheOffset = -start;
0227: cacheLength = text.length();
0228:
0229: init();
0230: }
0231:
0232: /**
0233: * Supplies a text to search in/match with, as a part of char array.
0234: * Resets current search position to zero.
0235: * @param text - a data source
0236: * @param start - where the target starts
0237: * @param len - how long is the target
0238: * @see Matcher#setTarget(jregex.Matcher,int)
0239: * @see Matcher#setTarget(java.lang.String)
0240: * @see Matcher#setTarget(java.lang.String,int,int)
0241: * @see Matcher#setTarget(java.io.Reader,int)
0242: */
0243: public void setTarget(char[] text, int start, int len) {
0244: setTarget(text, start, len, true);
0245: }
0246:
0247: /**
0248: * To be used with much care.
0249: * Supplies a text to search in/match with, as a part of a char array, as above, but also allows to permit
0250: * to use the array as internal buffer for subsequent inputs. That is, if we call it with <code>shared=false</code>:<pre>
0251: * myMatcher.setTarget(myCharArray,x,y,<b>false</b>); //we declare that array contents is NEITHER shared NOR will be used later, so may modifications on it are permitted
0252: * </pre>
0253: * then we should expect the array contents to be changed on subsequent setTarget(..) operations.
0254: * Such method may yield some increase in perfomanse in the case of multiple setTarget() calls.
0255: * Resets current search position to zero.
0256: * @param text - a data source
0257: * @param start - where the target starts
0258: * @param len - how long is the target
0259: * @param shared - if <code>true<code>: data are shared or used later, <b>don't</b> modify it; if <code>false<code>: possible modifications of the text on subsequent <code>setTarget()</code> calls are perceived and allowed.
0260: * @see Matcher#setTarget(jregex.Matcher,int)
0261: * @see Matcher#setTarget(java.lang.String)
0262: * @see Matcher#setTarget(java.lang.String,int,int)
0263: * @see Matcher#setTarget(char[],int,int)
0264: * @see Matcher#setTarget(java.io.Reader,int)
0265: */
0266: public final void setTarget(char[] text, int start, int len,
0267: boolean shared) {
0268: cache = null;
0269: data = text;
0270: offset = start;
0271: end = start + len;
0272: this .shared = shared;
0273: init();
0274: }
0275:
0276: /**
0277: * Supplies a text to search in/match with through a stream.
0278: * Resets current search position to zero.
0279: * @param in - a data stream;
0280: * @param len - how much characters should be read; if len is -1, read the entire stream.
0281: * @see Matcher#setTarget(jregex.Matcher,int)
0282: * @see Matcher#setTarget(java.lang.String)
0283: * @see Matcher#setTarget(java.lang.String,int,int)
0284: * @see Matcher#setTarget(char[],int,int)
0285: */
0286: public void setTarget(Reader in, int len) throws IOException {
0287: if (len < 0) {
0288: setAll(in);
0289: return;
0290: }
0291: char[] mychars = data;
0292: boolean shared = this .shared;
0293: if (mychars == null || shared || mychars.length < len) {
0294: mychars = new char[len];
0295: shared = false;
0296: }
0297: int count = 0;
0298: int c;
0299: while ((c = in.read(mychars, count, len)) >= 0) {
0300: len -= c;
0301: count += c;
0302: if (len == 0)
0303: break;
0304: }
0305: setTarget(mychars, 0, count, shared);
0306: }
0307:
0308: private void setAll(Reader in) throws IOException {
0309: char[] mychars = data;
0310: int free;
0311: boolean shared = this .shared;
0312: if (mychars == null || shared) {
0313: mychars = new char[free = 1024];
0314: shared = false;
0315: } else
0316: free = mychars.length;
0317: int count = 0;
0318: int c;
0319: while ((c = in.read(mychars, count, free)) >= 0) {
0320: free -= c;
0321: count += c;
0322: if (free == 0) {
0323: int newsize = count * 3;
0324: char[] newchars = new char[newsize];
0325: System.arraycopy(mychars, 0, newchars, 0, count);
0326: mychars = newchars;
0327: free = newsize - count;
0328: shared = false;
0329: }
0330: }
0331: setTarget(mychars, 0, count, shared);
0332: }
0333:
0334: private final String getString(int start, int end) {
0335: String src = cache;
0336: if (src != null) {
0337: int co = cacheOffset;
0338: return src.substring(start - co, end - co);
0339: }
0340: int tOffset, tEnd, tLen = (tEnd = this .end)
0341: - (tOffset = this .offset);
0342: char[] data = this .data;
0343: if ((end - start) >= (tLen / 3)) {
0344: //it makes sence to make a cache
0345: cache = src = new String(data, tOffset, tLen);
0346: cacheOffset = tOffset;
0347: cacheLength = tLen;
0348: return src.substring(start - tOffset, end - tOffset);
0349: }
0350: return new String(data, start, end - start);
0351: }
0352:
0353: /* Matching */
0354:
0355: /**
0356: * Tells whether the entire target matches the beginning of the pattern.
0357: * The whole pattern is also regarded as its beginning.<br>
0358: * This feature allows to find a mismatch by examining only a beginning part of
0359: * the target (as if the beginning of the target doesn't match the beginning of the pattern, then the entire target
0360: * also couldn't match).<br>
0361: * For example the following assertions yield <code>true<code>:<pre>
0362: * Pattern p=new Pattern("abcd");
0363: * p.matcher("").matchesPrefix();
0364: * p.matcher("a").matchesPrefix();
0365: * p.matcher("ab").matchesPrefix();
0366: * p.matcher("abc").matchesPrefix();
0367: * p.matcher("abcd").matchesPrefix();
0368: * </pre>
0369: * and the following yield <code>false<code>:<pre>
0370: * p.matcher("b").isPrefix();
0371: * p.matcher("abcdef").isPrefix();
0372: * p.matcher("x").isPrefix();
0373: * </pre>
0374: * @return true if the entire target matches the beginning of the pattern
0375: */
0376: public final boolean matchesPrefix() {
0377: setPosition(0);
0378: return search(ANCHOR_START | ACCEPT_INCOMPLETE | ANCHOR_END);
0379: }
0380:
0381: /**
0382: * Just an old name for isPrefix().<br>
0383: * Retained for backwards compatibility.
0384: * @deprecated Replaced by isPrefix()
0385: */
0386: public final boolean isStart() {
0387: return matchesPrefix();
0388: }
0389:
0390: /**
0391: * Tells whether a current target matches the whole pattern.
0392: * For example the following yields the <code>true<code>:<pre>
0393: * Pattern p=new Pattern("\\w+");
0394: * p.matcher("a").matches();
0395: * p.matcher("ab").matches();
0396: * p.matcher("abc").matches();
0397: * </pre>
0398: * and the following yields the <code>false<code>:<pre>
0399: * p.matcher("abc def").matches();
0400: * p.matcher("bcd ").matches();
0401: * p.matcher(" bcd").matches();
0402: * p.matcher("#xyz#").matches();
0403: * </pre>
0404: * @return whether a current target matches the whole pattern.
0405: */
0406: public final boolean matches() {
0407: if (called)
0408: setPosition(0);
0409: return search(ANCHOR_START | ANCHOR_END);
0410: }
0411:
0412: /**
0413: * Just a combination of setTarget(String) and matches().
0414: * @param s the target string;
0415: * @return whether the specified string matches the whole pattern.
0416: */
0417: public final boolean matches(String s) {
0418: setTarget(s);
0419: return search(ANCHOR_START | ANCHOR_END);
0420: }
0421:
0422: /**
0423: * Allows to set a position the subsequent find()/find(int) will start from.
0424: * @param pos the position to start from;
0425: * @see Matcher#find()
0426: * @see Matcher#find(int)
0427: */
0428: public void setPosition(int pos) {
0429: wOffset = offset + pos;
0430: wEnd = -1;
0431: called = false;
0432: flush();
0433: }
0434:
0435: public void setOffset(int offset) {
0436: this .offset = offset;
0437: wOffset = offset;
0438: wEnd = -1;
0439: called = false;
0440: flush();
0441: }
0442:
0443: /**
0444: * Searches through a target for a matching substring, starting from just after the end of last match.
0445: * If there wasn't any search performed, starts from zero.
0446: * @return <code>true</code> if a match found.
0447: */
0448: public final boolean find() {
0449: if (called)
0450: skip();
0451: return search(0);
0452: }
0453:
0454: /**
0455: * Searches through a target for a matching substring, starting from just after the end of last match.
0456: * If there wasn't any search performed, starts from zero.
0457: * @param anchors a zero or a combination(bitwise OR) of ANCHOR_START,ANCHOR_END,ANCHOR_LASTMATCH,ACCEPT_INCOMPLETE
0458: * @return <code>true</code> if a match found.
0459: */
0460: public final boolean find(int anchors) {
0461: if (called)
0462: skip();
0463: return search(anchors);
0464: }
0465:
0466: /**
0467: * The same as findAll(int), but with default behaviour;
0468: */
0469: public MatchIterator findAll() {
0470: return findAll(0);
0471: }
0472:
0473: /**
0474: * Returns an iterator over the matches found by subsequently calling find(options), the search starts from the zero position.
0475: */
0476: public MatchIterator findAll(final int options) {
0477: //setPosition(0);
0478: return new MatchIterator() {
0479: private boolean checked = false;
0480: private boolean hasMore = false;
0481:
0482: public boolean hasMore() {
0483: if (!checked)
0484: check();
0485: return hasMore;
0486: }
0487:
0488: public MatchResult nextMatch() {
0489: if (!checked)
0490: check();
0491: if (!hasMore)
0492: throw new NoSuchElementException();
0493: checked = false;
0494: return Matcher.this ;
0495: }
0496:
0497: private final void check() {
0498: hasMore = find(options);
0499: checked = true;
0500: }
0501:
0502: public int count() {
0503: if (!checked)
0504: check();
0505: if (!hasMore)
0506: return 0;
0507: int c = 1;
0508: while (find(options))
0509: c++;
0510: checked = false;
0511: return c;
0512: }
0513: };
0514: }
0515:
0516: /**
0517: * Continues to search from where the last search left off.
0518: * The same as proceed(0).
0519: * @see Matcher#proceed(int)
0520: */
0521: public final boolean proceed() {
0522: return proceed(0);
0523: }
0524:
0525: /**
0526: * Continues to search from where the last search left off using specified options:<pre>
0527: * Matcher m=new Pattern("\\w+").matcher("abc");
0528: * while(m.proceed(0)){
0529: * System.out.println(m.group(0));
0530: * }
0531: * </pre>
0532: * Output:<pre>
0533: * abc
0534: * ab
0535: * a
0536: * bc
0537: * b
0538: * c
0539: * </pre>
0540: * For example, let's find all odd nubmers occuring in a text:<pre>
0541: * Matcher m=new Pattern("\\d+").matcher("123");
0542: * while(m.proceed(0)){
0543: * String match=m.group(0);
0544: * if(isOdd(Integer.parseInt(match))) System.out.println(match);
0545: * }
0546: *
0547: * static boolean isOdd(int i){
0548: * return (i&1)>0;
0549: * }
0550: * </pre>
0551: * This outputs:<pre>
0552: * 123
0553: * 1
0554: * 23
0555: * 3
0556: * </pre>
0557: * Note that using <code>find()</code> method we would find '123' only.
0558: * @param options search options, some of ANCHOR_START|ANCHOR_END|ANCHOR_LASTMATCH|ACCEPT_INCOMPLETE; zero value(default) stands for usual search for substring.
0559: */
0560: public final boolean proceed(int options) {
0561: //System.out.println("next() : top="+top);
0562: if (called) {
0563: if (top == null) {
0564: wOffset++;
0565: }
0566: }
0567: return search(0);
0568: }
0569:
0570: /**
0571: * Sets the current search position just after the end of last match.
0572: */
0573: public final void skip() {
0574: int we = wEnd;
0575: if (wOffset == we) { //requires special handling
0576: //if no variants at 'wOutside',advance pointer and clear
0577: if (top == null) {
0578: wOffset++;
0579: flush();
0580: }
0581: //otherwise, if there exist a variant,
0582: //don't clear(), i.e. allow it to match
0583: return;
0584: } else {
0585: if (we < 0)
0586: wOffset = 0;
0587: else
0588: wOffset = we;
0589: }
0590: //rflush(); //rflush() works faster on simple regexes (with a small group/branch number)
0591: flush();
0592: }
0593:
0594: private final void init() {
0595: //wOffset=-1;
0596: //System.out.println("init(): offset="+offset+", end="+end);
0597: wOffset = offset;
0598: wEnd = -1;
0599: called = false;
0600: flush();
0601: }
0602:
0603: /**
0604: * Resets the internal state.
0605: */
0606: private final void flush() {
0607: top = null;
0608: defaultEntry.reset(0);
0609:
0610: /*
0611: int c=0;
0612: SearchEntry se=first;
0613: while(se!=null){
0614: c++;
0615: se=se.on;
0616: }
0617: System.out.println("queue: allocated="+c+", truncating to "+minQueueLength);
0618: new Exception().printStackTrace();
0619: */
0620:
0621: first.reset(minQueueLength);
0622: //first.reset(0);
0623: for (int i = memregs.length - 1; i > 0; i--) {
0624: MemReg mr = memregs[i];
0625: mr.in = mr.out = -1;
0626: }
0627: for (int i = memregs.length - 1; i > 0; i--) {
0628: MemReg mr = memregs[i];
0629: mr.in = mr.out = -1;
0630: }
0631: called = false;
0632: }
0633:
0634: //reverse flush
0635: //may work significantly faster,
0636: //need testing
0637: private final void rflush() {
0638: SearchEntry entry = top;
0639: top = null;
0640: MemReg[] memregs = this .memregs;
0641: int[] counters = this .counters;
0642: while (entry != null) {
0643: SearchEntry next = entry.sub;
0644: SearchEntry.popState(entry, memregs, counters);
0645: entry = next;
0646: }
0647: SearchEntry.popState(defaultEntry, memregs, counters);
0648: }
0649:
0650: /**
0651: */
0652: public String toString() {
0653: return getString(wOffset, wEnd);
0654: }
0655:
0656: public Pattern pattern() {
0657: return re;
0658: }
0659:
0660: public String target() {
0661: return getString(offset, end);
0662: }
0663:
0664: /**
0665: */
0666: public char[] targetChars() {
0667: shared = true;
0668: return data;
0669: }
0670:
0671: /**
0672: */
0673: public int targetStart() {
0674: return offset;
0675: }
0676:
0677: /**
0678: */
0679: public int targetEnd() {
0680: return end;
0681: }
0682:
0683: public char charAt(int i) {
0684: int in = this .wOffset;
0685: int out = this .wEnd;
0686: if (in < 0 || out < in)
0687: throw new IllegalStateException("unassigned");
0688: return data[in + i];
0689: }
0690:
0691: public char charAt(int i, int groupId) {
0692: MemReg mr = bounds(groupId);
0693: if (mr == null)
0694: throw new IllegalStateException("group #" + groupId
0695: + " is not assigned");
0696: int in = mr.in;
0697: if (i < 0 || i > (mr.out - in))
0698: throw new StringIndexOutOfBoundsException("" + i);
0699: return data[in + i];
0700: }
0701:
0702: public final int length() {
0703: return wEnd - wOffset;
0704: }
0705:
0706: /**
0707: */
0708: public final int start() {
0709: return wOffset - offset;
0710: }
0711:
0712: /**
0713: */
0714: public final int end() {
0715: return wEnd - offset;
0716: }
0717:
0718: /**
0719: */
0720: public String prefix() {
0721: return getString(offset, wOffset);
0722: }
0723:
0724: /**
0725: */
0726: public String suffix() {
0727: return getString(wEnd, end);
0728: }
0729:
0730: /**
0731: */
0732: public int groupCount() {
0733: return memregs.length;
0734: }
0735:
0736: /**
0737: */
0738: public String group(int n) {
0739: MemReg mr = bounds(n);
0740: if (mr == null)
0741: return null;
0742: return getString(mr.in, mr.out);
0743: }
0744:
0745: /**
0746: */
0747: public String group(String name) {
0748: Integer id = re.groupId(name);
0749: if (id == null)
0750: throw new IllegalArgumentException("<" + name
0751: + "> isn't defined");
0752: return group(id.intValue());
0753: }
0754:
0755: /**
0756: */
0757: public boolean getGroup(int n, TextBuffer tb) {
0758: MemReg mr = bounds(n);
0759: if (mr == null)
0760: return false;
0761: int in;
0762: tb.append(data, in = mr.in, mr.out - in);
0763: return true;
0764: }
0765:
0766: /**
0767: */
0768: public boolean getGroup(String name, TextBuffer tb) {
0769: Integer id = re.groupId(name);
0770: if (id == null)
0771: throw new IllegalArgumentException("unknown group: \""
0772: + name + "\"");
0773: return getGroup(id.intValue(), tb);
0774: }
0775:
0776: /**
0777: */
0778: public boolean getGroup(int n, StringBuffer sb) {
0779: MemReg mr = bounds(n);
0780: if (mr == null)
0781: return false;
0782: int in;
0783: sb.append(data, in = mr.in, mr.out - in);
0784: return true;
0785: }
0786:
0787: /**
0788: */
0789: public boolean getGroup(String name, StringBuffer sb) {
0790: Integer id = re.groupId(name);
0791: if (id == null)
0792: throw new IllegalArgumentException("unknown group: \""
0793: + name + "\"");
0794: return getGroup(id.intValue(), sb);
0795: }
0796:
0797: /**
0798: */
0799: public String[] groups() {
0800: MemReg[] memregs = this .memregs;
0801: String[] groups = new String[memregs.length];
0802: int in, out;
0803: MemReg mr;
0804: for (int i = 0; i < memregs.length; i++) {
0805: in = (mr = memregs[i]).in;
0806: out = mr.out;
0807: if ((in = mr.in) < 0 || mr.out < in)
0808: continue;
0809: groups[i] = getString(in, out);
0810: }
0811: return groups;
0812: }
0813:
0814: /**
0815: */
0816: public Vector groupv() {
0817: MemReg[] memregs = this .memregs;
0818: Vector v = new Vector();
0819: int in, out;
0820: MemReg mr;
0821: for (int i = 0; i < memregs.length; i++) {
0822: mr = bounds(i);
0823: if (mr == null) {
0824: v.addElement("empty");
0825: continue;
0826: }
0827: String s = getString(mr.in, mr.out);
0828: v.addElement(s);
0829: }
0830: return v;
0831: }
0832:
0833: private final MemReg bounds(int id) {
0834: //System.out.println("Matcher.bounds("+id+"):");
0835: MemReg mr;
0836: if (id >= 0) {
0837: mr = memregs[id];
0838: } else
0839: switch (id) {
0840: case PREFIX:
0841: mr = prefixBounds;
0842: if (mr == null)
0843: prefixBounds = mr = new MemReg(PREFIX);
0844: mr.in = offset;
0845: mr.out = wOffset;
0846: break;
0847: case SUFFIX:
0848: mr = suffixBounds;
0849: if (mr == null)
0850: suffixBounds = mr = new MemReg(SUFFIX);
0851: mr.in = wEnd;
0852: mr.out = end;
0853: break;
0854: case TARGET:
0855: mr = targetBounds;
0856: if (mr == null)
0857: targetBounds = mr = new MemReg(TARGET);
0858: mr.in = offset;
0859: mr.out = end;
0860: break;
0861: default:
0862: throw new IllegalArgumentException(
0863: "illegal group id: "
0864: + id
0865: + "; must either nonnegative int, or MatchResult.PREFIX, or MatchResult.SUFFIX");
0866: }
0867: //System.out.println(" mr=["+mr.in+","+mr.out+"]");
0868: int in;
0869: if ((in = mr.in) < 0 || mr.out < in)
0870: return null;
0871: return mr;
0872: }
0873:
0874: /**
0875: */
0876: public final boolean isCaptured() {
0877: return wOffset >= 0 && wEnd >= wOffset;
0878: }
0879:
0880: /**
0881: */
0882: public final boolean isCaptured(int id) {
0883: return bounds(id) != null;
0884: }
0885:
0886: /**
0887: */
0888: public final boolean isCaptured(String groupName) {
0889: Integer id = re.groupId(groupName);
0890: if (id == null)
0891: throw new IllegalArgumentException("unknown group: \""
0892: + groupName + "\"");
0893: return isCaptured(id.intValue());
0894: }
0895:
0896: /**
0897: */
0898: public final int length(int id) {
0899: MemReg mr = bounds(id);
0900: return mr.out - mr.in;
0901: }
0902:
0903: /**
0904: */
0905: public final int start(int id) {
0906: return bounds(id).in - offset;
0907: }
0908:
0909: /**
0910: */
0911: public final int end(int id) {
0912: return bounds(id).out - offset;
0913: }
0914:
0915: private final boolean search(int anchors) {
0916: called = true;
0917: final int end = this .end;
0918: int offset = this .offset;
0919: char[] data = this .data;
0920: int wOffset = this .wOffset;
0921: int wEnd = this .wEnd;
0922:
0923: MemReg[] memregs = this .memregs;
0924: int[] counters = this .counters;
0925: LAEntry[] lookaheads = this .lookaheads;
0926:
0927: //int memregCount=memregs.length;
0928: //int cntCount=counters.length;
0929: int memregCount = this .memregCount;
0930: int cntCount = this .counterCount;
0931:
0932: SearchEntry defaultEntry = this .defaultEntry;
0933: SearchEntry first = this .first;
0934: SearchEntry top = this .top;
0935: SearchEntry actual = null;
0936: int cnt, regLen;
0937: int i;
0938:
0939: final boolean matchEnd = (anchors & ANCHOR_END) > 0;
0940: final boolean allowIncomplete = (anchors & ACCEPT_INCOMPLETE) > 0;
0941:
0942: Pattern re = this .re;
0943: Term root = re.root;
0944: Term term;
0945: if (top == null) {
0946: if ((anchors & ANCHOR_START) > 0) {
0947: term = re.root0; //raw root
0948: root = startAnchor;
0949: } else if ((anchors & ANCHOR_LASTMATCH) > 0) {
0950: term = re.root0; //raw root
0951: root = lastMatchAnchor;
0952: } else {
0953: term = root; //optimized root
0954: }
0955: i = wOffset;
0956: actual = first;
0957: SearchEntry.popState(defaultEntry, memregs, counters);
0958: } else {
0959: top = (actual = top).sub;
0960: term = actual.term;
0961: i = actual.index;
0962: SearchEntry.popState(actual, memregs, counters);
0963: }
0964: cnt = actual.cnt;
0965: regLen = actual.regLen;
0966:
0967: main: while (wOffset <= end) {
0968: matchHere: for (;;) {
0969: /*
0970: System.out.print("char: "+i+", term: ");
0971: System.out.print(term.toString());
0972:
0973: System.out.print(" // mrs:{");
0974: for(int dbi=0;dbi<memregs.length;dbi++){
0975: System.out.print('[');
0976: System.out.print(memregs[dbi].in);
0977: System.out.print(',');
0978: System.out.print(memregs[dbi].out);
0979: System.out.print(']');
0980: System.out.print(' ');
0981: }
0982: System.out.print("}, crs:{");
0983: for(int dbi=0;dbi<counters.length;dbi++){
0984: System.out.print(counters[dbi]);
0985: if(dbi<counters.length-1)System.out.print(',');
0986: }
0987: System.out.println("}");
0988: */
0989: int memreg, cntreg;
0990: char c;
0991: switch (term.type) {
0992: case Term.FIND: {
0993: int jump = find(data, i + term.distance, end,
0994: term.target); //don't eat the last match
0995: if (jump < 0)
0996: break main; //return false
0997: i += jump;
0998: wOffset = i; //force window to move
0999: if (term.eat) {
1000: if (i == end)
1001: break;
1002: i++;
1003: }
1004: term = term.next;
1005: continue matchHere;
1006: }
1007: case Term.FINDREG: {
1008: MemReg mr = memregs[term.target.memreg];
1009: int sampleOff = mr.in;
1010: int sampleLen = mr.out - sampleOff;
1011: //if(sampleOff<0 || sampleLen<0) throw new Error("backreference used before definition: \\"+term.memreg);
1012: /*@since 1.2*/
1013: if (sampleOff < 0 || sampleLen < 0) {
1014: break;
1015: } else if (sampleLen == 0) {
1016: term = term.next;
1017: continue matchHere;
1018: }
1019: int jump = findReg(data, i + term.distance,
1020: sampleOff, sampleLen, term.target, end); //don't eat the last match
1021: if (jump < 0)
1022: break main; //return false
1023: i += jump;
1024: wOffset = i; //force window to move
1025: if (term.eat) {
1026: i += sampleLen;
1027: if (i > end)
1028: break;
1029: }
1030: term = term.next;
1031: continue matchHere;
1032: }
1033: case Term.VOID:
1034: term = term.next;
1035: continue matchHere;
1036:
1037: case Term.CHAR:
1038: //can only be 1-char-wide
1039: // \/
1040: if (i >= end || data[i] != term.c)
1041: break;
1042: //System.out.println("CHAR: "+data[i]+", i="+i);
1043: i++;
1044: term = term.next;
1045: continue matchHere;
1046:
1047: case Term.ANY_CHAR:
1048: //can only be 1-char-wide
1049: // \/
1050: if (i >= end)
1051: break;
1052: i++;
1053: term = term.next;
1054: continue matchHere;
1055:
1056: case Term.ANY_CHAR_NE:
1057: //can only be 1-char-wide
1058: // \/
1059: if (i >= end || data[i] == '\n')
1060: break;
1061: i++;
1062: term = term.next;
1063: continue matchHere;
1064:
1065: case Term.END:
1066: if (i >= end) { //meets
1067: term = term.next;
1068: continue matchHere;
1069: }
1070: break;
1071:
1072: case Term.END_EOL: //perl's $
1073: if (i >= end) { //meets
1074: term = term.next;
1075: continue matchHere;
1076: } else {
1077: boolean matches = i >= end
1078: | ((i + 1) == end && data[i] == '\n');
1079:
1080: if (matches) {
1081: term = term.next;
1082: continue matchHere;
1083: } else
1084: break;
1085: }
1086:
1087: case Term.LINE_END:
1088: if (i >= end) { //meets
1089: term = term.next;
1090: continue matchHere;
1091: } else {
1092: /*
1093: if(((c=data[i])=='\r' || c=='\n') &&
1094: (c=data[i-1])!='\r' && c!='\n'){
1095: term=term.next;
1096: continue matchHere;
1097: }
1098: */
1099: //5 aug 2001
1100: if (data[i] == '\n') {
1101: term = term.next;
1102: continue matchHere;
1103: }
1104: }
1105: break;
1106:
1107: case Term.START: //Perl's "^"
1108: if (i == offset) { //meets
1109: term = term.next;
1110: continue matchHere;
1111: }
1112: //break;
1113:
1114: //changed on 27-04-2002
1115: //due to a side effect: if ALLOW_INCOMPLETE is enabled,
1116: //the anchorStart moves up to the end and succeeds
1117: //(see comments at the last lines of matchHere, ~line 1830)
1118: //Solution: if there are some entries on the stack ("^a|b$"),
1119: //try them; otherwise it's a final 'no'
1120: //if(top!=null) break;
1121: //else break main;
1122:
1123: //changed on 25-05-2002
1124: //rationale: if the term is startAnchor,
1125: //it's the root term by definition,
1126: //so if it doesn't match, the entire pattern
1127: //couldn't match too;
1128: //otherwise we could have the following problem:
1129: //"c|^a" against "abc" finds only "a"
1130: if (top != null)
1131: break;
1132: if (term != startAnchor)
1133: break;
1134: else
1135: break main;
1136:
1137: case Term.LAST_MATCH_END:
1138: if (i == wEnd || wEnd == -1) { //meets
1139: term = term.next;
1140: continue matchHere;
1141: }
1142: break main; //return false
1143:
1144: case Term.LINE_START:
1145: if (i == offset) { //meets
1146: term = term.next;
1147: continue matchHere;
1148: } else if (i < end) {
1149: /*
1150: if(((c=data[i-1])=='\r' || c=='\n') &&
1151: (c=data[i])!='\r' && c!='\n'){
1152: term=term.next;
1153: continue matchHere;
1154: }
1155: */
1156: //5 aug 2001
1157: //if((c=data[i-1])=='\r' || c=='\n'){ ??
1158: if ((c = data[i - 1]) == '\n') {
1159: term = term.next;
1160: continue matchHere;
1161: }
1162: }
1163: break;
1164:
1165: case Term.BITSET: {
1166: //can only be 1-char-wide
1167: // \/
1168: if (i >= end)
1169: break;
1170: c = data[i];
1171: if (!(c <= 255 && term.bitset[c]) ^ term.inverse)
1172: break;
1173: i++;
1174: term = term.next;
1175: continue matchHere;
1176: }
1177: case Term.BITSET2: {
1178: //can only be 1-char-wide
1179: // \/
1180: if (i >= end)
1181: break;
1182: c = data[i];
1183: boolean[] arr = term.bitset2[c >> 8];
1184: if (arr == null || !arr[c & 255] ^ term.inverse)
1185: break;
1186: i++;
1187: term = term.next;
1188: continue matchHere;
1189: }
1190: case Term.BOUNDARY: {
1191: boolean ch1Meets = false, ch2Meets = false;
1192: boolean[] bitset = term.bitset;
1193: test1: {
1194: int j = i - 1;
1195: //if(j<offset || j>=end) break test1;
1196: if (j < offset)
1197: break test1;
1198: c = data[j];
1199: ch1Meets = (c < 256 && bitset[c]);
1200: }
1201: test2: {
1202: //if(i<offset || i>=end) break test2;
1203: if (i >= end)
1204: break test2;
1205: c = data[i];
1206: ch2Meets = (c < 256 && bitset[c]);
1207: }
1208: if (ch1Meets ^ ch2Meets ^ term.inverse) { //meets
1209: term = term.next;
1210: continue matchHere;
1211: } else
1212: break;
1213: }
1214: case Term.UBOUNDARY: {
1215: boolean ch1Meets = false, ch2Meets = false;
1216: boolean[][] bitset2 = term.bitset2;
1217: test1: {
1218: int j = i - 1;
1219: //if(j<offset || j>=end) break test1;
1220: if (j < offset)
1221: break test1;
1222: c = data[j];
1223: boolean[] bits = bitset2[c >> 8];
1224: ch1Meets = bits != null && bits[c & 0xff];
1225: }
1226: test2: {
1227: //if(i<offset || i>=end) break test2;
1228: if (i >= end)
1229: break test2;
1230: c = data[i];
1231: boolean[] bits = bitset2[c >> 8];
1232: ch2Meets = bits != null && bits[c & 0xff];
1233: }
1234: if (ch1Meets ^ ch2Meets ^ term.inverse) { //is boundary ^ inv
1235: term = term.next;
1236: continue matchHere;
1237: } else
1238: break;
1239: }
1240: case Term.DIRECTION: {
1241: boolean ch1Meets = false, ch2Meets = false;
1242: boolean[] bitset = term.bitset;
1243: boolean inv = term.inverse;
1244: //System.out.println("i="+i+", inv="+inv+", bitset="+CharacterClass.stringValue0(bitset));
1245: int j = i - 1;
1246: //if(j>=offset && j<end){
1247: if (j >= offset) {
1248: c = data[j];
1249: ch1Meets = c < 256 && bitset[c];
1250: //System.out.println(" ch1Meets="+ch1Meets);
1251: }
1252: if (ch1Meets ^ inv)
1253: break;
1254:
1255: //if(i>=offset && i<end){
1256: if (i < end) {
1257: c = data[i];
1258: ch2Meets = c < 256 && bitset[c];
1259: //System.out.println(" ch2Meets="+ch2Meets);
1260: }
1261: if (!ch2Meets ^ inv)
1262: break;
1263:
1264: //System.out.println(" Ok");
1265:
1266: term = term.next;
1267: continue matchHere;
1268: }
1269: case Term.UDIRECTION: {
1270: boolean ch1Meets = false, ch2Meets = false;
1271: boolean[][] bitset2 = term.bitset2;
1272: boolean inv = term.inverse;
1273: int j = i - 1;
1274:
1275: //if(j>=offset && j<end){
1276: if (j >= offset) {
1277: c = data[j];
1278: boolean[] bits = bitset2[c >> 8];
1279: ch1Meets = bits != null && bits[c & 0xff];
1280: }
1281: if (ch1Meets ^ inv)
1282: break;
1283:
1284: //if(i>=offset && i<end){
1285: if (i < end) {
1286: c = data[i];
1287: boolean[] bits = bitset2[c >> 8];
1288: ch2Meets = bits != null && bits[c & 0xff];
1289: }
1290: if (!ch2Meets ^ inv)
1291: break;
1292:
1293: term = term.next;
1294: continue matchHere;
1295: }
1296: case Term.REG: {
1297: MemReg mr = memregs[term.memreg];
1298: int sampleOffset = mr.in;
1299: int sampleOutside = mr.out;
1300: int rLen;
1301: if (sampleOffset < 0
1302: || (rLen = sampleOutside - sampleOffset) < 0) {
1303: break;
1304: } else if (rLen == 0) {
1305: term = term.next;
1306: continue matchHere;
1307: }
1308:
1309: // don't prevent us from reaching the 'end'
1310: if ((i + rLen) > end)
1311: break;
1312:
1313: if (compareRegions(data, sampleOffset, i, rLen, end)) {
1314: i += rLen;
1315: term = term.next;
1316: continue matchHere;
1317: }
1318: break;
1319: }
1320: case Term.REG_I: {
1321: MemReg mr = memregs[term.memreg];
1322: int sampleOffset = mr.in;
1323: int sampleOutside = mr.out;
1324: int rLen;
1325: if (sampleOffset < 0
1326: || (rLen = sampleOutside - sampleOffset) < 0) {
1327: break;
1328: } else if (rLen == 0) {
1329: term = term.next;
1330: continue matchHere;
1331: }
1332:
1333: // don't prevent us from reaching the 'end'
1334: if ((i + rLen) > end)
1335: break;
1336:
1337: if (compareRegionsI(data, sampleOffset, i, rLen,
1338: end)) {
1339: i += rLen;
1340: term = term.next;
1341: continue matchHere;
1342: }
1343: break;
1344: }
1345: case Term.REPEAT_0_INF: {
1346: //System.out.println("REPEAT, i="+i+", term.minCount="+term.minCount+", term.maxCount="+term.maxCount);
1347: //i+=(cnt=repeat(data,i,end,term.target));
1348: if ((cnt = repeat(data, i, end, term.target)) <= 0) {
1349: term = term.next;
1350: continue;
1351: }
1352: i += cnt;
1353:
1354: //branch out the backtracker (that is term.failNext, see Term.make*())
1355: actual.cnt = cnt;
1356: actual.term = term.failNext;
1357: actual.index = i;
1358: actual = (top = actual).on;
1359: if (actual == null) {
1360: actual = new SearchEntry();
1361: top.on = actual;
1362: actual.sub = top;
1363: }
1364: term = term.next;
1365: continue;
1366: }
1367: case Term.REPEAT_MIN_INF: {
1368: //System.out.println("REPEAT, i="+i+", term.minCount="+term.minCount+", term.maxCount="+term.maxCount);
1369: cnt = repeat(data, i, end, term.target);
1370: if (cnt < term.minCount)
1371: break;
1372: i += cnt;
1373:
1374: //branch out the backtracker (that is term.failNext, see Term.make*())
1375: actual.cnt = cnt;
1376: actual.term = term.failNext;
1377: actual.index = i;
1378: actual = (top = actual).on;
1379: if (actual == null) {
1380: actual = new SearchEntry();
1381: top.on = actual;
1382: actual.sub = top;
1383: }
1384: term = term.next;
1385: continue;
1386: }
1387: case Term.REPEAT_MIN_MAX: {
1388: //System.out.println("REPEAT, i="+i+", term.minCount="+term.minCount+", term.maxCount="+term.maxCount);
1389: int out1 = end;
1390: int out2 = i + term.maxCount;
1391: cnt = repeat(data, i, out1 < out2 ? out1 : out2,
1392: term.target);
1393: if (cnt < term.minCount)
1394: break;
1395: i += cnt;
1396:
1397: //branch out the backtracker (that is term.failNext, see Term.make*())
1398: actual.cnt = cnt;
1399: actual.term = term.failNext;
1400: actual.index = i;
1401: actual = (top = actual).on;
1402: if (actual == null) {
1403: actual = new SearchEntry();
1404: top.on = actual;
1405: actual.sub = top;
1406: }
1407: term = term.next;
1408: continue;
1409: }
1410: case Term.REPEAT_REG_MIN_INF: {
1411: MemReg mr = memregs[term.memreg];
1412: int sampleOffset = mr.in;
1413: int sampleOutside = mr.out;
1414: //if(sampleOffset<0) throw new Error("register is referred before definition: "+term.memreg);
1415: //if(sampleOutside<0 || sampleOutside<sampleOffset) throw new Error("register is referred within definition: "+term.memreg);
1416: /*@since 1.2*/
1417: int bitset;
1418: if (sampleOffset < 0
1419: || (bitset = sampleOutside - sampleOffset) < 0) {
1420: break;
1421: } else if (bitset == 0) {
1422: term = term.next;
1423: continue matchHere;
1424: }
1425:
1426: cnt = 0;
1427:
1428: while (compareRegions(data, i, sampleOffset,
1429: bitset, end)) {
1430: cnt++;
1431: i += bitset;
1432: }
1433:
1434: if (cnt < term.minCount)
1435: break;
1436:
1437: actual.cnt = cnt;
1438: actual.term = term.failNext;
1439: actual.index = i;
1440: actual.regLen = bitset;
1441: actual = (top = actual).on;
1442: if (actual == null) {
1443: actual = new SearchEntry();
1444: top.on = actual;
1445: actual.sub = top;
1446: }
1447: term = term.next;
1448: continue;
1449: }
1450: case Term.REPEAT_REG_MIN_MAX: {
1451: MemReg mr = memregs[term.memreg];
1452: int sampleOffset = mr.in;
1453: int sampleOutside = mr.out;
1454: //if(sampleOffset<0) throw new Error("register is referred before definition: "+term.memreg);
1455: //if(sampleOutside<0 || sampleOutside<sampleOffset) throw new Error("register is referred within definition: "+term.memreg);
1456: /*@since 1.2*/
1457: int bitset;
1458: if (sampleOffset < 0
1459: || (bitset = sampleOutside - sampleOffset) < 0) {
1460: break;
1461: } else if (bitset == 0) {
1462: term = term.next;
1463: continue matchHere;
1464: }
1465:
1466: cnt = 0;
1467: int countBack = term.maxCount;
1468: while (countBack > 0
1469: && compareRegions(data, i, sampleOffset,
1470: bitset, end)) {
1471: cnt++;
1472: i += bitset;
1473: countBack--;
1474: }
1475:
1476: if (cnt < term.minCount)
1477: break;
1478:
1479: actual.cnt = cnt;
1480: actual.term = term.failNext;
1481: actual.index = i;
1482: actual.regLen = bitset;
1483: actual = (top = actual).on;
1484: if (actual == null) {
1485: actual = new SearchEntry();
1486: top.on = actual;
1487: actual.sub = top;
1488: }
1489: term = term.next;
1490: continue;
1491: }
1492: case Term.BACKTRACK_0:
1493: //System.out.println("<<");
1494: cnt = actual.cnt;
1495: if (cnt > 0) {
1496: cnt--;
1497: i--;
1498: actual.cnt = cnt;
1499: actual.index = i;
1500: actual.term = term;
1501: actual = (top = actual).on;
1502: if (actual == null) {
1503: actual = new SearchEntry();
1504: top.on = actual;
1505: actual.sub = top;
1506: }
1507: term = term.next;
1508: continue;
1509: } else
1510: break;
1511:
1512: case Term.BACKTRACK_MIN:
1513: //System.out.println("<<");
1514: cnt = actual.cnt;
1515: if (cnt > term.minCount) {
1516: cnt--;
1517: i--;
1518: actual.cnt = cnt;
1519: actual.index = i;
1520: actual.term = term;
1521: actual = (top = actual).on;
1522: if (actual == null) {
1523: actual = new SearchEntry();
1524: top.on = actual;
1525: actual.sub = top;
1526: }
1527: term = term.next;
1528: continue;
1529: } else
1530: break;
1531:
1532: case Term.BACKTRACK_FIND_MIN: {
1533: //System.out.print("<<<[cnt=");
1534: cnt = actual.cnt;
1535: //System.out.print(cnt+", minCnt=");
1536: //System.out.print(term.minCount+", target=");
1537: //System.out.print(term.target+"]");
1538: int minCnt;
1539: if (cnt > (minCnt = term.minCount)) {
1540: int start = i + term.distance;
1541: if (start > end) {
1542: int exceed = start - end;
1543: cnt -= exceed;
1544: if (cnt <= minCnt)
1545: break;
1546: i -= exceed;
1547: start = end;
1548: }
1549: int back = findBack(data, i + term.distance,
1550: cnt - minCnt, term.target);
1551: //System.out.print("[back="+back+"]");
1552: if (back < 0)
1553: break;
1554:
1555: //cnt-=back;
1556: //i-=back;
1557: if ((cnt -= back) <= minCnt) {
1558: i -= back;
1559: if (term.eat)
1560: i++;
1561: term = term.next;
1562: continue;
1563: }
1564: i -= back;
1565:
1566: actual.cnt = cnt;
1567: actual.index = i;
1568:
1569: if (term.eat)
1570: i++;
1571:
1572: actual.term = term;
1573: actual = (top = actual).on;
1574: if (actual == null) {
1575: actual = new SearchEntry();
1576: top.on = actual;
1577: actual.sub = top;
1578: }
1579: term = term.next;
1580: continue;
1581: } else
1582: break;
1583: }
1584:
1585: case Term.BACKTRACK_FINDREG_MIN: {
1586: //System.out.print("<<<[cnt=");
1587: cnt = actual.cnt;
1588: //System.out.print(cnt+", minCnt=");
1589: //System.out.print(term.minCount+", target=");
1590: //System.out.print(term.target);
1591: //System.out.print("reg=<"+memregs[term.target.memreg].in+","+memregs[term.target.memreg].out+">]");
1592: int minCnt;
1593: if (cnt > (minCnt = term.minCount)) {
1594: int start = i + term.distance;
1595: if (start > end) {
1596: int exceed = start - end;
1597: cnt -= exceed;
1598: if (cnt <= minCnt)
1599: break;
1600: i -= exceed;
1601: start = end;
1602: }
1603: MemReg mr = memregs[term.target.memreg];
1604: int sampleOff = mr.in;
1605: int sampleLen = mr.out - sampleOff;
1606: //if(sampleOff<0 || sampleLen<0) throw new Error("backreference used before definition: \\"+term.memreg);
1607: //int back=findBackReg(data,i+term.distance,sampleOff,sampleLen,cnt-minCnt,term.target,end);
1608: //if(back<0) break;
1609: /*@since 1.2*/
1610: int back;
1611: if (sampleOff < 0 || sampleLen < 0) {
1612: //the group is not def., as in the case of '(\w+)\1'
1613: //treat as usual BACKTRACK_MIN
1614: cnt--;
1615: i--;
1616: actual.cnt = cnt;
1617: actual.index = i;
1618: actual.term = term;
1619: actual = (top = actual).on;
1620: if (actual == null) {
1621: actual = new SearchEntry();
1622: top.on = actual;
1623: actual.sub = top;
1624: }
1625: term = term.next;
1626: continue;
1627: } else if (sampleLen == 0) {
1628: back = 1;
1629: } else {
1630: back = findBackReg(data, i + term.distance,
1631: sampleOff, sampleLen, cnt - minCnt,
1632: term.target, end);
1633: //System.out.print("[back="+back+"]");
1634: if (back < 0)
1635: break;
1636: }
1637: cnt -= back;
1638: i -= back;
1639: actual.cnt = cnt;
1640: actual.index = i;
1641:
1642: if (term.eat)
1643: i += sampleLen;
1644:
1645: actual.term = term;
1646: actual = (top = actual).on;
1647: if (actual == null) {
1648: actual = new SearchEntry();
1649: top.on = actual;
1650: actual.sub = top;
1651: }
1652: term = term.next;
1653: continue;
1654: } else
1655: break;
1656: }
1657:
1658: case Term.BACKTRACK_REG_MIN:
1659: //System.out.println("<<");
1660: cnt = actual.cnt;
1661: if (cnt > term.minCount) {
1662: regLen = actual.regLen;
1663: cnt--;
1664: i -= regLen;
1665: actual.cnt = cnt;
1666: actual.index = i;
1667: actual.term = term;
1668: //actual.regLen=regLen;
1669: actual = (top = actual).on;
1670: if (actual == null) {
1671: actual = new SearchEntry();
1672: top.on = actual;
1673: actual.sub = top;
1674: }
1675: term = term.next;
1676: continue;
1677: } else
1678: break;
1679:
1680: case Term.GROUP_IN: {
1681: memreg = term.memreg;
1682: //memreg=0 is a regex itself; we don't need to handle it
1683: //because regex bounds already are in wOffset and wEnd
1684: if (memreg > 0) {
1685: //MemReg mr=memregs[memreg];
1686: //saveMemregState((top!=null)? top: defaultEntry,memreg,mr);
1687: //mr.in=i;
1688:
1689: memregs[memreg].tmp = i; //assume
1690: }
1691: term = term.next;
1692: continue;
1693: }
1694: case Term.GROUP_OUT:
1695: memreg = term.memreg;
1696: //see above
1697: if (memreg > 0) {
1698: //if(term.saveState)saveMemregState((top!=null)? top: defaultEntry,memreg,memregs);
1699:
1700: MemReg mr = memregs[memreg];
1701: SearchEntry.saveMemregState((top != null) ? top
1702: : defaultEntry, memreg, mr);
1703: mr.in = mr.tmp; //commit
1704: mr.out = i;
1705: }
1706: term = term.next;
1707: continue;
1708:
1709: case Term.PLOOKBEHIND_IN: {
1710: int tmp = i - term.distance;
1711: if (tmp < offset)
1712: break;
1713: //System.out.println("term="+term+", next="+term.next);
1714: LAEntry le = lookaheads[term.lookaheadId];
1715: le.index = i;
1716: i = tmp;
1717: le.actual = actual;
1718: le.top = top;
1719: term = term.next;
1720: continue;
1721: }
1722: case Term.INDEPENDENT_IN:
1723: case Term.PLOOKAHEAD_IN: {
1724: LAEntry le = lookaheads[term.lookaheadId];
1725: le.index = i;
1726: le.actual = actual;
1727: le.top = top;
1728: term = term.next;
1729: continue;
1730: }
1731: case Term.LOOKBEHIND_CONDITION_OUT:
1732: case Term.LOOKAHEAD_CONDITION_OUT:
1733: case Term.PLOOKAHEAD_OUT:
1734: case Term.PLOOKBEHIND_OUT: {
1735: LAEntry le = lookaheads[term.lookaheadId];
1736: i = le.index;
1737: actual = le.actual;
1738: top = le.top;
1739: term = term.next;
1740: continue;
1741: }
1742: case Term.INDEPENDENT_OUT: {
1743: LAEntry le = lookaheads[term.lookaheadId];
1744: actual = le.actual;
1745: top = le.top;
1746: term = term.next;
1747: continue;
1748: }
1749: case Term.NLOOKBEHIND_IN: {
1750: int tmp = i - term.distance;
1751: if (tmp < offset) {
1752: term = term.failNext;
1753: continue;
1754: }
1755: LAEntry le = lookaheads[term.lookaheadId];
1756: le.actual = actual;
1757: le.top = top;
1758:
1759: actual.term = term.failNext;
1760: actual.index = i;
1761: i = tmp;
1762: actual = (top = actual).on;
1763: if (actual == null) {
1764: actual = new SearchEntry();
1765: top.on = actual;
1766: actual.sub = top;
1767: }
1768: term = term.next;
1769: continue;
1770: }
1771: case Term.NLOOKAHEAD_IN: {
1772: LAEntry le = lookaheads[term.lookaheadId];
1773: le.actual = actual;
1774: le.top = top;
1775:
1776: actual.term = term.failNext;
1777: actual.index = i;
1778: actual = (top = actual).on;
1779: if (actual == null) {
1780: actual = new SearchEntry();
1781: top.on = actual;
1782: actual.sub = top;
1783: }
1784:
1785: term = term.next;
1786: continue;
1787: }
1788: case Term.NLOOKBEHIND_OUT:
1789: case Term.NLOOKAHEAD_OUT: {
1790: LAEntry le = lookaheads[term.lookaheadId];
1791: actual = le.actual;
1792: top = le.top;
1793: break;
1794: }
1795: case Term.LOOKBEHIND_CONDITION_IN: {
1796: int tmp = i - term.distance;
1797: if (tmp < offset) {
1798: term = term.failNext;
1799: continue;
1800: }
1801: LAEntry le = lookaheads[term.lookaheadId];
1802: le.index = i;
1803: le.actual = actual;
1804: le.top = top;
1805:
1806: actual.term = term.failNext;
1807: actual.index = i;
1808: actual = (top = actual).on;
1809: if (actual == null) {
1810: actual = new SearchEntry();
1811: top.on = actual;
1812: actual.sub = top;
1813: }
1814:
1815: i = tmp;
1816:
1817: term = term.next;
1818: continue;
1819: }
1820: case Term.LOOKAHEAD_CONDITION_IN: {
1821: LAEntry le = lookaheads[term.lookaheadId];
1822: le.index = i;
1823: le.actual = actual;
1824: le.top = top;
1825:
1826: actual.term = term.failNext;
1827: actual.index = i;
1828: actual = (top = actual).on;
1829: if (actual == null) {
1830: actual = new SearchEntry();
1831: top.on = actual;
1832: actual.sub = top;
1833: }
1834:
1835: term = term.next;
1836: continue;
1837: }
1838: case Term.MEMREG_CONDITION: {
1839: MemReg mr = memregs[term.memreg];
1840: int sampleOffset = mr.in;
1841: int sampleOutside = mr.out;
1842: if (sampleOffset >= 0 && sampleOutside >= 0
1843: && sampleOutside >= sampleOffset) {
1844: term = term.next;
1845: } else {
1846: term = term.failNext;
1847: }
1848: continue;
1849: }
1850: case Term.BRANCH_STORE_CNT_AUX1:
1851: actual.regLen = regLen;
1852: case Term.BRANCH_STORE_CNT:
1853: actual.cnt = cnt;
1854: case Term.BRANCH:
1855: actual.term = term.failNext;
1856: actual.index = i;
1857: actual = (top = actual).on;
1858: if (actual == null) {
1859: actual = new SearchEntry();
1860: top.on = actual;
1861: actual.sub = top;
1862: }
1863: term = term.next;
1864: continue;
1865:
1866: case Term.SUCCESS:
1867: //System.out.println("success, matchEnd="+matchEnd+", i="+i+", end="+end);
1868: if (!matchEnd || i == end) {
1869: this .wOffset = memregs[0].in = wOffset;
1870: this .wEnd = memregs[0].out = i;
1871: this .top = top;
1872: return true;
1873: } else
1874: break;
1875:
1876: case Term.CNT_SET_0:
1877: cnt = 0;
1878: term = term.next;
1879: continue;
1880:
1881: case Term.CNT_INC:
1882: cnt++;
1883: term = term.next;
1884: continue;
1885:
1886: case Term.CNT_GT_EQ:
1887: if (cnt >= term.maxCount) {
1888: term = term.next;
1889: continue;
1890: } else
1891: break;
1892:
1893: case Term.READ_CNT_LT:
1894: cnt = actual.cnt;
1895: if (cnt < term.maxCount) {
1896: term = term.next;
1897: continue;
1898: } else
1899: break;
1900:
1901: case Term.CRSTORE_CRINC: {
1902: int cntvalue = counters[cntreg = term.cntreg];
1903: SearchEntry.saveCntState((top != null) ? top
1904: : defaultEntry, cntreg, cntvalue);
1905: counters[cntreg] = ++cntvalue;
1906: term = term.next;
1907: continue;
1908: }
1909: case Term.CR_SET_0:
1910: counters[term.cntreg] = 0;
1911:
1912: term = term.next;
1913: continue;
1914:
1915: case Term.CR_LT:
1916: if (counters[term.cntreg] < term.maxCount) {
1917: term = term.next;
1918: continue;
1919: } else
1920: break;
1921:
1922: case Term.CR_GT_EQ:
1923: if (counters[term.cntreg] >= term.maxCount) {
1924: term = term.next;
1925: continue;
1926: } else
1927: break;
1928:
1929: default:
1930: throw new Error("unknown term type: " + term.type);
1931: }
1932:
1933: //if(top==null) break matchHere;
1934: if (allowIncomplete && i == end) {
1935: //an attempt to implement matchesPrefix()
1936: //not sure it's a good way
1937: //27-04-2002: just as expencted,
1938: //the side effect was found (and POSSIBLY fixed);
1939: //see the case Term.START
1940: return true;
1941: }
1942: if (top == null) {
1943: break matchHere;
1944: }
1945:
1946: //pop the stack
1947: top = (actual = top).sub;
1948: term = actual.term;
1949: i = actual.index;
1950: //System.out.println("***POP*** : branch to #"+term.instanceNum+" at "+i);
1951: if (actual.isState) {
1952: SearchEntry.popState(actual, memregs, counters);
1953: }
1954: }
1955:
1956: if (defaultEntry.isState)
1957: SearchEntry.popState(defaultEntry, memregs, counters);
1958:
1959: term = root;
1960: //wOffset++;
1961: //i=wOffset;
1962: i = ++wOffset;
1963: }
1964: this .wOffset = wOffset;
1965: this .top = top;
1966:
1967: return false;
1968: }
1969:
1970: private static final boolean compareRegions(char[] arr, int off1,
1971: int off2, int len, int out) {
1972: //System.out.print("out="+out+", off1="+off1+", off2="+off2+", len="+len+", reg1="+new String(arr,off1,len)+", reg2="+new String(arr,off2,len));
1973: int p1 = off1 + len - 1;
1974: int p2 = off2 + len - 1;
1975: if (p1 >= out || p2 >= out) {
1976: //System.out.println(" : out");
1977: return false;
1978: }
1979: for (int c = len; c > 0; c--, p1--, p2--) {
1980: if (arr[p1] != arr[p2]) {
1981: //System.out.println(" : no");
1982: return false;
1983: }
1984: }
1985: //System.out.println(" : yes");
1986: return true;
1987: }
1988:
1989: private static final boolean compareRegionsI(char[] arr, int off1,
1990: int off2, int len, int out) {
1991: int p1 = off1 + len - 1;
1992: int p2 = off2 + len - 1;
1993: if (p1 >= out || p2 >= out) {
1994: return false;
1995: }
1996: char c1, c2;
1997: for (int c = len; c > 0; c--, p1--, p2--) {
1998: if ((c1 = arr[p1]) != Character.toLowerCase(c2 = arr[p2])
1999: && c1 != Character.toUpperCase(c2)
2000: && c1 != Character.toTitleCase(c2))
2001: return false;
2002: }
2003: return true;
2004: }
2005:
2006: //repeat while matches
2007: private static final int repeat(char[] data, int off, int out,
2008: Term term) {
2009: //System.out.print("off="+off+", out="+out+", term="+term);
2010: switch (term.type) {
2011: case Term.CHAR: {
2012: char c = term.c;
2013: int i = off;
2014: while (i < out) {
2015: if (data[i] != c)
2016: break;
2017: i++;
2018: }
2019: //System.out.println(", returning "+(i-off));
2020: return i - off;
2021: }
2022: case Term.ANY_CHAR: {
2023: return out - off;
2024: }
2025: case Term.ANY_CHAR_NE: {
2026: int i = off;
2027: while (i < out) {
2028: if (data[i] == '\n')
2029: break;
2030: i++;
2031: }
2032: return i - off;
2033: }
2034: case Term.BITSET: {
2035: boolean[] arr = term.bitset;
2036: int i = off;
2037: char c;
2038: if (term.inverse)
2039: while (i < out) {
2040: if ((c = data[i]) <= 255 && arr[c])
2041: break;
2042: else
2043: i++;
2044: }
2045: else
2046: while (i < out) {
2047: if ((c = data[i]) <= 255 && arr[c])
2048: i++;
2049: else
2050: break;
2051: }
2052: return i - off;
2053: }
2054: case Term.BITSET2: {
2055: int i = off;
2056: boolean[][] bitset2 = term.bitset2;
2057: char c;
2058: if (term.inverse)
2059: while (i < out) {
2060: boolean[] arr = bitset2[(c = data[i]) >> 8];
2061: if (arr != null && arr[c & 0xff])
2062: break;
2063: else
2064: i++;
2065: }
2066: else
2067: while (i < out) {
2068: boolean[] arr = bitset2[(c = data[i]) >> 8];
2069: if (arr != null && arr[c & 0xff])
2070: i++;
2071: else
2072: break;
2073: }
2074: return i - off;
2075: }
2076: }
2077: throw new Error("this kind of term can't be quantified:"
2078: + term.type);
2079: }
2080:
2081: //repeat while doesn't match
2082: private static final int find(char[] data, int off, int out,
2083: Term term) {
2084: //System.out.print("off="+off+", out="+out+", term="+term);
2085: if (off >= out)
2086: return -1;
2087: switch (term.type) {
2088: case Term.CHAR: {
2089: char c = term.c;
2090: int i = off;
2091: while (i < out) {
2092: if (data[i] == c)
2093: break;
2094: i++;
2095: }
2096: //System.out.println(", returning "+(i-off));
2097: return i - off;
2098: }
2099: case Term.BITSET: {
2100: boolean[] arr = term.bitset;
2101: int i = off;
2102: char c;
2103: if (!term.inverse)
2104: while (i < out) {
2105: if ((c = data[i]) <= 255 && arr[c])
2106: break;
2107: else
2108: i++;
2109: }
2110: else
2111: while (i < out) {
2112: if ((c = data[i]) <= 255 && arr[c])
2113: i++;
2114: else
2115: break;
2116: }
2117: return i - off;
2118: }
2119: case Term.BITSET2: {
2120: int i = off;
2121: boolean[][] bitset2 = term.bitset2;
2122: char c;
2123: if (!term.inverse)
2124: while (i < out) {
2125: boolean[] arr = bitset2[(c = data[i]) >> 8];
2126: if (arr != null && arr[c & 0xff])
2127: break;
2128: else
2129: i++;
2130: }
2131: else
2132: while (i < out) {
2133: boolean[] arr = bitset2[(c = data[i]) >> 8];
2134: if (arr != null && arr[c & 0xff])
2135: i++;
2136: else
2137: break;
2138: }
2139: return i - off;
2140: }
2141: }
2142: throw new IllegalArgumentException(
2143: "can't seek this kind of term:" + term.type);
2144: }
2145:
2146: private static final int findReg(char[] data, int off, int regOff,
2147: int regLen, Term term, int out) {
2148: //System.out.print("off="+off+", out="+out+", term="+term);
2149: if (off >= out)
2150: return -1;
2151: int i = off;
2152: if (term.type == Term.REG) {
2153: while (i < out) {
2154: if (compareRegions(data, i, regOff, regLen, out))
2155: break;
2156: i++;
2157: }
2158: } else if (term.type == Term.REG_I) {
2159: while (i < out) {
2160: if (compareRegionsI(data, i, regOff, regLen, out))
2161: break;
2162: i++;
2163: }
2164: } else
2165: throw new IllegalArgumentException(
2166: "wrong findReg() target:" + term.type);
2167: return off - i;
2168: }
2169:
2170: private static final int findBack(char[] data, int off,
2171: int maxCount, Term term) {
2172: //System.out.print("off="+off+", maxCount="+maxCount+", term="+term);
2173: switch (term.type) {
2174: case Term.CHAR: {
2175: char c = term.c;
2176: int i = off;
2177: int iMin = off - maxCount;
2178: for (;;) {
2179: if (data[--i] == c)
2180: break;
2181: if (i <= iMin)
2182: return -1;
2183: }
2184: //System.out.println(", returning "+(off-i));
2185: return off - i;
2186: }
2187: case Term.BITSET: {
2188: boolean[] arr = term.bitset;
2189: int i = off;
2190: char c;
2191: int iMin = off - maxCount;
2192: if (!term.inverse)
2193: for (;;) {
2194: if ((c = data[--i]) <= 255 && arr[c])
2195: break;
2196: if (i <= iMin)
2197: return -1;
2198: }
2199: else
2200: for (;;) {
2201: if ((c = data[--i]) > 255 || !arr[c])
2202: break;
2203: if (i <= iMin)
2204: return -1;
2205: }
2206: return off - i;
2207: }
2208: case Term.BITSET2: {
2209: boolean[][] bitset2 = term.bitset2;
2210: int i = off;
2211: char c;
2212: int iMin = off - maxCount;
2213: if (!term.inverse)
2214: for (;;) {
2215: boolean[] arr = bitset2[(c = data[--i]) >> 8];
2216: if (arr != null && arr[c & 0xff])
2217: break;
2218: if (i <= iMin)
2219: return -1;
2220: }
2221: else
2222: for (;;) {
2223: boolean[] arr = bitset2[(c = data[--i]) >> 8];
2224: if (arr == null || arr[c & 0xff])
2225: break;
2226: if (i <= iMin)
2227: return -1;
2228: }
2229: return off - i;
2230: }
2231: }
2232: throw new IllegalArgumentException(
2233: "can't find this kind of term:" + term.type);
2234: }
2235:
2236: private static final int findBackReg(char[] data, int off,
2237: int regOff, int regLen, int maxCount, Term term, int out) {
2238: //assume that the cases when regLen==0 or maxCount==0 are handled by caller
2239: int i = off;
2240: int iMin = off - maxCount;
2241: if (term.type == Term.REG) {
2242: /*@since 1.2*/
2243: char first = data[regOff];
2244: regOff++;
2245: regLen--;
2246: for (;;) {
2247: i--;
2248: if (data[i] == first
2249: && compareRegions(data, i + 1, regOff, regLen,
2250: out))
2251: break;
2252: if (i <= iMin)
2253: return -1;
2254: }
2255: } else if (term.type == Term.REG_I) {
2256: /*@since 1.2*/
2257: char c = data[regOff];
2258: char firstLower = Character.toLowerCase(c);
2259: char firstUpper = Character.toUpperCase(c);
2260: char firstTitle = Character.toTitleCase(c);
2261: regOff++;
2262: regLen--;
2263: for (;;) {
2264: i--;
2265: if (((c = data[i]) == firstLower || c == firstUpper || c == firstTitle)
2266: && compareRegionsI(data, i + 1, regOff, regLen,
2267: out))
2268: break;
2269: if (i <= iMin)
2270: return -1;
2271: }
2272: return off - i;
2273: } else
2274: throw new IllegalArgumentException(
2275: "wrong findBackReg() target type :" + term.type);
2276: return off - i;
2277: }
2278:
2279: public String toString_d() {
2280: StringBuffer s = new StringBuffer();
2281: s.append("counters: ");
2282: s.append(counters == null ? 0 : counters.length);
2283:
2284: s.append("\r\nmemregs: ");
2285: s.append(memregs.length);
2286: for (int i = 0; i < memregs.length; i++)
2287: s.append("\r\n #" + i + ": [" + memregs[i].in + ","
2288: + memregs[i].out + "](\""
2289: + getString(memregs[i].in, memregs[i].out) + "\")");
2290:
2291: s.append("\r\ndata: ");
2292: if (data != null)
2293: s.append(data.length);
2294: else
2295: s.append("[none]");
2296:
2297: s.append("\r\noffset: ");
2298: s.append(offset);
2299:
2300: s.append("\r\nend: ");
2301: s.append(end);
2302:
2303: s.append("\r\nwOffset: ");
2304: s.append(wOffset);
2305:
2306: s.append("\r\nwEnd: ");
2307: s.append(wEnd);
2308:
2309: s.append("\r\nregex: ");
2310: s.append(re);
2311: return s.toString();
2312: }
2313: }
2314:
2315: class SearchEntry {
2316: Term term;
2317: int index;
2318: int cnt;
2319: int regLen;
2320:
2321: boolean isState;
2322:
2323: SearchEntry sub, on;
2324:
2325: private static class MState {
2326: int index, in, out;
2327: MState next, prev;
2328: }
2329:
2330: private static class CState {
2331: int index, value;
2332: CState next, prev;
2333: }
2334:
2335: private MState mHead, mCurrent;
2336: private CState cHead, cCurrent;
2337:
2338: final static void saveMemregState(SearchEntry entry, int memreg,
2339: MemReg mr) {
2340: //System.out.println("saveMemregState("+entry+","+memreg+"):");
2341: entry.isState = true;
2342: MState current = entry.mCurrent;
2343: if (current == null) {
2344: MState head = entry.mHead;
2345: if (head == null)
2346: entry.mHead = entry.mCurrent = current = new MState();
2347: else
2348: current = head;
2349: } else {
2350: MState next = current.next;
2351: if (next == null) {
2352: current.next = next = new MState();
2353: next.prev = current;
2354: }
2355: current = next;
2356: }
2357: current.index = memreg;
2358: current.in = mr.in;
2359: current.out = mr.out;
2360: entry.mCurrent = current;
2361: }
2362:
2363: final static void saveCntState(SearchEntry entry, int cntreg,
2364: int value) {
2365: entry.isState = true;
2366: CState current = entry.cCurrent;
2367: if (current == null) {
2368: CState head = entry.cHead;
2369: if (head == null)
2370: entry.cHead = entry.cCurrent = current = new CState();
2371: else
2372: current = head;
2373: } else {
2374: CState next = current.next;
2375: if (next == null) {
2376: current.next = next = new CState();
2377: next.prev = current;
2378: }
2379: current = next;
2380: }
2381: current.index = cntreg;
2382: current.value = value;
2383: entry.cCurrent = current;
2384: }
2385:
2386: final static void popState(SearchEntry entry, MemReg[] memregs,
2387: int[] counters) {
2388: //System.out.println("popState("+entry+"):");
2389: MState ms = entry.mCurrent;
2390: while (ms != null) {
2391: MemReg mr = memregs[ms.index];
2392: mr.in = ms.in;
2393: mr.out = ms.out;
2394: ms = ms.prev;
2395: }
2396: CState cs = entry.cCurrent;
2397: while (cs != null) {
2398: counters[cs.index] = cs.value;
2399: cs = cs.prev;
2400: }
2401: entry.mCurrent = null;
2402: entry.cCurrent = null;
2403: entry.isState = false;
2404: }
2405:
2406: final void reset(int restQueue) {
2407: term = null;
2408: index = cnt = regLen = 0;
2409:
2410: mCurrent = null;
2411: cCurrent = null;
2412: isState = false;
2413:
2414: SearchEntry on = this .on;
2415: if (on != null) {
2416: if (restQueue > 0)
2417: on.reset(restQueue - 1);
2418: else {
2419: this .on = null;
2420: on.sub = null;
2421: }
2422: }
2423: //sub=on=null;
2424: }
2425: }
2426:
2427: class MemReg {
2428: int index;
2429:
2430: int in = -1, out = -1;
2431: int tmp = -1; //for assuming at GROUP_IN
2432:
2433: MemReg(int index) {
2434: this .index = index;
2435: }
2436:
2437: void reset() {
2438: in = out = -1;
2439: }
2440: }
2441:
2442: class LAEntry {
2443: int index;
2444: SearchEntry top, actual;
2445: }
|