Source Code Cross Referenced for NativeRegExp.java in  » Scripting » rhino » org » mozilla » javascript » regexp » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Scripting » rhino » org.mozilla.javascript.regexp 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        /* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
0002:         *
0003:         * ***** BEGIN LICENSE BLOCK *****
0004:         * Version: MPL 1.1/GPL 2.0
0005:         *
0006:         * The contents of this file are subject to the Mozilla Public License Version
0007:         * 1.1 (the "License"); you may not use this file except in compliance with
0008:         * the License. You may obtain a copy of the License at
0009:         * http://www.mozilla.org/MPL/
0010:         *
0011:         * Software distributed under the License is distributed on an "AS IS" basis,
0012:         * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
0013:         * for the specific language governing rights and limitations under the
0014:         * License.
0015:         *
0016:         * The Original Code is Rhino code, released
0017:         * May 6, 1998.
0018:         *
0019:         * The Initial Developer of the Original Code is
0020:         * Netscape Communications Corporation.
0021:         * Portions created by the Initial Developer are Copyright (C) 1997-1999
0022:         * the Initial Developer. All Rights Reserved.
0023:         *
0024:         * Contributor(s):
0025:         *   Norris Boyd
0026:         *   Igor Bukanov
0027:         *   Brendan Eich
0028:         *   Matthias Radestock
0029:         *
0030:         * Alternatively, the contents of this file may be used under the terms of
0031:         * the GNU General Public License Version 2 or later (the "GPL"), in which
0032:         * case the provisions of the GPL are applicable instead of those above. If
0033:         * you wish to allow use of your version of this file only under the terms of
0034:         * the GPL and not to allow others to use your version of this file under the
0035:         * MPL, indicate your decision by deleting the provisions above and replacing
0036:         * them with the notice and other provisions required by the GPL. If you do
0037:         * not delete the provisions above, a recipient may use your version of this
0038:         * file under either the MPL or the GPL.
0039:         *
0040:         * ***** END LICENSE BLOCK ***** */
0041:
0042:        package org.mozilla.javascript.regexp;
0043:
0044:        import java.io.Serializable;
0045:
0046:        import org.mozilla.javascript.Context;
0047:        import org.mozilla.javascript.Function;
0048:        import org.mozilla.javascript.IdFunctionObject;
0049:        import org.mozilla.javascript.IdScriptableObject;
0050:        import org.mozilla.javascript.Kit;
0051:        import org.mozilla.javascript.ScriptRuntime;
0052:        import org.mozilla.javascript.Scriptable;
0053:        import org.mozilla.javascript.ScriptableObject;
0054:        import org.mozilla.javascript.Undefined;
0055:
0056:        /**
0057:         * This class implements the RegExp native object.
0058:         *
0059:         * Revision History:
0060:         * Implementation in C by Brendan Eich
0061:         * Initial port to Java by Norris Boyd from jsregexp.c version 1.36
0062:         * Merged up to version 1.38, which included Unicode support.
0063:         * Merged bug fixes in version 1.39.
0064:         * Merged JSFUN13_BRANCH changes up to 1.32.2.13
0065:         *
0066:         * @author Brendan Eich
0067:         * @author Norris Boyd
0068:         */
0069:
0070:        public class NativeRegExp extends IdScriptableObject implements 
0071:                Function {
0072:            static final long serialVersionUID = 4965263491464903264L;
0073:
0074:            private static final Object REGEXP_TAG = new Object();
0075:
0076:            public static final int JSREG_GLOB = 0x1; // 'g' flag: global
0077:            public static final int JSREG_FOLD = 0x2; // 'i' flag: fold
0078:            public static final int JSREG_MULTILINE = 0x4; // 'm' flag: multiline
0079:
0080:            //type of match to perform
0081:            public static final int TEST = 0;
0082:            public static final int MATCH = 1;
0083:            public static final int PREFIX = 2;
0084:
0085:            private static final boolean debug = false;
0086:
0087:            private static final byte REOP_EMPTY = 0; /* match rest of input against rest of r.e. */
0088:            private static final byte REOP_ALT = 1; /* alternative subexpressions in kid and next */
0089:            private static final byte REOP_BOL = 2; /* beginning of input (or line if multiline) */
0090:            private static final byte REOP_EOL = 3; /* end of input (or line if multiline) */
0091:            private static final byte REOP_WBDRY = 4; /* match "" at word boundary */
0092:            private static final byte REOP_WNONBDRY = 5; /* match "" at word non-boundary */
0093:            private static final byte REOP_QUANT = 6; /* quantified atom: atom{1,2} */
0094:            private static final byte REOP_STAR = 7; /* zero or more occurrences of kid */
0095:            private static final byte REOP_PLUS = 8; /* one or more occurrences of kid */
0096:            private static final byte REOP_OPT = 9; /* optional subexpression in kid */
0097:            private static final byte REOP_LPAREN = 10; /* left paren bytecode: kid is u.num'th sub-regexp */
0098:            private static final byte REOP_RPAREN = 11; /* right paren bytecode */
0099:            private static final byte REOP_DOT = 12; /* stands for any character */
0100:            //    private static final byte REOP_CCLASS        = 13; /* character class: [a-f] */
0101:            private static final byte REOP_DIGIT = 14; /* match a digit char: [0-9] */
0102:            private static final byte REOP_NONDIGIT = 15; /* match a non-digit char: [^0-9] */
0103:            private static final byte REOP_ALNUM = 16; /* match an alphanumeric char: [0-9a-z_A-Z] */
0104:            private static final byte REOP_NONALNUM = 17; /* match a non-alphanumeric char: [^0-9a-z_A-Z] */
0105:            private static final byte REOP_SPACE = 18; /* match a whitespace char */
0106:            private static final byte REOP_NONSPACE = 19; /* match a non-whitespace char */
0107:            private static final byte REOP_BACKREF = 20; /* back-reference (e.g., \1) to a parenthetical */
0108:            private static final byte REOP_FLAT = 21; /* match a flat string */
0109:            private static final byte REOP_FLAT1 = 22; /* match a single char */
0110:            private static final byte REOP_JUMP = 23; /* for deoptimized closure loops */
0111:            //    private static final byte REOP_DOTSTAR       = 24; /* optimize .* to use a single opcode */
0112:            //    private static final byte REOP_ANCHOR        = 25; /* like .* but skips left context to unanchored r.e. */
0113:            //    private static final byte REOP_EOLONLY       = 26; /* $ not preceded by any pattern */
0114:            //    private static final byte REOP_UCFLAT        = 27; /* flat Unicode string; len immediate counts chars */
0115:            private static final byte REOP_UCFLAT1 = 28; /* single Unicode char */
0116:            //    private static final byte REOP_UCCLASS       = 29; /* Unicode character class, vector of chars to match */
0117:            //    private static final byte REOP_NUCCLASS      = 30; /* negated Unicode character class */
0118:            //    private static final byte REOP_BACKREFi      = 31; /* case-independent REOP_BACKREF */
0119:            private static final byte REOP_FLATi = 32; /* case-independent REOP_FLAT */
0120:            private static final byte REOP_FLAT1i = 33; /* case-independent REOP_FLAT1 */
0121:            //    private static final byte REOP_UCFLATi       = 34; /* case-independent REOP_UCFLAT */
0122:            private static final byte REOP_UCFLAT1i = 35; /* case-independent REOP_UCFLAT1 */
0123:            //    private static final byte REOP_ANCHOR1       = 36; /* first-char discriminating REOP_ANCHOR */
0124:            //    private static final byte REOP_NCCLASS       = 37; /* negated 8-bit character class */
0125:            //    private static final byte REOP_DOTSTARMIN    = 38; /* ungreedy version of REOP_DOTSTAR */
0126:            //    private static final byte REOP_LPARENNON     = 39; /* non-capturing version of REOP_LPAREN */
0127:            //    private static final byte REOP_RPARENNON     = 40; /* non-capturing version of REOP_RPAREN */
0128:            private static final byte REOP_ASSERT = 41; /* zero width positive lookahead assertion */
0129:            private static final byte REOP_ASSERT_NOT = 42; /* zero width negative lookahead assertion */
0130:            private static final byte REOP_ASSERTTEST = 43; /* sentinel at end of assertion child */
0131:            private static final byte REOP_ASSERTNOTTEST = 44; /* sentinel at end of !assertion child */
0132:            private static final byte REOP_MINIMALSTAR = 45; /* non-greedy version of * */
0133:            private static final byte REOP_MINIMALPLUS = 46; /* non-greedy version of + */
0134:            private static final byte REOP_MINIMALOPT = 47; /* non-greedy version of ? */
0135:            private static final byte REOP_MINIMALQUANT = 48; /* non-greedy version of {} */
0136:            private static final byte REOP_ENDCHILD = 49; /* sentinel at end of quantifier child */
0137:            private static final byte REOP_CLASS = 50; /* character class with index */
0138:            private static final byte REOP_REPEAT = 51; /* directs execution of greedy quantifier */
0139:            private static final byte REOP_MINIMALREPEAT = 52; /* directs execution of non-greedy quantifier */
0140:            private static final byte REOP_END = 53;
0141:
0142:            public static void init(Context cx, Scriptable scope, boolean sealed) {
0143:
0144:                NativeRegExp proto = new NativeRegExp();
0145:                proto.re = (RECompiled) compileRE(cx, "", null, false);
0146:                proto.activatePrototypeMap(MAX_PROTOTYPE_ID);
0147:                proto.setParentScope(scope);
0148:                proto.setPrototype(getObjectPrototype(scope));
0149:
0150:                NativeRegExpCtor ctor = new NativeRegExpCtor();
0151:                // Bug #324006: ECMA-262 15.10.6.1 says "The initial value of
0152:                // RegExp.prototype.constructor is the builtin RegExp constructor." 
0153:                proto.put("constructor", proto, ctor);
0154:
0155:                ScriptRuntime.setFunctionProtoAndParent(ctor, scope);
0156:
0157:                ctor.setImmunePrototypeProperty(proto);
0158:
0159:                if (sealed) {
0160:                    proto.sealObject();
0161:                    ctor.sealObject();
0162:                }
0163:
0164:                defineProperty(scope, "RegExp", ctor, ScriptableObject.DONTENUM);
0165:            }
0166:
0167:            NativeRegExp(Scriptable scope, Object regexpCompiled) {
0168:                this .re = (RECompiled) regexpCompiled;
0169:                this .lastIndex = 0;
0170:                ScriptRuntime.setObjectProtoAndParent(this , scope);
0171:            }
0172:
0173:            public String getClassName() {
0174:                return "RegExp";
0175:            }
0176:
0177:            public Object call(Context cx, Scriptable scope,
0178:                    Scriptable this Obj, Object[] args) {
0179:                return execSub(cx, scope, args, MATCH);
0180:            }
0181:
0182:            public Scriptable construct(Context cx, Scriptable scope,
0183:                    Object[] args) {
0184:                return (Scriptable) execSub(cx, scope, args, MATCH);
0185:            }
0186:
0187:            Scriptable compile(Context cx, Scriptable scope, Object[] args) {
0188:                if (args.length > 0 && args[0] instanceof  NativeRegExp) {
0189:                    if (args.length > 1 && args[1] != Undefined.instance) {
0190:                        // report error
0191:                        throw ScriptRuntime
0192:                                .typeError0("msg.bad.regexp.compile");
0193:                    }
0194:                    NativeRegExp thatObj = (NativeRegExp) args[0];
0195:                    this .re = thatObj.re;
0196:                    this .lastIndex = thatObj.lastIndex;
0197:                    return this ;
0198:                }
0199:                String s = args.length == 0 ? "" : ScriptRuntime
0200:                        .toString(args[0]);
0201:                String global = args.length > 1
0202:                        && args[1] != Undefined.instance ? ScriptRuntime
0203:                        .toString(args[1]) : null;
0204:                this .re = (RECompiled) compileRE(cx, s, global, false);
0205:                this .lastIndex = 0;
0206:                return this ;
0207:            }
0208:
0209:            public String toString() {
0210:                StringBuffer buf = new StringBuffer();
0211:                buf.append('/');
0212:                if (re.source.length != 0) {
0213:                    buf.append(re.source);
0214:                } else {
0215:                    // See bugzilla 226045
0216:                    buf.append("(?:)");
0217:                }
0218:                buf.append('/');
0219:                if ((re.flags & JSREG_GLOB) != 0)
0220:                    buf.append('g');
0221:                if ((re.flags & JSREG_FOLD) != 0)
0222:                    buf.append('i');
0223:                if ((re.flags & JSREG_MULTILINE) != 0)
0224:                    buf.append('m');
0225:                return buf.toString();
0226:            }
0227:
0228:            NativeRegExp() {
0229:            }
0230:
0231:            private static RegExpImpl getImpl(Context cx) {
0232:                return (RegExpImpl) ScriptRuntime.getRegExpProxy(cx);
0233:            }
0234:
0235:            private Object execSub(Context cx, Scriptable scopeObj,
0236:                    Object[] args, int matchType) {
0237:                RegExpImpl reImpl = getImpl(cx);
0238:                String str;
0239:                if (args.length == 0) {
0240:                    str = reImpl.input;
0241:                    if (str == null) {
0242:                        reportError("msg.no.re.input.for", toString());
0243:                    }
0244:                } else {
0245:                    str = ScriptRuntime.toString(args[0]);
0246:                }
0247:                double d = ((re.flags & JSREG_GLOB) != 0) ? lastIndex : 0;
0248:
0249:                Object rval;
0250:                if (d < 0 || str.length() < d) {
0251:                    lastIndex = 0;
0252:                    rval = null;
0253:                } else {
0254:                    int indexp[] = { (int) d };
0255:                    rval = executeRegExp(cx, scopeObj, reImpl, str, indexp,
0256:                            matchType);
0257:                    if ((re.flags & JSREG_GLOB) != 0) {
0258:                        lastIndex = (rval == null || rval == Undefined.instance) ? 0
0259:                                : indexp[0];
0260:                    }
0261:                }
0262:                return rval;
0263:            }
0264:
0265:            static Object compileRE(Context cx, String str, String global,
0266:                    boolean flat) {
0267:                RECompiled regexp = new RECompiled();
0268:                regexp.source = str.toCharArray();
0269:                int length = str.length();
0270:
0271:                int flags = 0;
0272:                if (global != null) {
0273:                    for (int i = 0; i < global.length(); i++) {
0274:                        char c = global.charAt(i);
0275:                        if (c == 'g') {
0276:                            flags |= JSREG_GLOB;
0277:                        } else if (c == 'i') {
0278:                            flags |= JSREG_FOLD;
0279:                        } else if (c == 'm') {
0280:                            flags |= JSREG_MULTILINE;
0281:                        } else {
0282:                            reportError("msg.invalid.re.flag", String
0283:                                    .valueOf(c));
0284:                        }
0285:                    }
0286:                }
0287:                regexp.flags = flags;
0288:
0289:                CompilerState state = new CompilerState(cx, regexp.source,
0290:                        length, flags);
0291:                if (flat && length > 0) {
0292:                    if (debug) {
0293:                        System.out.println("flat = \"" + str + "\"");
0294:                    }
0295:                    state.result = new RENode(REOP_FLAT);
0296:                    state.result.chr = state.cpbegin[0];
0297:                    state.result.length = length;
0298:                    state.result.flatIndex = 0;
0299:                    state.progLength += 5;
0300:                } else if (!parseDisjunction(state))
0301:                    return null;
0302:
0303:                regexp.program = new byte[state.progLength + 1];
0304:                if (state.classCount != 0) {
0305:                    regexp.classList = new RECharSet[state.classCount];
0306:                    regexp.classCount = state.classCount;
0307:                }
0308:                int endPC = emitREBytecode(state, regexp, 0, state.result);
0309:                regexp.program[endPC++] = REOP_END;
0310:
0311:                if (debug) {
0312:                    System.out.println("Prog. length = " + endPC);
0313:                    for (int i = 0; i < endPC; i++) {
0314:                        System.out.print(regexp.program[i]);
0315:                        if (i < (endPC - 1))
0316:                            System.out.print(", ");
0317:                    }
0318:                    System.out.println();
0319:                }
0320:                regexp.parenCount = state.parenCount;
0321:
0322:                // If re starts with literal, init anchorCh accordingly
0323:                switch (regexp.program[0]) {
0324:                case REOP_UCFLAT1:
0325:                case REOP_UCFLAT1i:
0326:                    regexp.anchorCh = (char) getIndex(regexp.program, 1);
0327:                    break;
0328:                case REOP_FLAT1:
0329:                case REOP_FLAT1i:
0330:                    regexp.anchorCh = (char) (regexp.program[1] & 0xFF);
0331:                    break;
0332:                case REOP_FLAT:
0333:                case REOP_FLATi:
0334:                    int k = getIndex(regexp.program, 1);
0335:                    regexp.anchorCh = regexp.source[k];
0336:                    break;
0337:                }
0338:
0339:                if (debug) {
0340:                    if (regexp.anchorCh >= 0) {
0341:                        System.out.println("Anchor ch = '"
0342:                                + (char) regexp.anchorCh + "'");
0343:                    }
0344:                }
0345:                return regexp;
0346:            }
0347:
0348:            static boolean isDigit(char c) {
0349:                return '0' <= c && c <= '9';
0350:            }
0351:
0352:            private static boolean isWord(char c) {
0353:                return Character.isLetter(c) || isDigit(c) || c == '_';
0354:            }
0355:
0356:            private static boolean isLineTerm(char c) {
0357:                return ScriptRuntime.isJSLineTerminator(c);
0358:            }
0359:
0360:            private static boolean isREWhiteSpace(int c) {
0361:                return (c == '\u0020' || c == '\u0009' || c == '\n'
0362:                        || c == '\r' || c == 0x2028 || c == 0x2029
0363:                        || c == '\u000C' || c == '\u000B' || c == '\u00A0' || Character
0364:                        .getType((char) c) == Character.SPACE_SEPARATOR);
0365:            }
0366:
0367:            /*
0368:             *
0369:             * 1. If IgnoreCase is false, return ch.
0370:             * 2. Let u be ch converted to upper case as if by calling
0371:             *    String.prototype.toUpperCase on the one-character string ch.
0372:             * 3. If u does not consist of a single character, return ch.
0373:             * 4. Let cu be u's character.
0374:             * 5. If ch's code point value is greater than or equal to decimal 128 and cu's
0375:             *    code point value is less than decimal 128, then return ch.
0376:             * 6. Return cu.
0377:             */
0378:            private static char upcase(char ch) {
0379:                if (ch < 128) {
0380:                    if ('a' <= ch && ch <= 'z') {
0381:                        return (char) (ch + ('A' - 'a'));
0382:                    }
0383:                    return ch;
0384:                }
0385:                char cu = Character.toUpperCase(ch);
0386:                if ((ch >= 128) && (cu < 128))
0387:                    return ch;
0388:                return cu;
0389:            }
0390:
0391:            private static char downcase(char ch) {
0392:                if (ch < 128) {
0393:                    if ('A' <= ch && ch <= 'Z') {
0394:                        return (char) (ch + ('a' - 'A'));
0395:                    }
0396:                    return ch;
0397:                }
0398:                char cl = Character.toLowerCase(ch);
0399:                if ((ch >= 128) && (cl < 128))
0400:                    return ch;
0401:                return cl;
0402:            }
0403:
0404:            /*
0405:             * Validates and converts hex ascii value.
0406:             */
0407:            private static int toASCIIHexDigit(int c) {
0408:                if (c < '0')
0409:                    return -1;
0410:                if (c <= '9') {
0411:                    return c - '0';
0412:                }
0413:                c |= 0x20;
0414:                if ('a' <= c && c <= 'f') {
0415:                    return c - 'a' + 10;
0416:                }
0417:                return -1;
0418:            }
0419:
0420:            /*
0421:             * Top-down regular expression grammar, based closely on Perl4.
0422:             *
0423:             *  regexp:     altern                  A regular expression is one or more
0424:             *              altern '|' regexp       alternatives separated by vertical bar.
0425:             */
0426:            private static boolean parseDisjunction(CompilerState state) {
0427:                if (!parseAlternative(state))
0428:                    return false;
0429:                char[] source = state.cpbegin;
0430:                int index = state.cp;
0431:                if (index != source.length && source[index] == '|') {
0432:                    RENode altResult;
0433:                    ++state.cp;
0434:                    altResult = new RENode(REOP_ALT);
0435:                    altResult.kid = state.result;
0436:                    if (!parseDisjunction(state))
0437:                        return false;
0438:                    altResult.kid2 = state.result;
0439:                    state.result = altResult;
0440:                    /* ALT, <next>, ..., JUMP, <end> ... JUMP <end> */
0441:                    state.progLength += 9;
0442:                }
0443:                return true;
0444:            }
0445:
0446:            /*
0447:             *  altern:     item                    An alternative is one or more items,
0448:             *              item altern             concatenated together.
0449:             */
0450:            private static boolean parseAlternative(CompilerState state) {
0451:                RENode headTerm = null;
0452:                RENode tailTerm = null;
0453:                char[] source = state.cpbegin;
0454:                while (true) {
0455:                    if (state.cp == state.cpend
0456:                            || source[state.cp] == '|'
0457:                            || (state.parenNesting != 0 && source[state.cp] == ')')) {
0458:                        if (headTerm == null) {
0459:                            state.result = new RENode(REOP_EMPTY);
0460:                        } else
0461:                            state.result = headTerm;
0462:                        return true;
0463:                    }
0464:                    if (!parseTerm(state))
0465:                        return false;
0466:                    if (headTerm == null)
0467:                        headTerm = state.result;
0468:                    else {
0469:                        if (tailTerm == null) {
0470:                            headTerm.next = state.result;
0471:                            tailTerm = state.result;
0472:                            while (tailTerm.next != null)
0473:                                tailTerm = tailTerm.next;
0474:                        } else {
0475:                            tailTerm.next = state.result;
0476:                            tailTerm = tailTerm.next;
0477:                            while (tailTerm.next != null)
0478:                                tailTerm = tailTerm.next;
0479:                        }
0480:                    }
0481:                }
0482:            }
0483:
0484:            /* calculate the total size of the bitmap required for a class expression */
0485:            private static boolean calculateBitmapSize(CompilerState state,
0486:                    RENode target, char[] src, int index, int end) {
0487:                char rangeStart = 0;
0488:                char c;
0489:                int n;
0490:                int nDigits;
0491:                int i;
0492:                int max = 0;
0493:                boolean inRange = false;
0494:
0495:                target.bmsize = 0;
0496:
0497:                if (index == end)
0498:                    return true;
0499:
0500:                if (src[index] == '^')
0501:                    ++index;
0502:
0503:                while (index != end) {
0504:                    int localMax = 0;
0505:                    nDigits = 2;
0506:                    switch (src[index]) {
0507:                    case '\\':
0508:                        ++index;
0509:                        c = src[index++];
0510:                        switch (c) {
0511:                        case 'b':
0512:                            localMax = 0x8;
0513:                            break;
0514:                        case 'f':
0515:                            localMax = 0xC;
0516:                            break;
0517:                        case 'n':
0518:                            localMax = 0xA;
0519:                            break;
0520:                        case 'r':
0521:                            localMax = 0xD;
0522:                            break;
0523:                        case 't':
0524:                            localMax = 0x9;
0525:                            break;
0526:                        case 'v':
0527:                            localMax = 0xB;
0528:                            break;
0529:                        case 'c':
0530:                            if (((index + 1) < end)
0531:                                    && Character.isLetter(src[index + 1]))
0532:                                localMax = (char) (src[index++] & 0x1F);
0533:                            else
0534:                                localMax = '\\';
0535:                            break;
0536:                        case 'u':
0537:                            nDigits += 2;
0538:                            // fall thru...
0539:                        case 'x':
0540:                            n = 0;
0541:                            for (i = 0; (i < nDigits) && (index < end); i++) {
0542:                                c = src[index++];
0543:                                n = Kit.xDigitToInt(c, n);
0544:                                if (n < 0) {
0545:                                    // Back off to accepting the original
0546:                                    // '\' as a literal
0547:                                    index -= (i + 1);
0548:                                    n = '\\';
0549:                                    break;
0550:                                }
0551:                            }
0552:                            localMax = n;
0553:                            break;
0554:                        case 'd':
0555:                            if (inRange) {
0556:                                reportError("msg.bad.range", "");
0557:                                return false;
0558:                            }
0559:                            localMax = '9';
0560:                            break;
0561:                        case 'D':
0562:                        case 's':
0563:                        case 'S':
0564:                        case 'w':
0565:                        case 'W':
0566:                            if (inRange) {
0567:                                reportError("msg.bad.range", "");
0568:                                return false;
0569:                            }
0570:                            target.bmsize = 65535;
0571:                            return true;
0572:                        case '0':
0573:                        case '1':
0574:                        case '2':
0575:                        case '3':
0576:                        case '4':
0577:                        case '5':
0578:                        case '6':
0579:                        case '7':
0580:                            /*
0581:                             *  This is a non-ECMA extension - decimal escapes (in this
0582:                             *  case, octal!) are supposed to be an error inside class
0583:                             *  ranges, but supported here for backwards compatibility.
0584:                             *
0585:                             */
0586:                            n = (c - '0');
0587:                            c = src[index];
0588:                            if ('0' <= c && c <= '7') {
0589:                                index++;
0590:                                n = 8 * n + (c - '0');
0591:                                c = src[index];
0592:                                if ('0' <= c && c <= '7') {
0593:                                    index++;
0594:                                    i = 8 * n + (c - '0');
0595:                                    if (i <= 0377)
0596:                                        n = i;
0597:                                    else
0598:                                        index--;
0599:                                }
0600:                            }
0601:                            localMax = n;
0602:                            break;
0603:
0604:                        default:
0605:                            localMax = c;
0606:                            break;
0607:                        }
0608:                        break;
0609:                    default:
0610:                        localMax = src[index++];
0611:                        break;
0612:                    }
0613:                    if (inRange) {
0614:                        if (rangeStart > localMax) {
0615:                            reportError("msg.bad.range", "");
0616:                            return false;
0617:                        }
0618:                        inRange = false;
0619:                    } else {
0620:                        if (index < (end - 1)) {
0621:                            if (src[index] == '-') {
0622:                                ++index;
0623:                                inRange = true;
0624:                                rangeStart = (char) localMax;
0625:                                continue;
0626:                            }
0627:                        }
0628:                    }
0629:                    if ((state.flags & JSREG_FOLD) != 0) {
0630:                        char cu = upcase((char) localMax);
0631:                        char cd = downcase((char) localMax);
0632:                        localMax = (cu >= cd) ? cu : cd;
0633:                    }
0634:                    if (localMax > max)
0635:                        max = localMax;
0636:                }
0637:                target.bmsize = max;
0638:                return true;
0639:            }
0640:
0641:            /*
0642:             *  item:       assertion               An item is either an assertion or
0643:             *              quantatom               a quantified atom.
0644:             *
0645:             *  assertion:  '^'                     Assertions match beginning of string
0646:             *                                      (or line if the class static property
0647:             *                                      RegExp.multiline is true).
0648:             *              '$'                     End of string (or line if the class
0649:             *                                      static property RegExp.multiline is
0650:             *                                      true).
0651:             *              '\b'                    Word boundary (between \w and \W).
0652:             *              '\B'                    Word non-boundary.
0653:             *
0654:             *  quantatom:  atom                    An unquantified atom.
0655:             *              quantatom '{' n ',' m '}'
0656:             *                                      Atom must occur between n and m times.
0657:             *              quantatom '{' n ',' '}' Atom must occur at least n times.
0658:             *              quantatom '{' n '}'     Atom must occur exactly n times.
0659:             *              quantatom '*'           Zero or more times (same as {0,}).
0660:             *              quantatom '+'           One or more times (same as {1,}).
0661:             *              quantatom '?'           Zero or one time (same as {0,1}).
0662:             *
0663:             *              any of which can be optionally followed by '?' for ungreedy
0664:             *
0665:             *  atom:       '(' regexp ')'          A parenthesized regexp (what matched
0666:             *                                      can be addressed using a backreference,
0667:             *                                      see '\' n below).
0668:             *              '.'                     Matches any char except '\n'.
0669:             *              '[' classlist ']'       A character class.
0670:             *              '[' '^' classlist ']'   A negated character class.
0671:             *              '\f'                    Form Feed.
0672:             *              '\n'                    Newline (Line Feed).
0673:             *              '\r'                    Carriage Return.
0674:             *              '\t'                    Horizontal Tab.
0675:             *              '\v'                    Vertical Tab.
0676:             *              '\d'                    A digit (same as [0-9]).
0677:             *              '\D'                    A non-digit.
0678:             *              '\w'                    A word character, [0-9a-z_A-Z].
0679:             *              '\W'                    A non-word character.
0680:             *              '\s'                    A whitespace character, [ \b\f\n\r\t\v].
0681:             *              '\S'                    A non-whitespace character.
0682:             *              '\' n                   A backreference to the nth (n decimal
0683:             *                                      and positive) parenthesized expression.
0684:             *              '\' octal               An octal escape sequence (octal must be
0685:             *                                      two or three digits long, unless it is
0686:             *                                      0 for the null character).
0687:             *              '\x' hex                A hex escape (hex must be two digits).
0688:             *              '\c' ctrl               A control character, ctrl is a letter.
0689:             *              '\' literalatomchar     Any character except one of the above
0690:             *                                      that follow '\' in an atom.
0691:             *              otheratomchar           Any character not first among the other
0692:             *                                      atom right-hand sides.
0693:             */
0694:
0695:            private static void doFlat(CompilerState state, char c) {
0696:                state.result = new RENode(REOP_FLAT);
0697:                state.result.chr = c;
0698:                state.result.length = 1;
0699:                state.result.flatIndex = -1;
0700:                state.progLength += 3;
0701:            }
0702:
0703:            private static int getDecimalValue(char c, CompilerState state,
0704:                    int maxValue, String overflowMessageId) {
0705:                boolean overflow = false;
0706:                int start = state.cp;
0707:                char[] src = state.cpbegin;
0708:                int value = c - '0';
0709:                for (; state.cp != state.cpend; ++state.cp) {
0710:                    c = src[state.cp];
0711:                    if (!isDigit(c)) {
0712:                        break;
0713:                    }
0714:                    if (!overflow) {
0715:                        int digit = c - '0';
0716:                        if (value < (maxValue - digit) / 10) {
0717:                            value = value * 10 + digit;
0718:                        } else {
0719:                            overflow = true;
0720:                            value = maxValue;
0721:                        }
0722:                    }
0723:                }
0724:                if (overflow) {
0725:                    reportError(overflowMessageId, String.valueOf(src, start,
0726:                            state.cp - start));
0727:                }
0728:                return value;
0729:            }
0730:
0731:            private static boolean parseTerm(CompilerState state) {
0732:                char[] src = state.cpbegin;
0733:                char c = src[state.cp++];
0734:                int nDigits = 2;
0735:                int parenBaseCount = state.parenCount;
0736:                int num, tmp;
0737:                RENode term;
0738:                int termStart;
0739:
0740:                switch (c) {
0741:                /* assertions and atoms */
0742:                case '^':
0743:                    state.result = new RENode(REOP_BOL);
0744:                    state.progLength++;
0745:                    return true;
0746:                case '$':
0747:                    state.result = new RENode(REOP_EOL);
0748:                    state.progLength++;
0749:                    return true;
0750:                case '\\':
0751:                    if (state.cp < state.cpend) {
0752:                        c = src[state.cp++];
0753:                        switch (c) {
0754:                        /* assertion escapes */
0755:                        case 'b':
0756:                            state.result = new RENode(REOP_WBDRY);
0757:                            state.progLength++;
0758:                            return true;
0759:                        case 'B':
0760:                            state.result = new RENode(REOP_WNONBDRY);
0761:                            state.progLength++;
0762:                            return true;
0763:                            /* Decimal escape */
0764:                        case '0':
0765:                            /*
0766:                             * Under 'strict' ECMA 3, we interpret \0 as NUL and don't accept octal.
0767:                             * However, (XXX and since Rhino doesn't have a 'strict' mode) we'll just
0768:                             * behave the old way for compatibility reasons.
0769:                             * (see http://bugzilla.mozilla.org/show_bug.cgi?id=141078)
0770:                             *
0771:                             */
0772:                            reportWarning(state.cx, "msg.bad.backref", "");
0773:                            /* octal escape */
0774:                            num = 0;
0775:                            while (state.cp < state.cpend) {
0776:                                c = src[state.cp];
0777:                                if ((c >= '0') && (c <= '7')) {
0778:                                    state.cp++;
0779:                                    tmp = 8 * num + (c - '0');
0780:                                    if (tmp > 0377)
0781:                                        break;
0782:                                    num = tmp;
0783:                                } else
0784:                                    break;
0785:                            }
0786:                            c = (char) (num);
0787:                            doFlat(state, c);
0788:                            break;
0789:                        case '1':
0790:                        case '2':
0791:                        case '3':
0792:                        case '4':
0793:                        case '5':
0794:                        case '6':
0795:                        case '7':
0796:                        case '8':
0797:                        case '9':
0798:                            termStart = state.cp - 1;
0799:                            num = getDecimalValue(c, state, 0xFFFF,
0800:                                    "msg.overlarge.backref");
0801:                            if (num > state.parenCount)
0802:                                reportWarning(state.cx, "msg.bad.backref", "");
0803:                            /*
0804:                             * n > 9 or > count of parentheses,
0805:                             * then treat as octal instead.
0806:                             */
0807:                            if ((num > 9) && (num > state.parenCount)) {
0808:                                state.cp = termStart;
0809:                                num = 0;
0810:                                while (state.cp < state.cpend) {
0811:                                    c = src[state.cp];
0812:                                    if ((c >= '0') && (c <= '7')) {
0813:                                        state.cp++;
0814:                                        tmp = 8 * num + (c - '0');
0815:                                        if (tmp > 0377)
0816:                                            break;
0817:                                        num = tmp;
0818:                                    } else
0819:                                        break;
0820:                                }
0821:                                c = (char) (num);
0822:                                doFlat(state, c);
0823:                                break;
0824:                            }
0825:                            /* otherwise, it's a back-reference */
0826:                            state.result = new RENode(REOP_BACKREF);
0827:                            state.result.parenIndex = num - 1;
0828:                            state.progLength += 3;
0829:                            break;
0830:                        /* Control escape */
0831:                        case 'f':
0832:                            c = 0xC;
0833:                            doFlat(state, c);
0834:                            break;
0835:                        case 'n':
0836:                            c = 0xA;
0837:                            doFlat(state, c);
0838:                            break;
0839:                        case 'r':
0840:                            c = 0xD;
0841:                            doFlat(state, c);
0842:                            break;
0843:                        case 't':
0844:                            c = 0x9;
0845:                            doFlat(state, c);
0846:                            break;
0847:                        case 'v':
0848:                            c = 0xB;
0849:                            doFlat(state, c);
0850:                            break;
0851:                        /* Control letter */
0852:                        case 'c':
0853:                            if (((state.cp + 1) < state.cpend)
0854:                                    && Character.isLetter(src[state.cp + 1]))
0855:                                c = (char) (src[state.cp++] & 0x1F);
0856:                            else {
0857:                                /* back off to accepting the original '\' as a literal */
0858:                                --state.cp;
0859:                                c = '\\';
0860:                            }
0861:                            doFlat(state, c);
0862:                            break;
0863:                        /* UnicodeEscapeSequence */
0864:                        case 'u':
0865:                            nDigits += 2;
0866:                            // fall thru...
0867:                            /* HexEscapeSequence */
0868:                        case 'x': {
0869:                            int n = 0;
0870:                            int i;
0871:                            for (i = 0; (i < nDigits)
0872:                                    && (state.cp < state.cpend); i++) {
0873:                                c = src[state.cp++];
0874:                                n = Kit.xDigitToInt(c, n);
0875:                                if (n < 0) {
0876:                                    // Back off to accepting the original
0877:                                    // 'u' or 'x' as a literal
0878:                                    state.cp -= (i + 2);
0879:                                    n = src[state.cp++];
0880:                                    break;
0881:                                }
0882:                            }
0883:                            c = (char) (n);
0884:                        }
0885:                            doFlat(state, c);
0886:                            break;
0887:                        /* Character class escapes */
0888:                        case 'd':
0889:                            state.result = new RENode(REOP_DIGIT);
0890:                            state.progLength++;
0891:                            break;
0892:                        case 'D':
0893:                            state.result = new RENode(REOP_NONDIGIT);
0894:                            state.progLength++;
0895:                            break;
0896:                        case 's':
0897:                            state.result = new RENode(REOP_SPACE);
0898:                            state.progLength++;
0899:                            break;
0900:                        case 'S':
0901:                            state.result = new RENode(REOP_NONSPACE);
0902:                            state.progLength++;
0903:                            break;
0904:                        case 'w':
0905:                            state.result = new RENode(REOP_ALNUM);
0906:                            state.progLength++;
0907:                            break;
0908:                        case 'W':
0909:                            state.result = new RENode(REOP_NONALNUM);
0910:                            state.progLength++;
0911:                            break;
0912:                        /* IdentityEscape */
0913:                        default:
0914:                            state.result = new RENode(REOP_FLAT);
0915:                            state.result.chr = c;
0916:                            state.result.length = 1;
0917:                            state.result.flatIndex = state.cp - 1;
0918:                            state.progLength += 3;
0919:                            break;
0920:                        }
0921:                        break;
0922:                    } else {
0923:                        /* a trailing '\' is an error */
0924:                        reportError("msg.trail.backslash", "");
0925:                        return false;
0926:                    }
0927:                case '(': {
0928:                    RENode result = null;
0929:                    termStart = state.cp;
0930:                    if (state.cp + 1 < state.cpend
0931:                            && src[state.cp] == '?'
0932:                            && ((c = src[state.cp + 1]) == '=' || c == '!' || c == ':')) {
0933:                        state.cp += 2;
0934:                        if (c == '=') {
0935:                            result = new RENode(REOP_ASSERT);
0936:                            /* ASSERT, <next>, ... ASSERTTEST */
0937:                            state.progLength += 4;
0938:                        } else if (c == '!') {
0939:                            result = new RENode(REOP_ASSERT_NOT);
0940:                            /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
0941:                            state.progLength += 4;
0942:                        }
0943:                    } else {
0944:                        result = new RENode(REOP_LPAREN);
0945:                        /* LPAREN, <index>, ... RPAREN, <index> */
0946:                        state.progLength += 6;
0947:                        result.parenIndex = state.parenCount++;
0948:                    }
0949:                    ++state.parenNesting;
0950:                    if (!parseDisjunction(state))
0951:                        return false;
0952:                    if (state.cp == state.cpend || src[state.cp] != ')') {
0953:                        reportError("msg.unterm.paren", "");
0954:                        return false;
0955:                    }
0956:                    ++state.cp;
0957:                    --state.parenNesting;
0958:                    if (result != null) {
0959:                        result.kid = state.result;
0960:                        state.result = result;
0961:                    }
0962:                    break;
0963:                }
0964:                case ')':
0965:                    reportError("msg.re.unmatched.right.paren", "");
0966:                    return false;
0967:                case '[':
0968:                    state.result = new RENode(REOP_CLASS);
0969:                    termStart = state.cp;
0970:                    state.result.startIndex = termStart;
0971:                    while (true) {
0972:                        if (state.cp == state.cpend) {
0973:                            reportError("msg.unterm.class", "");
0974:                            return false;
0975:                        }
0976:                        if (src[state.cp] == '\\')
0977:                            state.cp++;
0978:                        else {
0979:                            if (src[state.cp] == ']') {
0980:                                state.result.kidlen = state.cp - termStart;
0981:                                break;
0982:                            }
0983:                        }
0984:                        state.cp++;
0985:                    }
0986:                    state.result.index = state.classCount++;
0987:                    /*
0988:                     * Call calculateBitmapSize now as we want any errors it finds
0989:                     * to be reported during the parse phase, not at execution.
0990:                     */
0991:                    if (!calculateBitmapSize(state, state.result, src,
0992:                            termStart, state.cp++))
0993:                        return false;
0994:                    state.progLength += 3; /* CLASS, <index> */
0995:                    break;
0996:
0997:                case '.':
0998:                    state.result = new RENode(REOP_DOT);
0999:                    state.progLength++;
1000:                    break;
1001:                case '*':
1002:                case '+':
1003:                case '?':
1004:                    reportError("msg.bad.quant", String
1005:                            .valueOf(src[state.cp - 1]));
1006:                    return false;
1007:                default:
1008:                    state.result = new RENode(REOP_FLAT);
1009:                    state.result.chr = c;
1010:                    state.result.length = 1;
1011:                    state.result.flatIndex = state.cp - 1;
1012:                    state.progLength += 3;
1013:                    break;
1014:                }
1015:
1016:                term = state.result;
1017:                if (state.cp == state.cpend) {
1018:                    return true;
1019:                }
1020:                boolean hasQ = false;
1021:                switch (src[state.cp]) {
1022:                case '+':
1023:                    state.result = new RENode(REOP_QUANT);
1024:                    state.result.min = 1;
1025:                    state.result.max = -1;
1026:                    /* <PLUS>, <parencount>, <parenindex>, <next> ... <ENDCHILD> */
1027:                    state.progLength += 8;
1028:                    hasQ = true;
1029:                    break;
1030:                case '*':
1031:                    state.result = new RENode(REOP_QUANT);
1032:                    state.result.min = 0;
1033:                    state.result.max = -1;
1034:                    /* <STAR>, <parencount>, <parenindex>, <next> ... <ENDCHILD> */
1035:                    state.progLength += 8;
1036:                    hasQ = true;
1037:                    break;
1038:                case '?':
1039:                    state.result = new RENode(REOP_QUANT);
1040:                    state.result.min = 0;
1041:                    state.result.max = 1;
1042:                    /* <OPT>, <parencount>, <parenindex>, <next> ... <ENDCHILD> */
1043:                    state.progLength += 8;
1044:                    hasQ = true;
1045:                    break;
1046:                case '{': /* balance '}' */
1047:                {
1048:                    int min = 0;
1049:                    int max = -1;
1050:                    int leftCurl = state.cp;
1051:
1052:                    /* For Perl etc. compatibility, if quntifier does not match
1053:                     * \{\d+(,\d*)?\} exactly back off from it
1054:                     * being a quantifier, and chew it up as a literal
1055:                     * atom next time instead.
1056:                     */
1057:
1058:                    c = src[++state.cp];
1059:                    if (isDigit(c)) {
1060:                        ++state.cp;
1061:                        min = getDecimalValue(c, state, 0xFFFF,
1062:                                "msg.overlarge.min");
1063:                        c = src[state.cp];
1064:                        if (c == ',') {
1065:                            c = src[++state.cp];
1066:                            if (isDigit(c)) {
1067:                                ++state.cp;
1068:                                max = getDecimalValue(c, state, 0xFFFF,
1069:                                        "msg.overlarge.max");
1070:                                c = src[state.cp];
1071:                                if (min > max) {
1072:                                    reportError("msg.max.lt.min", String
1073:                                            .valueOf(src[state.cp]));
1074:                                    return false;
1075:                                }
1076:                            }
1077:                        } else {
1078:                            max = min;
1079:                        }
1080:                        /* balance '{' */
1081:                        if (c == '}') {
1082:                            state.result = new RENode(REOP_QUANT);
1083:                            state.result.min = min;
1084:                            state.result.max = max;
1085:                            // QUANT, <min>, <max>, <parencount>,
1086:                            // <parenindex>, <next> ... <ENDCHILD>
1087:                            state.progLength += 12;
1088:                            hasQ = true;
1089:                        }
1090:                    }
1091:                    if (!hasQ) {
1092:                        state.cp = leftCurl;
1093:                    }
1094:                    break;
1095:                }
1096:                }
1097:                if (!hasQ)
1098:                    return true;
1099:
1100:                ++state.cp;
1101:                state.result.kid = term;
1102:                state.result.parenIndex = parenBaseCount;
1103:                state.result.parenCount = state.parenCount - parenBaseCount;
1104:                if ((state.cp < state.cpend) && (src[state.cp] == '?')) {
1105:                    ++state.cp;
1106:                    state.result.greedy = false;
1107:                } else
1108:                    state.result.greedy = true;
1109:                return true;
1110:            }
1111:
1112:            private static void resolveForwardJump(byte[] array, int from,
1113:                    int pc) {
1114:                if (from > pc)
1115:                    throw Kit.codeBug();
1116:                addIndex(array, from, pc - from);
1117:            }
1118:
1119:            private static int getOffset(byte[] array, int pc) {
1120:                return getIndex(array, pc);
1121:            }
1122:
1123:            private static int addIndex(byte[] array, int pc, int index) {
1124:                if (index < 0)
1125:                    throw Kit.codeBug();
1126:                if (index > 0xFFFF)
1127:                    throw Context.reportRuntimeError("Too complex regexp");
1128:                array[pc] = (byte) (index >> 8);
1129:                array[pc + 1] = (byte) (index);
1130:                return pc + 2;
1131:            }
1132:
1133:            private static int getIndex(byte[] array, int pc) {
1134:                return ((array[pc] & 0xFF) << 8) | (array[pc + 1] & 0xFF);
1135:            }
1136:
1137:            private static final int OFFSET_LEN = 2;
1138:            private static final int INDEX_LEN = 2;
1139:
1140:            private static int emitREBytecode(CompilerState state,
1141:                    RECompiled re, int pc, RENode t) {
1142:                RENode nextAlt;
1143:                int nextAltFixup, nextTermFixup;
1144:                byte[] program = re.program;
1145:
1146:                while (t != null) {
1147:                    program[pc++] = t.op;
1148:                    switch (t.op) {
1149:                    case REOP_EMPTY:
1150:                        --pc;
1151:                        break;
1152:                    case REOP_ALT:
1153:                        nextAlt = t.kid2;
1154:                        nextAltFixup = pc; /* address of next alternate */
1155:                        pc += OFFSET_LEN;
1156:                        pc = emitREBytecode(state, re, pc, t.kid);
1157:                        program[pc++] = REOP_JUMP;
1158:                        nextTermFixup = pc; /* address of following term */
1159:                        pc += OFFSET_LEN;
1160:                        resolveForwardJump(program, nextAltFixup, pc);
1161:                        pc = emitREBytecode(state, re, pc, nextAlt);
1162:
1163:                        program[pc++] = REOP_JUMP;
1164:                        nextAltFixup = pc;
1165:                        pc += OFFSET_LEN;
1166:
1167:                        resolveForwardJump(program, nextTermFixup, pc);
1168:                        resolveForwardJump(program, nextAltFixup, pc);
1169:                        break;
1170:                    case REOP_FLAT:
1171:                        /*
1172:                         * Consecutize FLAT's if possible.
1173:                         */
1174:                        if (t.flatIndex != -1) {
1175:                            while ((t.next != null)
1176:                                    && (t.next.op == REOP_FLAT)
1177:                                    && ((t.flatIndex + t.length) == t.next.flatIndex)) {
1178:                                t.length += t.next.length;
1179:                                t.next = t.next.next;
1180:                            }
1181:                        }
1182:                        if ((t.flatIndex != -1) && (t.length > 1)) {
1183:                            if ((state.flags & JSREG_FOLD) != 0)
1184:                                program[pc - 1] = REOP_FLATi;
1185:                            else
1186:                                program[pc - 1] = REOP_FLAT;
1187:                            pc = addIndex(program, pc, t.flatIndex);
1188:                            pc = addIndex(program, pc, t.length);
1189:                        } else {
1190:                            if (t.chr < 256) {
1191:                                if ((state.flags & JSREG_FOLD) != 0)
1192:                                    program[pc - 1] = REOP_FLAT1i;
1193:                                else
1194:                                    program[pc - 1] = REOP_FLAT1;
1195:                                program[pc++] = (byte) (t.chr);
1196:                            } else {
1197:                                if ((state.flags & JSREG_FOLD) != 0)
1198:                                    program[pc - 1] = REOP_UCFLAT1i;
1199:                                else
1200:                                    program[pc - 1] = REOP_UCFLAT1;
1201:                                pc = addIndex(program, pc, t.chr);
1202:                            }
1203:                        }
1204:                        break;
1205:                    case REOP_LPAREN:
1206:                        pc = addIndex(program, pc, t.parenIndex);
1207:                        pc = emitREBytecode(state, re, pc, t.kid);
1208:                        program[pc++] = REOP_RPAREN;
1209:                        pc = addIndex(program, pc, t.parenIndex);
1210:                        break;
1211:                    case REOP_BACKREF:
1212:                        pc = addIndex(program, pc, t.parenIndex);
1213:                        break;
1214:                    case REOP_ASSERT:
1215:                        nextTermFixup = pc;
1216:                        pc += OFFSET_LEN;
1217:                        pc = emitREBytecode(state, re, pc, t.kid);
1218:                        program[pc++] = REOP_ASSERTTEST;
1219:                        resolveForwardJump(program, nextTermFixup, pc);
1220:                        break;
1221:                    case REOP_ASSERT_NOT:
1222:                        nextTermFixup = pc;
1223:                        pc += OFFSET_LEN;
1224:                        pc = emitREBytecode(state, re, pc, t.kid);
1225:                        program[pc++] = REOP_ASSERTNOTTEST;
1226:                        resolveForwardJump(program, nextTermFixup, pc);
1227:                        break;
1228:                    case REOP_QUANT:
1229:                        if ((t.min == 0) && (t.max == -1))
1230:                            program[pc - 1] = (t.greedy) ? REOP_STAR
1231:                                    : REOP_MINIMALSTAR;
1232:                        else if ((t.min == 0) && (t.max == 1))
1233:                            program[pc - 1] = (t.greedy) ? REOP_OPT
1234:                                    : REOP_MINIMALOPT;
1235:                        else if ((t.min == 1) && (t.max == -1))
1236:                            program[pc - 1] = (t.greedy) ? REOP_PLUS
1237:                                    : REOP_MINIMALPLUS;
1238:                        else {
1239:                            if (!t.greedy)
1240:                                program[pc - 1] = REOP_MINIMALQUANT;
1241:                            pc = addIndex(program, pc, t.min);
1242:                            // max can be -1 which addIndex does not accept
1243:                            pc = addIndex(program, pc, t.max + 1);
1244:                        }
1245:                        pc = addIndex(program, pc, t.parenCount);
1246:                        pc = addIndex(program, pc, t.parenIndex);
1247:                        nextTermFixup = pc;
1248:                        pc += OFFSET_LEN;
1249:                        pc = emitREBytecode(state, re, pc, t.kid);
1250:                        program[pc++] = REOP_ENDCHILD;
1251:                        resolveForwardJump(program, nextTermFixup, pc);
1252:                        break;
1253:                    case REOP_CLASS:
1254:                        pc = addIndex(program, pc, t.index);
1255:                        re.classList[t.index] = new RECharSet(t.bmsize,
1256:                                t.startIndex, t.kidlen);
1257:                        break;
1258:                    default:
1259:                        break;
1260:                    }
1261:                    t = t.next;
1262:                }
1263:                return pc;
1264:            }
1265:
1266:            private static void pushProgState(REGlobalData gData, int min,
1267:                    int max, REBackTrackData backTrackLastToSave,
1268:                    int continuation_pc, int continuation_op) {
1269:                gData.stateStackTop = new REProgState(gData.stateStackTop, min,
1270:                        max, gData.cp, backTrackLastToSave, continuation_pc,
1271:                        continuation_op);
1272:            }
1273:
1274:            private static REProgState popProgState(REGlobalData gData) {
1275:                REProgState state = gData.stateStackTop;
1276:                gData.stateStackTop = state.previous;
1277:                return state;
1278:            }
1279:
1280:            private static void pushBackTrackState(REGlobalData gData, byte op,
1281:                    int target) {
1282:                gData.backTrackStackTop = new REBackTrackData(gData, op, target);
1283:            }
1284:
1285:            /*
1286:             *   Consecutive literal characters.
1287:             */
1288:            private static boolean flatNMatcher(REGlobalData gData,
1289:                    int matchChars, int length, char[] chars, int end) {
1290:                if ((gData.cp + length) > end)
1291:                    return false;
1292:                for (int i = 0; i < length; i++) {
1293:                    if (gData.regexp.source[matchChars + i] != chars[gData.cp
1294:                            + i]) {
1295:                        return false;
1296:                    }
1297:                }
1298:                gData.cp += length;
1299:                return true;
1300:            }
1301:
1302:            private static boolean flatNIMatcher(REGlobalData gData,
1303:                    int matchChars, int length, char[] chars, int end) {
1304:                if ((gData.cp + length) > end)
1305:                    return false;
1306:                for (int i = 0; i < length; i++) {
1307:                    if (upcase(gData.regexp.source[matchChars + i]) != upcase(chars[gData.cp
1308:                            + i])) {
1309:                        return false;
1310:                    }
1311:                }
1312:                gData.cp += length;
1313:                return true;
1314:            }
1315:
1316:            /*
1317:            1. Evaluate DecimalEscape to obtain an EscapeValue E.
1318:            2. If E is not a character then go to step 6.
1319:            3. Let ch be E's character.
1320:            4. Let A be a one-element RECharSet containing the character ch.
1321:            5. Call CharacterSetMatcher(A, false) and return its Matcher result.
1322:            6. E must be an integer. Let n be that integer.
1323:            7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
1324:            8. Return an internal Matcher closure that takes two arguments, a State x
1325:               and a Continuation c, and performs the following:
1326:                1. Let cap be x's captures internal array.
1327:                2. Let s be cap[n].
1328:                3. If s is undefined, then call c(x) and return its result.
1329:                4. Let e be x's endIndex.
1330:                5. Let len be s's length.
1331:                6. Let f be e+len.
1332:                7. If f>InputLength, return failure.
1333:                8. If there exists an integer i between 0 (inclusive) and len (exclusive)
1334:                   such that Canonicalize(s[i]) is not the same character as
1335:                   Canonicalize(Input [e+i]), then return failure.
1336:                9. Let y be the State (f, cap).
1337:                10. Call c(y) and return its result.
1338:             */
1339:            private static boolean backrefMatcher(REGlobalData gData,
1340:                    int parenIndex, char[] chars, int end) {
1341:                int len;
1342:                int i;
1343:                int parenContent = gData.parens_index(parenIndex);
1344:                if (parenContent == -1)
1345:                    return true;
1346:
1347:                len = gData.parens_length(parenIndex);
1348:                if ((gData.cp + len) > end)
1349:                    return false;
1350:
1351:                if ((gData.regexp.flags & JSREG_FOLD) != 0) {
1352:                    for (i = 0; i < len; i++) {
1353:                        if (upcase(chars[parenContent + i]) != upcase(chars[gData.cp
1354:                                + i]))
1355:                            return false;
1356:                    }
1357:                } else {
1358:                    for (i = 0; i < len; i++) {
1359:                        if (chars[parenContent + i] != chars[gData.cp + i])
1360:                            return false;
1361:                    }
1362:                }
1363:                gData.cp += len;
1364:                return true;
1365:            }
1366:
1367:            /* Add a single character to the RECharSet */
1368:            private static void addCharacterToCharSet(RECharSet cs, char c) {
1369:                int byteIndex = (c / 8);
1370:                if (c > cs.length)
1371:                    throw new RuntimeException();
1372:                cs.bits[byteIndex] |= 1 << (c & 0x7);
1373:            }
1374:
1375:            /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
1376:            private static void addCharacterRangeToCharSet(RECharSet cs,
1377:                    char c1, char c2) {
1378:                int i;
1379:
1380:                int byteIndex1 = (c1 / 8);
1381:                int byteIndex2 = (c2 / 8);
1382:
1383:                if ((c2 > cs.length) || (c1 > c2))
1384:                    throw new RuntimeException();
1385:
1386:                c1 &= 0x7;
1387:                c2 &= 0x7;
1388:
1389:                if (byteIndex1 == byteIndex2) {
1390:                    cs.bits[byteIndex1] |= ((0xFF) >> (7 - (c2 - c1))) << c1;
1391:                } else {
1392:                    cs.bits[byteIndex1] |= 0xFF << c1;
1393:                    for (i = byteIndex1 + 1; i < byteIndex2; i++)
1394:                        cs.bits[i] = (byte) 0xFF;
1395:                    cs.bits[byteIndex2] |= (0xFF) >> (7 - c2);
1396:                }
1397:            }
1398:
1399:            /* Compile the source of the class into a RECharSet */
1400:            private static void processCharSet(REGlobalData gData,
1401:                    RECharSet charSet) {
1402:                synchronized (charSet) {
1403:                    if (!charSet.converted) {
1404:                        processCharSetImpl(gData, charSet);
1405:                        charSet.converted = true;
1406:                    }
1407:                }
1408:            }
1409:
1410:            private static void processCharSetImpl(REGlobalData gData,
1411:                    RECharSet charSet) {
1412:                int src = charSet.startIndex;
1413:                int end = src + charSet.strlength;
1414:
1415:                char rangeStart = 0, this Ch;
1416:                int byteLength;
1417:                char c;
1418:                int n;
1419:                int nDigits;
1420:                int i;
1421:                boolean inRange = false;
1422:
1423:                charSet.sense = true;
1424:                byteLength = (charSet.length / 8) + 1;
1425:                charSet.bits = new byte[byteLength];
1426:
1427:                if (src == end)
1428:                    return;
1429:
1430:                if (gData.regexp.source[src] == '^') {
1431:                    charSet.sense = false;
1432:                    ++src;
1433:                }
1434:
1435:                while (src != end) {
1436:                    nDigits = 2;
1437:                    switch (gData.regexp.source[src]) {
1438:                    case '\\':
1439:                        ++src;
1440:                        c = gData.regexp.source[src++];
1441:                        switch (c) {
1442:                        case 'b':
1443:                            this Ch = 0x8;
1444:                            break;
1445:                        case 'f':
1446:                            this Ch = 0xC;
1447:                            break;
1448:                        case 'n':
1449:                            this Ch = 0xA;
1450:                            break;
1451:                        case 'r':
1452:                            this Ch = 0xD;
1453:                            break;
1454:                        case 't':
1455:                            this Ch = 0x9;
1456:                            break;
1457:                        case 'v':
1458:                            this Ch = 0xB;
1459:                            break;
1460:                        case 'c':
1461:                            if (((src + 1) < end)
1462:                                    && isWord(gData.regexp.source[src + 1]))
1463:                                this Ch = (char) (gData.regexp.source[src++] & 0x1F);
1464:                            else {
1465:                                --src;
1466:                                this Ch = '\\';
1467:                            }
1468:                            break;
1469:                        case 'u':
1470:                            nDigits += 2;
1471:                            // fall thru
1472:                        case 'x':
1473:                            n = 0;
1474:                            for (i = 0; (i < nDigits) && (src < end); i++) {
1475:                                c = gData.regexp.source[src++];
1476:                                int digit = toASCIIHexDigit(c);
1477:                                if (digit < 0) {
1478:                                    /* back off to accepting the original '\'
1479:                                     * as a literal
1480:                                     */
1481:                                    src -= (i + 1);
1482:                                    n = '\\';
1483:                                    break;
1484:                                }
1485:                                n = (n << 4) | digit;
1486:                            }
1487:                            this Ch = (char) (n);
1488:                            break;
1489:                        case '0':
1490:                        case '1':
1491:                        case '2':
1492:                        case '3':
1493:                        case '4':
1494:                        case '5':
1495:                        case '6':
1496:                        case '7':
1497:                            /*
1498:                             *  This is a non-ECMA extension - decimal escapes (in this
1499:                             *  case, octal!) are supposed to be an error inside class
1500:                             *  ranges, but supported here for backwards compatibility.
1501:                             *
1502:                             */
1503:                            n = (c - '0');
1504:                            c = gData.regexp.source[src];
1505:                            if ('0' <= c && c <= '7') {
1506:                                src++;
1507:                                n = 8 * n + (c - '0');
1508:                                c = gData.regexp.source[src];
1509:                                if ('0' <= c && c <= '7') {
1510:                                    src++;
1511:                                    i = 8 * n + (c - '0');
1512:                                    if (i <= 0377)
1513:                                        n = i;
1514:                                    else
1515:                                        src--;
1516:                                }
1517:                            }
1518:                            this Ch = (char) (n);
1519:                            break;
1520:
1521:                        case 'd':
1522:                            addCharacterRangeToCharSet(charSet, '0', '9');
1523:                            continue; /* don't need range processing */
1524:                        case 'D':
1525:                            addCharacterRangeToCharSet(charSet, (char) 0,
1526:                                    (char) ('0' - 1));
1527:                            addCharacterRangeToCharSet(charSet,
1528:                                    (char) ('9' + 1), (char) (charSet.length));
1529:                            continue;
1530:                        case 's':
1531:                            for (i = charSet.length; i >= 0; i--)
1532:                                if (isREWhiteSpace(i))
1533:                                    addCharacterToCharSet(charSet, (char) (i));
1534:                            continue;
1535:                        case 'S':
1536:                            for (i = charSet.length; i >= 0; i--)
1537:                                if (!isREWhiteSpace(i))
1538:                                    addCharacterToCharSet(charSet, (char) (i));
1539:                            continue;
1540:                        case 'w':
1541:                            for (i = charSet.length; i >= 0; i--)
1542:                                if (isWord((char) i))
1543:                                    addCharacterToCharSet(charSet, (char) (i));
1544:                            continue;
1545:                        case 'W':
1546:                            for (i = charSet.length; i >= 0; i--)
1547:                                if (!isWord((char) i))
1548:                                    addCharacterToCharSet(charSet, (char) (i));
1549:                            continue;
1550:                        default:
1551:                            this Ch = c;
1552:                            break;
1553:
1554:                        }
1555:                        break;
1556:
1557:                    default:
1558:                        this Ch = gData.regexp.source[src++];
1559:                        break;
1560:
1561:                    }
1562:                    if (inRange) {
1563:                        if ((gData.regexp.flags & JSREG_FOLD) != 0) {
1564:                            addCharacterRangeToCharSet(charSet,
1565:                                    upcase(rangeStart), upcase(this Ch));
1566:                            addCharacterRangeToCharSet(charSet,
1567:                                    downcase(rangeStart), downcase(this Ch));
1568:                        } else {
1569:                            addCharacterRangeToCharSet(charSet, rangeStart,
1570:                                    this Ch);
1571:                        }
1572:                        inRange = false;
1573:                    } else {
1574:                        if ((gData.regexp.flags & JSREG_FOLD) != 0) {
1575:                            addCharacterToCharSet(charSet, upcase(this Ch));
1576:                            addCharacterToCharSet(charSet, downcase(this Ch));
1577:                        } else {
1578:                            addCharacterToCharSet(charSet, this Ch);
1579:                        }
1580:                        if (src < (end - 1)) {
1581:                            if (gData.regexp.source[src] == '-') {
1582:                                ++src;
1583:                                inRange = true;
1584:                                rangeStart = this Ch;
1585:                            }
1586:                        }
1587:                    }
1588:                }
1589:            }
1590:
1591:            /*
1592:             *   Initialize the character set if it this is the first call.
1593:             *   Test the bit - if the ^ flag was specified, non-inclusion is a success
1594:             */
1595:            private static boolean classMatcher(REGlobalData gData,
1596:                    RECharSet charSet, char ch) {
1597:                if (!charSet.converted) {
1598:                    processCharSet(gData, charSet);
1599:                }
1600:
1601:                int byteIndex = ch / 8;
1602:                if (charSet.sense) {
1603:                    if ((charSet.length == 0)
1604:                            || ((ch > charSet.length) || ((charSet.bits[byteIndex] & (1 << (ch & 0x7))) == 0)))
1605:                        return false;
1606:                } else {
1607:                    if (!((charSet.length == 0) || ((ch > charSet.length) || ((charSet.bits[byteIndex] & (1 << (ch & 0x7))) == 0))))
1608:                        return false;
1609:                }
1610:                return true;
1611:            }
1612:
1613:            private static boolean executeREBytecode(REGlobalData gData,
1614:                    char[] chars, int end) {
1615:                int pc = 0;
1616:                byte program[] = gData.regexp.program;
1617:                int currentContinuation_op;
1618:                int currentContinuation_pc;
1619:                boolean result = false;
1620:
1621:                currentContinuation_pc = 0;
1622:                currentContinuation_op = REOP_END;
1623:                if (debug) {
1624:                    System.out.println("Input = \"" + new String(chars)
1625:                            + "\", start at " + gData.cp);
1626:                }
1627:                int op = program[pc++];
1628:                for (;;) {
1629:                    if (debug) {
1630:                        System.out.println("Testing at " + gData.cp + ", op = "
1631:                                + op);
1632:                    }
1633:                    switch (op) {
1634:                    case REOP_EMPTY:
1635:                        result = true;
1636:                        break;
1637:                    case REOP_BOL:
1638:                        if (gData.cp != 0) {
1639:                            if (gData.multiline
1640:                                    || ((gData.regexp.flags & JSREG_MULTILINE) != 0)) {
1641:                                if (!isLineTerm(chars[gData.cp - 1])) {
1642:                                    result = false;
1643:                                    break;
1644:                                }
1645:                            } else {
1646:                                result = false;
1647:                                break;
1648:                            }
1649:                        }
1650:                        result = true;
1651:                        break;
1652:                    case REOP_EOL:
1653:                        if (gData.cp != end) {
1654:                            if (gData.multiline
1655:                                    || ((gData.regexp.flags & JSREG_MULTILINE) != 0)) {
1656:                                if (!isLineTerm(chars[gData.cp])) {
1657:                                    result = false;
1658:                                    break;
1659:                                }
1660:                            } else {
1661:                                result = false;
1662:                                break;
1663:                            }
1664:                        }
1665:                        result = true;
1666:                        break;
1667:                    case REOP_WBDRY:
1668:                        result = ((gData.cp == 0 || !isWord(chars[gData.cp - 1])) ^ !((gData.cp < end) && isWord(chars[gData.cp])));
1669:                        break;
1670:                    case REOP_WNONBDRY:
1671:                        result = ((gData.cp == 0 || !isWord(chars[gData.cp - 1])) ^ ((gData.cp < end) && isWord(chars[gData.cp])));
1672:                        break;
1673:                    case REOP_DOT:
1674:                        result = (gData.cp != end && !isLineTerm(chars[gData.cp]));
1675:                        if (result) {
1676:                            gData.cp++;
1677:                        }
1678:                        break;
1679:                    case REOP_DIGIT:
1680:                        result = (gData.cp != end && isDigit(chars[gData.cp]));
1681:                        if (result) {
1682:                            gData.cp++;
1683:                        }
1684:                        break;
1685:                    case REOP_NONDIGIT:
1686:                        result = (gData.cp != end && !isDigit(chars[gData.cp]));
1687:                        if (result) {
1688:                            gData.cp++;
1689:                        }
1690:                        break;
1691:                    case REOP_SPACE:
1692:                        result = (gData.cp != end && isREWhiteSpace(chars[gData.cp]));
1693:                        if (result) {
1694:                            gData.cp++;
1695:                        }
1696:                        break;
1697:                    case REOP_NONSPACE:
1698:                        result = (gData.cp != end && !isREWhiteSpace(chars[gData.cp]));
1699:                        if (result) {
1700:                            gData.cp++;
1701:                        }
1702:                        break;
1703:                    case REOP_ALNUM:
1704:                        result = (gData.cp != end && isWord(chars[gData.cp]));
1705:                        if (result) {
1706:                            gData.cp++;
1707:                        }
1708:                        break;
1709:                    case REOP_NONALNUM:
1710:                        result = (gData.cp != end && !isWord(chars[gData.cp]));
1711:                        if (result) {
1712:                            gData.cp++;
1713:                        }
1714:                        break;
1715:                    case REOP_FLAT: {
1716:                        int offset = getIndex(program, pc);
1717:                        pc += INDEX_LEN;
1718:                        int length = getIndex(program, pc);
1719:                        pc += INDEX_LEN;
1720:                        result = flatNMatcher(gData, offset, length, chars, end);
1721:                    }
1722:                        break;
1723:                    case REOP_FLATi: {
1724:                        int offset = getIndex(program, pc);
1725:                        pc += INDEX_LEN;
1726:                        int length = getIndex(program, pc);
1727:                        pc += INDEX_LEN;
1728:                        result = flatNIMatcher(gData, offset, length, chars,
1729:                                end);
1730:                    }
1731:                        break;
1732:                    case REOP_FLAT1: {
1733:                        char matchCh = (char) (program[pc++] & 0xFF);
1734:                        result = (gData.cp != end && chars[gData.cp] == matchCh);
1735:                        if (result) {
1736:                            gData.cp++;
1737:                        }
1738:                    }
1739:                        break;
1740:                    case REOP_FLAT1i: {
1741:                        char matchCh = (char) (program[pc++] & 0xFF);
1742:                        result = (gData.cp != end && upcase(chars[gData.cp]) == upcase(matchCh));
1743:                        if (result) {
1744:                            gData.cp++;
1745:                        }
1746:                    }
1747:                        break;
1748:                    case REOP_UCFLAT1: {
1749:                        char matchCh = (char) getIndex(program, pc);
1750:                        pc += INDEX_LEN;
1751:                        result = (gData.cp != end && chars[gData.cp] == matchCh);
1752:                        if (result) {
1753:                            gData.cp++;
1754:                        }
1755:                    }
1756:                        break;
1757:                    case REOP_UCFLAT1i: {
1758:                        char matchCh = (char) getIndex(program, pc);
1759:                        pc += INDEX_LEN;
1760:                        result = (gData.cp != end && upcase(chars[gData.cp]) == upcase(matchCh));
1761:                        if (result) {
1762:                            gData.cp++;
1763:                        }
1764:                    }
1765:                        break;
1766:                    case REOP_ALT: {
1767:                        int nextpc;
1768:                        byte nextop;
1769:                        pushProgState(gData, 0, 0, null,
1770:                                currentContinuation_pc, currentContinuation_op);
1771:                        nextpc = pc + getOffset(program, pc);
1772:                        nextop = program[nextpc++];
1773:                        pushBackTrackState(gData, nextop, nextpc);
1774:                        pc += INDEX_LEN;
1775:                        op = program[pc++];
1776:                    }
1777:                        continue;
1778:
1779:                    case REOP_JUMP: {
1780:                        int offset;
1781:                        REProgState state = popProgState(gData);
1782:                        currentContinuation_pc = state.continuation_pc;
1783:                        currentContinuation_op = state.continuation_op;
1784:                        offset = getOffset(program, pc);
1785:                        pc += offset;
1786:                        op = program[pc++];
1787:                    }
1788:                        continue;
1789:
1790:                    case REOP_LPAREN: {
1791:                        int parenIndex = getIndex(program, pc);
1792:                        pc += INDEX_LEN;
1793:                        gData.set_parens(parenIndex, gData.cp, 0);
1794:                        op = program[pc++];
1795:                    }
1796:                        continue;
1797:                    case REOP_RPAREN: {
1798:                        int cap_index;
1799:                        int parenIndex = getIndex(program, pc);
1800:                        pc += INDEX_LEN;
1801:                        cap_index = gData.parens_index(parenIndex);
1802:                        gData.set_parens(parenIndex, cap_index, gData.cp
1803:                                - cap_index);
1804:                        if (parenIndex > gData.lastParen)
1805:                            gData.lastParen = parenIndex;
1806:                        op = program[pc++];
1807:                    }
1808:                        continue;
1809:                    case REOP_BACKREF: {
1810:                        int parenIndex = getIndex(program, pc);
1811:                        pc += INDEX_LEN;
1812:                        result = backrefMatcher(gData, parenIndex, chars, end);
1813:                    }
1814:                        break;
1815:
1816:                    case REOP_CLASS: {
1817:                        int index = getIndex(program, pc);
1818:                        pc += INDEX_LEN;
1819:                        if (gData.cp != end) {
1820:                            if (classMatcher(gData,
1821:                                    gData.regexp.classList[index],
1822:                                    chars[gData.cp])) {
1823:                                gData.cp++;
1824:                                result = true;
1825:                                break;
1826:                            }
1827:                        }
1828:                        result = false;
1829:                    }
1830:                        break;
1831:
1832:                    case REOP_ASSERT:
1833:                    case REOP_ASSERT_NOT: {
1834:                        byte testOp;
1835:                        pushProgState(gData, 0, 0, gData.backTrackStackTop,
1836:                                currentContinuation_pc, currentContinuation_op);
1837:                        if (op == REOP_ASSERT) {
1838:                            testOp = REOP_ASSERTTEST;
1839:                        } else {
1840:                            testOp = REOP_ASSERTNOTTEST;
1841:                        }
1842:                        pushBackTrackState(gData, testOp, pc
1843:                                + getOffset(program, pc));
1844:                        pc += INDEX_LEN;
1845:                        op = program[pc++];
1846:                    }
1847:                        continue;
1848:
1849:                    case REOP_ASSERTTEST:
1850:                    case REOP_ASSERTNOTTEST: {
1851:                        REProgState state = popProgState(gData);
1852:                        gData.cp = state.index;
1853:                        gData.backTrackStackTop = state.backTrack;
1854:                        currentContinuation_pc = state.continuation_pc;
1855:                        currentContinuation_op = state.continuation_op;
1856:                        if (result) {
1857:                            if (op == REOP_ASSERTTEST) {
1858:                                result = true;
1859:                            } else {
1860:                                result = false;
1861:                            }
1862:                        } else {
1863:                            if (op == REOP_ASSERTTEST) {
1864:                                // Do nothing
1865:                            } else {
1866:                                result = true;
1867:                            }
1868:                        }
1869:                    }
1870:                        break;
1871:
1872:                    case REOP_STAR:
1873:                    case REOP_PLUS:
1874:                    case REOP_OPT:
1875:                    case REOP_QUANT:
1876:                    case REOP_MINIMALSTAR:
1877:                    case REOP_MINIMALPLUS:
1878:                    case REOP_MINIMALOPT:
1879:                    case REOP_MINIMALQUANT: {
1880:                        int min, max;
1881:                        boolean greedy = false;
1882:                        switch (op) {
1883:                        case REOP_STAR:
1884:                            greedy = true;
1885:                            // fallthrough
1886:                        case REOP_MINIMALSTAR:
1887:                            min = 0;
1888:                            max = -1;
1889:                            break;
1890:                        case REOP_PLUS:
1891:                            greedy = true;
1892:                            // fallthrough
1893:                        case REOP_MINIMALPLUS:
1894:                            min = 1;
1895:                            max = -1;
1896:                            break;
1897:                        case REOP_OPT:
1898:                            greedy = true;
1899:                            // fallthrough
1900:                        case REOP_MINIMALOPT:
1901:                            min = 0;
1902:                            max = 1;
1903:                            break;
1904:                        case REOP_QUANT:
1905:                            greedy = true;
1906:                            // fallthrough
1907:                        case REOP_MINIMALQUANT:
1908:                            min = getOffset(program, pc);
1909:                            pc += INDEX_LEN;
1910:                            // See comments in emitREBytecode for " - 1" reason
1911:                            max = getOffset(program, pc) - 1;
1912:                            pc += INDEX_LEN;
1913:                            break;
1914:                        default:
1915:                            throw Kit.codeBug();
1916:                        }
1917:                        pushProgState(gData, min, max, null,
1918:                                currentContinuation_pc, currentContinuation_op);
1919:                        if (greedy) {
1920:                            currentContinuation_op = REOP_REPEAT;
1921:                            currentContinuation_pc = pc;
1922:                            pushBackTrackState(gData, REOP_REPEAT, pc);
1923:                            /* Step over <parencount>, <parenindex> & <next> */
1924:                            pc += 3 * INDEX_LEN;
1925:                            op = program[pc++];
1926:                        } else {
1927:                            if (min != 0) {
1928:                                currentContinuation_op = REOP_MINIMALREPEAT;
1929:                                currentContinuation_pc = pc;
1930:                                /* <parencount> <parenindex> & <next> */
1931:                                pc += 3 * INDEX_LEN;
1932:                                op = program[pc++];
1933:                            } else {
1934:                                pushBackTrackState(gData, REOP_MINIMALREPEAT,
1935:                                        pc);
1936:                                popProgState(gData);
1937:                                pc += 2 * INDEX_LEN; // <parencount> & <parenindex>
1938:                                pc = pc + getOffset(program, pc);
1939:                                op = program[pc++];
1940:                            }
1941:                        }
1942:                    }
1943:                        continue;
1944:
1945:                    case REOP_ENDCHILD:
1946:                        // Use the current continuation.
1947:                        pc = currentContinuation_pc;
1948:                        op = currentContinuation_op;
1949:                        continue;
1950:
1951:                    case REOP_REPEAT: {
1952:                        REProgState state = popProgState(gData);
1953:                        if (!result) {
1954:                            //
1955:                            // There's been a failure, see if we have enough
1956:                            // children.
1957:                            //
1958:                            if (state.min == 0)
1959:                                result = true;
1960:                            currentContinuation_pc = state.continuation_pc;
1961:                            currentContinuation_op = state.continuation_op;
1962:                            pc += 2 * INDEX_LEN; /* <parencount> & <parenindex> */
1963:                            pc = pc + getOffset(program, pc);
1964:                            break;
1965:                        } else {
1966:                            if (state.min == 0 && gData.cp == state.index) {
1967:                                // matched an empty string, that'll get us nowhere
1968:                                result = false;
1969:                                currentContinuation_pc = state.continuation_pc;
1970:                                currentContinuation_op = state.continuation_op;
1971:                                pc += 2 * INDEX_LEN;
1972:                                pc = pc + getOffset(program, pc);
1973:                                break;
1974:                            }
1975:                            int new_min = state.min, new_max = state.max;
1976:                            if (new_min != 0)
1977:                                new_min--;
1978:                            if (new_max != -1)
1979:                                new_max--;
1980:                            if (new_max == 0) {
1981:                                result = true;
1982:                                currentContinuation_pc = state.continuation_pc;
1983:                                currentContinuation_op = state.continuation_op;
1984:                                pc += 2 * INDEX_LEN;
1985:                                pc = pc + getOffset(program, pc);
1986:                                break;
1987:                            }
1988:                            pushProgState(gData, new_min, new_max, null,
1989:                                    state.continuation_pc,
1990:                                    state.continuation_op);
1991:                            currentContinuation_op = REOP_REPEAT;
1992:                            currentContinuation_pc = pc;
1993:                            pushBackTrackState(gData, REOP_REPEAT, pc);
1994:                            int parenCount = getIndex(program, pc);
1995:                            pc += INDEX_LEN;
1996:                            int parenIndex = getIndex(program, pc);
1997:                            pc += 2 * INDEX_LEN;
1998:                            op = program[pc++];
1999:                            for (int k = 0; k < parenCount; k++) {
2000:                                gData.set_parens(parenIndex + k, -1, 0);
2001:                            }
2002:                        }
2003:                    }
2004:                        continue;
2005:
2006:                    case REOP_MINIMALREPEAT: {
2007:                        REProgState state = popProgState(gData);
2008:                        if (!result) {
2009:                            //
2010:                            // Non-greedy failure - try to consume another child.
2011:                            //
2012:                            if (state.max == -1 || state.max > 0) {
2013:                                pushProgState(gData, state.min, state.max,
2014:                                        null, state.continuation_pc,
2015:                                        state.continuation_op);
2016:                                currentContinuation_op = REOP_MINIMALREPEAT;
2017:                                currentContinuation_pc = pc;
2018:                                int parenCount = getIndex(program, pc);
2019:                                pc += INDEX_LEN;
2020:                                int parenIndex = getIndex(program, pc);
2021:                                pc += 2 * INDEX_LEN;
2022:                                for (int k = 0; k < parenCount; k++) {
2023:                                    gData.set_parens(parenIndex + k, -1, 0);
2024:                                }
2025:                                op = program[pc++];
2026:                                continue;
2027:                            } else {
2028:                                // Don't need to adjust pc since we're going to pop.
2029:                                currentContinuation_pc = state.continuation_pc;
2030:                                currentContinuation_op = state.continuation_op;
2031:                                break;
2032:                            }
2033:                        } else {
2034:                            if (state.min == 0 && gData.cp == state.index) {
2035:                                // Matched an empty string, that'll get us nowhere.
2036:                                result = false;
2037:                                currentContinuation_pc = state.continuation_pc;
2038:                                currentContinuation_op = state.continuation_op;
2039:                                break;
2040:                            }
2041:                            int new_min = state.min, new_max = state.max;
2042:                            if (new_min != 0)
2043:                                new_min--;
2044:                            if (new_max != -1)
2045:                                new_max--;
2046:                            pushProgState(gData, new_min, new_max, null,
2047:                                    state.continuation_pc,
2048:                                    state.continuation_op);
2049:                            if (new_min != 0) {
2050:                                currentContinuation_op = REOP_MINIMALREPEAT;
2051:                                currentContinuation_pc = pc;
2052:                                int parenCount = getIndex(program, pc);
2053:                                pc += INDEX_LEN;
2054:                                int parenIndex = getIndex(program, pc);
2055:                                pc += 2 * INDEX_LEN;
2056:                                for (int k = 0; k < parenCount; k++) {
2057:                                    gData.set_parens(parenIndex + k, -1, 0);
2058:                                }
2059:                                op = program[pc++];
2060:                            } else {
2061:                                currentContinuation_pc = state.continuation_pc;
2062:                                currentContinuation_op = state.continuation_op;
2063:                                pushBackTrackState(gData, REOP_MINIMALREPEAT,
2064:                                        pc);
2065:                                popProgState(gData);
2066:                                pc += 2 * INDEX_LEN;
2067:                                pc = pc + getOffset(program, pc);
2068:                                op = program[pc++];
2069:                            }
2070:                            continue;
2071:                        }
2072:                    }
2073:
2074:                    case REOP_END:
2075:                        return true;
2076:
2077:                    default:
2078:                        throw Kit.codeBug();
2079:
2080:                    }
2081:                    /*
2082:                     *  If the match failed and there's a backtrack option, take it.
2083:                     *  Otherwise this is a complete and utter failure.
2084:                     */
2085:                    if (!result) {
2086:                        REBackTrackData backTrackData = gData.backTrackStackTop;
2087:                        if (backTrackData != null) {
2088:                            gData.backTrackStackTop = backTrackData.previous;
2089:
2090:                            gData.lastParen = backTrackData.lastParen;
2091:
2092:                            // XXX: If backTrackData will no longer be used, then
2093:                            // there is no need to clone backTrackData.parens
2094:                            if (backTrackData.parens != null) {
2095:                                gData.parens = backTrackData.parens.clone();
2096:                            }
2097:
2098:                            gData.cp = backTrackData.cp;
2099:
2100:                            gData.stateStackTop = backTrackData.stateStackTop;
2101:
2102:                            currentContinuation_op = gData.stateStackTop.continuation_op;
2103:                            currentContinuation_pc = gData.stateStackTop.continuation_pc;
2104:                            pc = backTrackData.continuation_pc;
2105:                            op = backTrackData.continuation_op;
2106:                            continue;
2107:                        } else
2108:                            return false;
2109:                    }
2110:
2111:                    op = program[pc++];
2112:                }
2113:
2114:            }
2115:
2116:            private static boolean matchRegExp(REGlobalData gData,
2117:                    RECompiled re, char[] chars, int start, int end,
2118:                    boolean multiline) {
2119:                if (re.parenCount != 0) {
2120:                    gData.parens = new long[re.parenCount];
2121:                } else {
2122:                    gData.parens = null;
2123:                }
2124:
2125:                gData.backTrackStackTop = null;
2126:
2127:                gData.stateStackTop = null;
2128:
2129:                gData.multiline = multiline;
2130:                gData.regexp = re;
2131:                gData.lastParen = 0;
2132:
2133:                int anchorCh = gData.regexp.anchorCh;
2134:                //
2135:                // have to include the position beyond the last character
2136:                //  in order to detect end-of-input/line condition
2137:                //
2138:                for (int i = start; i <= end; ++i) {
2139:                    //
2140:                    // If the first node is a literal match, step the index into
2141:                    // the string until that match is made, or fail if it can't be
2142:                    // found at all.
2143:                    //
2144:                    if (anchorCh >= 0) {
2145:                        for (;;) {
2146:                            if (i == end) {
2147:                                return false;
2148:                            }
2149:                            char matchCh = chars[i];
2150:                            if (matchCh == anchorCh
2151:                                    || ((gData.regexp.flags & JSREG_FOLD) != 0 && upcase(matchCh) == upcase((char) anchorCh))) {
2152:                                break;
2153:                            }
2154:                            ++i;
2155:                        }
2156:                    }
2157:                    gData.cp = i;
2158:                    for (int j = 0; j < re.parenCount; j++) {
2159:                        gData.set_parens(j, -1, 0);
2160:                    }
2161:                    boolean result = executeREBytecode(gData, chars, end);
2162:
2163:                    gData.backTrackStackTop = null;
2164:                    gData.stateStackTop = null;
2165:                    if (result) {
2166:                        gData.skipped = i - start;
2167:                        return true;
2168:                    }
2169:                }
2170:                return false;
2171:            }
2172:
2173:            /*
2174:             * indexp is assumed to be an array of length 1
2175:             */
2176:            Object executeRegExp(Context cx, Scriptable scopeObj,
2177:                    RegExpImpl res, String str, int indexp[], int matchType) {
2178:                REGlobalData gData = new REGlobalData();
2179:
2180:                int start = indexp[0];
2181:                char[] charArray = str.toCharArray();
2182:                int end = charArray.length;
2183:                if (start > end)
2184:                    start = end;
2185:                //
2186:                // Call the recursive matcher to do the real work.
2187:                //
2188:                boolean matches = matchRegExp(gData, re, charArray, start, end,
2189:                        res.multiline);
2190:                if (!matches) {
2191:                    if (matchType != PREFIX)
2192:                        return null;
2193:                    return Undefined.instance;
2194:                }
2195:                int index = gData.cp;
2196:                int i = index;
2197:                indexp[0] = i;
2198:                int matchlen = i - (start + gData.skipped);
2199:                int ep = index;
2200:                index -= matchlen;
2201:                Object result;
2202:                Scriptable obj;
2203:
2204:                if (matchType == TEST) {
2205:                    /*
2206:                     * Testing for a match and updating cx.regExpImpl: don't allocate
2207:                     * an array object, do return true.
2208:                     */
2209:                    result = Boolean.TRUE;
2210:                    obj = null;
2211:                } else {
2212:                    /*
2213:                     * The array returned on match has element 0 bound to the matched
2214:                     * string, elements 1 through re.parenCount bound to the paren
2215:                     * matches, an index property telling the length of the left context,
2216:                     * and an input property referring to the input string.
2217:                     */
2218:                    Scriptable scope = getTopLevelScope(scopeObj);
2219:                    result = ScriptRuntime.newObject(cx, scope, "Array", null);
2220:                    obj = (Scriptable) result;
2221:
2222:                    String matchstr = new String(charArray, index, matchlen);
2223:                    obj.put(0, obj, matchstr);
2224:                }
2225:
2226:                if (re.parenCount == 0) {
2227:                    res.parens = null;
2228:                    res.lastParen = SubString.emptySubString;
2229:                } else {
2230:                    SubString parsub = null;
2231:                    int num;
2232:                    res.parens = new SubString[re.parenCount];
2233:                    for (num = 0; num < re.parenCount; num++) {
2234:                        int cap_index = gData.parens_index(num);
2235:                        String parstr;
2236:                        if (cap_index != -1) {
2237:                            int cap_length = gData.parens_length(num);
2238:                            parsub = new SubString(charArray, cap_index,
2239:                                    cap_length);
2240:                            res.parens[num] = parsub;
2241:                            if (matchType == TEST)
2242:                                continue;
2243:                            parstr = parsub.toString();
2244:                            obj.put(num + 1, obj, parstr);
2245:                        } else {
2246:                            if (matchType != TEST)
2247:                                obj.put(num + 1, obj, Undefined.instance);
2248:                        }
2249:                    }
2250:                    res.lastParen = parsub;
2251:                }
2252:
2253:                if (!(matchType == TEST)) {
2254:                    /*
2255:                     * Define the index and input properties last for better for/in loop
2256:                     * order (so they come after the elements).
2257:                     */
2258:                    obj.put("index", obj, new Integer(start + gData.skipped));
2259:                    obj.put("input", obj, str);
2260:                }
2261:
2262:                if (res.lastMatch == null) {
2263:                    res.lastMatch = new SubString();
2264:                    res.leftContext = new SubString();
2265:                    res.rightContext = new SubString();
2266:                }
2267:                res.lastMatch.charArray = charArray;
2268:                res.lastMatch.index = index;
2269:                res.lastMatch.length = matchlen;
2270:
2271:                res.leftContext.charArray = charArray;
2272:                if (cx.getLanguageVersion() == Context.VERSION_1_2) {
2273:                    /*
2274:                     * JS1.2 emulated Perl4.0.1.8 (patch level 36) for global regexps used
2275:                     * in scalar contexts, and unintentionally for the string.match "list"
2276:                     * psuedo-context.  On "hi there bye", the following would result:
2277:                     *
2278:                     * Language     while(/ /g){print("$`");}   s/ /$`/g
2279:                     * perl4.036    "hi", "there"               "hihitherehi therebye"
2280:                     * perl5        "hi", "hi there"            "hihitherehi therebye"
2281:                     * js1.2        "hi", "there"               "hihitheretherebye"
2282:                     *
2283:                     * Insofar as JS1.2 always defined $` as "left context from the last
2284:                     * match" for global regexps, it was more consistent than perl4.
2285:                     */
2286:                    res.leftContext.index = start;
2287:                    res.leftContext.length = gData.skipped;
2288:                } else {
2289:                    /*
2290:                     * For JS1.3 and ECMAv2, emulate Perl5 exactly:
2291:                     *
2292:                     * js1.3        "hi", "hi there"            "hihitherehi therebye"
2293:                     */
2294:                    res.leftContext.index = 0;
2295:                    res.leftContext.length = start + gData.skipped;
2296:                }
2297:
2298:                res.rightContext.charArray = charArray;
2299:                res.rightContext.index = ep;
2300:                res.rightContext.length = end - ep;
2301:
2302:                return result;
2303:            }
2304:
2305:            int getFlags() {
2306:                return re.flags;
2307:            }
2308:
2309:            private static void reportWarning(Context cx, String messageId,
2310:                    String arg) {
2311:                if (cx.hasFeature(Context.FEATURE_STRICT_MODE)) {
2312:                    String msg = ScriptRuntime.getMessage1(messageId, arg);
2313:                    Context.reportWarning(msg);
2314:                }
2315:            }
2316:
2317:            private static void reportError(String messageId, String arg) {
2318:                String msg = ScriptRuntime.getMessage1(messageId, arg);
2319:                throw ScriptRuntime.constructError("SyntaxError", msg);
2320:            }
2321:
2322:            // #string_id_map#
2323:
2324:            private static final int Id_lastIndex = 1, Id_source = 2,
2325:                    Id_global = 3, Id_ignoreCase = 4, Id_multiline = 5,
2326:
2327:                    MAX_INSTANCE_ID = 5;
2328:
2329:            protected int getMaxInstanceId() {
2330:                return MAX_INSTANCE_ID;
2331:            }
2332:
2333:            protected int findInstanceIdInfo(String s) {
2334:                int id;
2335:                // #generated# Last update: 2007-05-09 08:16:24 EDT
2336:                L0: {
2337:                    id = 0;
2338:                    String X = null;
2339:                    int c;
2340:                    int s_length = s.length();
2341:                    if (s_length == 6) {
2342:                        c = s.charAt(0);
2343:                        if (c == 'g') {
2344:                            X = "global";
2345:                            id = Id_global;
2346:                        } else if (c == 's') {
2347:                            X = "source";
2348:                            id = Id_source;
2349:                        }
2350:                    } else if (s_length == 9) {
2351:                        c = s.charAt(0);
2352:                        if (c == 'l') {
2353:                            X = "lastIndex";
2354:                            id = Id_lastIndex;
2355:                        } else if (c == 'm') {
2356:                            X = "multiline";
2357:                            id = Id_multiline;
2358:                        }
2359:                    } else if (s_length == 10) {
2360:                        X = "ignoreCase";
2361:                        id = Id_ignoreCase;
2362:                    }
2363:                    if (X != null && X != s && !X.equals(s))
2364:                        id = 0;
2365:                    break L0;
2366:                }
2367:                // #/generated#
2368:                // #/string_id_map#
2369:
2370:                if (id == 0)
2371:                    return super .findInstanceIdInfo(s);
2372:
2373:                int attr;
2374:                switch (id) {
2375:                case Id_lastIndex:
2376:                    attr = PERMANENT | DONTENUM;
2377:                    break;
2378:                case Id_source:
2379:                case Id_global:
2380:                case Id_ignoreCase:
2381:                case Id_multiline:
2382:                    attr = PERMANENT | READONLY | DONTENUM;
2383:                    break;
2384:                default:
2385:                    throw new IllegalStateException();
2386:                }
2387:                return instanceIdInfo(attr, id);
2388:            }
2389:
2390:            protected String getInstanceIdName(int id) {
2391:                switch (id) {
2392:                case Id_lastIndex:
2393:                    return "lastIndex";
2394:                case Id_source:
2395:                    return "source";
2396:                case Id_global:
2397:                    return "global";
2398:                case Id_ignoreCase:
2399:                    return "ignoreCase";
2400:                case Id_multiline:
2401:                    return "multiline";
2402:                }
2403:                return super .getInstanceIdName(id);
2404:            }
2405:
2406:            protected Object getInstanceIdValue(int id) {
2407:                switch (id) {
2408:                case Id_lastIndex:
2409:                    return ScriptRuntime.wrapNumber(lastIndex);
2410:                case Id_source:
2411:                    return new String(re.source);
2412:                case Id_global:
2413:                    return ScriptRuntime
2414:                            .wrapBoolean((re.flags & JSREG_GLOB) != 0);
2415:                case Id_ignoreCase:
2416:                    return ScriptRuntime
2417:                            .wrapBoolean((re.flags & JSREG_FOLD) != 0);
2418:                case Id_multiline:
2419:                    return ScriptRuntime
2420:                            .wrapBoolean((re.flags & JSREG_MULTILINE) != 0);
2421:                }
2422:                return super .getInstanceIdValue(id);
2423:            }
2424:
2425:            protected void setInstanceIdValue(int id, Object value) {
2426:                if (id == Id_lastIndex) {
2427:                    lastIndex = ScriptRuntime.toNumber(value);
2428:                    return;
2429:                }
2430:                super .setInstanceIdValue(id, value);
2431:            }
2432:
2433:            protected void initPrototypeId(int id) {
2434:                String s;
2435:                int arity;
2436:                switch (id) {
2437:                case Id_compile:
2438:                    arity = 1;
2439:                    s = "compile";
2440:                    break;
2441:                case Id_toString:
2442:                    arity = 0;
2443:                    s = "toString";
2444:                    break;
2445:                case Id_toSource:
2446:                    arity = 0;
2447:                    s = "toSource";
2448:                    break;
2449:                case Id_exec:
2450:                    arity = 1;
2451:                    s = "exec";
2452:                    break;
2453:                case Id_test:
2454:                    arity = 1;
2455:                    s = "test";
2456:                    break;
2457:                case Id_prefix:
2458:                    arity = 1;
2459:                    s = "prefix";
2460:                    break;
2461:                default:
2462:                    throw new IllegalArgumentException(String.valueOf(id));
2463:                }
2464:                initPrototypeMethod(REGEXP_TAG, id, s, arity);
2465:            }
2466:
2467:            public Object execIdCall(IdFunctionObject f, Context cx,
2468:                    Scriptable scope, Scriptable this Obj, Object[] args) {
2469:                if (!f.hasTag(REGEXP_TAG)) {
2470:                    return super .execIdCall(f, cx, scope, this Obj, args);
2471:                }
2472:                int id = f.methodId();
2473:                switch (id) {
2474:                case Id_compile:
2475:                    return realThis(this Obj, f).compile(cx, scope, args);
2476:
2477:                case Id_toString:
2478:                case Id_toSource:
2479:                    return realThis(this Obj, f).toString();
2480:
2481:                case Id_exec:
2482:                    return realThis(this Obj, f).execSub(cx, scope, args, MATCH);
2483:
2484:                case Id_test: {
2485:                    Object x = realThis(this Obj, f).execSub(cx, scope, args,
2486:                            TEST);
2487:                    return Boolean.TRUE.equals(x) ? Boolean.TRUE
2488:                            : Boolean.FALSE;
2489:                }
2490:
2491:                case Id_prefix:
2492:                    return realThis(this Obj, f)
2493:                            .execSub(cx, scope, args, PREFIX);
2494:                }
2495:                throw new IllegalArgumentException(String.valueOf(id));
2496:            }
2497:
2498:            private static NativeRegExp realThis(Scriptable this Obj,
2499:                    IdFunctionObject f) {
2500:                if (!(this Obj instanceof  NativeRegExp))
2501:                    throw incompatibleCallError(f);
2502:                return (NativeRegExp) this Obj;
2503:            }
2504:
2505:            // #string_id_map#
2506:            protected int findPrototypeId(String s) {
2507:                int id;
2508:                // #generated# Last update: 2007-05-09 08:16:24 EDT
2509:                L0: {
2510:                    id = 0;
2511:                    String X = null;
2512:                    int c;
2513:                    L: switch (s.length()) {
2514:                    case 4:
2515:                        c = s.charAt(0);
2516:                        if (c == 'e') {
2517:                            X = "exec";
2518:                            id = Id_exec;
2519:                        } else if (c == 't') {
2520:                            X = "test";
2521:                            id = Id_test;
2522:                        }
2523:                        break L;
2524:                    case 6:
2525:                        X = "prefix";
2526:                        id = Id_prefix;
2527:                        break L;
2528:                    case 7:
2529:                        X = "compile";
2530:                        id = Id_compile;
2531:                        break L;
2532:                    case 8:
2533:                        c = s.charAt(3);
2534:                        if (c == 'o') {
2535:                            X = "toSource";
2536:                            id = Id_toSource;
2537:                        } else if (c == 't') {
2538:                            X = "toString";
2539:                            id = Id_toString;
2540:                        }
2541:                        break L;
2542:                    }
2543:                    if (X != null && X != s && !X.equals(s))
2544:                        id = 0;
2545:                    break L0;
2546:                }
2547:                // #/generated#
2548:                return id;
2549:            }
2550:
2551:            private static final int Id_compile = 1, Id_toString = 2,
2552:                    Id_toSource = 3, Id_exec = 4, Id_test = 5, Id_prefix = 6,
2553:
2554:                    MAX_PROTOTYPE_ID = 6;
2555:
2556:            // #/string_id_map#
2557:
2558:            private RECompiled re;
2559:            double lastIndex; /* index after last match, for //g iterator */
2560:
2561:        } // class NativeRegExp
2562:
2563:        class RECompiled implements  Serializable {
2564:            static final long serialVersionUID = -6144956577595844213L;
2565:
2566:            char[] source; /* locked source string, sans // */
2567:            int parenCount; /* number of parenthesized submatches */
2568:            int flags; /* flags  */
2569:            byte[] program; /* regular expression bytecode */
2570:            int classCount; /* count [...] bitmaps */
2571:            RECharSet[] classList; /* list of [...] bitmaps */
2572:            int anchorCh = -1; /* if >= 0, then re starts with this literal char */
2573:        }
2574:
2575:        class RENode {
2576:
2577:            RENode(byte op) {
2578:                this .op = op;
2579:            }
2580:
2581:            byte op; /* r.e. op bytecode */
2582:            RENode next; /* next in concatenation order */
2583:            RENode kid; /* first operand */
2584:
2585:            RENode kid2; /* second operand */
2586:            int num; /* could be a number */
2587:            int parenIndex; /* or a parenthesis index */
2588:
2589:            /* or a range */
2590:            int min;
2591:            int max;
2592:            int parenCount;
2593:            boolean greedy;
2594:
2595:            /* or a character class */
2596:            int startIndex;
2597:            int kidlen; /* length of string at kid, in chars */
2598:            int bmsize; /* bitmap size, based on max char code */
2599:            int index; /* index into class list */
2600:
2601:            /* or a literal sequence */
2602:            char chr; /* of one character */
2603:            int length; /* or many (via the index) */
2604:            int flatIndex; /* which is -1 if not sourced */
2605:
2606:        }
2607:
2608:        class CompilerState {
2609:
2610:            CompilerState(Context cx, char[] source, int length, int flags) {
2611:                this .cx = cx;
2612:                this .cpbegin = source;
2613:                this .cp = 0;
2614:                this .cpend = length;
2615:                this .flags = flags;
2616:                this .parenCount = 0;
2617:                this .classCount = 0;
2618:                this .progLength = 0;
2619:            }
2620:
2621:            Context cx;
2622:            char cpbegin[];
2623:            int cpend;
2624:            int cp;
2625:            int flags;
2626:            int parenCount;
2627:            int parenNesting;
2628:            int classCount; /* number of [] encountered */
2629:            int progLength; /* estimated bytecode length */
2630:            RENode result;
2631:        }
2632:
2633:        class REProgState {
2634:            REProgState(REProgState previous, int min, int max, int index,
2635:                    REBackTrackData backTrack, int continuation_pc,
2636:                    int continuation_op) {
2637:                this .previous = previous;
2638:                this .min = min;
2639:                this .max = max;
2640:                this .index = index;
2641:                this .continuation_op = continuation_op;
2642:                this .continuation_pc = continuation_pc;
2643:                this .backTrack = backTrack;
2644:            }
2645:
2646:            REProgState previous; // previous state in stack
2647:
2648:            int min; /* current quantifier min */
2649:            int max; /* current quantifier max */
2650:            int index; /* progress in text */
2651:            int continuation_op;
2652:            int continuation_pc;
2653:            REBackTrackData backTrack; // used by ASSERT_  to recover state
2654:        }
2655:
2656:        class REBackTrackData {
2657:
2658:            REBackTrackData(REGlobalData gData, int op, int pc) {
2659:                previous = gData.backTrackStackTop;
2660:                continuation_op = op;
2661:                continuation_pc = pc;
2662:                lastParen = gData.lastParen;
2663:                if (gData.parens != null) {
2664:                    parens = gData.parens.clone();
2665:                }
2666:                cp = gData.cp;
2667:                stateStackTop = gData.stateStackTop;
2668:            }
2669:
2670:            REBackTrackData previous;
2671:
2672:            int continuation_op; /* where to backtrack to */
2673:            int continuation_pc;
2674:            int lastParen;
2675:            long[] parens; /* parenthesis captures */
2676:            int cp; /* char buffer index */
2677:            REProgState stateStackTop; /* state of op that backtracked */
2678:        }
2679:
2680:        class REGlobalData {
2681:            boolean multiline;
2682:            RECompiled regexp; /* the RE in execution */
2683:            int lastParen; /* highest paren set so far */
2684:            int skipped; /* chars skipped anchoring this r.e. */
2685:
2686:            int cp; /* char buffer index */
2687:            long[] parens; /* parens captures */
2688:
2689:            REProgState stateStackTop; /* stack of state of current ancestors */
2690:
2691:            REBackTrackData backTrackStackTop; /* last matched-so-far position */
2692:
2693:            /**
2694:             * Get start of parenthesis capture contents, -1 for empty.
2695:             */
2696:            int parens_index(int i) {
2697:                return (int) (parens[i]);
2698:            }
2699:
2700:            /**
2701:             * Get length of parenthesis capture contents.
2702:             */
2703:            int parens_length(int i) {
2704:                return (int) (parens[i] >>> 32);
2705:            }
2706:
2707:            void set_parens(int i, int index, int length) {
2708:                parens[i] = (index & 0xffffffffL) | ((long) length << 32);
2709:            }
2710:
2711:        }
2712:
2713:        /*
2714:         * This struct holds a bitmap representation of a class from a regexp.
2715:         * There's a list of these referenced by the classList field in the NativeRegExp
2716:         * struct below. The initial state has startIndex set to the offset in the
2717:         * original regexp source of the beginning of the class contents. The first
2718:         * use of the class converts the source representation into a bitmap.
2719:         *
2720:         */
2721:        final class RECharSet implements  Serializable {
2722:            static final long serialVersionUID = 7931787979395898394L;
2723:
2724:            RECharSet(int length, int startIndex, int strlength) {
2725:                this .length = length;
2726:                this .startIndex = startIndex;
2727:                this .strlength = strlength;
2728:            }
2729:
2730:            int length;
2731:            int startIndex;
2732:            int strlength;
2733:
2734:            volatile transient boolean converted;
2735:            volatile transient boolean sense;
2736:            volatile transient byte[] bits;
2737:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.