001: /* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
002: *
003: * ***** BEGIN LICENSE BLOCK *****
004: * Version: MPL 1.1/GPL 2.0
005: *
006: * The contents of this file are subject to the Mozilla Public License Version
007: * 1.1 (the "License"); you may not use this file except in compliance with
008: * the License. You may obtain a copy of the License at
009: * http://www.mozilla.org/MPL/
010: *
011: * Software distributed under the License is distributed on an "AS IS" basis,
012: * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
013: * for the specific language governing rights and limitations under the
014: * License.
015: *
016: * The Original Code is Rhino code, released
017: * May 6, 1998.
018: *
019: * The Initial Developer of the Original Code is
020: * Netscape Communications Corporation.
021: * Portions created by the Initial Developer are Copyright (C) 1997-1999
022: * the Initial Developer. All Rights Reserved.
023: *
024: * Contributor(s):
025: *
026: * Alternatively, the contents of this file may be used under the terms of
027: * the GNU General Public License Version 2 or later (the "GPL"), in which
028: * case the provisions of the GPL are applicable instead of those above. If
029: * you wish to allow use of your version of this file only under the terms of
030: * the GPL and not to allow others to use your version of this file under the
031: * MPL, indicate your decision by deleting the provisions above and replacing
032: * them with the notice and other provisions required by the GPL. If you do
033: * not delete the provisions above, a recipient may use your version of this
034: * file under either the MPL or the GPL.
035: *
036: * ***** END LICENSE BLOCK ***** */
037:
038: package org.mozilla.javascript.regexp;
039:
040: import org.mozilla.javascript.*;
041:
042: /**
043: *
044: */
045: public class RegExpImpl implements RegExpProxy {
046:
047: public boolean isRegExp(Scriptable obj) {
048: return obj instanceof NativeRegExp;
049: }
050:
051: public Object compileRegExp(Context cx, String source, String flags) {
052: return NativeRegExp.compileRE(cx, source, flags, false);
053: }
054:
055: public Scriptable wrapRegExp(Context cx, Scriptable scope,
056: Object compiled) {
057: return new NativeRegExp(scope, compiled);
058: }
059:
060: public Object action(Context cx, Scriptable scope,
061: Scriptable this Obj, Object[] args, int actionType) {
062: GlobData data = new GlobData();
063: data.mode = actionType;
064:
065: switch (actionType) {
066: case RA_MATCH: {
067: Object rval;
068: data.optarg = 1;
069: rval = matchOrReplace(cx, scope, this Obj, args, this , data,
070: false);
071: return data.arrayobj == null ? rval : data.arrayobj;
072: }
073:
074: case RA_SEARCH:
075: data.optarg = 1;
076: return matchOrReplace(cx, scope, this Obj, args, this , data,
077: false);
078:
079: case RA_REPLACE: {
080: Object arg1 = args.length < 2 ? Undefined.instance
081: : args[1];
082: String repstr = null;
083: Function lambda = null;
084: if (arg1 instanceof Function) {
085: lambda = (Function) arg1;
086: } else {
087: repstr = ScriptRuntime.toString(arg1);
088: }
089:
090: data.optarg = 2;
091: data.lambda = lambda;
092: data.repstr = repstr;
093: data.dollar = repstr == null ? -1 : repstr.indexOf('$');
094: data.charBuf = null;
095: data.leftIndex = 0;
096: Object val = matchOrReplace(cx, scope, this Obj, args, this ,
097: data, true);
098: SubString rc = this .rightContext;
099:
100: if (data.charBuf == null) {
101: if (data.global || val == null
102: || !val.equals(Boolean.TRUE)) {
103: /* Didn't match even once. */
104: return data.str;
105: }
106: SubString lc = this .leftContext;
107: replace_glob(data, cx, scope, this , lc.index, lc.length);
108: }
109: data.charBuf.append(rc.charArray, rc.index, rc.length);
110: return data.charBuf.toString();
111: }
112:
113: default:
114: throw Kit.codeBug();
115: }
116: }
117:
118: /**
119: * Analog of C match_or_replace.
120: */
121: private static Object matchOrReplace(Context cx, Scriptable scope,
122: Scriptable this Obj, Object[] args, RegExpImpl reImpl,
123: GlobData data, boolean forceFlat) {
124: NativeRegExp re;
125:
126: String str = ScriptRuntime.toString(this Obj);
127: data.str = str;
128: Scriptable topScope = ScriptableObject.getTopLevelScope(scope);
129:
130: if (args.length == 0) {
131: Object compiled = NativeRegExp.compileRE(cx, "", "", false);
132: re = new NativeRegExp(topScope, compiled);
133: } else if (args[0] instanceof NativeRegExp) {
134: re = (NativeRegExp) args[0];
135: } else {
136: String src = ScriptRuntime.toString(args[0]);
137: String opt;
138: if (data.optarg < args.length) {
139: args[0] = src;
140: opt = ScriptRuntime.toString(args[data.optarg]);
141: } else {
142: opt = null;
143: }
144: Object compiled = NativeRegExp.compileRE(cx, src, opt,
145: forceFlat);
146: re = new NativeRegExp(topScope, compiled);
147: }
148: data.regexp = re;
149:
150: data.global = (re.getFlags() & NativeRegExp.JSREG_GLOB) != 0;
151: int[] indexp = { 0 };
152: Object result = null;
153: if (data.mode == RA_SEARCH) {
154: result = re.executeRegExp(cx, scope, reImpl, str, indexp,
155: NativeRegExp.TEST);
156: if (result != null && result.equals(Boolean.TRUE))
157: result = new Integer(reImpl.leftContext.length);
158: else
159: result = new Integer(-1);
160: } else if (data.global) {
161: re.lastIndex = 0;
162: for (int count = 0; indexp[0] <= str.length(); count++) {
163: result = re.executeRegExp(cx, scope, reImpl, str,
164: indexp, NativeRegExp.TEST);
165: if (result == null || !result.equals(Boolean.TRUE))
166: break;
167: if (data.mode == RA_MATCH) {
168: match_glob(data, cx, scope, count, reImpl);
169: } else {
170: if (data.mode != RA_REPLACE)
171: Kit.codeBug();
172: SubString lastMatch = reImpl.lastMatch;
173: int leftIndex = data.leftIndex;
174: int leftlen = lastMatch.index - leftIndex;
175: data.leftIndex = lastMatch.index + lastMatch.length;
176: replace_glob(data, cx, scope, reImpl, leftIndex,
177: leftlen);
178: }
179: if (reImpl.lastMatch.length == 0) {
180: if (indexp[0] == str.length())
181: break;
182: indexp[0]++;
183: }
184: }
185: } else {
186: result = re.executeRegExp(cx, scope, reImpl, str, indexp,
187: ((data.mode == RA_REPLACE) ? NativeRegExp.TEST
188: : NativeRegExp.MATCH));
189: }
190:
191: return result;
192: }
193:
194: public int find_split(Context cx, Scriptable scope, String target,
195: String separator, Scriptable reObj, int[] ip,
196: int[] matchlen, boolean[] matched, String[][] parensp) {
197: int i = ip[0];
198: int length = target.length();
199: int result;
200:
201: int version = cx.getLanguageVersion();
202: NativeRegExp re = (NativeRegExp) reObj;
203: again: while (true) { // imitating C label
204: /* JS1.2 deviated from Perl by never matching at end of string. */
205: int ipsave = ip[0]; // reuse ip to save object creation
206: ip[0] = i;
207: Object ret = re.executeRegExp(cx, scope, this , target, ip,
208: NativeRegExp.TEST);
209: if (ret != Boolean.TRUE) {
210: // Mismatch: ensure our caller advances i past end of string.
211: ip[0] = ipsave;
212: matchlen[0] = 1;
213: matched[0] = false;
214: return length;
215: }
216: i = ip[0];
217: ip[0] = ipsave;
218: matched[0] = true;
219:
220: SubString sep = this .lastMatch;
221: matchlen[0] = sep.length;
222: if (matchlen[0] == 0) {
223: /*
224: * Empty string match: never split on an empty
225: * match at the start of a find_split cycle. Same
226: * rule as for an empty global match in
227: * match_or_replace.
228: */
229: if (i == ip[0]) {
230: /*
231: * "Bump-along" to avoid sticking at an empty
232: * match, but don't bump past end of string --
233: * our caller must do that by adding
234: * sep->length to our return value.
235: */
236: if (i == length) {
237: if (version == Context.VERSION_1_2) {
238: matchlen[0] = 1;
239: result = i;
240: } else
241: result = -1;
242: break;
243: }
244: i++;
245: continue again; // imitating C goto
246: }
247: }
248: // PR_ASSERT((size_t)i >= sep->length);
249: result = i - matchlen[0];
250: break;
251: }
252: int size = (parens == null) ? 0 : parens.length;
253: parensp[0] = new String[size];
254: for (int num = 0; num < size; num++) {
255: SubString parsub = getParenSubString(num);
256: parensp[0][num] = parsub.toString();
257: }
258: return result;
259: }
260:
261: /**
262: * Analog of REGEXP_PAREN_SUBSTRING in C jsregexp.h.
263: * Assumes zero-based; i.e., for $3, i==2
264: */
265: SubString getParenSubString(int i) {
266: if (parens != null && i < parens.length) {
267: SubString parsub = parens[i];
268: if (parsub != null) {
269: return parsub;
270: }
271: }
272: return SubString.emptySubString;
273: }
274:
275: /*
276: * Analog of match_glob() in jsstr.c
277: */
278: private static void match_glob(GlobData mdata, Context cx,
279: Scriptable scope, int count, RegExpImpl reImpl) {
280: if (mdata.arrayobj == null) {
281: Scriptable s = ScriptableObject.getTopLevelScope(scope);
282: mdata.arrayobj = ScriptRuntime.newObject(cx, s, "Array",
283: null);
284: }
285: SubString matchsub = reImpl.lastMatch;
286: String matchstr = matchsub.toString();
287: mdata.arrayobj.put(count, mdata.arrayobj, matchstr);
288: }
289:
290: /*
291: * Analog of replace_glob() in jsstr.c
292: */
293: private static void replace_glob(GlobData rdata, Context cx,
294: Scriptable scope, RegExpImpl reImpl, int leftIndex,
295: int leftlen) {
296: int replen;
297: String lambdaStr;
298: if (rdata.lambda != null) {
299: // invoke lambda function with args lastMatch, $1, $2, ... $n,
300: // leftContext.length, whole string.
301: SubString[] parens = reImpl.parens;
302: int parenCount = (parens == null) ? 0 : parens.length;
303: Object[] args = new Object[parenCount + 3];
304: args[0] = reImpl.lastMatch.toString();
305: for (int i = 0; i < parenCount; i++) {
306: SubString sub = parens[i];
307: if (sub != null) {
308: args[i + 1] = sub.toString();
309: } else {
310: args[i + 1] = Undefined.instance;
311: }
312: }
313: args[parenCount + 1] = new Integer(
314: reImpl.leftContext.length);
315: args[parenCount + 2] = rdata.str;
316: // This is a hack to prevent expose of reImpl data to
317: // JS function which can run new regexps modifing
318: // regexp that are used later by the engine.
319: // TODO: redesign is necessary
320: if (reImpl != ScriptRuntime.getRegExpProxy(cx))
321: Kit.codeBug();
322: RegExpImpl re2 = new RegExpImpl();
323: re2.multiline = reImpl.multiline;
324: re2.input = reImpl.input;
325: ScriptRuntime.setRegExpProxy(cx, re2);
326: try {
327: Scriptable parent = ScriptableObject
328: .getTopLevelScope(scope);
329: Object result = rdata.lambda.call(cx, parent, parent,
330: args);
331: lambdaStr = ScriptRuntime.toString(result);
332: } finally {
333: ScriptRuntime.setRegExpProxy(cx, reImpl);
334: }
335: replen = lambdaStr.length();
336: } else {
337: lambdaStr = null;
338: replen = rdata.repstr.length();
339: if (rdata.dollar >= 0) {
340: int[] skip = new int[1];
341: int dp = rdata.dollar;
342: do {
343: SubString sub = interpretDollar(cx, reImpl,
344: rdata.repstr, dp, skip);
345: if (sub != null) {
346: replen += sub.length - skip[0];
347: dp += skip[0];
348: } else {
349: ++dp;
350: }
351: dp = rdata.repstr.indexOf('$', dp);
352: } while (dp >= 0);
353: }
354: }
355:
356: int growth = leftlen + replen + reImpl.rightContext.length;
357: StringBuffer charBuf = rdata.charBuf;
358: if (charBuf == null) {
359: charBuf = new StringBuffer(growth);
360: rdata.charBuf = charBuf;
361: } else {
362: charBuf.ensureCapacity(rdata.charBuf.length() + growth);
363: }
364:
365: charBuf
366: .append(reImpl.leftContext.charArray, leftIndex,
367: leftlen);
368: if (rdata.lambda != null) {
369: charBuf.append(lambdaStr);
370: } else {
371: do_replace(rdata, cx, reImpl);
372: }
373: }
374:
375: private static SubString interpretDollar(Context cx,
376: RegExpImpl res, String da, int dp, int[] skip) {
377: char dc;
378: int num, tmp;
379:
380: if (da.charAt(dp) != '$')
381: Kit.codeBug();
382:
383: /* Allow a real backslash (literal "\\") to escape "$1" etc. */
384: int version = cx.getLanguageVersion();
385: if (version != Context.VERSION_DEFAULT
386: && version <= Context.VERSION_1_4) {
387: if (dp > 0 && da.charAt(dp - 1) == '\\')
388: return null;
389: }
390: int daL = da.length();
391: if (dp + 1 >= daL)
392: return null;
393: /* Interpret all Perl match-induced dollar variables. */
394: dc = da.charAt(dp + 1);
395: if (NativeRegExp.isDigit(dc)) {
396: int cp;
397: if (version != Context.VERSION_DEFAULT
398: && version <= Context.VERSION_1_4) {
399: if (dc == '0')
400: return null;
401: /* Check for overflow to avoid gobbling arbitrary decimal digits. */
402: num = 0;
403: cp = dp;
404: while (++cp < daL
405: && NativeRegExp.isDigit(dc = da.charAt(cp))) {
406: tmp = 10 * num + (dc - '0');
407: if (tmp < num)
408: break;
409: num = tmp;
410: }
411: } else { /* ECMA 3, 1-9 or 01-99 */
412: int parenCount = (res.parens == null) ? 0
413: : res.parens.length;
414: num = dc - '0';
415: if (num > parenCount)
416: return null;
417: cp = dp + 2;
418: if ((dp + 2) < daL) {
419: dc = da.charAt(dp + 2);
420: if (NativeRegExp.isDigit(dc)) {
421: tmp = 10 * num + (dc - '0');
422: if (tmp <= parenCount) {
423: cp++;
424: num = tmp;
425: }
426: }
427: }
428: if (num == 0)
429: return null; /* $0 or $00 is not valid */
430: }
431: /* Adjust num from 1 $n-origin to 0 array-index-origin. */
432: num--;
433: skip[0] = cp - dp;
434: return res.getParenSubString(num);
435: }
436:
437: skip[0] = 2;
438: switch (dc) {
439: case '$':
440: return new SubString("$");
441: case '&':
442: return res.lastMatch;
443: case '+':
444: return res.lastParen;
445: case '`':
446: if (version == Context.VERSION_1_2) {
447: /*
448: * JS1.2 imitated the Perl4 bug where left context at each step
449: * in an iterative use of a global regexp started from last match,
450: * not from the start of the target string. But Perl4 does start
451: * $` at the beginning of the target string when it is used in a
452: * substitution, so we emulate that special case here.
453: */
454: res.leftContext.index = 0;
455: res.leftContext.length = res.lastMatch.index;
456: }
457: return res.leftContext;
458: case '\'':
459: return res.rightContext;
460: }
461: return null;
462: }
463:
464: /**
465: * Analog of do_replace in jsstr.c
466: */
467: private static void do_replace(GlobData rdata, Context cx,
468: RegExpImpl regExpImpl) {
469: StringBuffer charBuf = rdata.charBuf;
470: int cp = 0;
471: String da = rdata.repstr;
472: int dp = rdata.dollar;
473: if (dp != -1) {
474: int[] skip = new int[1];
475: do {
476: int len = dp - cp;
477: charBuf.append(da.substring(cp, dp));
478: cp = dp;
479: SubString sub = interpretDollar(cx, regExpImpl, da, dp,
480: skip);
481: if (sub != null) {
482: len = sub.length;
483: if (len > 0) {
484: charBuf.append(sub.charArray, sub.index, len);
485: }
486: cp += skip[0];
487: dp += skip[0];
488: } else {
489: ++dp;
490: }
491: dp = da.indexOf('$', dp);
492: } while (dp >= 0);
493: }
494: int daL = da.length();
495: if (daL > cp) {
496: charBuf.append(da.substring(cp, daL));
497: }
498: }
499:
500: String input; /* input string to match (perl $_, GC root) */
501: boolean multiline; /* whether input contains newlines (perl $*) */
502: SubString[] parens; /* Vector of SubString; last set of parens
503: matched (perl $1, $2) */
504: SubString lastMatch; /* last string matched (perl $&) */
505: SubString lastParen; /* last paren matched (perl $+) */
506: SubString leftContext; /* input to left of last match (perl $`) */
507: SubString rightContext; /* input to right of last match (perl $') */
508: }
509:
510: final class GlobData {
511: int mode; /* input: return index, match object, or void */
512: int optarg; /* input: index of optional flags argument */
513: boolean global; /* output: whether regexp was global */
514: String str; /* output: 'this' parameter object as string */
515: NativeRegExp regexp;/* output: regexp parameter object private data */
516:
517: // match-specific data
518:
519: Scriptable arrayobj;
520:
521: // replace-specific data
522:
523: Function lambda; /* replacement function object or null */
524: String repstr; /* replacement string */
525: int dollar = -1; /* -1 or index of first $ in repstr */
526: StringBuffer charBuf; /* result characters, null initially */
527: int leftIndex; /* leftContext index, always 0 for JS1.2 */
528: }
|