001: /*
002: * Copyright (c) 2002-2008 Gargoyle Software Inc. All rights reserved.
003: *
004: * Redistribution and use in source and binary forms, with or without
005: * modification, are permitted provided that the following conditions are met:
006: *
007: * 1. Redistributions of source code must retain the above copyright notice,
008: * this list of conditions and the following disclaimer.
009: * 2. Redistributions in binary form must reproduce the above copyright notice,
010: * this list of conditions and the following disclaimer in the documentation
011: * and/or other materials provided with the distribution.
012: * 3. The end-user documentation included with the redistribution, if any, must
013: * include the following acknowledgment:
014: *
015: * "This product includes software developed by Gargoyle Software Inc.
016: * (http://www.GargoyleSoftware.com/)."
017: *
018: * Alternately, this acknowledgment may appear in the software itself, if
019: * and wherever such third-party acknowledgments normally appear.
020: * 4. The name "Gargoyle Software" must not be used to endorse or promote
021: * products derived from this software without prior written permission.
022: * For written permission, please contact info@GargoyleSoftware.com.
023: * 5. Products derived from this software may not be called "HtmlUnit", nor may
024: * "HtmlUnit" appear in their name, without prior written permission of
025: * Gargoyle Software Inc.
026: *
027: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
028: * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
029: * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GARGOYLE
030: * SOFTWARE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
031: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
032: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
033: * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
034: * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
035: * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
036: * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
037: */
038: package com.gargoylesoftware.htmlunit.javascript.regexp;
039:
040: import java.util.regex.Matcher;
041: import java.util.regex.Pattern;
042: import java.util.regex.PatternSyntaxException;
043:
044: import org.apache.commons.lang.StringUtils;
045: import org.apache.commons.logging.Log;
046: import org.apache.commons.logging.LogFactory;
047: import org.mozilla.javascript.Context;
048: import org.mozilla.javascript.RegExpProxy;
049: import org.mozilla.javascript.ScriptRuntime;
050: import org.mozilla.javascript.Scriptable;
051: import org.mozilla.javascript.regexp.NativeRegExp;
052: import org.mozilla.javascript.regexp.RegExpImpl;
053:
054: /**
055: * Begins customization of JavaScript RegExp base on JDK regular expression support.
056: *
057: * @version $Revision: 2154 $
058: * @author Marc Guillemot
059: * @author Ahmed Ashour
060: */
061: public class HtmlUnitRegExpProxy extends RegExpImpl {
062: private final RegExpProxy wrapped_;
063:
064: /**
065: * Wraps a proxy to enhance it
066: * @param wrapped the original proxy
067: */
068: public HtmlUnitRegExpProxy(final RegExpProxy wrapped) {
069: wrapped_ = wrapped;
070: }
071:
072: /**
073: * Use wrapped proxy except for replacement with string arg where it uses Java regular expression
074: * {@inheritDoc}
075: */
076: public Object action(final Context cx, final Scriptable scope,
077: final Scriptable this Obj, final Object[] args,
078: final int actionType) {
079:
080: // in a first time just improve replacement with a String (not a function)
081: if (RA_REPLACE == actionType && args.length == 2
082: && (args[1] instanceof String)) {
083: final String this String = Context.toString(this Obj);
084: final String replacement = ((String) args[1]).replaceAll(
085: "\\\\", "\\\\\\\\");
086: final Object arg0 = args[0];
087: if (arg0 instanceof String) {
088: // arg0 should not be interpreted as a RegExp
089: return StringUtils.replaceOnce(this String,
090: (String) arg0, replacement);
091: } else if (arg0 instanceof NativeRegExp) {
092: try {
093: final NativeRegExp regexp = (NativeRegExp) arg0;
094: final String str = arg0.toString();
095: final String regex = readNativeRegExpPattern(regexp);
096: final String flagsStr = StringUtils
097: .substringAfterLast(str, "/");
098: final int flags = jsFlagsToPatternFlags(flagsStr);
099: final Pattern pattern = Pattern.compile(regex,
100: flags);
101: final Matcher matcher = pattern.matcher(this String);
102: if (flagsStr.indexOf('g') != -1) {
103: return matcher.replaceAll(replacement);
104: } else {
105: return matcher.replaceFirst(replacement);
106: }
107: } catch (final PatternSyntaxException e) {
108: getLog().warn(e);
109: }
110: }
111: } else if (RA_MATCH == actionType) {
112: if (args.length == 0) {
113: return null;
114: }
115: final Object arg0 = args[0];
116: final String this String = Context.toString(this Obj);
117: final String regex;
118: final int flags;
119: if (arg0 instanceof NativeRegExp) {
120: regex = readNativeRegExpPattern((NativeRegExp) arg0);
121: flags = readNativeRegExpFlags((NativeRegExp) arg0);
122: } else {
123: regex = Context.toString(arg0);
124: flags = 0;
125: }
126: final Pattern pattern = Pattern.compile(regex, flags);
127: final Matcher matcher = pattern.matcher(this String);
128: if (!matcher.find()) {
129: return null;
130: }
131: final Object[] groups = new Object[matcher.groupCount() + 1];
132: for (int i = 0; i <= matcher.groupCount(); ++i) {
133: groups[i] = matcher.group(i);
134: if (groups[i] == null) {
135: groups[i] = Context.getUndefinedValue();
136: }
137: }
138: final Scriptable response = cx.newArray(scope, groups);
139: // the additional properties (cf ECMA script reference 15.10.6.2 13)
140: response.put("index", response, new Integer(matcher
141: .start(0)));
142: response.put("input", response, this String);
143: return response;
144: }
145:
146: return wrappedAction(cx, scope, this Obj, args, actionType);
147: }
148:
149: private int readNativeRegExpFlags(final NativeRegExp nativeRegExp) {
150: final String str = nativeRegExp.toString(); // the form is /regex/flags
151: return jsFlagsToPatternFlags(str);
152: }
153:
154: private String readNativeRegExpPattern(
155: final NativeRegExp nativeRegExp) {
156: final String str = nativeRegExp.toString(); // the form is /regex/flags
157: return StringUtils.substringBeforeLast(str.substring(1), "/")
158: .replaceAll("\\[\\^\\\\\\d\\]", ".");
159: }
160:
161: /**
162: * Calls action on the wrapped RegExp proxy.
163: */
164: private Object wrappedAction(final Context cx,
165: final Scriptable scope, final Scriptable this Obj,
166: final Object[] args, final int actionType) {
167:
168: // take care to set the context's RegExp proxy to the original one as this is checked
169: // (cf org.mozilla.javascript.regexp.RegExpImp:334)
170: try {
171: ScriptRuntime.setRegExpProxy(cx, wrapped_);
172: return wrapped_
173: .action(cx, scope, this Obj, args, actionType);
174: } finally {
175: ScriptRuntime.setRegExpProxy(cx, this );
176: }
177: }
178:
179: /**
180: * Convert JavaScript RegExp flags "img" to Java Pattern flags
181: * @param flagsStr the flags (a combination of i, m and g)
182: * @return the Java Pattern flags
183: */
184: private int jsFlagsToPatternFlags(final String flagsStr) {
185: int flags = 0;
186: if (flagsStr.indexOf('i') != -1) {
187: flags |= Pattern.CASE_INSENSITIVE;
188: }
189: if (flagsStr.indexOf('m') != -1) {
190: flags |= Pattern.MULTILINE;
191: }
192: return flags;
193: }
194:
195: /**
196: * {@inheritDoc}
197: */
198: public Object compileRegExp(final Context cx, final String source,
199: final String flags) {
200: return wrapped_.compileRegExp(cx, source, flags);
201: }
202:
203: /**
204: * {@inheritDoc}
205: */
206: public int find_split(final Context cx, final Scriptable scope,
207: final String target, final String separator,
208: final Scriptable re, final int[] ip, final int[] matchlen,
209: final boolean[] matched, final String[][] parensp) {
210: return wrapped_.find_split(cx, scope, target, separator, re,
211: ip, matchlen, matched, parensp);
212: }
213:
214: /**
215: * {@inheritDoc}
216: */
217: public boolean isRegExp(final Scriptable obj) {
218: return wrapped_.isRegExp(obj);
219: }
220:
221: /**
222: * {@inheritDoc}
223: */
224: public Scriptable wrapRegExp(final Context cx,
225: final Scriptable scope, final Object compiled) {
226: return wrapped_.wrapRegExp(cx, scope, compiled);
227: }
228:
229: /**
230: * Return the log object for this object.
231: * @return The log object for this object.
232: */
233: protected final Log getLog() {
234: return LogFactory.getLog(getClass());
235: }
236:
237: }
|