001: package net.sf.saxon.functions;
002:
003: import net.sf.saxon.expr.Expression;
004: import net.sf.saxon.expr.StaticContext;
005: import net.sf.saxon.expr.XPathContext;
006: import net.sf.saxon.om.Item;
007: import net.sf.saxon.trans.DynamicError;
008: import net.sf.saxon.trans.XPathException;
009: import net.sf.saxon.trans.StaticError;
010: import net.sf.saxon.type.RegexTranslator;
011: import net.sf.saxon.value.AtomicValue;
012: import net.sf.saxon.value.StringValue;
013: import net.sf.saxon.value.Value;
014:
015: import java.util.regex.Pattern;
016: import java.util.regex.PatternSyntaxException;
017: import java.util.regex.Matcher;
018:
019: /**
020: * This class implements the replace() function for replacing
021: * substrings that match a regular expression
022: */
023:
024: public class Replace extends SystemFunction {
025:
026: private Pattern regexp;
027:
028: /**
029: * Simplify and validate.
030: * This is a pure function so it can be simplified in advance if the arguments are known
031: */
032:
033: public Expression simplify(StaticContext env) throws XPathException {
034: Expression e = simplifyArguments(env);
035:
036: // compile the regular expression once if possible
037: if (!(e instanceof Value)) {
038: try {
039: regexp = Matches.tryToCompile(argument, 1, 3);
040: } catch (StaticError err) {
041: err.setLocator(this );
042: throw err;
043: }
044:
045: // check that it's not a pattern that matches ""
046: if (regexp != null && regexp.matcher("").matches()) {
047: DynamicError err = new DynamicError(
048: "The regular expression in replace() must not be one that matches a zero-length string");
049: err.setErrorCode("FORX0003");
050: err.setLocator(this );
051: throw err;
052: }
053: }
054:
055: return e;
056: }
057:
058: /**
059: * Evaluate the function in a string context
060: */
061:
062: public Item evaluateItem(XPathContext c) throws XPathException {
063:
064: AtomicValue arg0 = (AtomicValue) argument[0].evaluateItem(c);
065: if (arg0 == null) {
066: arg0 = StringValue.EMPTY_STRING;
067: }
068:
069: AtomicValue arg2 = (AtomicValue) argument[2].evaluateItem(c);
070: CharSequence replacement = arg2.getStringValueCS();
071: checkReplacement(replacement, c);
072:
073: Pattern re = regexp;
074: if (re == null) {
075:
076: AtomicValue arg1 = (AtomicValue) argument[1]
077: .evaluateItem(c);
078:
079: CharSequence flags;
080:
081: if (argument.length == 3) {
082: flags = "";
083: } else {
084: AtomicValue arg3 = (AtomicValue) argument[3]
085: .evaluateItem(c);
086: flags = arg3.getStringValueCS();
087: }
088:
089: try {
090: String javaRegex = RegexTranslator.translate(arg1
091: .getStringValueCS(), true);
092: re = Pattern
093: .compile(javaRegex, Matches.setFlags(flags));
094: } catch (RegexTranslator.RegexSyntaxException err) {
095: DynamicError de = new DynamicError(err);
096: de.setErrorCode("FORX0002");
097: de.setXPathContext(c);
098: de.setLocator(this );
099: throw de;
100: } catch (PatternSyntaxException err) {
101: DynamicError de = new DynamicError(err);
102: de.setErrorCode("FORX0002");
103: de.setXPathContext(c);
104: de.setLocator(this );
105: throw de;
106: }
107:
108: // check that it's not a pattern that matches ""
109: if (re.matcher("").matches()) {
110: dynamicError(
111: "The regular expression in replace() must not be one that matches a zero-length string",
112: "FORX0003", c);
113: }
114: }
115: String input = arg0.getStringValue();
116: Matcher matcher = re.matcher(input);
117: try {
118: String res = matcher.replaceAll(replacement.toString());
119: return StringValue.makeStringValue(res);
120: } catch (IndexOutOfBoundsException e) {
121: // this occurs if the replacement string references a group $n and there are less than n
122: // capturing subexpressions in the regex. In this case we're supposed to replace $n by an
123: // empty string. We do this by modifying the replacement string.
124: int gps = matcher.groupCount();
125: if (gps >= 9) {
126: // don't know what's gone wrong here
127: throw e;
128: }
129: String r = replacement.toString();
130: // remove occurrences of $n from the replacement string, if n is greater than the number of groups
131: String f = "\\$[" + (gps + 1) + "-9]";
132: String rep = Pattern.compile(f).matcher(r).replaceAll("");
133: String res = matcher.replaceAll(rep);
134: return StringValue.makeStringValue(res);
135: }
136:
137: }
138:
139: /**
140: * Check the contents of the replacement string
141: */
142:
143: private void checkReplacement(CharSequence rep, XPathContext context)
144: throws XPathException {
145: for (int i = 0; i < rep.length(); i++) {
146: char c = rep.charAt(i);
147: if (c == '$') {
148: if (i + 1 < rep.length()) {
149: char next = rep.charAt(++i);
150: if (next < '0' || next > '9') {
151: dynamicError(
152: "Invalid replacement string in replace(): $ sign must be followed by digit 0-9",
153: "FORX0004", context);
154: }
155: } else {
156: dynamicError(
157: "Invalid replacement string in replace(): $ sign at end of string",
158: "FORX0004", context);
159: }
160: } else if (c == '\\') {
161: if (i + 1 < rep.length()) {
162: char next = rep.charAt(++i);
163: if (next != '\\' && next != '$') {
164: dynamicError(
165: "Invalid replacement string in replace(): \\ character must be followed by \\ or $",
166: "FORX0004", context);
167: }
168: } else {
169: dynamicError(
170: "Invalid replacement string in replace(): \\ character at end of string",
171: "FORX0004", context);
172: }
173: }
174: }
175: }
176:
177: }
178:
179: //
180: // The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
181: // you may not use this file except in compliance with the License. You may obtain a copy of the
182: // License at http://www.mozilla.org/MPL/
183: //
184: // Software distributed under the License is distributed on an "AS IS" basis,
185: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
186: // See the License for the specific language governing rights and limitations under the License.
187: //
188: // The Original Code is: all this file.
189: //
190: // The Initial Developer of the Original Code is Michael H. Kay
191: //
192: // Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
193: //
194: // Contributor(s): none.
195: //
|