001: package net.sf.saxon.functions;
002:
003: import net.sf.saxon.expr.Expression;
004: import net.sf.saxon.expr.StaticContext;
005: import net.sf.saxon.expr.XPathContext;
006: import net.sf.saxon.om.Item;
007: import net.sf.saxon.trans.DynamicError;
008: import net.sf.saxon.trans.StaticError;
009: import net.sf.saxon.trans.XPathException;
010: import net.sf.saxon.type.RegexTranslator;
011: import net.sf.saxon.value.AtomicValue;
012: import net.sf.saxon.value.BooleanValue;
013: import net.sf.saxon.value.StringValue;
014: import net.sf.saxon.value.Value;
015:
016: import java.util.regex.Pattern;
017: import java.util.regex.PatternSyntaxException;
018:
019: /**
020: * This class implements the matches() function for regular expression matching
021: */
022:
023: public class Matches extends SystemFunction {
024:
025: private Pattern regexp;
026:
027: /**
028: * Simplify and validate.
029: * This is a pure function so it can be simplified in advance if the arguments are known
030: * @return the simplified expression
031: * @throws net.sf.saxon.trans.StaticError if any error is found (e.g. invalid regular expression)
032: */
033:
034: public Expression simplify(StaticContext env) throws XPathException {
035: Expression e = simplifyArguments(env);
036:
037: // compile the regular expression once if possible
038: if (!(e instanceof Value)) {
039: try {
040: regexp = Matches.tryToCompile(argument, 1, 2);
041: } catch (StaticError err) {
042: err.setLocator(this );
043: throw err;
044: }
045: }
046:
047: return e;
048: }
049:
050: /**
051: * Set the Java flags from the supplied XPath flags.
052: * @param inFlags the flags as a string, e.g. "im"
053: * @return the flags as a bit-significant integer
054: * @throws net.sf.saxon.trans.StaticError if the supplied value is invalid
055: */
056:
057: public static int setFlags(CharSequence inFlags) throws StaticError {
058: int flags = Pattern.UNIX_LINES;
059: for (int i = 0; i < inFlags.length(); i++) {
060: char c = inFlags.charAt(i);
061: switch (c) {
062: case 'm':
063: flags |= Pattern.MULTILINE;
064: break;
065: case 'i':
066: flags |= Pattern.CASE_INSENSITIVE;
067: flags |= Pattern.UNICODE_CASE;
068: break;
069: case 's':
070: flags |= Pattern.DOTALL;
071: break;
072: case 'x':
073: flags |= Pattern.COMMENTS; // note, this enables comments as well as whitespace
074: break;
075: default:
076: StaticError err = new StaticError("Invalid character '"
077: + c + "' in regular expression flags");
078: err.setErrorCode("FORX0001");
079: throw err;
080: }
081: }
082: return flags;
083: }
084:
085: /**
086: * Try to precompile the arguments to the function. This method is shared by
087: * the implementations of the three XPath functions matches(), replace(), and
088: * tokenize().
089: * @param args the supplied arguments to the function, as an array
090: * @param patternArg the position of the argument containing the regular expression
091: * @param flagsArg the position of the argument containing the flags
092: * @return the compiled regular expression, or null indicating that the information
093: * is not available statically so it cannot be precompiled
094: * @throws net.sf.saxon.trans.StaticError if any failure occurs, in particular, if the regular
095: * expression is invalid
096: */
097:
098: protected static Pattern tryToCompile(Expression[] args,
099: int patternArg, int flagsArg) throws StaticError {
100: if (patternArg > args.length - 1) {
101: // too few arguments were supplied; the error will be reported in due course
102: return null;
103: }
104: CharSequence flagstr = null;
105: if (args.length - 1 < flagsArg) {
106: flagstr = "";
107: } else if (args[flagsArg] instanceof StringValue) {
108: flagstr = ((StringValue) args[flagsArg]).getStringValueCS();
109: }
110:
111: if (args[patternArg] instanceof StringValue && flagstr != null) {
112: int flags = Matches.setFlags(flagstr);
113:
114: try {
115: String javaRegex = RegexTranslator.translate(
116: ((StringValue) args[patternArg])
117: .getStringValueCS(), true);
118: return Pattern.compile(javaRegex, flags);
119: } catch (RegexTranslator.RegexSyntaxException err) {
120: StaticError e2 = new StaticError(err.getMessage());
121: e2.setErrorCode("FORX0002");
122: throw e2;
123: } catch (PatternSyntaxException err) {
124: StaticError e2 = new StaticError(err.getMessage());
125: e2.setErrorCode("FORX0002");
126: throw e2;
127: }
128: } else {
129: return null;
130: }
131: }
132:
133: /**
134: * Evaluate the matches() function to give a Boolean value.
135: * @param c The dynamic evaluation context
136: * @return the result as a BooleanValue, or null to indicate the empty sequence
137: * @throws XPathException on an error
138: */
139:
140: public Item evaluateItem(XPathContext c) throws XPathException {
141: AtomicValue sv0 = (AtomicValue) argument[0].evaluateItem(c);
142: if (sv0 == null) {
143: sv0 = StringValue.EMPTY_STRING;
144: }
145: ;
146:
147: Pattern re = regexp;
148: if (re == null) {
149:
150: AtomicValue pat = (AtomicValue) argument[1].evaluateItem(c);
151: if (pat == null)
152: return null;
153:
154: CharSequence flags;
155: if (argument.length == 2) {
156: flags = "";
157: } else {
158: AtomicValue sv2 = (AtomicValue) argument[2]
159: .evaluateItem(c);
160: if (sv2 == null)
161: return null;
162: flags = sv2.getStringValueCS();
163: }
164:
165: try {
166: String javaRegex = RegexTranslator.translate(pat
167: .getStringValueCS(), true);
168: re = Pattern.compile(javaRegex, setFlags(flags));
169: } catch (RegexTranslator.RegexSyntaxException err) {
170: DynamicError de = new DynamicError(err);
171: de.setErrorCode("FORX0002");
172: de.setXPathContext(c);
173: throw de;
174: } catch (PatternSyntaxException err) {
175: DynamicError de = new DynamicError(err);
176: de.setErrorCode("FORX0002");
177: de.setXPathContext(c);
178: throw de;
179: } catch (StaticError serr) {
180: dynamicError(serr.getMessage(), serr
181: .getErrorCodeLocalPart(), c);
182: }
183: }
184: return BooleanValue.get(re.matcher(sv0.getStringValueCS())
185: .find());
186: }
187:
188: /**
189: * Temporary test rig, used to submit bug report to Sun
190: */
191: // public static void main(String[] args) throws Exception {
192: //
193: // matches("\u212a", "K");
194: // matches("\u212a", "[A-Z]");
195: // matches("\u212a", "I|J|K|L");
196: // matches("\u212a", "[IJKL]");
197: // matches("\u212a", "k");
198: // matches("\u212a", "[a-z]");
199: // matches("\u212a", "i|j|k|l");
200: // matches("\u212a", "[ijkl]");
201: // }
202: //
203: // private static void matches(String in, String pattern) {
204: // System.err.println("Java version " + System.getProperty("java.version"));
205: // int flags = Pattern.UNIX_LINES;
206: // flags |= Pattern.CASE_INSENSITIVE;
207: // flags |= Pattern.UNICODE_CASE;
208: // Pattern p = Pattern.compile(pattern, flags);
209: // boolean b = p.matcher(in).find();
210: // System.err.println("Pattern " + pattern + ": " + (b ? " match" : "no match"));
211: // }
212: // Results of this test with JDK 1.5.0_05:
213: //
214: // Pattern K: match
215: // Java version 1.5.0_05
216: // Pattern [A-Z]: no match
217: // Java version 1.5.0_05
218: // Pattern I|J|K|L: match
219: // Java version 1.5.0_05
220: // Pattern [IJKL]: no match
221: // Java version 1.5.0_05
222: // Pattern k: match
223: // Java version 1.5.0_05
224: // Pattern [a-z]: match
225: // Java version 1.5.0_05
226: // Pattern i|j|k|l: match
227: // Java version 1.5.0_05
228: // Pattern [ijkl]: no match
229: }
230:
231: //
232: // The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
233: // you may not use this file except in compliance with the License. You may obtain a copy of the
234: // License at http://www.mozilla.org/MPL/
235: //
236: // Software distributed under the License is distributed on an "AS IS" basis,
237: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
238: // See the License for the specific language governing rights and limitations under the License.
239: //
240: // The Original Code is: all this file.
241: //
242: // The Initial Developer of the Original Code is Michael H. Kay
243: //
244: // Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
245: //
246: // Contributor(s): none.
247: //
|