001: package net.sf.saxon.functions;
002:
003: import net.sf.saxon.expr.XPathContext;
004: import net.sf.saxon.om.Item;
005: import net.sf.saxon.sort.CodepointCollator;
006: import net.sf.saxon.trans.XPathException;
007: import net.sf.saxon.value.AtomicValue;
008: import net.sf.saxon.value.BooleanValue;
009: import net.sf.saxon.value.StringValue;
010:
011: import java.text.CollationElementIterator;
012: import java.text.RuleBasedCollator;
013: import java.util.Comparator;
014:
015: /**
016: * This class implements the contains(), starts-with(), ends-with(),
017: * substring-before(), and substring-after() functions
018: */
019:
020: public class Contains extends CollatingFunction {
021:
022: public static final int CONTAINS = 0;
023: public static final int STARTSWITH = 1;
024: public static final int ENDSWITH = 2;
025: public static final int AFTER = 3;
026: public static final int BEFORE = 4;
027:
028: /**
029: * Evaluate the function
030: */
031:
032: public Item evaluateItem(XPathContext context)
033: throws XPathException {
034:
035: Comparator collator = getCollator(2, context);
036:
037: AtomicValue arg0 = (AtomicValue) argument[0]
038: .evaluateItem(context);
039: if (arg0 == null) {
040: arg0 = StringValue.EMPTY_STRING;
041: }
042:
043: AtomicValue arg1 = (AtomicValue) argument[1]
044: .evaluateItem(context);
045: if (arg1 == null) {
046: arg1 = StringValue.EMPTY_STRING;
047: }
048: ;
049:
050: String s0 = arg0.getStringValue();
051: String s1 = arg1.getStringValue();
052:
053: if (collator instanceof CodepointCollator) {
054: // Using unicode code-point matching: use the Java string-matching routines directly
055: switch (operation) {
056: case CONTAINS:
057: return BooleanValue.get(s0.indexOf(s1) >= 0);
058: case STARTSWITH:
059: return BooleanValue.get(s0.startsWith(s1));
060: case ENDSWITH:
061: return BooleanValue.get(s0.endsWith(s1));
062: case AFTER:
063: int i = s0.indexOf(s1);
064: if (i < 0)
065: return StringValue.EMPTY_STRING;
066: return StringValue.makeStringValue(s0.substring(i
067: + s1.length()));
068: case BEFORE:
069: int j = s0.indexOf(s1);
070: if (j < 0)
071: return StringValue.EMPTY_STRING;
072: return StringValue.makeStringValue(s0.substring(0, j));
073: default:
074: throw new UnsupportedOperationException(
075: "Unknown operation " + operation);
076: }
077: } else {
078:
079: if (!(collator instanceof RuleBasedCollator)) {
080: dynamicError("The collation for "
081: + getDisplayName(context.getNamePool())
082: + " must be a RuleBaseCollator", "FOCH0004",
083: context);
084: return null;
085: }
086: RuleBasedCollator rbc = (RuleBasedCollator) collator;
087: CollationElementIterator iter0 = rbc
088: .getCollationElementIterator(s0);
089: CollationElementIterator iter1 = rbc
090: .getCollationElementIterator(s1);
091:
092: switch (operation) {
093: case STARTSWITH:
094: return BooleanValue.get(collationStartsWith(iter0,
095: iter1));
096: case CONTAINS:
097: case ENDSWITH:
098: return BooleanValue.get(collationContains(iter0, iter1,
099: null));
100: case AFTER:
101: int[] ia = new int[2];
102: boolean ba = collationContains(iter0, iter1, ia);
103: if (ba) {
104: return StringValue.makeStringValue(s0
105: .substring(ia[1]));
106: } else {
107: return StringValue.EMPTY_STRING;
108: }
109: case BEFORE:
110: int[] ib = new int[2];
111: boolean bb = collationContains(iter0, iter1, ib);
112: if (bb) {
113: return StringValue.makeStringValue(s0.substring(0,
114: ib[0]));
115: } else {
116: return StringValue.EMPTY_STRING;
117: }
118: default:
119: throw new UnsupportedOperationException(
120: "Unknown operation " + operation);
121: }
122: }
123: }
124:
125: /**
126: * Determine whether one string starts with another, under the terms of a given
127: * collating sequence.
128: * @param s0 iterator over the collation elements of the containing string
129: * @param s1 iterator over the collation elements of the contained string
130: */
131:
132: private boolean collationStartsWith(CollationElementIterator s0,
133: CollationElementIterator s1) {
134: while (true) {
135: int e1 = s1.next();
136: if (e1 == -1) {
137: return true;
138: }
139: int e0 = s0.next();
140: if (e0 != e1) {
141: return false;
142: }
143: }
144: }
145:
146: /**
147: * Determine whether one string contains another, under the terms of a given
148: * collating sequence. If operation=ENDSWITH, the match must be at the end of the first
149: * string.
150: * @param s0 iterator over the collation elements of the containing string
151: * @param s1 iterator over the collation elements of the contained string
152: * @param offsets may be null, but if it is supplied, it must be an array of two
153: * integers which, if the function returns true, will contain the start position of the
154: * first matching substring, and the offset of the first character after the first
155: * matching substring. This is not available for operation=endswith
156: * @return true if the first string contains the second
157: */
158:
159: private boolean collationContains(CollationElementIterator s0,
160: CollationElementIterator s1, int[] offsets) {
161: int e1 = s1.next();
162: if (e1 == -1) {
163: return true;
164: }
165: int e0 = -1;
166: while (true) {
167: // scan the first string to find a matching character
168: while (e0 != e1) {
169: e0 = s0.next();
170: if (e0 == -1) {
171: // hit the end, no match
172: return false;
173: }
174: }
175: // matched first character, note the position of the possible match
176: int start = s0.getOffset();
177: if (collationStartsWith(s0, s1)) {
178: if (operation == ENDSWITH) {
179: if (s0.next() == -1) {
180: // the match is at the end
181: return true;
182: }
183: // else ignore this match and keep looking
184: } else {
185: if (offsets != null) {
186: offsets[0] = start - 1;
187: offsets[1] = s0.getOffset();
188: }
189: return true;
190: }
191: }
192: // reset the position and try again
193: s0.setOffset(start);
194:
195: // workaround for a difference between JDK 1.4.0 and JDK 1.4.1
196: if (s0.getOffset() != start) {
197: // JDK 1.4.0 takes this path
198: s0.next();
199: }
200: s1.reset();
201: e0 = -1;
202: e1 = s1.next();
203: // loop round to try again
204: }
205: }
206:
207: }
208:
209: //
210: // The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
211: // you may not use this file except in compliance with the License. You may obtain a copy of the
212: // License at http://www.mozilla.org/MPL/
213: //
214: // Software distributed under the License is distributed on an "AS IS" basis,
215: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
216: // See the License for the specific language governing rights and limitations under the License.
217: //
218: // The Original Code is: all this file.
219: //
220: // The Initial Developer of the Original Code is Michael H. Kay.
221: //
222: // Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
223: //
224: // Contributor(s): none.
225: //
|