001: package net.sf.saxon.functions;
002:
003: import net.sf.saxon.expr.Token;
004: import net.sf.saxon.expr.XPathContext;
005: import net.sf.saxon.om.Item;
006: import net.sf.saxon.trans.XPathException;
007: import net.sf.saxon.value.AtomicValue;
008: import net.sf.saxon.value.IntegerValue;
009: import net.sf.saxon.value.NumericValue;
010: import net.sf.saxon.value.StringValue;
011:
012: /**
013: * This class implements the XPath substring() function
014: */
015:
016: public class Substring extends SystemFunction {
017:
018: /**
019: * Evaluate the function
020: */
021:
022: public Item evaluateItem(XPathContext context)
023: throws XPathException {
024:
025: AtomicValue sv = (AtomicValue) argument[0]
026: .evaluateItem(context);
027: if (sv == null) {
028: sv = StringValue.EMPTY_STRING;
029: }
030: CharSequence s = sv.getStringValueCS();
031:
032: AtomicValue a1 = (AtomicValue) argument[1]
033: .evaluateItem(context);
034: NumericValue a = (NumericValue) a1.getPrimitiveValue();
035:
036: if (argument.length == 2) {
037: return StringValue.makeStringValue(substring(s, a));
038: } else {
039: AtomicValue b2 = (AtomicValue) argument[2]
040: .evaluateItem(context);
041: NumericValue b = (NumericValue) b2.getPrimitiveValue();
042: return StringValue.makeStringValue(substring(s, a, b,
043: context));
044: }
045: }
046:
047: /**
048: * Implement substring function with two arguments.
049: */
050:
051: private static CharSequence substring(CharSequence s,
052: NumericValue start) {
053: int slength = s.length();
054:
055: long lstart;
056: if (start instanceof IntegerValue) {
057: lstart = ((IntegerValue) start).longValue();
058: } else {
059: NumericValue rstart = start.round();
060: // We need to be careful to handle cases such as plus/minus infinity
061: if (rstart.compareTo(IntegerValue.ZERO) <= 0) {
062: return s;
063: } else if (rstart.compareTo(new IntegerValue(slength)) > 0) {
064: // this works even where the string contains surrogate pairs,
065: // because the Java length is always >= the XPath length
066: return "";
067: } else {
068: try {
069: lstart = rstart.longValue();
070: } catch (XPathException err) {
071: // this shouldn't happen unless the string length exceeds the bounds
072: // of a long
073: throw new AssertionError(
074: "string length out of permissible range");
075: }
076: }
077: }
078:
079: int pos = 1;
080: int cpos = 0;
081: while (cpos < slength) {
082: if (pos >= lstart) {
083: return s.subSequence(cpos, s.length());
084: }
085:
086: int ch = (int) s.charAt(cpos++);
087: if (ch < 55296 || ch > 56319)
088: pos++; // don't count high surrogates, i.e. D800 to DBFF
089: }
090: return "";
091: }
092:
093: /**
094: * Implement substring function with three arguments.
095: */
096:
097: private static CharSequence substring(CharSequence s,
098: NumericValue start, NumericValue len, XPathContext context) {
099: int slength = s.length();
100:
101: long lstart;
102: if (start instanceof IntegerValue) {
103: lstart = ((IntegerValue) start).longValue();
104: } else {
105: start = start.round();
106: // We need to be careful to handle cases such as plus/minus infinity and NaN
107: if (start.compareTo(IntegerValue.ZERO) <= 0) {
108: lstart = 0;
109: } else if (start.compareTo(new IntegerValue(slength)) > 0) {
110: // this works even where the string contains surrogate pairs,
111: // because the Java length is always >= the XPath length
112: return "";
113: } else if (start.isNaN()) {
114: return "";
115: } else {
116: try {
117: lstart = start.longValue();
118: } catch (XPathException err) {
119: // this shouldn't happen unless the string length exceeds the bounds
120: // of a long
121: throw new AssertionError(
122: "string length out of permissible range");
123: }
124: }
125: }
126:
127: NumericValue end;
128: try {
129: end = start.arithmetic(Token.PLUS, len.round(), context);
130: } catch (XPathException e) {
131: throw new AssertionError(
132: "Unexpected arithmetic failure in substring");
133: }
134: long lend;
135: if (end instanceof IntegerValue) {
136: lend = ((IntegerValue) end).longValue();
137: } else {
138: // We need to be careful to handle cases such as plus/minus infinity and NaN
139: if (end.compareTo(IntegerValue.ZERO) <= 0) {
140: return "";
141: } else if (end.isNaN()) {
142: return "";
143: } else if (end.compareTo(new IntegerValue(slength)) > 0) {
144: // this works even where the string contains surrogate pairs,
145: // because the Java length is always >= the XPath length
146: lend = slength + 1;
147: } else {
148: try {
149: lend = end.ceiling().longValue();
150: } catch (XPathException err) {
151: // this shouldn't happen unless the string length exceeds the bounds
152: // of a long
153: throw new AssertionError(
154: "string length out of permissible range");
155: }
156: }
157: }
158:
159: int jstart = -1;
160: int jend = -1;
161: int pos = 1;
162: int cpos = 0;
163: while (cpos < slength) {
164: if (pos >= lstart) {
165: if (pos < lend) {
166: if (jstart < 0) {
167: jstart = cpos;
168: }
169: } else {
170: jend = cpos;
171: break;
172: }
173: }
174:
175: int ch = (int) s.charAt(cpos++);
176: if (ch < 55296 || ch > 56319)
177: pos++; // don't count high surrogates, i.e. D800 to DBFF
178: }
179: if (jstart < 0 || jstart == jend) {
180: return "";
181: } else if (jend < 0) {
182: return s.subSequence(jstart, s.length());
183: } else {
184: return s.subSequence(jstart, jend);
185: }
186: }
187: }
188:
189: //
190: // The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
191: // you may not use this file except in compliance with the License. You may obtain a copy of the
192: // License at http://www.mozilla.org/MPL/
193: //
194: // Software distributed under the License is distributed on an "AS IS" basis,
195: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
196: // See the License for the specific language governing rights and limitations under the License.
197: //
198: // The Original Code is: all this file.
199: //
200: // The Initial Developer of the Original Code is Michael H. Kay.
201: //
202: // Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
203: //
204: // Contributor(s): none.
205: //
|