001: package net.sf.saxon.value;
002:
003: import net.sf.saxon.Err;
004: import net.sf.saxon.expr.XPathContext;
005: import net.sf.saxon.om.*;
006: import net.sf.saxon.trans.DynamicError;
007: import net.sf.saxon.trans.XPathException;
008: import net.sf.saxon.type.*;
009:
010: /**
011: * An atomic value of type xs:string
012: */
013:
014: public class StringValue extends AtomicValue {
015:
016: public static final StringValue EMPTY_STRING = new StringValue("");
017: public static final StringValue SINGLE_SPACE = new StringValue(" ");
018:
019: // We hold the value as a CharSequence (it may be a StringBuffer rather than a string)
020: // But the first time this is converted to a string, we keep it as a string
021:
022: protected CharSequence value; // may be zero-length, will never be null
023: protected int length = -1; // the length in XML characters - not necessarily the same as the Java length
024:
025: /**
026: * Protected constructor for use by subtypes
027: */
028:
029: protected StringValue() {
030: value = "";
031: }
032:
033: /**
034: * Constructor. Note that although a StringValue may wrap any kind of CharSequence
035: * (usually a String, but it can also be, for example, a StringBuffer), the caller
036: * is responsible for ensuring that the value is immutable.
037: * @param value the String value. Null is taken as equivalent to "".
038: */
039:
040: public StringValue(CharSequence value) {
041: this .value = (value == null ? "" : value);
042: }
043:
044: /**
045: * Factory method. Unlike the constructor, this avoids creating a new StringValue in the case
046: * of a zero-length string (and potentially other strings, in future)
047: * @param value the String value. Null is taken as equivalent to "".
048: * @return the corresponding StringValue
049: */
050:
051: public static StringValue makeStringValue(CharSequence value) {
052: if (value == null || value.length() == 0) {
053: return StringValue.EMPTY_STRING;
054: } else {
055: return new StringValue(value);
056: }
057: }
058:
059: /**
060: * Get the string value as a String
061: */
062:
063: public final String getStringValue() {
064: return (String) (value = value.toString());
065: }
066:
067: /**
068: * Get the value of the item as a CharSequence. This is in some cases more efficient than
069: * the version of the method that returns a String.
070: */
071:
072: public final CharSequence getStringValueCS() {
073: return value;
074: }
075:
076: /**
077: * Convert a value to another primitive data type, with control over how validation is
078: * handled.
079: * @param requiredType type code of the required atomic type
080: * @param validate true if validation is required. If set to false, the caller guarantees that
081: * the value is valid for the target data type, and that further validation is therefore not required.
082: * Note that a validation failure may be reported even if validation was not requested.
083: * @param context
084: * @return the result of the conversion, if successful. If unsuccessful, the value returned
085: * will be a ValidationErrorValue. The caller must check for this condition. No exception is thrown, instead
086: * the exception will be encapsulated within the ErrorValue.
087: */
088:
089: public AtomicValue convertPrimitive(BuiltInAtomicType requiredType,
090: boolean validate, XPathContext context) {
091: int req = requiredType.getFingerprint();
092: if (req == Type.STRING || req == Type.ANY_ATOMIC
093: || req == Type.ITEM) {
094: return this ;
095: }
096: return convertStringToBuiltInType(value, requiredType,
097: (validate ? context.getConfiguration().getNameChecker()
098: : null));
099: }
100:
101: /**
102: * Convert a string value to another built-in data type, with control over how validation is
103: * handled.
104: * @param value the value to be converted
105: * @param requiredType the required atomic type
106: * @param checker if validation is required, a NameChecker. If set to null, the caller guarantees that
107: * the value is valid for the target data type, and that further validation is therefore not required.
108: * Note that a validation failure may be reported even if validation was not requested.
109: * @return the result of the conversion, if successful. If unsuccessful, the value returned
110: * will be a {@link ValidationErrorValue}. The caller must check for this condition. No exception is thrown, instead
111: * the exception will be encapsulated within the ValidationErrorValue.
112: */
113:
114: public static AtomicValue convertStringToBuiltInType(
115: CharSequence value, BuiltInAtomicType requiredType,
116: NameChecker checker) {
117: try {
118: switch (requiredType.getFingerprint()) {
119: case Type.BOOLEAN: {
120: return BooleanValue.fromString(value);
121: }
122: case Type.NUMBER:
123: case Type.DOUBLE:
124: return new DoubleValue(value);
125:
126: case Type.INTEGER:
127: return IntegerValue.stringToInteger(value);
128:
129: case Type.UNSIGNED_LONG:
130: case Type.UNSIGNED_INT:
131: case Type.UNSIGNED_SHORT:
132: case Type.UNSIGNED_BYTE:
133: if (checker != null) {
134: for (int c = 0; c < value.length(); c++) {
135: if (value.charAt(c) == '+') {
136: ValidationException err = new ValidationException(
137: "An unsigned number must not contain a plus sign");
138: return new ValidationErrorValue(err);
139: }
140: }
141: }
142: // fall through
143: case Type.NON_POSITIVE_INTEGER:
144: case Type.NEGATIVE_INTEGER:
145: case Type.LONG:
146: case Type.INT:
147: case Type.SHORT:
148: case Type.BYTE:
149: case Type.NON_NEGATIVE_INTEGER:
150: case Type.POSITIVE_INTEGER:
151: AtomicValue iv = IntegerValue.stringToInteger(value);
152: if (iv instanceof ValidationErrorValue) {
153: // indicates that the conversion failed
154: return iv;
155: }
156: ValidationException err;
157: if (iv instanceof IntegerValue) {
158: err = ((IntegerValue) iv).convertToSubtype(
159: requiredType, checker != null);
160: } else {
161: err = ((BigIntegerValue) iv).convertToSubType(
162: requiredType, checker != null);
163: }
164: return (err == null ? iv
165: : new ValidationErrorValue(err));
166: case Type.DECIMAL:
167: return DecimalValue.makeDecimalValue(value,
168: checker != null);
169: case Type.FLOAT:
170: return new FloatValue(value);
171: case Type.DATE:
172: return new DateValue(value);
173: case Type.DATE_TIME:
174: return new DateTimeValue(value);
175: case Type.TIME:
176: return new TimeValue(value);
177: case Type.G_YEAR:
178: return new GYearValue(value);
179: case Type.G_YEAR_MONTH:
180: return new GYearMonthValue(value);
181: case Type.G_MONTH:
182: return new GMonthValue(value);
183: case Type.G_MONTH_DAY:
184: return new GMonthDayValue(value);
185: case Type.G_DAY:
186: return new GDayValue(value);
187: case Type.DURATION:
188: return new DurationValue(value);
189: case Type.YEAR_MONTH_DURATION:
190: return new MonthDurationValue(value);
191: case Type.DAY_TIME_DURATION:
192: return new SecondsDurationValue(value);
193: case Type.UNTYPED_ATOMIC:
194: case Type.ANY_SIMPLE_TYPE:
195: return new UntypedAtomicValue(value);
196: case Type.STRING:
197: case Type.ANY_ATOMIC:
198: case Type.ITEM:
199: return makeStringValue(value);
200: case Type.NORMALIZED_STRING:
201: case Type.TOKEN:
202: case Type.LANGUAGE:
203: case Type.NAME:
204: case Type.NCNAME:
205: case Type.ID:
206: case Type.IDREF:
207: case Type.ENTITY:
208: case Type.NMTOKEN:
209: return RestrictedStringValue.makeRestrictedString(
210: value, requiredType.getFingerprint(), checker);
211: case Type.ANY_URI:
212: if (AnyURIValue.isValidURI(value)) {
213: return new AnyURIValue(value);
214: } else {
215: throw new ValidationException("Invalid URI: "
216: + value.toString());
217: }
218: case Type.HEX_BINARY:
219: return new HexBinaryValue(value);
220: case Type.BASE64_BINARY:
221: return new Base64BinaryValue(value);
222: default:
223: ValidationException ve = new ValidationException(
224: "Cannot convert string to type "
225: + Err.wrap(requiredType
226: .getDisplayName()));
227: ve.setErrorCode("XPTY0004");
228: ve.setIsTypeError(true);
229: throw ve;
230: }
231: } catch (ValidationException err) {
232: if (err.getErrorCodeLocalPart() == null) {
233: err.setErrorCode("FORG0001");
234: }
235: return new ValidationErrorValue(err);
236: } catch (XPathException err) {
237: if (err.getErrorCodeLocalPart() == null) {
238: err.setErrorCode("FORG0001");
239: }
240: ValidationException ve = new ValidationException(err
241: .getMessage());
242: if (err.getErrorCodeLocalPart() == null) {
243: ve.setErrorCode("FORG0001");
244: } else {
245: ve.setErrorCode(err.getErrorCodeLocalPart());
246: }
247: return new ValidationErrorValue(ve);
248: }
249: }
250:
251: /**
252: * Convert the value to a given type. The result of the conversion will be
253: * an atomic value of the required type. This method works where the target
254: * type is a built-in atomic type and also where it is a user-defined atomic
255: * type.
256: *
257: * @param targetType the type to which the value is to be converted
258: * @param checker a NameChecker if validation is required, null if the caller already knows that the
259: * value is valid
260: * @return the value after conversion if successful; or a {@link ValidationErrorValue} if conversion failed. The
261: * caller must check for this condition. Validation may fail even if validation was not requested.
262: */
263:
264: public static AtomicValue convertStringToAtomicType(
265: CharSequence value, AtomicType targetType,
266: NameChecker checker) {
267: if (targetType instanceof BuiltInAtomicType) {
268: return convertStringToBuiltInType(value,
269: (BuiltInAtomicType) targetType, checker);
270: } else {
271: AtomicValue v = convertStringToBuiltInType(value,
272: (BuiltInAtomicType) targetType
273: .getPrimitiveItemType(), checker);
274: if (v instanceof ValidationErrorValue) {
275: // conversion has failed
276: return v;
277: }
278: return targetType.makeDerivedValue(v, value,
279: checker != null);
280: }
281: }
282:
283: /**
284: * Return the type of the expression
285: * @return Type.STRING (always)
286: * @param th
287: */
288:
289: public ItemType getItemType(TypeHierarchy th) {
290: return Type.STRING_TYPE;
291: }
292:
293: /**
294: * Get the length of this string, as defined in XPath. This is not the same as the Java length,
295: * as a Unicode surrogate pair counts as a single character
296: */
297:
298: public int getStringLength() {
299: // memo function; only compute it the first time
300: if (length == -1) {
301: length = getStringLength(value);
302: }
303: return length;
304: }
305:
306: /**
307: * Get the length of a string, as defined in XPath. This is not the same as the Java length,
308: * as a Unicode surrogate pair counts as a single character.
309: * @param s The string whose length is required
310: */
311:
312: public static int getStringLength(CharSequence s) {
313: int n = 0;
314: for (int i = 0; i < s.length(); i++) {
315: int c = (int) s.charAt(i);
316: if (c < 55296 || c > 56319)
317: n++; // don't count high surrogates, i.e. D800 to DBFF
318: }
319: return n;
320: }
321:
322: /**
323: * Iterate over a string, returning a sequence of integers representing the Unicode code-point values
324: */
325:
326: public SequenceIterator iterateCharacters() {
327: return new CharacterIterator();
328: }
329:
330: /**
331: * Expand a string containing surrogate pairs into an array of 32-bit characters
332: */
333:
334: public static int[] expand(CharSequence s) {
335: int[] array = new int[getStringLength(s)];
336: int o = 0;
337: for (int i = 0; i < s.length(); i++) {
338: int charval;
339: int c = s.charAt(i);
340: if (c >= 55296 && c <= 56319) {
341: // we'll trust the data to be sound
342: charval = ((c - 55296) * 1024)
343: + ((int) s.charAt(i + 1) - 56320) + 65536;
344: i++;
345: } else {
346: charval = c;
347: }
348: array[o++] = charval;
349: }
350: return array;
351: }
352:
353: /**
354: * Contract an array of integers containing Unicode codepoints into a Java string
355: */
356:
357: public static CharSequence contract(int[] codes, int used) {
358: FastStringBuffer sb = new FastStringBuffer(codes.length);
359: for (int i = 0; i < used; i++) {
360: if (codes[i] < 65536) {
361: sb.append((char) codes[i]);
362: } else { // output a surrogate pair
363: sb.append(XMLChar.highSurrogate(codes[i]));
364: sb.append(XMLChar.lowSurrogate(codes[i]));
365: }
366: }
367: return sb;
368: }
369:
370: /**
371: * Determine if two StringValues are equal, according to XML Schema rules. (This method
372: * is not used for XPath comparisons, which are always under the control of a collation.)
373: * @throws ClassCastException if the values are not comparable
374: */
375:
376: public boolean equals(Object other) {
377: // For XML Schema purposes a String is never equal to a URI
378: if (other instanceof AnyURIValue) {
379: throw new ClassCastException(
380: "Cannot compare string to anyURI");
381: }
382: // Force a ClassCastException if the other value isn't a string or derived from string
383: StringValue otherVal = (StringValue) ((AtomicValue) other)
384: .getPrimitiveValue();
385: // cannot use equals() directly on two unlike CharSequences
386: return getStringValue().equals(otherVal.getStringValue());
387: }
388:
389: public int hashCode() {
390: return getStringValue().hashCode();
391: }
392:
393: /**
394: * Compare two values for equality. This supports identity constraints in XML Schema,
395: * which allow list-valued elements and attributes to participate in key and uniqueness constraints.
396: * This method returns false if any error occurs during the comparison, or if any of the items
397: * in either sequence is a node rather than an atomic value. The default implementation of
398: * schemaEquals() is the same as equals(), but subclasses can override this.
399: */
400:
401: public boolean schemaEquals(Value obj) {
402: if (obj instanceof AtomicValue) {
403: obj = ((AtomicValue) obj).getPrimitiveValue();
404: }
405: if (obj instanceof StringValue) {
406: return value.toString().equals(
407: ((StringValue) obj).value.toString());
408: } else {
409: return false;
410: }
411: }
412:
413: public boolean effectiveBooleanValue(XPathContext context)
414: throws XPathException {
415: return value.length() > 0;
416: }
417:
418: /**
419: * Convert to Java object (for passing to external functions)
420: */
421:
422: public Object convertToJava(Class target, XPathContext context)
423: throws XPathException {
424: if (target == Object.class) {
425: return value;
426: } else if (target.isAssignableFrom(StringValue.class)) {
427: return this ;
428: } else if (target == String.class
429: || target == CharSequence.class) {
430: return getStringValue();
431: } else if (target == boolean.class) {
432: BooleanValue bval = (BooleanValue) convert(Type.BOOLEAN,
433: context);
434: return Boolean.valueOf(bval.getBooleanValue());
435: } else if (target == Boolean.class) {
436: BooleanValue bval = (BooleanValue) convert(Type.BOOLEAN,
437: context);
438: return Boolean.valueOf(bval.getBooleanValue());
439: } else if (target == double.class) {
440: DoubleValue dval = (DoubleValue) convert(Type.DOUBLE,
441: context);
442: return new Double(dval.getDoubleValue());
443: } else if (target == Double.class) {
444: DoubleValue dval = (DoubleValue) convert(Type.DOUBLE,
445: context);
446: return new Double(dval.getDoubleValue());
447: } else if (target == float.class) {
448: DoubleValue dval = (DoubleValue) convert(Type.DOUBLE,
449: context);
450: return new Float(dval.getDoubleValue());
451: } else if (target == Float.class) {
452: DoubleValue dval = (DoubleValue) convert(Type.DOUBLE,
453: context);
454: return new Float(dval.getDoubleValue());
455: } else if (target == long.class) {
456: IntegerValue dval = (IntegerValue) convert(Type.INTEGER,
457: context);
458: return new Long(dval.longValue());
459: } else if (target == Long.class) {
460: IntegerValue dval = (IntegerValue) convert(Type.INTEGER,
461: context);
462: return new Long(dval.longValue());
463: } else if (target == int.class) {
464: IntegerValue dval = (IntegerValue) convert(Type.INTEGER,
465: context);
466: return new Integer((int) dval.longValue());
467: } else if (target == Integer.class) {
468: IntegerValue dval = (IntegerValue) convert(Type.INTEGER,
469: context);
470: return new Integer((int) dval.longValue());
471: } else if (target == short.class) {
472: IntegerValue dval = (IntegerValue) convert(Type.INTEGER,
473: context);
474: return new Short((short) dval.longValue());
475: } else if (target == Short.class) {
476: IntegerValue dval = (IntegerValue) convert(Type.INTEGER,
477: context);
478: return new Short((short) dval.longValue());
479: } else if (target == byte.class) {
480: IntegerValue dval = (IntegerValue) convert(Type.INTEGER,
481: context);
482: return new Byte((byte) dval.longValue());
483: } else if (target == Byte.class) {
484: IntegerValue dval = (IntegerValue) convert(Type.INTEGER,
485: context);
486: return new Byte((byte) dval.longValue());
487: } else if (target == char.class || target == Character.class) {
488: if (value.length() == 1) {
489: return new Character(value.charAt(0));
490: } else {
491: DynamicError de = new DynamicError(
492: "Cannot convert string to Java char unless length is 1");
493: de.setXPathContext(context);
494: de.setErrorCode("SAXON:0000");
495: throw de;
496: }
497: } else {
498: Object o = super .convertToJava(target, context);
499: if (o == null) {
500: DynamicError err = new DynamicError(
501: "Conversion of string to " + target.getName()
502: + " is not supported");
503: err.setXPathContext(context);
504: err.setErrorCode("SAXON:0000");
505: throw err;
506: }
507: return o;
508: }
509: }
510:
511: public String toString() {
512: return "\"" + value + '\"';
513: }
514:
515: /**
516: * CharacterIterator is used to iterate over the characters in a string,
517: * returning them as integers representing the Unicode code-point.
518: */
519:
520: public final class CharacterIterator implements SequenceIterator {
521:
522: int inpos = 0; // 0-based index of the current Java char
523: int outpos = 0; // 1-based value of position() function
524: int current = -1; // Unicode codepoint most recently returned
525:
526: /**
527: * Create an iterator over a string
528: */
529:
530: public CharacterIterator() {
531: }
532:
533: public Item next() {
534: if (inpos < value.length()) {
535: int c = value.charAt(inpos++);
536: if (c >= 55296 && c <= 56319) {
537: // we'll trust the data to be sound
538: current = ((c - 55296) * 1024)
539: + ((int) value.charAt(inpos++) - 56320)
540: + 65536;
541: } else {
542: current = c;
543: }
544: outpos++;
545: return new IntegerValue(current);
546: } else {
547: outpos = -1;
548: return null;
549: }
550: }
551:
552: public Item current() {
553: if (outpos < 1) {
554: return null;
555: }
556: return new IntegerValue(current);
557: }
558:
559: public int position() {
560: return outpos;
561: }
562:
563: public SequenceIterator getAnother() {
564: return new CharacterIterator();
565: }
566:
567: /**
568: * Get properties of this iterator, as a bit-significant integer.
569: *
570: * @return the properties of this iterator. This will be some combination of
571: * properties such as {@link GROUNDED} and {@link LAST_POSITION_FINDER}. It is always
572: * acceptable to return the value zero, indicating that there are no known special properties.
573: */
574:
575: public int getProperties() {
576: return 0;
577: }
578: }
579:
580: }
581:
582: //
583: // The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
584: // you may not use this file except in compliance with the License. You may obtain a copy of the
585: // License at http://www.mozilla.org/MPL/
586: //
587: // Software distributed under the License is distributed on an "AS IS" basis,
588: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
589: // See the License for the specific language governing rights and limitations under the License.
590: //
591: // The Original Code is: all this file.
592: //
593: // The Initial Developer of the Original Code is Michael H. Kay.
594: //
595: // Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
596: //
597: // Contributor(s): none.
598: //
|