001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package org.apache.xerces.impl.dv.xs;
019:
020: import org.apache.xerces.impl.dv.InvalidDatatypeValueException;
021: import org.apache.xerces.util.URI;
022: import org.apache.xerces.impl.dv.ValidationContext;
023:
024: /**
025: * Represent the schema type "anyURI"
026: *
027: * @xerces.internal
028: *
029: * @author Neeraj Bajaj, Sun Microsystems, inc.
030: * @author Sandy Gao, IBM
031: *
032: * @version $Id: AnyURIDV.java 446745 2006-09-15 21:43:58Z mrglavas $
033: */
034: public class AnyURIDV extends TypeValidator {
035:
036: private static final URI BASE_URI;
037: static {
038: URI uri = null;
039: try {
040: uri = new URI("abc://def.ghi.jkl");
041: } catch (URI.MalformedURIException ex) {
042: }
043: BASE_URI = uri;
044: }
045:
046: public short getAllowedFacets() {
047: return (XSSimpleTypeDecl.FACET_LENGTH
048: | XSSimpleTypeDecl.FACET_MINLENGTH
049: | XSSimpleTypeDecl.FACET_MAXLENGTH
050: | XSSimpleTypeDecl.FACET_PATTERN
051: | XSSimpleTypeDecl.FACET_ENUMERATION | XSSimpleTypeDecl.FACET_WHITESPACE);
052: }
053:
054: // before we return string we have to make sure it is correct URI as per spec.
055: // for some types (string and derived), they just return the string itself
056: public Object getActualValue(String content,
057: ValidationContext context)
058: throws InvalidDatatypeValueException {
059: // check 3.2.17.c0 must: URI (rfc 2396/2723)
060: try {
061: if (content.length() != 0) {
062: // encode special characters using XLink 5.4 algorithm
063: final String encoded = encode(content);
064: // Support for relative URLs
065: // According to Java 1.1: URLs may also be specified with a
066: // String and the URL object that it is related to.
067: new URI(BASE_URI, encoded);
068: }
069: } catch (URI.MalformedURIException ex) {
070: throw new InvalidDatatypeValueException(
071: "cvc-datatype-valid.1.2.1", new Object[] { content,
072: "anyURI" });
073: }
074:
075: // REVISIT: do we need to return the new URI object?
076: return content;
077: }
078:
079: // which ASCII characters need to be escaped
080: private static boolean gNeedEscaping[] = new boolean[128];
081: // the first hex character if a character needs to be escaped
082: private static char gAfterEscaping1[] = new char[128];
083: // the second hex character if a character needs to be escaped
084: private static char gAfterEscaping2[] = new char[128];
085: private static char[] gHexChs = { '0', '1', '2', '3', '4', '5',
086: '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
087: // initialize the above 3 arrays
088: static {
089: for (int i = 0; i <= 0x1f; i++) {
090: gNeedEscaping[i] = true;
091: gAfterEscaping1[i] = gHexChs[i >> 4];
092: gAfterEscaping2[i] = gHexChs[i & 0xf];
093: }
094: gNeedEscaping[0x7f] = true;
095: gAfterEscaping1[0x7f] = '7';
096: gAfterEscaping2[0x7f] = 'F';
097: char[] escChs = { ' ', '<', '>', '"', '{', '}', '|', '\\', '^',
098: '~', '`' };
099: int len = escChs.length;
100: char ch;
101: for (int i = 0; i < len; i++) {
102: ch = escChs[i];
103: gNeedEscaping[ch] = true;
104: gAfterEscaping1[ch] = gHexChs[ch >> 4];
105: gAfterEscaping2[ch] = gHexChs[ch & 0xf];
106: }
107: }
108:
109: // To encode special characters in anyURI, by using %HH to represent
110: // special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', etc.
111: // and non-ASCII characters (whose value >= 128).
112: private static String encode(String anyURI) {
113: int len = anyURI.length(), ch;
114: StringBuffer buffer = new StringBuffer(len * 3);
115:
116: // for each character in the anyURI
117: int i = 0;
118: for (; i < len; i++) {
119: ch = anyURI.charAt(i);
120: // if it's not an ASCII character, break here, and use UTF-8 encoding
121: if (ch >= 128)
122: break;
123: if (gNeedEscaping[ch]) {
124: buffer.append('%');
125: buffer.append(gAfterEscaping1[ch]);
126: buffer.append(gAfterEscaping2[ch]);
127: } else {
128: buffer.append((char) ch);
129: }
130: }
131:
132: // we saw some non-ascii character
133: if (i < len) {
134: // get UTF-8 bytes for the remaining sub-string
135: byte[] bytes = null;
136: byte b;
137: try {
138: bytes = anyURI.substring(i).getBytes("UTF-8");
139: } catch (java.io.UnsupportedEncodingException e) {
140: // should never happen
141: return anyURI;
142: }
143: len = bytes.length;
144:
145: // for each byte
146: for (i = 0; i < len; i++) {
147: b = bytes[i];
148: // for non-ascii character: make it positive, then escape
149: if (b < 0) {
150: ch = b + 256;
151: buffer.append('%');
152: buffer.append(gHexChs[ch >> 4]);
153: buffer.append(gHexChs[ch & 0xf]);
154: } else if (gNeedEscaping[b]) {
155: buffer.append('%');
156: buffer.append(gAfterEscaping1[b]);
157: buffer.append(gAfterEscaping2[b]);
158: } else {
159: buffer.append((char) b);
160: }
161: }
162: }
163:
164: // If encoding happened, create a new string;
165: // otherwise, return the orginal one.
166: if (buffer.length() != len) {
167: return buffer.toString();
168: } else {
169: return anyURI;
170: }
171: }
172:
173: } // class AnyURIDV
|