001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */package org.apache.solr.util;
017:
018: import java.io.Writer;
019: import java.io.IOException;
020:
021: /**
022: * @author yonik
023: * @version $Id: XML.java 472574 2006-11-08 18:25:52Z yonik $
024: */
025: public class XML {
026:
027: //
028: // copied from some of my personal code... -YCS
029: // table created from python script.
030: // only have to escape quotes in attribute values, and don't really have to escape '>'
031: // many chars less than 0x20 are *not* valid XML, even when escaped!
032: // for example, <foo>�<foo> is invalid XML.
033: private static final String[] chardata_escapes = { "#0;", "#1;",
034: "#2;", "#3;", "#4;", "#5;", "#6;", "#7;", "#8;", null,
035: null, "#11;", "#12;", null, "#14;", "#15;", "#16;", "#17;",
036: "#18;", "#19;", "#20;", "#21;", "#22;", "#23;", "#24;",
037: "#25;", "#26;", "#27;", "#28;", "#29;", "#30;", "#31;",
038: null, null, null, null, null, null, "&", null, null,
039: null, null, null, null, null, null, null, null, null, null,
040: null, null, null, null, null, null, null, null, null,
041: "<", null, ">" };
042:
043: private static final String[] attribute_escapes = { "#0;", "#1;",
044: "#2;", "#3;", "#4;", "#5;", "#6;", "#7;", "#8;", null,
045: null, "#11;", "#12;", null, "#14;", "#15;", "#16;", "#17;",
046: "#18;", "#19;", "#20;", "#21;", "#22;", "#23;", "#24;",
047: "#25;", "#26;", "#27;", "#28;", "#29;", "#30;", "#31;",
048: null, null, """, null, null, null, "&", null,
049: null, null, null, null, null, null, null, null, null, null,
050: null, null, null, null, null, null, null, null, null, null,
051: "<" };
052:
053: /*****************************************
054: #Simple python script used to generate the escape table above. -YCS
055: #
056: #use individual char arrays or one big char array for better efficiency
057: # or byte array?
058: #other={'&':'amp', '<':'lt', '>':'gt', "'":'apos', '"':'quot'}
059: #
060: other={'&':'amp', '<':'lt'}
061:
062: maxi=ord(max(other.keys()))+1
063: table=[None] * maxi
064: #NOTE: invalid XML chars are "escaped" as #nn; *not* &#nn; because
065: #a real XML escape would cause many strict XML parsers to choke.
066: for i in range(0x20): table[i]='#%d;' % i
067: for i in '\n\r\t ': table[ord(i)]=None
068: for k,v in other.items():
069: table[ord(k)]='&%s;' % v
070:
071: result=""
072: for i in range(maxi):
073: val=table[i]
074: if not val: val='null'
075: else: val='"%s"' % val
076: result += val + ','
077:
078: print result
079: ****************************************/
080:
081: /*********
082: *
083: * @param str
084: * @param out
085: * @throws IOException
086: */
087: public static void escapeCharData(String str, Writer out)
088: throws IOException {
089: escape(str, out, chardata_escapes);
090: }
091:
092: public static void escapeAttributeValue(String str, Writer out)
093: throws IOException {
094: escape(str, out, attribute_escapes);
095: }
096:
097: public final static void writeXML(Writer out, String tag, String val)
098: throws IOException {
099: out.write('<');
100: out.write(tag);
101: if (val == null) {
102: out.write("/>");
103: } else {
104: out.write('>');
105: escapeCharData(val, out);
106: out.write("</");
107: out.write(tag);
108: out.write('>');
109: }
110: }
111:
112: /** does NOT escape character data in val, must already be valid XML */
113: public final static void writeUnescapedXML(Writer out, String tag,
114: String val, Object... attrs) throws IOException {
115: out.write('<');
116: out.write(tag);
117: for (int i = 0; i < attrs.length; i++) {
118: out.write(' ');
119: out.write(attrs[i++].toString());
120: out.write("=\"");
121: out.write(attrs[i].toString());
122: out.write("\"");
123: }
124: if (val == null) {
125: out.write("/>");
126: } else {
127: out.write('>');
128: out.write(val);
129: out.write("</");
130: out.write(tag);
131: out.write('>');
132: }
133: }
134:
135: /** escapes character data in val */
136: public final static void writeXML(Writer out, String tag,
137: String val, Object... attrs) throws IOException {
138: out.write('<');
139: out.write(tag);
140: for (int i = 0; i < attrs.length; i++) {
141: out.write(' ');
142: out.write(attrs[i++].toString());
143: out.write("=\"");
144: escapeAttributeValue(attrs[i].toString(), out);
145: out.write("\"");
146: }
147: if (val == null) {
148: out.write("/>");
149: } else {
150: out.write('>');
151: escapeCharData(val, out);
152: out.write("</");
153: out.write(tag);
154: out.write('>');
155: }
156: }
157:
158: private static void escape(String str, Writer out, String[] escapes)
159: throws IOException {
160: int start = 0;
161: // "n" was used for counting the chars added to out...
162: // removed cause it wasn't really useful so far.
163: // int n=0;
164:
165: for (int i = start; i < str.length(); i++) {
166: char ch = str.charAt(i);
167: // since I already received the char, what if I put it into
168: // a char array and wrote that to the stream instead of the
169: // string? (would cause extra GC though)
170: String subst = null;
171: if (ch < escapes.length) {
172: subst = escapes[ch];
173: }
174: if (subst != null) {
175: if (start < i) {
176: out.write(str.substring(start, i));
177: // write(str,off,len) causes problems for Jetty with chars > 127
178: //out.write(str, start, i-start);
179: // n+=i-start;
180: }
181: out.write(subst);
182: // n+=subst.length();
183: start = i + 1;
184: }
185: }
186: if (start == 0) {
187: out.write(str);
188: // n += str.length();
189: } else if (start < str.length()) {
190: out.write(str.substring(start));
191: // write(str,off,len) causes problems for Jetty with chars > 127
192: // out.write(str, start, str.length()-start);
193: // n += str.length()-start;
194: }
195: // return n;
196: }
197: }
|