001: /*
002: * Copyright 2001-2006 C:1 Financial Services GmbH
003: *
004: * This software is free software; you can redistribute it and/or
005: * modify it under the terms of the GNU Lesser General Public
006: * License Version 2.1, as published by the Free Software Foundation.
007: *
008: * This software is distributed in the hope that it will be useful,
009: * but WITHOUT ANY WARRANTY; without even the implied warranty of
010: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
011: * Lesser General Public License for more details.
012: *
013: * You should have received a copy of the GNU Lesser General Public
014: * License along with this library; if not, write to the Free Software
015: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
016: */
017:
018: package de.finix.contelligent.util;
019:
020: import java.io.StringWriter;
021:
022: import org.w3c.dom.DOMImplementation;
023: import org.w3c.dom.Document;
024: import org.w3c.dom.ls.DOMImplementationLS;
025: import org.w3c.dom.ls.LSOutput;
026: import org.w3c.dom.ls.LSSerializer;
027:
028: /**
029: * <code>XMLUtils</code> diverse xml related methods.
030: */
031: public class XMLUtils {
032:
033: /**
034: * <code>escapeCharacters</code> escapes & < > " ' characters from the <code>text</code>. This characters are
035: * encoded using predefined XML entities: & < > " ' This metchod is written to operate on a
036: * simple text which does not contain any XML elements or entities.
037: *
038: * @param text
039: * a <code>String</code> value
040: * @return a <code>String</code> value
041: */
042: public static String escapeCharacters(String text) {
043: StringBuffer buff = new StringBuffer(
044: (int) (1.2 * text.length()));
045: for (int i = 0; i < text.length(); i++) {
046: char c = text.charAt(i);
047: switch (c) {
048: case '&':
049: buff.append("&");
050: break;
051: case '<':
052: buff.append("<");
053: break;
054: case '>':
055: buff.append(">");
056: break;
057: case '"':
058: buff.append(""");
059: break;
060: case '\'':
061: buff.append("'");
062: break;
063: default:
064: buff.append(c);
065: }
066: }
067: return buff.toString();
068: }
069:
070: /**
071: * The <code>encodeAttribute</code> method replaces all '<' in the given String with <
072: *
073: * @param attribute
074: * a <code>String</code> value
075: * @return a <code>String</code> value
076: */
077: public static String encodeAttribute(String attribute) {
078: if (attribute != null) {
079: while (attribute.indexOf('<') != -1) {
080: attribute = attribute.substring(0, attribute
081: .indexOf('<'))
082: + "<"
083: + attribute
084: .substring(attribute.indexOf('<') + 1);
085: ;
086: }
087: }
088: return attribute;
089: }
090:
091: private final static int CDATA_THRESHOLD_LENGTH = 12;
092:
093: /** Makes any text fit into XML attributes. */
094: public final static String xmlEncodeTextForAttribute(String text) {
095: if (text == null)
096: return null;
097: if (!needsEncoding(text, true)) {
098: return text;
099: } else {
100: return xmlEncodeTextAsPCDATA(text, true);
101: }
102: }
103:
104: /** Encodes text as XML in the most suitable way, either CDATA or PCDATA. */
105: public final static String xmlEncodeText(String text) {
106: if (text == null)
107: return null;
108: if (!needsEncoding(text)) {
109: return text;
110: } else {
111: // only encode as cdata if is is longer than CDATAs overhead:
112: if (text.length() > CDATA_THRESHOLD_LENGTH) {
113: String cdata = xmlEncodeTextAsCDATA(text);
114: if (cdata != null) {
115: return cdata;
116: }
117: }
118: }
119: // if every thing else fails, do it the safe way...
120: return xmlEncodeTextAsPCDATA(text);
121: }
122:
123: /** Encodes any text as PCDATA. */
124: public final static String xmlEncodeTextAsPCDATA(String text) {
125: if (text == null)
126: return null;
127: return xmlEncodeTextAsPCDATA(text, false);
128: }
129:
130: /**
131: * Encodes any text as PCDATA.
132: *
133: * @param forAttribute
134: * if you want quotes and apostrophes specially treated for attributes
135: */
136: public final static String xmlEncodeTextAsPCDATA(String text,
137: boolean forAttribute) {
138: if (text == null)
139: return null;
140: boolean aposAttr = false;
141: // XXX Is this the best way to find out how this attribute is quoted?
142: if (forAttribute) {
143: aposAttr = (text.charAt(0) == '\'');
144: }
145: char c;
146: StringBuffer n = new StringBuffer(text.length() * 2);
147: for (int i = 0; i < text.length(); i++) {
148: c = text.charAt(i);
149: switch (c) {
150: case '&':
151: n.append("&");
152: break;
153: case '<':
154: n.append("<");
155: break;
156: case '"':
157: if (forAttribute && !aposAttr)
158: n.append(""");
159: else
160: n.append(c);
161: break;
162: case '\'':
163: if (forAttribute && aposAttr)
164: n.append("'");
165: else
166: n.append(c);
167: break;
168: default: {
169: n.append(c);
170: break;
171: }
172: }
173: }
174: return n.toString();
175: }
176:
177: /** Returns string as CDATA if possible, otherwise null. */
178: public final static String xmlEncodeTextAsCDATA(String text) {
179: if (text == null)
180: return null;
181: if (isCompatibleWithCDATA(text)) {
182: return "<![CDATA[" + text + "]]>";
183: } else {
184: return null;
185: }
186: }
187:
188: /** Checks if this text needs encoding in order to be represented in XML. */
189: public final static boolean needsEncoding(String text) {
190: return needsEncoding(text, false);
191: }
192:
193: /**
194: * Checks if this text needs encoding in order to be represented in XML.
195: *
196: * Set <code>checkForAttr</code> if you want to check for storability in an attribute.
197: */
198: public final static boolean needsEncoding(String data,
199: boolean checkForAttr) {
200: if (data == null)
201: return false;
202: char c;
203: for (int i = 0; i < data.length(); i++) {
204: c = data.charAt(i);
205: if (c == '&' || c == '<'
206: || (checkForAttr && (c == '"' || c == '\'')))
207: return true;
208: }
209: return false;
210: }
211:
212: /** Can this text be stored into CDATA? */
213: public final static boolean isCompatibleWithCDATA(String text) {
214: if (text == null)
215: return false;
216: return (text.indexOf("]]>") == -1);
217: }
218:
219: /**
220: * Make CDATA out of possibly encoded PCDATA. <br>
221: * E.g. make '&' out of '&'
222: * hint: entity character references (e.g.: &) are not featured here.
223: */
224: public final static String xmlDecodeTextToCDATA(String pcdata) {
225: if (pcdata == null)
226: return null;
227: int c, c1, c2, c3, c4, c5;
228: StringBuffer n = new StringBuffer(pcdata.length());
229: for (int i = 0; i < pcdata.length(); i++) {
230: c = pcdata.charAt(i);
231: if (c == '&') {
232: c1 = lookAhead(1, i, pcdata);
233: c2 = lookAhead(2, i, pcdata);
234: c3 = lookAhead(3, i, pcdata);
235: c4 = lookAhead(4, i, pcdata);
236: c5 = lookAhead(5, i, pcdata);
237:
238: if (c1 == 'a' && c2 == 'm' && c3 == 'p' && c4 == ';') {
239: n.append("&");
240: i += 5;
241: } else if (c1 == 'l' && c2 == 't' && c3 == ';') {
242: n.append("<");
243: i += 4;
244: } else if (c1 == 'g' && c2 == 't' && c3 == ';') {
245: n.append(">");
246: i += 4;
247: } else if (c1 == 'q' && c2 == 'u' && c3 == 'o'
248: && c4 == 't' && c5 == ';') {
249: n.append("\"");
250: i += 6;
251: } else if (c1 == 'a' && c2 == 'p' && c3 == 'o'
252: && c4 == 's' && c5 == ';') {
253: n.append("'");
254: i += 6;
255: } else
256: n.append("&");
257: } else
258: n.append((char) c);
259: }
260: return n.toString();
261: }
262:
263: private final static int lookAhead(int la, int offset, String data) {
264: try {
265: return data.charAt(offset + la);
266: } catch (StringIndexOutOfBoundsException e) {
267: return -1;
268: }
269: }
270:
271: // combine multiple checks in one methods for speed
272: private final static boolean contains(String text, char[] chars) {
273: if (text == null || chars == null || chars.length == 0) {
274: return false;
275: }
276: for (int i = 0; i < text.length(); i++) {
277: char c = text.charAt(i);
278: for (int j = 0; j < chars.length; j++) {
279: if (chars[j] == c) {
280: return true;
281: }
282: }
283: }
284: return false;
285: }
286:
287: /**
288: * Prints a whole Document.
289: *
290: * @param xmlDocument
291: *
292: * @return
293: */
294: public final static String documentToString(Document xmlDocument) {
295:
296: DOMImplementation domImpl = xmlDocument.getImplementation();
297: DOMImplementationLS domImplLS = (DOMImplementationLS) domImpl
298: .getFeature("LS", "3.0");
299:
300: LSSerializer serializer = domImplLS.createLSSerializer();
301: // Doesn't work properly:
302: //serializer.getDomConfig().setParameter("format-pretty-print", Boolean.TRUE);
303: LSOutput serializerOut = domImplLS.createLSOutput();
304:
305: StringWriter stringWriter = new StringWriter();
306: serializerOut.setCharacterStream(stringWriter);
307: serializer.write(xmlDocument, serializerOut);
308:
309: return stringWriter.toString();
310: }
311: }
|