001: /*
002:
003: This software is OSI Certified Open Source Software.
004: OSI Certified is a certification mark of the Open Source Initiative.
005:
006: The license (Mozilla version 1.0) can be read at the MMBase site.
007: See http://www.MMBase.org/license
008:
009: */
010: package org.mmbase.util.transformers;
011:
012: import java.util.HashMap;
013: import java.util.Map;
014:
015: /**
016: * Transformations related to escaping in XML.
017: * @author Michiel Meeuwissen
018: * @author Kees Jongenburger
019: * @version $Id: Xml.java,v 1.22 2007/11/16 16:25:29 michiel Exp $
020: */
021:
022: public class Xml extends ConfigurableStringTransformer implements
023: CharTransformer {
024:
025: public final static int ESCAPE = 1;
026: public final static int ESCAPE_ATTRIBUTE = 2;
027: public final static int ESCAPE_ATTRIBUTE_DOUBLE = 3;
028: public final static int ESCAPE_ATTRIBUTE_SINGLE = 4;
029: //public final static int ESCAPE_ATTRIBUTE_BOTH = 6;
030: public final static int ESCAPE_ATTRIBUTE_HTML = 5;
031:
032: public Xml() {
033: super (ESCAPE);
034: }
035:
036: public Xml(int c) {
037: super (c);
038: }
039:
040: //public final static int BODYTAG = 20;
041:
042: /**
043: * Used when registering this class as a possible Transformer
044: */
045:
046: public Map<String, Config> transformers() {
047: HashMap<String, Config> h = new HashMap<String, Config>();
048: h.put("escape_xml".toUpperCase(), new Config(Xml.class, ESCAPE,
049: "Escapes >, < & and \""));
050: h.put("escape_html".toUpperCase(), new Config(Xml.class,
051: ESCAPE, "Like ESCAPE_XML now."));
052: h.put("escape_wml".toUpperCase(), new Config(Xml.class, ESCAPE,
053: "Like ESCAPE_XML now."));
054: h
055: .put(
056: "escape_xml_attribute".toUpperCase(),
057: new Config(
058: Xml.class,
059: ESCAPE_ATTRIBUTE,
060: "Escaping in attributes only involves quotes. This simply escapes both types (which is little too much)."));
061: h
062: .put(
063: "escape_xml_attribute_double".toUpperCase(),
064: new Config(Xml.class, ESCAPE_ATTRIBUTE_DOUBLE,
065: "Escaping in attributes only involves quotes. This is for double quotes."));
066: h
067: .put(
068: "escape_xml_attribute_single".toUpperCase(),
069: new Config(Xml.class, ESCAPE_ATTRIBUTE_SINGLE,
070: "Escaping in attributes only involves quotes. This is for single quotes."));
071: h
072: .put(
073: "escape_html_attribute".toUpperCase(),
074: new Config(Xml.class, ESCAPE_ATTRIBUTE_HTML,
075: "This escapes all quotes, and also newlines. Handly in some html tags."));
076: return h;
077: }
078:
079: /**
080: * Attributes of XML tags cannot contain quotes, and also & must be escaped
081: * @param att String representing the attribute
082: * @param quot Which quote (either ' or ")
083: */
084: public static String XMLAttributeEscape(String att, char quot) {
085: if (att == null)
086: return "";
087: StringBuilder sb = new StringBuilder();
088: char[] data = att.toCharArray();
089: char c;
090: for (char element : data) {
091: c = element;
092: if (c == quot) {
093: if (quot == '"') {
094: sb.append(""");
095: } else {
096: sb.append("'");
097: }
098:
099: } else if (c == '&') {
100: sb.append("&");
101: } else {
102: sb.append(c);
103: }
104: }
105: return sb.toString();
106: }
107:
108: /**
109: * Attributes of XML tags cannot contain quotes, and also & must be escaped
110: * @param att String representing the attribute
111: */
112: public static String XMLAttributeEscape(String att) {
113: if (att == null)
114: return "";
115: StringBuilder sb = new StringBuilder();
116: char[] data = att.toCharArray();
117: char c;
118: for (char element : data) {
119: c = element;
120: if (c == '"') {
121: sb.append(""");
122: } else if (c == '\'') {
123: sb.append("'");
124: } else if (c == '&') {
125: sb.append("&");
126: } else {
127: sb.append(c);
128: }
129: }
130: return sb.toString();
131: }
132:
133: /**
134: * Utility class for escaping and unescaping
135: * (XML)data
136: * @param xml the xml to encode
137: * @return the encoded xml data
138: * <UL>
139: * <LI>& is replaced by &amp;</LI>
140: * <LI>" is replaced by &quot;</LI>
141: * <LI>< is replaced by &lt;</LI>
142: * <LI>> is replaced by &gt;</LI>
143: * </UL>
144: **/
145: public static String XMLEscape(String xml) {
146: if (xml == null)
147: return "";
148: StringBuilder sb = new StringBuilder();
149: XMLEscape(xml, sb);
150: return sb.toString();
151: }
152:
153: /**
154: * @since MMBase-1.9
155: */
156: public static void XMLEscape(String xml, StringBuilder sb) {
157: char[] data = xml.toCharArray();
158: char c;
159: for (char element : data) {
160: c = element;
161: if (c == '&') {
162: sb.append("&");
163: } else if (c == '<') {
164: sb.append("<");
165: } else if (c == '>') {
166: sb.append(">");
167: } else if (c == '"') {
168: sb.append(""");
169: } else {
170: sb.append(c);
171: }
172: }
173: }
174:
175: /**
176: * @since MMBase-1.8
177: */
178: public static void XMLEscape(String xml, StringBuffer sb) {
179: StringBuilder s = new StringBuilder();
180: XMLEscape(xml, s);
181: sb.append(s.toString());
182: }
183:
184: private static String removeNewlines(String incoming) {
185: String ret = incoming.replace('\n', ' ');
186: return ret.replace('\r', ' ');
187: }
188:
189: /**
190: * Utility class for escaping and unescaping
191: * (XML)data
192: * @param data the data to decode to (html/xml) where
193: * <UL>
194: * <LI>& was replaced by &amp;</LI>
195: * <LI>" was replaced by &quot;</LI>
196: * <LI>< was replaced by &lt;</LI>
197: * <LI>> was replaced by &gt;</LI>
198: * </UL>
199: * @return the decoded xml data
200: **/
201: public static String XMLUnescape(String data) {
202: if (data == null)
203: return "";
204: StringBuilder sb = new StringBuilder();
205: int i;
206: for (i = 0; i < data.length(); i++) {
207: char c = data.charAt(i);
208: if (c == '&') {
209: int end = data.indexOf(';', i + 1);
210: //if we found no amperstand then we are done
211: if (end == -1) {
212: sb.append(c);
213: continue;
214: }
215: String entity = data.substring(i + 1, end);
216: i += entity.length() + 1;
217: if (entity.equals("amp")) {
218: sb.append('&');
219: } else if (entity.equals("lt")) {
220: sb.append('<');
221: } else if (entity.equals("gt")) {
222: sb.append('>');
223: } else if (entity.equals("quot")) {
224: sb.append('"');
225: } else if (entity.equals("apos")) {
226: sb.append('\'');
227: } else {
228: sb.append("&" + entity + ";");
229: }
230: } else {
231: sb.append(c);
232: }
233: }
234: return sb.toString();
235: }
236:
237: public String transform(String r) {
238: switch (to) {
239: case ESCAPE:
240: return XMLEscape(r);
241: case ESCAPE_ATTRIBUTE:
242: return XMLAttributeEscape(r);
243: case ESCAPE_ATTRIBUTE_DOUBLE:
244: return XMLAttributeEscape(r, '"');
245: case ESCAPE_ATTRIBUTE_SINGLE:
246: return XMLAttributeEscape(r, '\'');
247: case ESCAPE_ATTRIBUTE_HTML:
248: return removeNewlines(XMLAttributeEscape(r));
249: default:
250: throw new UnknownCodingException(getClass(), "transform",
251: to);
252: }
253: }
254:
255: public String transformBack(String r) {
256: // the attribute unescape will do a little to much, I think.
257: switch (to) {
258: case ESCAPE:
259: case ESCAPE_ATTRIBUTE:
260: case ESCAPE_ATTRIBUTE_DOUBLE:
261: case ESCAPE_ATTRIBUTE_SINGLE:
262: return XMLUnescape(r);
263: case ESCAPE_ATTRIBUTE_HTML:
264: // we can only try, the removing of newlines cannot be undone.
265: return XMLUnescape(r);
266: default:
267: throw new UnknownCodingException(getClass(),
268: "transformBack", to);
269: }
270: }
271:
272: public String getEncoding() {
273: switch (to) {
274: case ESCAPE:
275: return "ESCAPE_XML";
276: case ESCAPE_ATTRIBUTE:
277: return "ESCAPE_XML_ATTRIBUTE";
278: case ESCAPE_ATTRIBUTE_DOUBLE:
279: return "ESCAPE_XML_ATTRIBUTE_DOUBLE";
280: case ESCAPE_ATTRIBUTE_SINGLE:
281: return "ESCAPE_XML_ATTRIBUTE_SINGLE";
282: case ESCAPE_ATTRIBUTE_HTML:
283: return "ESCAPE_HTML_ATTRIBUTE";
284: default:
285: throw new UnknownCodingException(getClass(), "getEncoding",
286: to);
287: }
288: }
289: }
|