001: /*
002: Copyright (c) 2004-2005, Dennis M. Sosnoski.
003: All rights reserved.
004:
005: Redistribution and use in source and binary forms, with or without modification,
006: are permitted provided that the following conditions are met:
007:
008: * Redistributions of source code must retain the above copyright notice, this
009: list of conditions and the following disclaimer.
010: * Redistributions in binary form must reproduce the above copyright notice,
011: this list of conditions and the following disclaimer in the documentation
012: and/or other materials provided with the distribution.
013: * Neither the name of JiBX nor the names of its contributors may be used
014: to endorse or promote products derived from this software without specific
015: prior written permission.
016:
017: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
018: ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
019: WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
020: DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
021: ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
022: (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023: LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
024: ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
026: SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: */
028:
029: package org.jibx.runtime.impl;
030:
031: import java.io.IOException;
032: import java.io.Writer;
033:
034: import org.jibx.runtime.ICharacterEscaper;
035:
036: /**
037: * Handler for writing UTF output stream (for any form of UTF, despite the
038: * name). This code is specifically for XML 1.0 and would require changes for
039: * XML 1.1 (to handle the added legal characters, rather than throwing an
040: * exception).
041: *
042: * @author Dennis M. Sosnoski
043: * @version 1.0
044: */
045:
046: public class UTF8Escaper implements ICharacterEscaper {
047: /** Singleton instance of class. */
048: private static final UTF8Escaper s_instance = new UTF8Escaper();
049:
050: /**
051: * Private constructor to prevent external creation.
052: */
053:
054: private UTF8Escaper() {
055: }
056:
057: /**
058: * Write attribute value with character entity substitutions. This assumes
059: * that attributes use the regular quote ('"') delimitor.
060: *
061: * @param text attribute value text
062: * @param writer sink for output text
063: * @throws IOException on error writing to document
064: */
065:
066: public void writeAttribute(String text, Writer writer)
067: throws IOException {
068: int mark = 0;
069: for (int i = 0; i < text.length(); i++) {
070: char chr = text.charAt(i);
071: if (chr == '"') {
072: writer.write(text, mark, i - mark);
073: mark = i + 1;
074: writer.write(""");
075: } else if (chr == '&') {
076: writer.write(text, mark, i - mark);
077: mark = i + 1;
078: writer.write("&");
079: } else if (chr == '<') {
080: writer.write(text, mark, i - mark);
081: mark = i + 1;
082: writer.write("<");
083: } else if (chr == '>' && i > 2 && text.charAt(i - 1) == ']'
084: && text.charAt(i - 2) == ']') {
085: writer.write(text, mark, i - mark - 2);
086: mark = i + 1;
087: writer.write("]]>");
088: } else if (chr < 0x20) {
089: if (chr != 0x9 && chr != 0xA && chr != 0xD) {
090: throw new IOException("Illegal character code 0x"
091: + Integer.toHexString(chr)
092: + " in attribute value text");
093: }
094: } else if (chr > 0xD7FF
095: && (chr < 0xE000 || chr == 0xFFFE || chr == 0xFFFF || chr > 0x10FFFF)) {
096: throw new IOException("Illegal character code 0x"
097: + Integer.toHexString(chr)
098: + " in attribute value text");
099: }
100: }
101: writer.write(text, mark, text.length() - mark);
102: }
103:
104: /**
105: * Write content value with character entity substitutions.
106: *
107: * @param text content value text
108: * @param writer sink for output text
109: * @throws IOException on error writing to document
110: */
111:
112: public void writeContent(String text, Writer writer)
113: throws IOException {
114: int mark = 0;
115: for (int i = 0; i < text.length(); i++) {
116: char chr = text.charAt(i);
117: if (chr == '&') {
118: writer.write(text, mark, i - mark);
119: mark = i + 1;
120: writer.write("&");
121: } else if (chr == '<') {
122: writer.write(text, mark, i - mark);
123: mark = i + 1;
124: writer.write("<");
125: } else if (chr == '>' && i > 2 && text.charAt(i - 1) == ']'
126: && text.charAt(i - 2) == ']') {
127: writer.write(text, mark, i - mark - 2);
128: mark = i + 1;
129: writer.write("]]>");
130: } else if (chr < 0x20) {
131: if (chr != 0x9 && chr != 0xA && chr != 0xD) {
132: throw new IOException("Illegal character code 0x"
133: + Integer.toHexString(chr)
134: + " in content text");
135: }
136: } else if (chr > 0xD7FF
137: && (chr < 0xE000 || chr == 0xFFFE || chr == 0xFFFF || chr > 0x10FFFF)) {
138: throw new IOException("Illegal character code 0x"
139: + Integer.toHexString(chr) + " in content text");
140: }
141: }
142: writer.write(text, mark, text.length() - mark);
143: }
144:
145: /**
146: * Write CDATA to document. This writes the beginning and ending sequences
147: * for a CDATA section as well as the actual text, verifying that only
148: * characters allowed by the encoding are included in the text.
149: *
150: * @param text content value text
151: * @param writer sink for output text
152: * @throws IOException on error writing to document
153: */
154:
155: public void writeCData(String text, Writer writer)
156: throws IOException {
157: writer.write("<![CDATA[");
158: for (int i = 0; i < text.length(); i++) {
159: char chr = text.charAt(i);
160: if (chr == '>' && i > 2 && text.charAt(i - 1) == ']'
161: && text.charAt(i - 2) == ']') {
162: throw new IOException(
163: "Sequence \"]]>\" is not allowed "
164: + "within CDATA section text");
165: } else if (chr < 0x20) {
166: if (chr != 0x9 && chr != 0xA && chr != 0xD) {
167: throw new IOException("Illegal character code 0x"
168: + Integer.toHexString(chr)
169: + " in CDATA section");
170: }
171: } else if (chr > 0xD7FF
172: && (chr < 0xE000 || chr == 0xFFFE || chr == 0xFFFF)) {
173: throw new IOException("Illegal character code 0x"
174: + Integer.toHexString(chr)
175: + " in CDATA section");
176: }
177: }
178: writer.write(text);
179: writer.write("]]>");
180: }
181:
182: /**
183: * Get instance of escaper.
184: *
185: * @return escaper instance
186: */
187:
188: public static ICharacterEscaper getInstance() {
189: return s_instance;
190: }
191: }
|