001: /*
002: * Copyright 2004 Sun Microsystems, Inc.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: *
016: */
017: package com.sun.syndication.unittest;
018:
019: import com.sun.syndication.io.XmlReader;
020: import junit.framework.TestCase;
021:
022: import java.io.*;
023: import java.text.MessageFormat;
024: import java.util.HashMap;
025: import java.util.Map;
026:
027: /**
028: * @author pat, tucu
029: *
030: */
031: public class TestXmlReader extends TestCase {
032: private static final String XML5 = "xml-prolog-encoding-spaced-single-quotes";
033: private static final String XML4 = "xml-prolog-encoding-single-quotes";
034: private static final String XML3 = "xml-prolog-encoding-double-quotes";
035: private static final String XML2 = "xml-prolog";
036: private static final String XML1 = "xml";
037:
038: public static void main(String[] args) throws Exception {
039: TestXmlReader test = new TestXmlReader();
040: test.testRawBom();
041: test.testRawNoBom();
042: test.testHttp();
043: }
044:
045: protected void _testRawNoBomValid(String encoding) throws Exception {
046: InputStream is = getXmlStream("no-bom", XML1, encoding,
047: encoding);
048: XmlReader xmlReader = new XmlReader(is, false);
049: assertEquals(xmlReader.getEncoding(), "UTF-8");
050:
051: is = getXmlStream("no-bom", XML2, encoding, encoding);
052: xmlReader = new XmlReader(is);
053: assertEquals(xmlReader.getEncoding(), "UTF-8");
054:
055: is = getXmlStream("no-bom", XML3, encoding, encoding);
056: xmlReader = new XmlReader(is);
057: assertEquals(xmlReader.getEncoding(), encoding);
058:
059: is = getXmlStream("no-bom", XML4, encoding, encoding);
060: xmlReader = new XmlReader(is);
061: assertEquals(xmlReader.getEncoding(), encoding);
062:
063: is = getXmlStream("no-bom", XML5, encoding, encoding);
064: xmlReader = new XmlReader(is);
065: assertEquals(xmlReader.getEncoding(), encoding);
066: }
067:
068: protected void _testRawNoBomInvalid(String encoding)
069: throws Exception {
070: InputStream is = getXmlStream("no-bom", XML3, encoding,
071: encoding);
072: try {
073: XmlReader xmlReader = new XmlReader(is, false);
074: fail("It should have failed");
075: } catch (IOException ex) {
076: assertTrue(ex.getMessage().indexOf("Invalid encoding,") > -1);
077: }
078: }
079:
080: public void testRawNoBom() throws Exception {
081: _testRawNoBomValid("US-ASCII");
082: _testRawNoBomValid("UTF-8");
083: _testRawNoBomValid("ISO-8859-1");
084: }
085:
086: protected void _testRawBomValid(String encoding) throws Exception {
087: InputStream is = getXmlStream(encoding + "-bom", XML3,
088: encoding, encoding);
089: XmlReader xmlReader = new XmlReader(is, false);
090: if (!encoding.equals("UTF-16")) {
091: assertEquals(xmlReader.getEncoding(), encoding);
092: } else {
093: assertEquals(xmlReader.getEncoding().substring(0,
094: encoding.length()), encoding);
095: }
096: }
097:
098: protected void _testRawBomInvalid(String bomEnc, String streamEnc,
099: String prologEnc) throws Exception {
100: InputStream is = getXmlStream(bomEnc, XML3, streamEnc,
101: prologEnc);
102: try {
103: XmlReader xmlReader = new XmlReader(is, false);
104: fail("It should have failed for BOM " + bomEnc
105: + ", streamEnc " + streamEnc + " and prologEnc "
106: + prologEnc);
107: } catch (IOException ex) {
108: assertTrue(ex.getMessage().indexOf("Invalid encoding,") > -1);
109: }
110: }
111:
112: public void testRawBom() throws Exception {
113: _testRawBomValid("UTF-8");
114: _testRawBomValid("UTF-16BE");
115: _testRawBomValid("UTF-16LE");
116: _testRawBomValid("UTF-16");
117:
118: _testRawBomInvalid("UTF-8-bom", "US-ASCII", "US-ASCII");
119: _testRawBomInvalid("UTF-8-bom", "ISO-8859-1", "ISO-8859-1");
120: _testRawBomInvalid("UTF-8-bom", "UTF-8", "UTF-16");
121: _testRawBomInvalid("UTF-8-bom", "UTF-8", "UTF-16BE");
122: _testRawBomInvalid("UTF-8-bom", "UTF-8", "UTF-16LE");
123: _testRawBomInvalid("UTF-16BE-bom", "UTF-16BE", "UTF-16LE");
124: _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-16BE");
125: _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-8");
126: }
127:
128: public void testHttp() throws Exception {
129: _testHttpValid("application/xml", "no-bom", "US-ASCII", null);
130: _testHttpValid("application/xml", "UTF-8-bom", "US-ASCII", null);
131: _testHttpValid("application/xml", "UTF-8-bom", "UTF-8", null);
132: _testHttpValid("application/xml", "UTF-8-bom", "UTF-8", "UTF-8");
133: _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom",
134: "UTF-8", null);
135: _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom",
136: "UTF-8", "UTF-8");
137: _testHttpValid("application/xml;charset=UTF-16",
138: "UTF-16BE-bom", "UTF-16BE", null);
139: _testHttpValid("application/xml;charset=UTF-16",
140: "UTF-16BE-bom", "UTF-16BE", "UTF-16");
141: _testHttpValid("application/xml;charset=UTF-16",
142: "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
143:
144: _testHttpInvalid("application/xml;charset=UTF-16BE",
145: "UTF-16BE-bom", "UTF-16BE", null);
146: _testHttpInvalid("application/xml;charset=UTF-16BE",
147: "UTF-16BE-bom", "UTF-16BE", "UTF-16");
148: _testHttpInvalid("application/xml;charset=UTF-16BE",
149: "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
150: _testHttpInvalid("application/xml", "UTF-8-bom", "US-ASCII",
151: "US-ASCII");
152: _testHttpInvalid("application/xml;charset=UTF-16", "UTF-16LE",
153: "UTF-8", "UTF-8");
154: _testHttpInvalid("application/xml;charset=UTF-16", "no-bom",
155: "UTF-16BE", "UTF-16BE");
156:
157: _testHttpValid("text/xml", "no-bom", "US-ASCII", null);
158: _testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8",
159: "UTF-8");
160: _testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8",
161: null);
162: _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom",
163: "UTF-16BE", null);
164: _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom",
165: "UTF-16BE", "UTF-16");
166: _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom",
167: "UTF-16BE", "UTF-16BE");
168: _testHttpValid("text/xml", "UTF-8-bom", "US-ASCII", null);
169:
170: _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
171: "UTF-16BE", null);
172: _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
173: "UTF-16BE", "UTF-16");
174: _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
175: "UTF-16BE", "UTF-16BE");
176: _testHttpInvalid("text/xml;charset=UTF-16", "no-bom",
177: "UTF-16BE", "UTF-16BE");
178: _testHttpInvalid("text/xml;charset=UTF-16", "no-bom",
179: "UTF-16BE", null);
180:
181: _testHttpLenient("text/xml", "no-bom", "US-ASCII", null,
182: "US-ASCII");
183: _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom",
184: "UTF-8", "UTF-8", "UTF-8");
185: _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom",
186: "UTF-8", null, "UTF-8");
187: _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom",
188: "UTF-16BE", null, "UTF-16BE");
189: _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom",
190: "UTF-16BE", "UTF-16", "UTF-16");
191: _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom",
192: "UTF-16BE", "UTF-16BE", "UTF-16BE");
193: _testHttpLenient("text/xml", "UTF-8-bom", "US-ASCII", null,
194: "US-ASCII");
195:
196: _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
197: "UTF-16BE", null, "UTF-16BE");
198: _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
199: "UTF-16BE", "UTF-16", "UTF-16");
200: _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom",
201: "UTF-16BE", "UTF-16BE", "UTF-16BE");
202: _testHttpLenient("text/xml;charset=UTF-16", "no-bom",
203: "UTF-16BE", "UTF-16BE", "UTF-16BE");
204: _testHttpLenient("text/xml;charset=UTF-16", "no-bom",
205: "UTF-16BE", null, "UTF-16");
206:
207: _testHttpLenient("text/html", "no-bom", "US-ASCII", "US-ASCII",
208: "US-ASCII");
209: _testHttpLenient("text/html", "no-bom", "US-ASCII", null,
210: "US-ASCII");
211: _testHttpLenient("text/html;charset=UTF-8", "no-bom",
212: "US-ASCII", "UTF-8", "UTF-8");
213: _testHttpLenient("text/html;charset=UTF-16BE", "no-bom",
214: "US-ASCII", "UTF-8", "UTF-8");
215: }
216:
217: public void _testHttpValid(String cT, String bomEnc,
218: String streamEnc, String prologEnc) throws Exception {
219: InputStream is = getXmlStream(bomEnc,
220: (prologEnc == null) ? XML1 : XML3, streamEnc, prologEnc);
221: XmlReader xmlReader = new XmlReader(is, cT, false);
222: if (!streamEnc.equals("UTF-16")) {
223: // we can not assert things here becuase UTF-8, US-ASCII and ISO-8859-1 look alike for the chars used for detection
224: } else {
225: assertEquals(xmlReader.getEncoding().substring(0,
226: streamEnc.length()), streamEnc);
227: }
228: }
229:
230: protected void _testHttpInvalid(String cT, String bomEnc,
231: String streamEnc, String prologEnc) throws Exception {
232: InputStream is = getXmlStream(bomEnc,
233: (prologEnc == null) ? XML2 : XML3, streamEnc, prologEnc);
234: try {
235: new XmlReader(is, cT, false);
236: fail("It should have failed for HTTP Content-type " + cT
237: + ", BOM " + bomEnc + ", streamEnc " + streamEnc
238: + " and prologEnc " + prologEnc);
239: } catch (IOException ex) {
240: assertTrue(ex.getMessage().indexOf("Invalid encoding,") > -1);
241: }
242: }
243:
244: protected void _testHttpLenient(String cT, String bomEnc,
245: String streamEnc, String prologEnc, String shouldbe)
246: throws Exception {
247: InputStream is = getXmlStream(bomEnc,
248: (prologEnc == null) ? XML2 : XML3, streamEnc, prologEnc);
249: XmlReader xmlReader = new XmlReader(is, cT, true);
250: assertEquals(xmlReader.getEncoding(), shouldbe);
251: }
252:
253: // XML Stream generator
254:
255: private static final int[] NO_BOM_BYTES = {};
256: private static final int[] UTF_16BE_BOM_BYTES = { 0xFE, 0xFF };
257: private static final int[] UTF_16LE_BOM_BYTES = { 0xFF, 0XFE };
258: private static final int[] UTF_8_BOM_BYTES = { 0xEF, 0xBB, 0xBF };
259:
260: private static final Map BOMs = new HashMap();
261:
262: static {
263: BOMs.put("no-bom", NO_BOM_BYTES);
264: BOMs.put("UTF-16BE-bom", UTF_16BE_BOM_BYTES);
265: BOMs.put("UTF-16LE-bom", UTF_16LE_BOM_BYTES);
266: BOMs.put("UTF-16-bom", NO_BOM_BYTES); // it's added by the writer
267: BOMs.put("UTF-8-bom", UTF_8_BOM_BYTES);
268: }
269:
270: private static final MessageFormat XML = new MessageFormat(
271: "<root>{2}</root>");
272: private static final MessageFormat XML_WITH_PROLOG = new MessageFormat(
273: "<?xml version=\"1.0\"?>\n<root>{2}</root>");
274: private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES = new MessageFormat(
275: "<?xml version=\"1.0\" encoding=\"{1}\"?>\n<root>{2}</root>");
276: private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES = new MessageFormat(
277: "<?xml version=\"1.0\" encoding=''{1}''?>\n<root>{2}</root>");
278: private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES = new MessageFormat(
279: "<?xml version=\"1.0\" encoding = \t \n \r''{1}''?>\n<root>{2}</root>");
280:
281: private static final MessageFormat INFO = new MessageFormat(
282: "\nBOM : {0}\nDoc : {1}\nStream Enc : {2}\nProlog Enc : {3}\n");
283:
284: private static final Map XMLs = new HashMap();
285:
286: static {
287: XMLs.put(XML1, XML);
288: XMLs.put(XML2, XML_WITH_PROLOG);
289: XMLs.put(XML3, XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES);
290: XMLs.put(XML4, XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES);
291: XMLs.put(XML5,
292: XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES);
293: }
294:
295: /**
296: *
297: * @param bomType no-bom, UTF-16BE-bom, UTF-16LE-bom, UTF-8-bom
298: * @param xmlType xml, xml-prolog, xml-prolog-charset
299: * @return XML stream
300: */
301: protected InputStream getXmlStream(String bomType, String xmlType,
302: String streamEnc, String prologEnc) throws IOException {
303: ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
304: int[] bom = (int[]) BOMs.get(bomType);
305: if (bom == null) {
306: bom = new int[0];
307: }
308: MessageFormat xml = (MessageFormat) XMLs.get(xmlType);
309: for (int i = 0; i < bom.length; i++) {
310: baos.write(bom[i]);
311: }
312: Writer writer = new OutputStreamWriter(baos, streamEnc);
313: String info = INFO.format(new Object[] { bomType, xmlType,
314: prologEnc });
315: String xmlDoc = xml.format(new Object[] { streamEnc, prologEnc,
316: info });
317: writer.write(xmlDoc);
318:
319: // PADDDING TO TEST THINGS WORK BEYOND PUSHBACK_SIZE
320: writer.write("<da>\n");
321: for (int i = 0; i < 10000; i++) {
322: writer.write("<do/>\n");
323: }
324: writer.write("</da>\n");
325:
326: writer.close();
327: return new ByteArrayInputStream(baos.toByteArray());
328: }
329:
330: }
|