001: package org.cyberneko.html;
002:
003: import java.io.StringReader;
004: import java.util.ArrayList;
005: import java.util.Arrays;
006: import java.util.List;
007:
008: import junit.framework.TestCase;
009:
010: import org.apache.xerces.xni.Augmentations;
011: import org.apache.xerces.xni.QName;
012: import org.apache.xerces.xni.XMLAttributes;
013: import org.apache.xerces.xni.XNIException;
014: import org.apache.xerces.xni.parser.XMLDocumentFilter;
015: import org.apache.xerces.xni.parser.XMLInputSource;
016: import org.cyberneko.html.filters.DefaultFilter;
017:
018: /**
019: * Unit tests for {@link HTMLScanner}.
020: * @author Marc Guillemot
021: * @author Ahmed Ashour
022: * @version $Id: HTMLScanner.java,v 1.19 2005/06/14 05:52:37 andyc Exp $
023: */
024: public class HTMLScannerTest extends TestCase {
025:
026: public void testisEncodingCompatible() throws Exception {
027: final HTMLScanner scanner = new HTMLScanner();
028: assertTrue(scanner.isEncodingCompatible("ISO-8859-1",
029: "ISO-8859-1"));
030: assertTrue(scanner.isEncodingCompatible("UTF-8", "UTF-8"));
031: assertTrue(scanner.isEncodingCompatible("UTF-16", "UTF-16"));
032: assertTrue(scanner.isEncodingCompatible("US-ASCII",
033: "ISO-8859-1"));
034: assertTrue(scanner.isEncodingCompatible("UTF-8", "ISO-8859-1"));
035:
036: assertFalse(scanner.isEncodingCompatible("UTF-8", "UTF-16"));
037: assertFalse(scanner
038: .isEncodingCompatible("ISO-8859-1", "UTF-16"));
039: assertFalse(scanner.isEncodingCompatible("UTF-16", "Cp1252"));
040: }
041:
042: public void testPushInputSource() throws Exception {
043: String string = "<html><head><title>foo</title></head>"
044: + "<body>"
045: + "<script id='myscript'>"
046: + " document.write('<style type=\"text/css\" id=\"myStyle\">');"
047: + " document.write(' .nwr {white-space: nowrap;}');"
048: + " document.write('</style>');"
049: + " document.write('<div id=\"myDiv\">');"
050: + " document.write('</div>');" + "</script>"
051: + "</body></html>";
052: HTMLConfiguration parser = new HTMLConfiguration();
053: EvaluateInputSourceFilter filter = new EvaluateInputSourceFilter(
054: parser);
055: parser.setProperty(
056: "http://cyberneko.org/html/properties/filters",
057: new XMLDocumentFilter[] { filter });
058: XMLInputSource source = new XMLInputSource(null, "myTest",
059: null, new StringReader(string), "UTF-8");
060: parser.parse(source);
061:
062: String[] expectedString = { "(HTML", "(HEAD", "(TITLE",
063: ")TITLE", ")HEAD", "(BODY", "(SCRIPT", ")SCRIPT",
064: "~inserting", "(STYLE", "~inserting", "~inserting",
065: ")STYLE", "~inserting", "(DIV", "~inserting", ")DIV",
066: ")BODY", ")HTML" };
067: assertEquals(Arrays.asList(expectedString),
068: filter.collectedStrings);
069: }
070:
071: private static class EvaluateInputSourceFilter extends
072: DefaultFilter {
073:
074: private List collectedStrings = new ArrayList();
075: private static int counter = 1;
076: protected HTMLConfiguration fConfiguration;
077:
078: public EvaluateInputSourceFilter(HTMLConfiguration config) {
079: fConfiguration = config;
080: }
081:
082: public void startElement(QName element, XMLAttributes attrs,
083: Augmentations augs) throws XNIException {
084: collectedStrings.add("(" + element.rawname);
085: }
086:
087: public void endElement(QName element, Augmentations augs)
088: throws XNIException {
089: collectedStrings.add(")" + element.rawname);
090: if (element.localpart.equals("SCRIPT")) {
091: // act as if evaluation of document.write would insert the content
092: insert("<style type=\"text/css\" id=\"myStyle\">");
093: insert(" .nwr {white-space: nowrap;}");
094: insert("</style>");
095: insert("<div id=\"myDiv\">");
096: insert("</div>");
097: }
098: }
099:
100: private void insert(final String string) {
101: collectedStrings.add("~inserting");
102: XMLInputSource source = new XMLInputSource(null, "myTest"
103: + counter++, null, new StringReader(string),
104: "UTF-8");
105: fConfiguration.evaluateInputSource(source);
106: }
107:
108: }
109: }
|