001: /*
002: * Java HTML Tidy - JTidy
003: * HTML parser and pretty printer
004: *
005: * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
006: * Institute of Technology, Institut National de Recherche en
007: * Informatique et en Automatique, Keio University). All Rights
008: * Reserved.
009: *
010: * Contributing Author(s):
011: *
012: * Dave Raggett <dsr@w3.org>
013: * Andy Quick <ac.quick@sympatico.ca> (translation to Java)
014: * Gary L Peskin <garyp@firstech.com> (Java development)
015: * Sami Lempinen <sami@lempinen.net> (release management)
016: * Fabrizio Giustina <fgiust at users.sourceforge.net>
017: *
018: * The contributing author(s) would like to thank all those who
019: * helped with testing, bug fixes, and patience. This wouldn't
020: * have been possible without all of you.
021: *
022: * COPYRIGHT NOTICE:
023: *
024: * This software and documentation is provided "as is," and
025: * the copyright holders and contributing author(s) make no
026: * representations or warranties, express or implied, including
027: * but not limited to, warranties of merchantability or fitness
028: * for any particular purpose or that the use of the software or
029: * documentation will not infringe any third party patents,
030: * copyrights, trademarks or other rights.
031: *
032: * The copyright holders and contributing author(s) will not be
033: * liable for any direct, indirect, special or consequential damages
034: * arising out of any use of the software or documentation, even if
035: * advised of the possibility of such damage.
036: *
037: * Permission is hereby granted to use, copy, modify, and distribute
038: * this source code, or portions hereof, documentation and executables,
039: * for any purpose, without fee, subject to the following restrictions:
040: *
041: * 1. The origin of this source code must not be misrepresented.
042: * 2. Altered versions must be plainly marked as such and must
043: * not be misrepresented as being the original source.
044: * 3. This Copyright notice may not be removed or altered from any
045: * source or altered source distribution.
046: *
047: * The copyright holders and contributing author(s) specifically
048: * permit, without fee, and encourage the use of this source code
049: * as a component for supporting the Hypertext Markup Language in
050: * commercial products. If you use this source code in a product,
051: * acknowledgment is not required but would be appreciated.
052: *
053: */
054: package org.w3c.tidy;
055:
056: import java.io.BufferedReader;
057: import java.io.ByteArrayInputStream;
058: import java.io.ByteArrayOutputStream;
059: import java.io.FileInputStream;
060: import java.io.FileNotFoundException;
061: import java.io.FileWriter;
062: import java.io.IOException;
063: import java.io.InputStreamReader;
064: import java.io.OutputStream;
065: import java.io.PrintWriter;
066: import java.io.StringWriter;
067: import java.net.URL;
068: import java.util.ArrayList;
069: import java.util.Iterator;
070: import java.util.List;
071: import java.util.Properties;
072:
073: import javax.xml.parsers.SAXParser;
074: import javax.xml.parsers.SAXParserFactory;
075:
076: import junit.framework.TestCase;
077:
078: import org.apache.commons.logging.Log;
079: import org.apache.commons.logging.LogFactory;
080: import org.w3c.dom.Document;
081: import org.xml.sax.Attributes;
082: import org.xml.sax.InputSource;
083: import org.xml.sax.SAXException;
084: import org.xml.sax.helpers.DefaultHandler;
085:
086: /**
087: * @author fgiust
088: * @version $Revision: 1.26 $ ($Author: fgiust $)
089: */
090: public class TidyTestCase extends TestCase {
091:
092: /**
093: * Tidy executable name, if you want to produce output files for comparison.
094: */
095: private static final String TIDY_EXECUTABLE = "tidy.exe";
096:
097: /**
098: * Logger used to enable/disable output file generation using tidy c executable. Setting this logger to
099: * <code>debug</code> in your log4j configuration file will cause the TIDY_EXECUTABLE to be run against the actual
100: * test file. If set to false the command line used to manually run tidy will appear in the log.
101: */
102: private static final Log RUN_TIDY_EXECUTABLE = LogFactory
103: .getLog("runtidy");
104:
105: /**
106: * Tidy test instance.
107: */
108: protected Tidy tidy;
109:
110: /**
111: * message listener.
112: */
113: protected TestMessageListener messageListener;
114:
115: /**
116: * Error out.
117: */
118: protected StringWriter errorLog;
119:
120: /**
121: * Tidy output.
122: */
123: protected String tidyOut;
124:
125: /**
126: * logger.
127: */
128: protected Log log = LogFactory.getLog(getClass());
129:
130: /**
131: * write directly to out. Useful for debugging (but it will make the test fail!).
132: */
133: private boolean writeToOut;
134:
135: /**
136: * Instantiate a new Test case.
137: * @param name test name
138: */
139: public TidyTestCase(String name) {
140: super (name);
141: }
142:
143: /**
144: * @see junit.framework.TestCase#setUp()
145: */
146: protected void setUp() throws Exception {
147: super .setUp();
148:
149: //creates a new Tidy
150: this .tidy = new Tidy();
151: }
152:
153: /**
154: * @see junit.framework.TestCase#tearDown()
155: */
156: protected void tearDown() throws Exception {
157: this .tidy = null;
158: this .errorLog = null;
159: this .tidyOut = null;
160:
161: super .tearDown();
162: }
163:
164: /**
165: * Executes a tidy test. This method simply requires the input file name. If a file with the same name but with a
166: * ".cfg" extension is found is used as configuration file for the test, otherwise the default config will be used.
167: * If a file with the same name, but with the ".out" extension is found, tidy will the result with the content of
168: * such file.
169: * @param fileName input file name
170: * @throws Exception any exception generated during the test
171: */
172: protected void executeTidyTest(String fileName) throws Exception {
173:
174: // set up Tidy using supplied configuration
175: setUpTidy(fileName);
176:
177: // input file
178: URL inputURL = getClass().getClassLoader()
179: .getResource(fileName);
180: assertNotNull("Can't find input file [" + fileName + "]",
181: inputURL);
182:
183: OutputStream out;
184: // out
185: if (!writeToOut) {
186: out = new ByteArrayOutputStream();
187: } else {
188: out = System.out;
189: }
190:
191: // go!
192: this .tidy.parse(inputURL.openStream(), out);
193:
194: if (log.isDebugEnabled()) {
195: log
196: .debug("out:\n---- out ----\n" + out
197: + "\n---- out ----");
198: log.debug("log:\n---- log ----\n" + this .errorLog
199: + "\n---- log ----");
200: }
201:
202: // existing file for comparison
203: String outFileName = fileName.substring(0, fileName
204: .lastIndexOf("."))
205: + ".out";
206: URL outFile = getClass().getClassLoader().getResource(
207: outFileName);
208:
209: this .tidyOut = out.toString();
210:
211: if (outFile != null) {
212: log.debug("Comparing file using [" + outFileName + "]");
213: assertEquals(this .tidyOut, outFile);
214: }
215:
216: // check messages
217: String messagesFileName = fileName.substring(0, fileName
218: .lastIndexOf("."))
219: + ".msg";
220: URL messagesFile = getClass().getClassLoader().getResource(
221: messagesFileName);
222:
223: // save messages
224: if (messagesFile == null) {
225: if (log.isDebugEnabled()) {
226: log.debug("Messages file doesn't exists, generating ["
227: + messagesFileName + "] for reference");
228: }
229: FileWriter fw = new FileWriter(inputURL.getFile()
230: .substring(0, inputURL.getFile().lastIndexOf("."))
231: + ".msg");
232: fw.write(this .messageListener.messagesToXml());
233: fw.close();
234: } else {
235: // compare result to expected messages
236: if (log.isDebugEnabled()) {
237: log.debug("Comparing messages using ["
238: + messagesFileName + "]");
239: }
240: compareMsgXml(messagesFile);
241: }
242: }
243:
244: /**
245: * Parse an existing msg file and assert that content is identical to current output.
246: * @param messagesFile URL to mesage file
247: * @throws Exception any exception generated during the test
248: */
249: protected void compareMsgXml(URL messagesFile) throws Exception {
250:
251: // first parse existing file
252: // avoid using DOM since if will need forking junit execution in maven (too slow)
253: SAXParserFactory factory = SAXParserFactory.newInstance();
254: SAXParser saxParser = factory.newSAXParser();
255:
256: MsgXmlHandler handler = new MsgXmlHandler();
257: saxParser.parse(new InputSource(messagesFile.openStream()),
258: handler);
259: List expectedMsgs = handler.getMessages();
260:
261: List tidyMsgs = this .messageListener.getReceived();
262:
263: // assert size
264: assertEquals("Number of messages is different from expected",
265: expectedMsgs.size(), tidyMsgs.size());
266:
267: // compare messages
268: Iterator expectedMsgIt = expectedMsgs.iterator();
269: Iterator tidyMsgIt = tidyMsgs.iterator();
270: int count = 0;
271: while (tidyMsgIt.hasNext()) {
272: TidyMessage expectedOne = (TidyMessage) expectedMsgIt
273: .next();
274: TidyMessage tidyOne = (TidyMessage) tidyMsgIt.next();
275:
276: assertEquals("Error code for message [" + count
277: + "] is different from expected", expectedOne
278: .getErrorCode(), tidyOne.getErrorCode());
279:
280: assertEquals("Level for message [" + count
281: + "] is different from expected", expectedOne
282: .getLevel(), tidyOne.getLevel());
283:
284: assertEquals(
285: "Line for message ["
286: + count
287: + "] is different from expected. Expected position: ["
288: + expectedOne.getLine() + ":"
289: + expectedOne.getColumn() + "] , current ["
290: + tidyOne.getLine() + ":"
291: + tidyOne.getColumn() + "]", expectedOne
292: .getLine(), tidyOne.getLine());
293:
294: assertEquals(
295: "Column for message ["
296: + count
297: + "] is different from expected. Expected position: ["
298: + expectedOne.getLine() + ":"
299: + expectedOne.getColumn() + "] , current ["
300: + tidyOne.getLine() + ":"
301: + tidyOne.getColumn() + "]", expectedOne
302: .getColumn(), tidyOne.getColumn());
303:
304: // don't assert text in respect for i18n
305:
306: count++;
307: }
308:
309: }
310:
311: /**
312: * Basic test for DOM parser. Test is set up using [fileName.cfg] configuration if the file exists. Calls
313: * tidy.parseDOM and returns the Document to the caller.
314: * @param fileName input file name
315: * @return parsed Document
316: * @throws Exception any exception generated during the test
317: */
318: protected Document parseDomTest(String fileName) throws Exception {
319: //creates a new Tidy
320: setUpTidy(fileName);
321:
322: // input file
323: URL inputURL = getClass().getClassLoader()
324: .getResource(fileName);
325: assertNotNull("Can't find input file [" + fileName + "]",
326: inputURL);
327:
328: // out
329: OutputStream out = new ByteArrayOutputStream();
330:
331: Document doc = this .tidy.parseDOM(inputURL.openStream(), out);
332: this .tidyOut = out.toString();
333:
334: return doc;
335: }
336:
337: /**
338: * assert generated output and test file are equals.
339: * @param tidyOutput tidy output as string
340: * @param correctFile URL used to load the file for comparison
341: * @throws FileNotFoundException if test file is not found
342: * @throws IOException in reading file
343: */
344: protected void assertEquals(String tidyOutput, URL correctFile)
345: throws FileNotFoundException, IOException {
346: // assume the expected output has the same encoding tidy has in its configuration
347: String encodingName = tidy.getConfiguration()
348: .getOutCharEncodingName();
349:
350: diff(new BufferedReader((new InputStreamReader(
351: new ByteArrayInputStream(tidyOutput.getBytes()),
352: encodingName))), new BufferedReader(
353: new InputStreamReader(new FileInputStream(correctFile
354: .getPath()), encodingName)));
355: }
356:
357: /**
358: * Utility method: assert no warnings were reported in the last tidy run.
359: */
360: protected void assertNoWarnings() {
361: int warningNum = this .tidy.getParseWarnings();
362: if (warningNum != 0) {
363: fail("Test failed, [" + warningNum
364: + "] false warnings were reported");
365: }
366: }
367:
368: /**
369: * Utility method: assert no errors were reported in the last tidy run.
370: */
371: protected void assertNoErrors() {
372: int errorNum = this .tidy.getParseErrors();
373: if (errorNum != 0) {
374: fail("Test failed, [" + errorNum
375: + "] false errors were reported");
376: }
377: }
378:
379: /**
380: * Utility method: assert no warnings were reported in the last tidy run.
381: * @param expectedNumber expected number of warnings.
382: */
383: protected void assertWarnings(int expectedNumber) {
384: int warningNum = this .tidy.getParseWarnings();
385: if (warningNum != expectedNumber) {
386: fail("Test failed, [" + expectedNumber
387: + "] warnings expected, [" + warningNum
388: + "] were reported");
389: }
390: }
391:
392: /**
393: * Utility method: assert no errors were reported in the last tidy run.
394: * @param expectedNumber expected number of errors.
395: */
396: protected void assertErrors(int expectedNumber) {
397: int errorNum = this .tidy.getParseErrors();
398: if (errorNum != expectedNumber) {
399: fail("Test failed, [" + expectedNumber
400: + "] errors expected, [" + errorNum
401: + "] were reported");
402: }
403: }
404:
405: /**
406: * Utility method: asserts a given String can be found in the error log.
407: * @param expectedString expected String in error log.
408: */
409: protected void assertLogContains(String expectedString) {
410: String logString = this .errorLog.toString();
411:
412: if (logString.indexOf(expectedString) == -1) {
413: fail("Test failed, expected [" + expectedString
414: + "] couldn't be found in error log.");
415: }
416: }
417:
418: /**
419: * Utility method: asserts a given String can't be found in the error log.
420: * @param expectedString expected String in error log.
421: */
422: protected void assertLogDoesntContains(String expectedString) {
423: String logString = this .errorLog.toString();
424:
425: if (logString.indexOf(expectedString) != -1) {
426: fail("Test failed, [" + expectedString
427: + "] was found in error log.");
428: }
429: }
430:
431: /**
432: * set up the tidy instance.
433: * @param fileName input file name (needed to determine configuration file name)
434: * @throws IOException in reading configuration file
435: */
436: private void setUpTidy(String fileName) throws IOException {
437: // config file names
438: String configFileName = fileName.substring(0, fileName
439: .lastIndexOf("."))
440: + ".cfg";
441: String messagesFileName = fileName.substring(0, fileName
442: .lastIndexOf("."));
443:
444: // input file
445: URL inputURL = getClass().getClassLoader()
446: .getResource(fileName);
447: assertNotNull("Can't find input file [" + fileName + "]",
448: inputURL);
449:
450: // configuration file
451: URL configurationFile = getClass().getClassLoader()
452: .getResource(configFileName);
453:
454: // debug runing test info
455: if (log.isDebugEnabled()) {
456: StringBuffer message = new StringBuffer();
457: message.append("Testing [" + fileName + "]");
458: if (configurationFile != null) {
459: message.append(" using configuration file ["
460: + configFileName + "]");
461: }
462: log.debug(message.toString());
463: }
464:
465: if (configurationFile == null) {
466: configurationFile = getClass().getClassLoader()
467: .getResource("default.cfg");
468: }
469:
470: generateOutputUsingTidyC(inputURL.getFile(), configurationFile
471: .getFile(), RUN_TIDY_EXECUTABLE.isDebugEnabled());
472:
473: // if configuration file exists load and set it
474: Properties testProperties = new Properties();
475: testProperties.load(configurationFile.openStream());
476: this .tidy.setConfigurationFromProps(testProperties);
477:
478: // set up error log
479: this .errorLog = new StringWriter();
480: this .tidy.setErrout(new PrintWriter(this .errorLog));
481:
482: this .messageListener = new TestMessageListener(messagesFileName);
483: this .tidy.setMessageListener(messageListener);
484: }
485:
486: /**
487: * Diff between two buffered readers. If comparison fails an AssertionFailedException is thrown with the line
488: * number, actual and expected output. Content is tested to be identical (same wrapping).
489: * @param tidyOutput reader for tidy generated output
490: * @param correctFile reader for test file
491: * @throws IOException in reading from readers
492: */
493: private static void diff(BufferedReader tidyOutput,
494: BufferedReader correctFile) throws IOException {
495: String tidyLine, testLine;
496: int i = 1;
497: do {
498: tidyLine = tidyOutput.readLine();
499: testLine = correctFile.readLine();
500: i++;
501: } while ((tidyLine != null) && (testLine != null)
502: && (tidyLine.equals(testLine)));
503: tidyOutput.close();
504: correctFile.close();
505:
506: if ((tidyLine != null) || (testLine != null)) {
507: fail("Wrong output, file comparison failed at line ["
508: + (i - 1) + "]:\n" + "[tidy][" + tidyLine + "]\n"
509: + "[test][" + testLine + "]");
510: }
511: return;
512: }
513:
514: /**
515: * Run TIDY_EXECUTABLE to produce an output file. Used to generates output files using tidy c for comparison with
516: * jtidy. A file ".out" will be written in the same folder of the input file.
517: * @param inputFileName input file for tidy.
518: * @param configurationFileName configuration file name (default if there is no not test-specific file).
519: * @param runIt if true the output is generated using tidy, if false simply output the command line.
520: */
521: private void generateOutputUsingTidyC(String inputFileName,
522: String configurationFileName, boolean runIt) {
523:
524: String outputFileName = inputFileName.substring(0,
525: inputFileName.lastIndexOf("."))
526: + ".out";
527:
528: String strCmd = TIDY_EXECUTABLE + " -config \""
529: + cleanUpFilePath(configurationFileName) + "\" -o \""
530: + cleanUpFilePath(outputFileName) + "\" \""
531: + cleanUpFilePath(inputFileName) + "\"";
532:
533: log.debug("cmd line:\n***\n" + strCmd + "\nw/o output:\n"
534: + TIDY_EXECUTABLE + " -config \""
535: + cleanUpFilePath(configurationFileName) + "\" \""
536: + cleanUpFilePath(inputFileName) + "\"" + "\n***");
537:
538: if (runIt) {
539: log.debug("running " + TIDY_EXECUTABLE);
540: try {
541: Runtime.getRuntime().exec(strCmd);
542: } catch (IOException e) {
543: log.warn("Error running [" + strCmd + "] cmd: "
544: + e.getMessage());
545: }
546: }
547:
548: }
549:
550: /**
551: * Utility method to clean up file path returned by URLs.
552: * @param fileName file name as given by URL.getFile()
553: * @return String fileName
554: */
555: protected String cleanUpFilePath(String fileName) {
556: if (fileName.length() > 3 && fileName.charAt(2) == ':') {
557: // assuming something like ""/C:/program files/..."
558: return fileName.substring(1);
559: } else if (fileName.startsWith("file://")) {
560: return fileName.substring(7);
561: }
562:
563: return fileName;
564:
565: }
566:
567: /**
568: * A simple SAX Content Handler used to parse .msg files.
569: */
570: static class MsgXmlHandler extends DefaultHandler {
571:
572: /**
573: * Parsed messages.
574: */
575: private List messages = new ArrayList();
576:
577: /**
578: * Error code for the current message.
579: */
580: private int code;
581:
582: /**
583: * Level for the current message.
584: */
585: private int level;
586:
587: /**
588: * Column for the current message.
589: */
590: private int column;
591:
592: /**
593: * Line for the current message.
594: */
595: private int line;
596:
597: /**
598: * Message the current message.
599: */
600: private StringBuffer textbuffer;
601:
602: /**
603: * Actual parsing position.
604: */
605: private int parsePosition = -100;
606:
607: /**
608: * actually parsing a detail tag.
609: */
610: private boolean intag;
611:
612: /**
613: * @see org.xml.sax.ContentHandler#startElement(String, String, String, org.xml.sax.Attributes)
614: */
615: public void startElement(String uri, String localName,
616: String qName, Attributes attributes)
617: throws SAXException {
618: if ("message".equals(qName)) {
619: parsePosition = 0;
620: textbuffer = new StringBuffer();
621: } else {
622: parsePosition++;
623: intag = true;
624: }
625: }
626:
627: /**
628: * @see org.xml.sax.ContentHandler#endElement(String, String, String)
629: */
630: public void endElement(String uri, String localName,
631: String qName) throws SAXException {
632: if ("message".equals(qName)) {
633: TidyMessage message = new TidyMessage(code, line,
634: column, TidyMessage.Level.fromCode(level),
635: textbuffer.toString());
636: messages.add(message);
637: }
638: intag = false;
639: }
640:
641: /**
642: * @see org.xml.sax.ContentHandler#characters(char[], int, int)
643: */
644: public void characters(char[] ch, int start, int length)
645: throws SAXException {
646: if (!intag) {
647: return;
648: }
649:
650: switch (parsePosition) {
651: case 1:
652: this .code = Integer.parseInt(new String(ch, start,
653: length));
654: break;
655: case 2:
656: this .level = Integer.parseInt(new String(ch, start,
657: length));
658: break;
659: case 3:
660: this .line = Integer.parseInt(new String(ch, start,
661: length));
662: break;
663: case 4:
664: this .column = Integer.parseInt(new String(ch, start,
665: length));
666: break;
667: case 5:
668: textbuffer.append(new String(ch, start, length));
669: break;
670: default:
671: break;
672: }
673: }
674:
675: /**
676: * Returns the list of parsed messages.
677: * @return List containing TidyMessage elements
678: */
679: public List getMessages() {
680: return messages;
681: }
682: }
683: }
|