Source Code Cross Referenced for Tidy.java in  » IDE-Netbeans » visualweb.api.designer » org » w3c » tidy » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » IDE Netbeans » visualweb.api.designer » org.w3c.tidy 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        /*
0002:         * @(#)Tidy.java   1.11 2000/08/16
0003:         *
0004:         */
0005:
0006:        /*
0007:         HTML parser and pretty printer
0008:
0009:         Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
0010:         Institute of Technology, Institut National de Recherche en
0011:         Informatique et en Automatique, Keio University). All Rights
0012:         Reserved.
0013:
0014:         Contributing Author(s):
0015:
0016:         Dave Raggett <dsr@w3.org>
0017:         Andy Quick <ac.quick@sympatico.ca> (translation to Java)
0018:
0019:         The contributing author(s) would like to thank all those who
0020:         helped with testing, bug fixes, and patience.  This wouldn't
0021:         have been possible without all of you.
0022:
0023:         COPYRIGHT NOTICE:
0024:        
0025:         This software and documentation is provided "as is," and
0026:         the copyright holders and contributing author(s) make no
0027:         representations or warranties, express or implied, including
0028:         but not limited to, warranties of merchantability or fitness
0029:         for any particular purpose or that the use of the software or
0030:         documentation will not infringe any third party patents,
0031:         copyrights, trademarks or other rights. 
0032:
0033:         The copyright holders and contributing author(s) will not be
0034:         liable for any direct, indirect, special or consequential damages
0035:         arising out of any use of the software or documentation, even if
0036:         advised of the possibility of such damage.
0037:
0038:         Permission is hereby granted to use, copy, modify, and distribute
0039:         this source code, or portions hereof, documentation and executables,
0040:         for any purpose, without fee, subject to the following restrictions:
0041:
0042:         1. The origin of this source code must not be misrepresented.
0043:         2. Altered versions must be plainly marked as such and must
0044:         not be misrepresented as being the original source.
0045:         3. This Copyright notice may not be removed or altered from any
0046:         source or altered source distribution.
0047:        
0048:         The copyright holders and contributing author(s) specifically
0049:         permit, without fee, and encourage the use of this source code
0050:         as a component for supporting the Hypertext Markup Language in
0051:         commercial products. If you use this source code in a product,
0052:         acknowledgment is not required but would be appreciated.
0053:         */
0054:
0055:        package org.w3c.tidy;
0056:
0057:        import java.io.PrintWriter;
0058:        import java.io.FileWriter;
0059:        import java.io.InputStream;
0060:        import java.io.FileInputStream;
0061:        import java.io.OutputStream;
0062:        import java.io.FileOutputStream;
0063:        import java.util.Properties;
0064:
0065:        import java.io.IOException;
0066:        import java.io.FileNotFoundException;
0067:
0068:        // BEGIN RAVE MODIFICATIONS
0069:        import org.w3c.dom.Attr;
0070:        import org.w3c.dom.CharacterData;
0071:        import org.w3c.dom.NamedNodeMap;
0072:        import org.w3c.dom.NodeList;
0073:
0074:        // END RAVE MODIFICATIONS
0075:
0076:        /**
0077:         *
0078:         * <p>HTML parser and pretty printer</p>
0079:         *
0080:         * <p>
0081:         * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
0082:         * See Tidy.java for the copyright notice.
0083:         * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
0084:         * HTML Tidy Release 4 Aug 2000</a>
0085:         * </p>
0086:         *
0087:         * <p>
0088:         * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
0089:         * Institute of Technology, Institut National de Recherche en
0090:         * Informatique et en Automatique, Keio University). All Rights
0091:         * Reserved.
0092:         * </p>
0093:         *
0094:         * <p>
0095:         * Contributing Author(s):<br>
0096:         *    <a href="mailto:dsr@w3.org">Dave Raggett</a><br>
0097:         *    <a href="mailto:ac.quick@sympatico.ca">Andy Quick</a> (translation to Java)
0098:         * </p>
0099:         *
0100:         * <p>
0101:         * The contributing author(s) would like to thank all those who
0102:         * helped with testing, bug fixes, and patience.  This wouldn't
0103:         * have been possible without all of you.
0104:         * </p>
0105:         *
0106:         * <p>
0107:         * COPYRIGHT NOTICE:<br>
0108:         * 
0109:         * This software and documentation is provided "as is," and
0110:         * the copyright holders and contributing author(s) make no
0111:         * representations or warranties, express or implied, including
0112:         * but not limited to, warranties of merchantability or fitness
0113:         * for any particular purpose or that the use of the software or
0114:         * documentation will not infringe any third party patents,
0115:         * copyrights, trademarks or other rights. 
0116:         * </p>
0117:         *
0118:         * <p>
0119:         * The copyright holders and contributing author(s) will not be
0120:         * liable for any direct, indirect, special or consequential damages
0121:         * arising out of any use of the software or documentation, even if
0122:         * advised of the possibility of such damage.
0123:         * </p>
0124:         *
0125:         * <p>
0126:         * Permission is hereby granted to use, copy, modify, and distribute
0127:         * this source code, or portions hereof, documentation and executables,
0128:         * for any purpose, without fee, subject to the following restrictions:
0129:         * </p>
0130:         *
0131:         * <p>
0132:         * <ol>
0133:         * <li>The origin of this source code must not be misrepresented.</li>
0134:         * <li>Altered versions must be plainly marked as such and must
0135:         * not be misrepresented as being the original source.</li>
0136:         * <li>This Copyright notice may not be removed or altered from any
0137:         * source or altered source distribution.</li>
0138:         * </ol>
0139:         * </p>
0140:         *
0141:         * <p>
0142:         * The copyright holders and contributing author(s) specifically
0143:         * permit, without fee, and encourage the use of this source code
0144:         * as a component for supporting the Hypertext Markup Language in
0145:         * commercial products. If you use this source code in a product,
0146:         * acknowledgment is not required but would be appreciated.
0147:         * </p>
0148:         *
0149:         * @author  Dave Raggett <dsr@w3.org>
0150:         * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
0151:         * @version 1.0, 1999/05/22
0152:         * @version 1.0.1, 1999/05/29
0153:         * @version 1.1, 1999/06/18 Java Bean
0154:         * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
0155:         * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
0156:         * @version 1.4, 1999/09/04 DOM support
0157:         * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
0158:         * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
0159:         * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
0160:         * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
0161:         * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
0162:         * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
0163:         * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
0164:         *
0165:         */
0166:
0167:        public class Tidy implements  java.io.Serializable {
0168:
0169:            static final long serialVersionUID = -2794371560623987718L;
0170:
0171:            private boolean initialized = false;
0172:            private PrintWriter errout = null; /* error output stream */
0173:            private PrintWriter stderr = null;
0174:            private Configuration configuration = null;
0175:            private String inputStreamName = "InputStream";
0176:            private int parseErrors = 0;
0177:            private int parseWarnings = 0;
0178:
0179:            public Tidy() {
0180:                init();
0181:            }
0182:
0183:            public Configuration getConfiguration() {
0184:                return configuration;
0185:            }
0186:
0187:            public PrintWriter getStderr() {
0188:                return stderr;
0189:            }
0190:
0191:            /**
0192:             * ParseErrors - the number of errors that occurred in the most
0193:             * recent parse operation
0194:             */
0195:
0196:            public int getParseErrors() {
0197:                return parseErrors;
0198:            }
0199:
0200:            /**
0201:             * ParseWarnings - the number of warnings that occurred in the most
0202:             * recent parse operation
0203:             */
0204:
0205:            public int getParseWarnings() {
0206:                return parseWarnings;
0207:            }
0208:
0209:            /**
0210:             * Errout - the error output stream
0211:             */
0212:
0213:            public PrintWriter getErrout() {
0214:                return errout;
0215:            }
0216:
0217:            public void setErrout(PrintWriter errout) {
0218:                this .errout = errout;
0219:            }
0220:
0221:            /**
0222:             * Spaces - default indentation
0223:             * @see org.w3c.tidy.Configuration#spaces
0224:             */
0225:
0226:            public void setSpaces(int spaces) {
0227:                configuration.spaces = spaces;
0228:            }
0229:
0230:            public int getSpaces() {
0231:                return configuration.spaces;
0232:            }
0233:
0234:            /**
0235:             * Wraplen - default wrap margin
0236:             * @see org.w3c.tidy.Configuration#wraplen
0237:             */
0238:
0239:            public void setWraplen(int wraplen) {
0240:                configuration.wraplen = wraplen;
0241:            }
0242:
0243:            public int getWraplen() {
0244:                return configuration.wraplen;
0245:            }
0246:
0247:            /**
0248:             * CharEncoding
0249:             * @see org.w3c.tidy.Configuration#CharEncoding
0250:             */
0251:
0252:            public void setCharEncoding(int charencoding) {
0253:                configuration.CharEncoding = charencoding;
0254:            }
0255:
0256:            public int getCharEncoding() {
0257:                return configuration.CharEncoding;
0258:            }
0259:
0260:            /**
0261:             * Tabsize
0262:             * @see org.w3c.tidy.Configuration#tabsize
0263:             */
0264:
0265:            public void setTabsize(int tabsize) {
0266:                configuration.tabsize = tabsize;
0267:            }
0268:
0269:            public int getTabsize() {
0270:                return configuration.tabsize;
0271:            }
0272:
0273:            /**
0274:             * Errfile - file name to write errors to
0275:             * @see org.w3c.tidy.Configuration#errfile
0276:             */
0277:
0278:            public void setErrfile(String errfile) {
0279:                configuration.errfile = errfile;
0280:            }
0281:
0282:            public String getErrfile() {
0283:                return configuration.errfile;
0284:            }
0285:
0286:            /**
0287:             * Writeback - if true then output tidied markup
0288:             * NOTE: this property is ignored when parsing from an InputStream.
0289:             * @see org.w3c.tidy.Configuration#writeback
0290:             */
0291:
0292:            public void setWriteback(boolean writeback) {
0293:                configuration.writeback = writeback;
0294:            }
0295:
0296:            public boolean getWriteback() {
0297:                return configuration.writeback;
0298:            }
0299:
0300:            /**
0301:             * OnlyErrors - if true normal output is suppressed
0302:             * @see org.w3c.tidy.Configuration#OnlyErrors
0303:             */
0304:
0305:            public void setOnlyErrors(boolean OnlyErrors) {
0306:                configuration.OnlyErrors = OnlyErrors;
0307:            }
0308:
0309:            public boolean getOnlyErrors() {
0310:                return configuration.OnlyErrors;
0311:            }
0312:
0313:            /**
0314:             * ShowWarnings - however errors are always shown
0315:             * @see org.w3c.tidy.Configuration#ShowWarnings
0316:             */
0317:
0318:            public void setShowWarnings(boolean ShowWarnings) {
0319:                configuration.ShowWarnings = ShowWarnings;
0320:            }
0321:
0322:            public boolean getShowWarnings() {
0323:                return configuration.ShowWarnings;
0324:            }
0325:
0326:            /**
0327:             * Quiet - no 'Parsing X', guessed DTD or summary
0328:             * @see org.w3c.tidy.Configuration#Quiet
0329:             */
0330:
0331:            public void setQuiet(boolean Quiet) {
0332:                configuration.Quiet = Quiet;
0333:            }
0334:
0335:            public boolean getQuiet() {
0336:                return configuration.Quiet;
0337:            }
0338:
0339:            /**
0340:             * IndentContent - indent content of appropriate tags
0341:             * @see org.w3c.tidy.Configuration#IndentContent
0342:             */
0343:
0344:            public void setIndentContent(boolean IndentContent) {
0345:                configuration.IndentContent = IndentContent;
0346:            }
0347:
0348:            public boolean getIndentContent() {
0349:                return configuration.IndentContent;
0350:            }
0351:
0352:            /**
0353:             * SmartIndent - does text/block level content effect indentation
0354:             * @see org.w3c.tidy.Configuration#SmartIndent
0355:             */
0356:
0357:            public void setSmartIndent(boolean SmartIndent) {
0358:                configuration.SmartIndent = SmartIndent;
0359:            }
0360:
0361:            public boolean getSmartIndent() {
0362:                return configuration.SmartIndent;
0363:            }
0364:
0365:            /**
0366:             * HideEndTags - suppress optional end tags
0367:             * @see org.w3c.tidy.Configuration#HideEndTags
0368:             */
0369:
0370:            public void setHideEndTags(boolean HideEndTags) {
0371:                configuration.HideEndTags = HideEndTags;
0372:            }
0373:
0374:            public boolean getHideEndTags() {
0375:                return configuration.HideEndTags;
0376:            }
0377:
0378:            /**
0379:             * XmlTags - treat input as XML
0380:             * @see org.w3c.tidy.Configuration#XmlTags
0381:             */
0382:
0383:            public void setXmlTags(boolean XmlTags) {
0384:                configuration.XmlTags = XmlTags;
0385:            }
0386:
0387:            public boolean getXmlTags() {
0388:                return configuration.XmlTags;
0389:            }
0390:
0391:            /**
0392:             * XmlOut - create output as XML
0393:             * @see org.w3c.tidy.Configuration#XmlOut
0394:             */
0395:
0396:            public void setXmlOut(boolean XmlOut) {
0397:                configuration.XmlOut = XmlOut;
0398:            }
0399:
0400:            public boolean getXmlOut() {
0401:                return configuration.XmlOut;
0402:            }
0403:
0404:            /**
0405:             * XHTML - output extensible HTML
0406:             * @see org.w3c.tidy.Configuration#xHTML
0407:             */
0408:
0409:            public void setXHTML(boolean xHTML) {
0410:                configuration.xHTML = xHTML;
0411:            }
0412:
0413:            public boolean getXHTML() {
0414:                return configuration.xHTML;
0415:            }
0416:
0417:            /**
0418:             * RawOut - avoid mapping values > 127 to entities
0419:             * @see org.w3c.tidy.Configuration#RawOut
0420:             */
0421:
0422:            public void setRawOut(boolean RawOut) {
0423:                configuration.RawOut = RawOut;
0424:            }
0425:
0426:            public boolean getRawOut() {
0427:                return configuration.RawOut;
0428:            }
0429:
0430:            /**
0431:             * UpperCaseTags - output tags in upper not lower case
0432:             * @see org.w3c.tidy.Configuration#UpperCaseTags
0433:             */
0434:
0435:            public void setUpperCaseTags(boolean UpperCaseTags) {
0436:                configuration.UpperCaseTags = UpperCaseTags;
0437:            }
0438:
0439:            public boolean getUpperCaseTags() {
0440:                return configuration.UpperCaseTags;
0441:            }
0442:
0443:            /**
0444:             * UpperCaseAttrs - output attributes in upper not lower case
0445:             * @see org.w3c.tidy.Configuration#UpperCaseAttrs
0446:             */
0447:
0448:            public void setUpperCaseAttrs(boolean UpperCaseAttrs) {
0449:                configuration.UpperCaseAttrs = UpperCaseAttrs;
0450:            }
0451:
0452:            public boolean getUpperCaseAttrs() {
0453:                return configuration.UpperCaseAttrs;
0454:            }
0455:
0456:            /**
0457:             * MakeClean - remove presentational clutter
0458:             * @see org.w3c.tidy.Configuration#MakeClean
0459:             */
0460:
0461:            public void setMakeClean(boolean MakeClean) {
0462:                configuration.MakeClean = MakeClean;
0463:            }
0464:
0465:            public boolean getMakeClean() {
0466:                return configuration.MakeClean;
0467:            }
0468:
0469:            /**
0470:             * BreakBeforeBR - o/p newline before &lt;br&gt; or not?
0471:             * @see org.w3c.tidy.Configuration#BreakBeforeBR
0472:             */
0473:
0474:            public void setBreakBeforeBR(boolean BreakBeforeBR) {
0475:                configuration.BreakBeforeBR = BreakBeforeBR;
0476:            }
0477:
0478:            public boolean getBreakBeforeBR() {
0479:                return configuration.BreakBeforeBR;
0480:            }
0481:
0482:            /**
0483:             * BurstSlides - create slides on each h2 element
0484:             * @see org.w3c.tidy.Configuration#BurstSlides
0485:             */
0486:
0487:            public void setBurstSlides(boolean BurstSlides) {
0488:                configuration.BurstSlides = BurstSlides;
0489:            }
0490:
0491:            public boolean getBurstSlides() {
0492:                return configuration.BurstSlides;
0493:            }
0494:
0495:            /**
0496:             * NumEntities - use numeric entities
0497:             * @see org.w3c.tidy.Configuration#NumEntities
0498:             */
0499:
0500:            public void setNumEntities(boolean NumEntities) {
0501:                configuration.NumEntities = NumEntities;
0502:            }
0503:
0504:            public boolean getNumEntities() {
0505:                return configuration.NumEntities;
0506:            }
0507:
0508:            /**
0509:             * QuoteMarks - output " marks as &amp;quot;
0510:             * @see org.w3c.tidy.Configuration#QuoteMarks
0511:             */
0512:
0513:            public void setQuoteMarks(boolean QuoteMarks) {
0514:                configuration.QuoteMarks = QuoteMarks;
0515:            }
0516:
0517:            public boolean getQuoteMarks() {
0518:                return configuration.QuoteMarks;
0519:            }
0520:
0521:            /**
0522:             * QuoteNbsp - output non-breaking space as entity
0523:             * @see org.w3c.tidy.Configuration#QuoteNbsp
0524:             */
0525:
0526:            public void setQuoteNbsp(boolean QuoteNbsp) {
0527:                configuration.QuoteNbsp = QuoteNbsp;
0528:            }
0529:
0530:            public boolean getQuoteNbsp() {
0531:                return configuration.QuoteNbsp;
0532:            }
0533:
0534:            /**
0535:             * QuoteAmpersand - output naked ampersand as &amp;
0536:             * @see org.w3c.tidy.Configuration#QuoteAmpersand
0537:             */
0538:
0539:            public void setQuoteAmpersand(boolean QuoteAmpersand) {
0540:                configuration.QuoteAmpersand = QuoteAmpersand;
0541:            }
0542:
0543:            public boolean getQuoteAmpersand() {
0544:                return configuration.QuoteAmpersand;
0545:            }
0546:
0547:            /**
0548:             * WrapAttVals - wrap within attribute values
0549:             * @see org.w3c.tidy.Configuration#WrapAttVals
0550:             */
0551:
0552:            public void setWrapAttVals(boolean WrapAttVals) {
0553:                configuration.WrapAttVals = WrapAttVals;
0554:            }
0555:
0556:            public boolean getWrapAttVals() {
0557:                return configuration.WrapAttVals;
0558:            }
0559:
0560:            /**
0561:             * WrapScriptlets - wrap within JavaScript string literals
0562:             * @see org.w3c.tidy.Configuration#WrapScriptlets
0563:             */
0564:
0565:            public void setWrapScriptlets(boolean WrapScriptlets) {
0566:                configuration.WrapScriptlets = WrapScriptlets;
0567:            }
0568:
0569:            public boolean getWrapScriptlets() {
0570:                return configuration.WrapScriptlets;
0571:            }
0572:
0573:            /**
0574:             * WrapSection - wrap within &lt;![ ... ]&gt; section tags
0575:             * @see org.w3c.tidy.Configuration#WrapSection
0576:             */
0577:
0578:            public void setWrapSection(boolean WrapSection) {
0579:                configuration.WrapSection = WrapSection;
0580:            }
0581:
0582:            public boolean getWrapSection() {
0583:                return configuration.WrapSection;
0584:            }
0585:
0586:            /**
0587:             * AltText - default text for alt attribute
0588:             * @see org.w3c.tidy.Configuration#altText
0589:             */
0590:
0591:            public void setAltText(String altText) {
0592:                configuration.altText = altText;
0593:            }
0594:
0595:            public String getAltText() {
0596:                return configuration.altText;
0597:            }
0598:
0599:            /**
0600:             * Slidestyle - style sheet for slides
0601:             * @see org.w3c.tidy.Configuration#slidestyle
0602:             */
0603:
0604:            public void setSlidestyle(String slidestyle) {
0605:                configuration.slidestyle = slidestyle;
0606:            }
0607:
0608:            public String getSlidestyle() {
0609:                return configuration.slidestyle;
0610:            }
0611:
0612:            /**
0613:             * XmlPi - add &lt;?xml?&gt; for XML docs
0614:             * @see org.w3c.tidy.Configuration#XmlPi
0615:             */
0616:
0617:            public void setXmlPi(boolean XmlPi) {
0618:                configuration.XmlPi = XmlPi;
0619:            }
0620:
0621:            public boolean getXmlPi() {
0622:                return configuration.XmlPi;
0623:            }
0624:
0625:            /**
0626:             * DropFontTags - discard presentation tags
0627:             * @see org.w3c.tidy.Configuration#DropFontTags
0628:             */
0629:
0630:            public void setDropFontTags(boolean DropFontTags) {
0631:                configuration.DropFontTags = DropFontTags;
0632:            }
0633:
0634:            public boolean getDropFontTags() {
0635:                return configuration.DropFontTags;
0636:            }
0637:
0638:            /**
0639:             * DropEmptyParas - discard empty p elements
0640:             * @see org.w3c.tidy.Configuration#DropEmptyParas
0641:             */
0642:
0643:            public void setDropEmptyParas(boolean DropEmptyParas) {
0644:                configuration.DropEmptyParas = DropEmptyParas;
0645:            }
0646:
0647:            public boolean getDropEmptyParas() {
0648:                return configuration.DropEmptyParas;
0649:            }
0650:
0651:            /**
0652:             * FixComments - fix comments with adjacent hyphens
0653:             * @see org.w3c.tidy.Configuration#FixComments
0654:             */
0655:
0656:            public void setFixComments(boolean FixComments) {
0657:                configuration.FixComments = FixComments;
0658:            }
0659:
0660:            public boolean getFixComments() {
0661:                return configuration.FixComments;
0662:            }
0663:
0664:            /**
0665:             * WrapAsp - wrap within ASP pseudo elements
0666:             * @see org.w3c.tidy.Configuration#WrapAsp
0667:             */
0668:
0669:            public void setWrapAsp(boolean WrapAsp) {
0670:                configuration.WrapAsp = WrapAsp;
0671:            }
0672:
0673:            public boolean getWrapAsp() {
0674:                return configuration.WrapAsp;
0675:            }
0676:
0677:            /**
0678:             * WrapJste - wrap within JSTE pseudo elements
0679:             * @see org.w3c.tidy.Configuration#WrapJste
0680:             */
0681:
0682:            public void setWrapJste(boolean WrapJste) {
0683:                configuration.WrapJste = WrapJste;
0684:            }
0685:
0686:            public boolean getWrapJste() {
0687:                return configuration.WrapJste;
0688:            }
0689:
0690:            /**
0691:             * WrapPhp - wrap within PHP pseudo elements
0692:             * @see org.w3c.tidy.Configuration#WrapPhp
0693:             */
0694:
0695:            public void setWrapPhp(boolean WrapPhp) {
0696:                configuration.WrapPhp = WrapPhp;
0697:            }
0698:
0699:            public boolean getWrapPhp() {
0700:                return configuration.WrapPhp;
0701:            }
0702:
0703:            /**
0704:             * FixBackslash - fix URLs by replacing \ with /
0705:             * @see org.w3c.tidy.Configuration#FixBackslash
0706:             */
0707:
0708:            public void setFixBackslash(boolean FixBackslash) {
0709:                configuration.FixBackslash = FixBackslash;
0710:            }
0711:
0712:            public boolean getFixBackslash() {
0713:                return configuration.FixBackslash;
0714:            }
0715:
0716:            /**
0717:             * IndentAttributes - newline+indent before each attribute
0718:             * @see org.w3c.tidy.Configuration#IndentAttributes
0719:             */
0720:
0721:            public void setIndentAttributes(boolean IndentAttributes) {
0722:                configuration.IndentAttributes = IndentAttributes;
0723:            }
0724:
0725:            public boolean getIndentAttributes() {
0726:                return configuration.IndentAttributes;
0727:            }
0728:
0729:            /**
0730:             * DocType - user specified doctype
0731:             * omit | auto | strict | loose | <i>fpi</i>
0732:             * where the <i>fpi</i> is a string similar to
0733:             *    &quot;-//ACME//DTD HTML 3.14159//EN&quot;
0734:             * Note: for <i>fpi</i> include the double-quotes in the string.
0735:             * @see org.w3c.tidy.Configuration#docTypeStr
0736:             * @see org.w3c.tidy.Configuration#docTypeMode
0737:             */
0738:
0739:            public void setDocType(String doctype) {
0740:                if (doctype != null)
0741:                    configuration.docTypeStr = configuration.parseDocType(
0742:                            doctype, "doctype");
0743:            }
0744:
0745:            public String getDocType() {
0746:                String result = null;
0747:                switch (configuration.docTypeMode) {
0748:                case Configuration.DOCTYPE_OMIT:
0749:                    result = "omit";
0750:                    break;
0751:                case Configuration.DOCTYPE_AUTO:
0752:                    result = "auto";
0753:                    break;
0754:                case Configuration.DOCTYPE_STRICT:
0755:                    result = "strict";
0756:                    break;
0757:                case Configuration.DOCTYPE_LOOSE:
0758:                    result = "loose";
0759:                    break;
0760:                case Configuration.DOCTYPE_USER:
0761:                    result = configuration.docTypeStr;
0762:                    break;
0763:                }
0764:                return result;
0765:            }
0766:
0767:            /**
0768:             * LogicalEmphasis - replace i by em and b by strong
0769:             * @see org.w3c.tidy.Configuration#LogicalEmphasis
0770:             */
0771:
0772:            public void setLogicalEmphasis(boolean LogicalEmphasis) {
0773:                configuration.LogicalEmphasis = LogicalEmphasis;
0774:            }
0775:
0776:            public boolean getLogicalEmphasis() {
0777:                return configuration.LogicalEmphasis;
0778:            }
0779:
0780:            /**
0781:             * XmlPIs - if set to true PIs must end with ?>
0782:             * @see org.w3c.tidy.Configuration#XmlPIs
0783:             */
0784:
0785:            public void setXmlPIs(boolean XmlPIs) {
0786:                configuration.XmlPIs = XmlPIs;
0787:            }
0788:
0789:            public boolean getXmlPIs() {
0790:                return configuration.XmlPIs;
0791:            }
0792:
0793:            /**
0794:             * EncloseText - if true text at body is wrapped in &lt;p&gt;'s
0795:             * @see org.w3c.tidy.Configuration#EncloseBodyText
0796:             */
0797:
0798:            public void setEncloseText(boolean EncloseText) {
0799:                configuration.EncloseBodyText = EncloseText;
0800:            }
0801:
0802:            public boolean getEncloseText() {
0803:                return configuration.EncloseBodyText;
0804:            }
0805:
0806:            /**
0807:             * EncloseBlockText - if true text in blocks is wrapped in &lt;p&gt;'s
0808:             * @see org.w3c.tidy.Configuration#EncloseBlockText
0809:             */
0810:
0811:            public void setEncloseBlockText(boolean EncloseBlockText) {
0812:                configuration.EncloseBlockText = EncloseBlockText;
0813:            }
0814:
0815:            public boolean getEncloseBlockText() {
0816:                return configuration.EncloseBlockText;
0817:            }
0818:
0819:            /**
0820:             * KeepFileTimes - if true last modified time is preserved<br>
0821:             * <b>this is NOT supported at this time.</b>
0822:             * @see org.w3c.tidy.Configuration#KeepFileTimes
0823:             */
0824:
0825:            public void setKeepFileTimes(boolean KeepFileTimes) {
0826:                configuration.KeepFileTimes = KeepFileTimes;
0827:            }
0828:
0829:            public boolean getKeepFileTimes() {
0830:                return configuration.KeepFileTimes;
0831:            }
0832:
0833:            /**
0834:             * Word2000 - draconian cleaning for Word2000
0835:             * @see org.w3c.tidy.Configuration#Word2000
0836:             */
0837:
0838:            public void setWord2000(boolean Word2000) {
0839:                configuration.Word2000 = Word2000;
0840:            }
0841:
0842:            public boolean getWord2000() {
0843:                return configuration.Word2000;
0844:            }
0845:
0846:            /**
0847:             * TidyMark - add meta element indicating tidied doc
0848:             * @see org.w3c.tidy.Configuration#TidyMark
0849:             */
0850:
0851:            public void setTidyMark(boolean TidyMark) {
0852:                configuration.TidyMark = TidyMark;
0853:            }
0854:
0855:            public boolean getTidyMark() {
0856:                return configuration.TidyMark;
0857:            }
0858:
0859:            /**
0860:             * XmlSpace - if set to yes adds xml:space attr as needed
0861:             * @see org.w3c.tidy.Configuration#XmlSpace
0862:             */
0863:
0864:            public void setXmlSpace(boolean XmlSpace) {
0865:                configuration.XmlSpace = XmlSpace;
0866:            }
0867:
0868:            public boolean getXmlSpace() {
0869:                return configuration.XmlSpace;
0870:            }
0871:
0872:            /**
0873:             * Emacs - if true format error output for GNU Emacs
0874:             * @see org.w3c.tidy.Configuration#Emacs
0875:             */
0876:
0877:            public void setEmacs(boolean Emacs) {
0878:                configuration.Emacs = Emacs;
0879:            }
0880:
0881:            public boolean getEmacs() {
0882:                return configuration.Emacs;
0883:            }
0884:
0885:            /**
0886:             * LiteralAttribs - if true attributes may use newlines
0887:             * @see org.w3c.tidy.Configuration#LiteralAttribs
0888:             */
0889:
0890:            public void setLiteralAttribs(boolean LiteralAttribs) {
0891:                configuration.LiteralAttribs = LiteralAttribs;
0892:            }
0893:
0894:            public boolean getLiteralAttribs() {
0895:                return configuration.LiteralAttribs;
0896:            }
0897:
0898:            /**
0899:             * InputStreamName - the name of the input stream (printed in the
0900:             * header information).
0901:             */
0902:            public void setInputStreamName(String name) {
0903:                if (name != null)
0904:                    inputStreamName = name;
0905:            }
0906:
0907:            public String getInputStreamName() {
0908:                return inputStreamName;
0909:            }
0910:
0911:            /**
0912:             * Sets the configuration from a configuration file.
0913:             */
0914:
0915:            public void setConfigurationFromFile(String filename) {
0916:                configuration.parseFile(filename);
0917:            }
0918:
0919:            /**
0920:             * Sets the configuration from a properties object.
0921:             */
0922:
0923:            public void setConfigurationFromProps(Properties props) {
0924:                configuration.addProps(props);
0925:            }
0926:
0927:            /**
0928:             * first time initialization which should
0929:             * precede reading the command line
0930:             */
0931:
0932:            private void init() {
0933:                configuration = new Configuration();
0934:                if (configuration == null)
0935:                    return;
0936:
0937:                AttributeTable at = AttributeTable.getDefaultAttributeTable();
0938:                if (at == null)
0939:                    return;
0940:                TagTable tt = new TagTable();
0941:                if (tt == null)
0942:                    return;
0943:                tt.setConfiguration(configuration);
0944:                configuration.tt = tt;
0945:                EntityTable et = EntityTable.getDefaultEntityTable();
0946:                if (et == null)
0947:                    return;
0948:
0949:                /* Unnecessary - same initial values in Configuration
0950:                Configuration.XmlTags       = false;
0951:                Configuration.XmlOut        = false;
0952:                Configuration.HideEndTags   = false;
0953:                Configuration.UpperCaseTags = false;
0954:                Configuration.MakeClean     = false;
0955:                Configuration.writeback     = false;
0956:                Configuration.OnlyErrors    = false;
0957:                 */
0958:
0959:                configuration.errfile = null;
0960:                stderr = new PrintWriter(System.err, true);
0961:                errout = stderr;
0962:                initialized = true;
0963:            }
0964:
0965:            /**
0966:             * Parses InputStream in and returns the root Node.
0967:             * If out is non-null, pretty prints to OutputStream out.
0968:             */
0969:
0970:            public Node parse(InputStream in, OutputStream out) {
0971:                Node document = null;
0972:
0973:                try {
0974:                    document = parse(in, null, out);
0975:                } catch (FileNotFoundException fnfe) {
0976:                } catch (IOException e) {
0977:                }
0978:
0979:                return document;
0980:            }
0981:
0982:            /**
0983:             * Internal routine that actually does the parsing.  The caller
0984:             * can pass either an InputStream or file name.  If both are passed,
0985:             * the file name is preferred.
0986:             */
0987:
0988:            private Node parse(InputStream in, String file, OutputStream out)
0989:                    throws FileNotFoundException, IOException {
0990:                Lexer lexer;
0991:                Node document = null;
0992:                Node doctype;
0993:                Out o = new OutImpl(); /* normal output stream */
0994:                PPrint pprint;
0995:
0996:                if (!initialized)
0997:                    return null;
0998:
0999:                if (errout == null)
1000:                    return null;
1001:
1002:                parseErrors = 0;
1003:                parseWarnings = 0;
1004:
1005:                /* ensure config is self-consistent */
1006:                configuration.adjust();
1007:
1008:                if (file != null) {
1009:                    in = new FileInputStream(file);
1010:                    inputStreamName = file;
1011:                } else if (in == null) {
1012:                    in = System.in;
1013:                    inputStreamName = "stdin";
1014:                }
1015:
1016:                if (in != null) {
1017:                    lexer = new Lexer(new StreamInImpl(in,
1018:                            configuration.CharEncoding, configuration.tabsize),
1019:                            configuration);
1020:                    lexer.errout = errout;
1021:
1022:                    /*
1023:                      store pointer to lexer in input stream
1024:                      to allow character encoding errors to be
1025:                      reported
1026:                     */
1027:                    lexer.in.lexer = lexer;
1028:
1029:                    /* Tidy doesn't alter the doctype for generic XML docs */
1030:                    if (configuration.XmlTags)
1031:                        document = ParserImpl.parseXMLDocument(lexer);
1032:                    else {
1033:                        lexer.warnings = 0;
1034:                        if (!configuration.Quiet)
1035:                            Report.helloMessage(errout, Report.RELEASE_DATE,
1036:                                    inputStreamName);
1037:
1038:                        document = ParserImpl.parseDocument(lexer);
1039:
1040:                        if (!document.checkNodeIntegrity()) {
1041:                            Report.badTree(errout);
1042:                            return null;
1043:                        }
1044:
1045:                        Clean cleaner = new Clean(configuration.tt);
1046:
1047:                        /* simplifies <b><b> ... </b> ...</b> etc. */
1048:                        cleaner.nestedEmphasis(document);
1049:
1050:                        /* cleans up <dir>indented text</dir> etc. */
1051:                        cleaner.list2BQ(document);
1052:                        cleaner.bQ2Div(document);
1053:
1054:                        /* replaces i by em and b by strong */
1055:                        if (configuration.LogicalEmphasis)
1056:                            cleaner.emFromI(document);
1057:
1058:                        if (configuration.Word2000
1059:                                && cleaner.isWord2000(document,
1060:                                        configuration.tt)) {
1061:                            /* prune Word2000's <![if ...]> ... <![endif]> */
1062:                            cleaner.dropSections(lexer, document);
1063:
1064:                            /* drop style & class attributes and empty p, span elements */
1065:                            cleaner.cleanWord2000(lexer, document);
1066:                        }
1067:
1068:                        /* replaces presentational markup by style rules */
1069:                        if (configuration.MakeClean
1070:                                || configuration.DropFontTags)
1071:                            cleaner.cleanTree(lexer, document);
1072:
1073:                        if (!document.checkNodeIntegrity()) {
1074:                            Report.badTree(errout);
1075:                            return null;
1076:                        }
1077:                        doctype = document.findDocType();
1078:                        if (document.content != null) {
1079:                            if (configuration.xHTML)
1080:                                lexer.setXHTMLDocType(document);
1081:                            else
1082:                                lexer.fixDocType(document);
1083:
1084:                            if (configuration.TidyMark)
1085:                                lexer.addGenerator(document);
1086:                        }
1087:
1088:                        /* ensure presence of initial <?XML version="1.0"?> */
1089:                        if (configuration.XmlOut && configuration.XmlPi)
1090:                            lexer.fixXMLPI(document);
1091:
1092:                        if (!configuration.Quiet && document.content != null) {
1093:                            Report.reportVersion(errout, lexer,
1094:                                    inputStreamName, doctype);
1095:                            Report.reportNumWarnings(errout, lexer);
1096:                        }
1097:                    }
1098:
1099:                    parseWarnings = lexer.warnings;
1100:                    parseErrors = lexer.errors;
1101:
1102:                    // Try to close the InputStream but only if if we created it.
1103:
1104:                    if ((file != null) && (in != System.in)) {
1105:                        try {
1106:                            in.close();
1107:                        } catch (IOException e) {
1108:                        }
1109:                    }
1110:
1111:                    if (lexer.errors > 0)
1112:                        Report.needsAuthorIntervention(errout);
1113:
1114:                    o.state = StreamIn.FSM_ASCII;
1115:                    o.encoding = configuration.CharEncoding;
1116:
1117:                    if (!configuration.OnlyErrors && lexer.errors == 0) {
1118:                        if (configuration.BurstSlides) {
1119:                            Node body;
1120:
1121:                            body = null;
1122:                            /*
1123:                               remove doctype to avoid potential clash with
1124:                               markup introduced when bursting into slides
1125:                             */
1126:                            /* discard the document type */
1127:                            doctype = document.findDocType();
1128:
1129:                            if (doctype != null)
1130:                                Node.discardElement(doctype);
1131:
1132:                            /* slides use transitional features */
1133:                            lexer.versions |= Dict.VERS_HTML40_LOOSE;
1134:
1135:                            /* and patch up doctype to match */
1136:                            if (configuration.xHTML)
1137:                                lexer.setXHTMLDocType(document);
1138:                            else
1139:                                lexer.fixDocType(document);
1140:
1141:                            /* find the body element which may be implicit */
1142:                            body = document.findBody(configuration.tt);
1143:
1144:                            if (body != null) {
1145:                                pprint = new PPrint(configuration);
1146:                                Report.reportNumberOfSlides(errout, pprint
1147:                                        .countSlides(body));
1148:                                pprint.createSlides(lexer, document);
1149:                            } else
1150:                                Report.missingBody(errout);
1151:                        } else if (configuration.writeback && (file != null)) {
1152:                            try {
1153:                                pprint = new PPrint(configuration);
1154:                                o.out = new FileOutputStream(file);
1155:
1156:                                if (configuration.XmlTags)
1157:                                    pprint.printXMLTree(o, (short) 0, 0, lexer,
1158:                                            document);
1159:                                else
1160:                                    pprint.printTree(o, (short) 0, 0, lexer,
1161:                                            document);
1162:
1163:                                pprint.flushLine(o, 0);
1164:                                o.out.close();
1165:                            } catch (IOException e) {
1166:                                errout.println(file + e.toString());
1167:                            }
1168:                        } else if (out != null) {
1169:                            pprint = new PPrint(configuration);
1170:                            o.out = out;
1171:
1172:                            if (configuration.XmlTags)
1173:                                pprint.printXMLTree(o, (short) 0, 0, lexer,
1174:                                        document);
1175:                            else
1176:                                pprint.printTree(o, (short) 0, 0, lexer,
1177:                                        document);
1178:
1179:                            pprint.flushLine(o, 0);
1180:                        }
1181:
1182:                    }
1183:
1184:                    Report.errorSummary(lexer);
1185:                }
1186:                return document;
1187:            }
1188:
1189:            /**
1190:             * Parses InputStream in and returns a DOM Document node.
1191:             * If out is non-null, pretty prints to OutputStream out.
1192:             */
1193:
1194:            public org.w3c.dom.Document parseDOM(InputStream in,
1195:                    OutputStream out) {
1196:                Node document = parse(in, out);
1197:                if (document != null)
1198:                    return (org.w3c.dom.Document) document.getAdapter();
1199:                else
1200:                    return null;
1201:            }
1202:
1203:            /**
1204:             * Creates an empty DOM Document.
1205:             */
1206:
1207:            public static org.w3c.dom.Document createEmptyDocument() {
1208:                Node document = new Node(Node.RootNode, new byte[0], 0, 0);
1209:                Node node = new Node(Node.StartTag, new byte[0], 0, 0, "html",
1210:                        new TagTable());
1211:                if (document != null && node != null) {
1212:                    Node.insertNodeAtStart(document, node);
1213:                    return (org.w3c.dom.Document) document.getAdapter();
1214:                } else {
1215:                    return null;
1216:                }
1217:            }
1218:
1219:            /**
1220:             * Pretty-prints a DOM Document.
1221:             */
1222:
1223:            public void pprint(org.w3c.dom.Document doc, OutputStream out) {
1224:                Out o = new OutImpl();
1225:                PPrint pprint;
1226:                Node document;
1227:
1228:                if (!(doc instanceof  DOMDocumentImpl)) {
1229:                    return;
1230:                }
1231:                document = ((DOMDocumentImpl) doc).adaptee;
1232:
1233:                o.state = StreamIn.FSM_ASCII;
1234:                o.encoding = configuration.CharEncoding;
1235:
1236:                if (out != null) {
1237:                    pprint = new PPrint(configuration);
1238:                    o.out = out;
1239:
1240:                    if (configuration.XmlTags)
1241:                        pprint.printXMLTree(o, (short) 0, 0, null, document);
1242:                    else
1243:                        pprint.printTree(o, (short) 0, 0, null, document);
1244:
1245:                    pprint.flushLine(o, 0);
1246:                }
1247:            }
1248:
1249:            /**
1250:             * Command line interface to parser and pretty printer.
1251:             */
1252:
1253:            public static void main(String[] argv) {
1254:                int totalerrors = 0;
1255:                int totalwarnings = 0;
1256:                String file;
1257:                InputStream in;
1258:                String prog = "Tidy";
1259:                Node document;
1260:                Node doctype;
1261:                Lexer lexer;
1262:                String s;
1263:                Out out = new OutImpl(); /* normal output stream */
1264:                PPrint pprint;
1265:                int argc = argv.length + 1;
1266:                int argIndex = 0;
1267:                Tidy tidy;
1268:                Configuration configuration;
1269:                String arg;
1270:                String current_errorfile = "stderr";
1271:
1272:                tidy = new Tidy();
1273:                configuration = tidy.getConfiguration();
1274:
1275:                /* read command line */
1276:
1277:                while (argc > 0) {
1278:                    if (argc > 1 && argv[argIndex].startsWith("-")) {
1279:                        /* support -foo and --foo */
1280:                        arg = argv[argIndex].substring(1);
1281:
1282:                        if (arg.length() > 0 && arg.charAt(0) == '-')
1283:                            arg = arg.substring(1);
1284:
1285:                        if (arg.equals("xml"))
1286:                            configuration.XmlTags = true;
1287:                        else if (arg.equals("asxml") || arg.equals("asxhtml"))
1288:                            configuration.xHTML = true;
1289:                        else if (arg.equals("indent")) {
1290:                            configuration.IndentContent = true;
1291:                            configuration.SmartIndent = true;
1292:                        } else if (arg.equals("omit"))
1293:                            configuration.HideEndTags = true;
1294:                        else if (arg.equals("upper"))
1295:                            configuration.UpperCaseTags = true;
1296:                        else if (arg.equals("clean"))
1297:                            configuration.MakeClean = true;
1298:                        else if (arg.equals("raw"))
1299:                            configuration.CharEncoding = Configuration.RAW;
1300:                        else if (arg.equals("ascii"))
1301:                            configuration.CharEncoding = Configuration.ASCII;
1302:                        else if (arg.equals("latin1"))
1303:                            configuration.CharEncoding = Configuration.LATIN1;
1304:                        else if (arg.equals("utf8"))
1305:                            configuration.CharEncoding = Configuration.UTF8;
1306:                        else if (arg.equals("iso2022"))
1307:                            configuration.CharEncoding = Configuration.ISO2022;
1308:                        else if (arg.equals("mac"))
1309:                            configuration.CharEncoding = Configuration.MACROMAN;
1310:                        else if (arg.equals("numeric"))
1311:                            configuration.NumEntities = true;
1312:                        else if (arg.equals("modify"))
1313:                            configuration.writeback = true;
1314:                        else if (arg.equals("change")) /* obsolete */
1315:                            configuration.writeback = true;
1316:                        else if (arg.equals("update")) /* obsolete */
1317:                            configuration.writeback = true;
1318:                        else if (arg.equals("errors"))
1319:                            configuration.OnlyErrors = true;
1320:                        else if (arg.equals("quiet"))
1321:                            configuration.Quiet = true;
1322:                        else if (arg.equals("slides"))
1323:                            configuration.BurstSlides = true;
1324:                        else if (arg.equals("help")
1325:                                || argv[argIndex].charAt(1) == '?'
1326:                                || argv[argIndex].charAt(1) == 'h') {
1327:                            Report.helpText(new PrintWriter(System.out, true),
1328:                                    prog);
1329:                            System.exit(1);
1330:                        } else if (arg.equals("config")) {
1331:                            if (argc >= 3) {
1332:                                configuration.parseFile(argv[argIndex + 1]);
1333:                                --argc;
1334:                                ++argIndex;
1335:                            }
1336:                        } else if (argv[argIndex].equals("-file")
1337:                                || argv[argIndex].equals("--file")
1338:                                || argv[argIndex].equals("-f")) {
1339:                            if (argc >= 3) {
1340:                                configuration.errfile = argv[argIndex + 1];
1341:                                --argc;
1342:                                ++argIndex;
1343:                            }
1344:                        } else if (argv[argIndex].equals("-wrap")
1345:                                || argv[argIndex].equals("--wrap")
1346:                                || argv[argIndex].equals("-w")) {
1347:                            if (argc >= 3) {
1348:                                configuration.wraplen = Integer
1349:                                        .parseInt(argv[argIndex + 1]);
1350:                                --argc;
1351:                                ++argIndex;
1352:                            }
1353:                        } else if (argv[argIndex].equals("-version")
1354:                                || argv[argIndex].equals("--version")
1355:                                || argv[argIndex].equals("-v")) {
1356:                            Report.showVersion(tidy.getErrout());
1357:                            System.exit(0);
1358:                        } else {
1359:                            s = argv[argIndex];
1360:
1361:                            for (int i = 1; i < s.length(); i++) {
1362:                                if (s.charAt(i) == 'i') {
1363:                                    configuration.IndentContent = true;
1364:                                    configuration.SmartIndent = true;
1365:                                } else if (s.charAt(i) == 'o')
1366:                                    configuration.HideEndTags = true;
1367:                                else if (s.charAt(i) == 'u')
1368:                                    configuration.UpperCaseTags = true;
1369:                                else if (s.charAt(i) == 'c')
1370:                                    configuration.MakeClean = true;
1371:                                else if (s.charAt(i) == 'n')
1372:                                    configuration.NumEntities = true;
1373:                                else if (s.charAt(i) == 'm')
1374:                                    configuration.writeback = true;
1375:                                else if (s.charAt(i) == 'e')
1376:                                    configuration.OnlyErrors = true;
1377:                                else if (s.charAt(i) == 'q')
1378:                                    configuration.Quiet = true;
1379:                                else
1380:                                    Report.unknownOption(tidy.getErrout(), s
1381:                                            .charAt(i));
1382:                            }
1383:                        }
1384:
1385:                        --argc;
1386:                        ++argIndex;
1387:                        continue;
1388:                    }
1389:
1390:                    /* ensure config is self-consistent */
1391:                    configuration.adjust();
1392:
1393:                    /* user specified error file */
1394:                    if (configuration.errfile != null) {
1395:                        /* is it same as the currently opened file? */
1396:                        if (!configuration.errfile.equals(current_errorfile)) {
1397:                            /* no so close previous error file */
1398:
1399:                            if (tidy.getErrout() != tidy.getStderr())
1400:                                tidy.getErrout().close();
1401:
1402:                            /* and try to open the new error file */
1403:                            try {
1404:                                tidy.setErrout(new PrintWriter(new FileWriter(
1405:                                        configuration.errfile), true));
1406:                                current_errorfile = configuration.errfile;
1407:                            } catch (IOException e) {
1408:                                /* can't be opened so fall back to stderr */
1409:                                current_errorfile = "stderr";
1410:                                tidy.setErrout(tidy.getStderr());
1411:                            }
1412:                        }
1413:                    }
1414:
1415:                    if (argc > 1) {
1416:                        file = argv[argIndex];
1417:                    } else {
1418:                        file = "stdin";
1419:                    }
1420:
1421:                    try {
1422:                        document = tidy.parse(null, file, System.out);
1423:                        totalwarnings += tidy.parseWarnings;
1424:                        totalerrors += tidy.parseErrors;
1425:                    } catch (FileNotFoundException fnfe) {
1426:                        Report.unknownFile(tidy.getErrout(), prog, file);
1427:                    } catch (IOException ioe) {
1428:                        Report.unknownFile(tidy.getErrout(), prog, file);
1429:                    }
1430:
1431:                    --argc;
1432:                    ++argIndex;
1433:
1434:                    if (argc <= 1)
1435:                        break;
1436:                }
1437:
1438:                if (totalerrors + totalwarnings > 0)
1439:                    Report.generalInfo(tidy.getErrout());
1440:
1441:                if (tidy.getErrout() != tidy.getStderr())
1442:                    tidy.getErrout().close();
1443:
1444:                /* return status can be used by scripts */
1445:
1446:                if (totalerrors > 0)
1447:                    System.exit(2);
1448:
1449:                if (totalwarnings > 0)
1450:                    System.exit(1);
1451:
1452:                /* 0 signifies all is ok */
1453:                System.exit(0);
1454:            }
1455:
1456:            // BEGIN RAVE MODIFICATIONS
1457:            static final String replacement = "%leaveentitiesalone%";
1458:
1459:            /** Wraps an input stream, and "escapes" entities such that
1460:             *  JTidy doesn't see them (and doesn't mess with them). The
1461:             *  corresponding EntityWrapperOutputStream will undo its effects.
1462:             * */
1463:            public static class EntityWrapperInputStream extends InputStream {
1464:                public EntityWrapperInputStream(InputStream inputStream) {
1465:                    this .inputStream = inputStream;
1466:                }
1467:
1468:                public int read() throws IOException {
1469:                    if (buffer != null) {
1470:                        if (++bufferPosition >= buffer.length()) {
1471:                            buffer = null;
1472:                            bufferPosition = -1;
1473:                        } else {
1474:                            return buffer.charAt(bufferPosition);
1475:                        }
1476:                    }
1477:
1478:                    int result = inputStream.read();
1479:                    if (result == '&') {
1480:                        buffer = replacement;
1481:                        result = read();
1482:                    }
1483:
1484:                    return result;
1485:                }
1486:
1487:                public int read(byte[] b) throws IOException {
1488:                    return read(b, 0, b.length);
1489:                }
1490:
1491:                public int read(byte[] b, int offset, int length)
1492:                        throws IOException {
1493:                    int c;
1494:                    int i = -1;
1495:                    while (++i < length) {
1496:                        c = read();
1497:                        if (c == -1) {
1498:                            return i == 0 ? -1 : i;
1499:                        }
1500:                        b[offset + i] = (byte) c;
1501:                    }
1502:
1503:                    return i;
1504:                }
1505:
1506:                public void close() throws IOException {
1507:                    inputStream.close();
1508:                }
1509:
1510:                private String buffer;
1511:                private InputStream inputStream;
1512:                private int bufferPosition = -1;
1513:            }
1514:
1515:            /** Wraps an output stream, and translates escaped entities back
1516:             * into proper entities
1517:             * */
1518:            public static class EntityWrapperOutputStream extends OutputStream {
1519:                public EntityWrapperOutputStream(OutputStream outputStream,
1520:                        boolean jspx) {
1521:                    this .outputStream = outputStream;
1522:                    this .jspx = jspx;
1523:                }
1524:
1525:                public void close() throws IOException {
1526:                    outputStream.close();
1527:                }
1528:
1529:                public void flush() throws IOException {
1530:                    outputStream.flush();
1531:                }
1532:
1533:                public void write(int b) throws IOException {
1534:                    if (sb.length() != 0) {
1535:                        sb.append((char) b);
1536:                        String s = sb.toString(); // UGH! super inefficient
1537:                        if (s.equals(replacement)) {
1538:                            outputStream.write('&');
1539:                            if (jspx) {
1540:                                outputStream.write('a');
1541:                                outputStream.write('m');
1542:                                outputStream.write('p');
1543:                                outputStream.write(';');
1544:                            }
1545:                            sb.setLength(0);
1546:                        } else if (!replacement.startsWith(s)) {
1547:                            outputStream.write(s.getBytes());
1548:                            sb.setLength(0);
1549:                        }
1550:                    } else if (b == (int) replacement.charAt(0)) {
1551:                        sb.append((char) b);
1552:                    } else {
1553:                        outputStream.write(b);
1554:                    }
1555:                }
1556:
1557:                private boolean jspx;
1558:                private StringBuffer sb = new StringBuffer();
1559:                private OutputStream outputStream;
1560:            }
1561:
1562:            /** When parseDOM is called, there's no output stream to fix the
1563:             * nodes. This method achieves that.
1564:             */
1565:            public static void cleanEntities(org.w3c.dom.Node node,
1566:                    boolean convertHtmlToJspx) {
1567:                if (node instanceof  CharacterData) {
1568:                    CharacterData text = (CharacterData) node;
1569:                    while (true) {
1570:                        String s = text.getData();
1571:                        if (s.indexOf(replacement) == -1) {
1572:                            break;
1573:                        }
1574:                        // Don't change text ampersands within text nodes because those 
1575:                        // will get expanded anyway by the dom serializer
1576:                        //s = s.replaceAll(replacement, convertHtmlToJspx ? "&amp;" : "&");
1577:                        s = s.replaceAll(replacement, "&");
1578:                        text.setData(s);
1579:                    }
1580:                }
1581:                NamedNodeMap nmn = node.getAttributes();
1582:                if (nmn != null) {
1583:                    for (int j = 0, siz = nmn.getLength(); j < siz; j++) {
1584:                        org.w3c.dom.Node item = nmn.item(j);
1585:                        if (item instanceof  org.w3c.dom.Attr) {
1586:                            org.w3c.dom.Attr attr = (org.w3c.dom.Attr) item;
1587:                            String s = attr.getValue();
1588:                            if (s.indexOf(replacement) == -1) {
1589:                                continue;
1590:                            }
1591:                            s = s.replaceAll(replacement,
1592:                                    convertHtmlToJspx ? "&amp;" : "&");
1593:                            s = expand(s);
1594:                            // XXX It would be nice if I could find a way to set
1595:                            // the node value of the Node in such a way that it
1596:                            // preserve entities.  Can I add text nodes and entity
1597:                            // nodes?? That would kick ass!
1598:                            attr.setValue(s);
1599:                        }
1600:                    }
1601:                }
1602:                NodeList nl = node.getChildNodes();
1603:                for (int i = 0, n = nl.getLength(); i < n; i++) {
1604:                    org.w3c.dom.Node child = nl.item(i);
1605:                    cleanEntities(child, convertHtmlToJspx);
1606:                }
1607:            }
1608:
1609:            /** Expand entities one level in the given source string.
1610:             * Copied from insync.markup. This method has the Sun copyright.
1611:             */
1612:            private static String expand(String unexpanded) {
1613:                if (unexpanded.indexOf('&') == -1) { // todo: keep index and copy up to it below
1614:                    return unexpanded;
1615:                }
1616:                int n = unexpanded.length();
1617:                int nm1 = n - 1;
1618:
1619:                // IMPORTANT NOTE: Keeps this code in sync with getJspxOffset below!
1620:
1621:                StringBuffer sb = new StringBuffer(n);
1622:                for (int i = 0; i < n; i++) {
1623:                    char c = unexpanded.charAt(i);
1624:                    if (c == '&' && i < nm1) {
1625:                        // Locate entity
1626:                        int begin = i + 1;
1627:                        int end = begin;
1628:                        while (end < n && unexpanded.charAt(end) != ';'
1629:                                && (end - begin <= 10)) { // longest entity is 8 chars
1630:                            end++;
1631:                        }
1632:                        if (end == n || unexpanded.charAt(end) != ';') {
1633:                            // Error - just spit out a &
1634:                            sb.append('&');
1635:                            continue;
1636:                        }
1637:                        String entity = unexpanded.substring(begin, end);
1638:
1639:                        //NB60 Talk to Peter Zavadsky
1640:                        int e = com.sun.org.apache.xml.internal.serialize.HTMLdtd
1641:                                .charFromName(entity);
1642:                        if (e == -1) {
1643:                            sb.append('&'); // browsers show the &
1644:                            continue;
1645:                        } else {
1646:                            sb.append((char) e);
1647:                            i = end;
1648:                        }
1649:                    } else {
1650:                        sb.append(c);
1651:                    }
1652:                }
1653:                return sb.toString();
1654:            }
1655:
1656:            // END RAVE MODIFICATIONS
1657:
1658:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.