| java.lang.Object org.htmlcleaner.HtmlCleaner
HtmlCleaner | public class HtmlCleaner (Code) | | Main HtmlCleaner class.
It represents public interface to the user. It's task is to call tokenizer with
specified source HTML, traverse list of produced token list and create internal
object model. It also offers a set of methods to write resulting XML to string,
file or any output stream.
Typical usage is the following:
HtmlCleaner cleaner = new HtmlCleaner(...); // one of few constructors
cleaner.setXXX(...) // optionally, set cleaner's behaviour
clener.clean(); // calls cleaning process
cleaner.writeXmlXXX(...); // writes resulting XML to string, file or any output stream
// cleaner.createDOM(); // writes resulting XML to string, file or any output stream
Created by: Vladimir Nikic
Date: November, 2006
|
Constructor Summary | |
public | HtmlCleaner(String htmlContent, ITagInfoProvider tagInfoProvider) Constructor - creates the instance with specified html
content as String. | public | HtmlCleaner(String htmlContent) Constructor - creates the instance with specified html
content as String. | public | HtmlCleaner(File file, String charset, ITagInfoProvider tagInfoProvider) Constructor - creates the instance for specified file. | public | HtmlCleaner(File file, String charset) Constructor - creates the instance for specified file. | public | HtmlCleaner(File file, ITagInfoProvider tagInfoProvider) Constructor - creates the instance for specified file and charset. | public | HtmlCleaner(File file) Constructor - creates the instance for specified file and charset. | public | HtmlCleaner(URL url, String charset, ITagInfoProvider tagInfoProvider) Constructor - creates the instance for specified URL and charset. | public | HtmlCleaner(URL url, ITagInfoProvider tagInfoProvider) Constructor - creates the instance for specified URL and charset. | public | HtmlCleaner(URL url, String charset) Constructor - creates the instance for specified URL and charset. | public | HtmlCleaner(URL url) Constructor - creates the instance for specified URL and charset. | public | HtmlCleaner(InputStream in, ITagInfoProvider tagInfoProvider) | public | HtmlCleaner(InputStream in) | public | HtmlCleaner(InputStream in, String charset) |
DEFAULT_CHARSET | final public static String DEFAULT_CHARSET(Code) | | |
HtmlCleaner | public HtmlCleaner(String htmlContent, ITagInfoProvider tagInfoProvider)(Code) | | Constructor - creates the instance with specified html
content as String.
Parameters: htmlContent - |
HtmlCleaner | public HtmlCleaner(String htmlContent)(Code) | | Constructor - creates the instance with specified html
content as String.
Parameters: htmlContent - |
HtmlCleaner | public HtmlCleaner(File file, String charset) throws IOException(Code) | | Constructor - creates the instance for specified file.
Parameters: file - Parameters: charset - throws: IOException - |
HtmlCleaner | public HtmlCleaner(File file) throws IOException(Code) | | Constructor - creates the instance for specified file and charset.
Parameters: file - throws: IOException - |
HtmlCleaner | public HtmlCleaner(URL url, String charset) throws IOException(Code) | | Constructor - creates the instance for specified URL and charset.
Parameters: url - Parameters: charset - throws: IOException - |
HtmlCleaner | public HtmlCleaner(URL url) throws IOException(Code) | | Constructor - creates the instance for specified URL and charset.
Parameters: url - throws: IOException - |
HtmlCleaner | public HtmlCleaner(InputStream in, ITagInfoProvider tagInfoProvider)(Code) | | Constructor - creates the instance for the specified inpout stream
Parameters: in - Parameters: tagInfoProvider - |
HtmlCleaner | public HtmlCleaner(InputStream in)(Code) | | Constructor - creates the instance for the specified inpout stream
Parameters: in - |
HtmlCleaner | public HtmlCleaner(InputStream in, String charset) throws IOException(Code) | | Constructor - creates the instance for the specified inpout stream
and the charset
Parameters: in - Parameters: charset - throws: IOException - |
clean | public void clean(boolean isTextPlain, boolean addStyleSheet) throws IOException(Code) | | |
getHyphenReplacementInComment | public String getHyphenReplacementInComment()(Code) | | |
isAdvancedXmlEscape | public boolean isAdvancedXmlEscape()(Code) | | |
isOmitComments | public boolean isOmitComments()(Code) | | |
isOmitDeprecatedTags | public boolean isOmitDeprecatedTags()(Code) | | |
isOmitDoctypeDeclaration | public boolean isOmitDoctypeDeclaration()(Code) | | |
isOmitUnknownTags | public boolean isOmitUnknownTags()(Code) | | |
isOmitXmlDeclaration | public boolean isOmitXmlDeclaration()(Code) | | |
isOmitXmlnsAttributes | public boolean isOmitXmlnsAttributes()(Code) | | |
isRecognizeUnicodeChars | public boolean isRecognizeUnicodeChars()(Code) | | |
isTranslateSpecialEntities | public boolean isTranslateSpecialEntities()(Code) | | |
isUseCdataForScriptAndStyle | public boolean isUseCdataForScriptAndStyle()(Code) | | |
setAdvancedXmlEscape | public void setAdvancedXmlEscape(boolean advancedXmlEscape)(Code) | | |
setHyphenReplacementInComment | public void setHyphenReplacementInComment(String hyphenReplacementInComment)(Code) | | |
setOmitComments | public void setOmitComments(boolean omitComments)(Code) | | |
setOmitDeprecatedTags | public void setOmitDeprecatedTags(boolean omitDeprecatedTags)(Code) | | |
setOmitDoctypeDeclaration | public void setOmitDoctypeDeclaration(boolean omitDoctypeDeclaration)(Code) | | |
setOmitUnknownTags | public void setOmitUnknownTags(boolean omitUnknownTags)(Code) | | |
setOmitXmlDeclaration | public void setOmitXmlDeclaration(boolean omitXmlDeclaration)(Code) | | |
setOmitXmlnsAttributes | public void setOmitXmlnsAttributes(boolean omitXmlnsAttributes)(Code) | | |
setRecognizeUnicodeChars | public void setRecognizeUnicodeChars(boolean recognizeUnicodeChars)(Code) | | |
setTranslateSpecialEntities | public void setTranslateSpecialEntities(boolean translateSpecialEntities)(Code) | | |
setUseCdataForScriptAndStyle | public void setUseCdataForScriptAndStyle(boolean useCdataForScriptAndStyle)(Code) | | |
|
|