| java.lang.Object org.cyberneko.html.filters.DefaultFilter org.cyberneko.html.filters.Purifier
Purifier | public class Purifier extends DefaultFilter (Code) | | This filter purifies the HTML input to ensure XML well-formedness.
The purification process includes:
- fixing illegal characters in the document, including
- element and attribute names,
- processing instruction target and data,
- document text;
- ensuring the string "--" does not appear in the content of
a comment;
- ensuring the string "]]>" does not appear in the content of
a CDATA section;
- ensuring that the XML declaration has required pseudo-attributes
and that the values are correct;
and
- synthesized missing namespace bindings.
Illegal characters in XML names are converted to the character
sequence "_u####_" where "####" is the value of the Unicode
character represented in hexadecimal. Whereas illegal characters
appearing in document content is converted to the character
sequence "\\u####".
In comments, the character '-' is replaced by the character
sequence "- " to prevent "--" from ever appearing in the comment
content. For CDATA sections, the character ']' is replaced by
the character sequence "] " to prevent "]]" from appearing.
The URI used for synthesized namespace bindings is
"http://cyberneko.org/html/ns/synthesized/number" where
number is generated to ensure uniqueness.
author: Andy Clark version: $Id: Purifier.java,v 1.5 2005/02/14 03:56:54 andyc Exp $ |
Method Summary | |
public void | characters(XMLString text, Augmentations augs) Characters. | public void | comment(XMLString text, Augmentations augs) Comment. | public void | doctypeDecl(String root, String pubid, String sysid, Augmentations augs) Doctype declaration. | public void | emptyElement(QName element, XMLAttributes attrs, Augmentations augs) Empty element. | public void | endCDATA(Augmentations augs) End CDATA section. | public void | endElement(QName element, Augmentations augs) End element. | protected void | handleStartDocument() Handle start document. | protected void | handleStartElement(QName element, XMLAttributes attrs) Handle start element. | public void | processingInstruction(String target, XMLString data, Augmentations augs) Processing instruction. | protected String | purifyName(String name, boolean localpart) Purify name. | protected QName | purifyQName(QName qname) Purify qualified name. | protected XMLString | purifyText(XMLString text) Purify content. | public void | reset(XMLComponentManager manager) | public void | startCDATA(Augmentations augs) Start CDATA section. | public void | startDocument(XMLLocator locator, String encoding, Augmentations augs) Start document. | public void | startDocument(XMLLocator locator, String encoding, NamespaceContext nscontext, Augmentations augs) Start document. | public void | startElement(QName element, XMLAttributes attrs, Augmentations augs) Start element. | protected void | synthesizeBinding(XMLAttributes attrs, String ns) Synthesize namespace binding. | final protected Augmentations | synthesizedAugs() Returns an augmentations object with a synthesized item added. | protected static String | toHexString(int c, int padlen) Returns a padded hexadecimal string for the given value. | public void | xmlDecl(String version, String encoding, String standalone, Augmentations augs) XML declaration. |
AUGMENTATIONS | final protected static String AUGMENTATIONS(Code) | | Include infoset augmentations.
|
NAMESPACES | final protected static String NAMESPACES(Code) | | Namespaces.
|
SYNTHESIZED_ITEM | final protected static HTMLEventInfo SYNTHESIZED_ITEM(Code) | | Synthesized event info item.
|
SYNTHESIZED_NAMESPACE_PREFX | final public static String SYNTHESIZED_NAMESPACE_PREFX(Code) | | Synthesized namespace binding prefix.
|
fAugmentations | protected boolean fAugmentations(Code) | | Augmentations.
|
fInCDATASection | protected boolean fInCDATASection(Code) | | True if inside a CDATA section.
|
fNamespaceContext | protected NamespaceContext fNamespaceContext(Code) | | Namespace information.
|
fNamespaces | protected boolean fNamespaces(Code) | | Namespaces.
|
fPublicId | protected String fPublicId(Code) | | Public identifier of doctype declaration.
|
fSeenDoctype | protected boolean fSeenDoctype(Code) | | True if the doctype declaration was seen.
|
fSeenRootElement | protected boolean fSeenRootElement(Code) | | True if root element was seen.
|
fSynthesizedNamespaceCount | protected int fSynthesizedNamespaceCount(Code) | | Synthesized namespace binding count.
|
fSystemId | protected String fSystemId(Code) | | System identifier of doctype declaration.
|
characters | public void characters(XMLString text, Augmentations augs) throws XNIException(Code) | | Characters.
|
comment | public void comment(XMLString text, Augmentations augs) throws XNIException(Code) | | Comment.
|
doctypeDecl | public void doctypeDecl(String root, String pubid, String sysid, Augmentations augs) throws XNIException(Code) | | Doctype declaration.
|
emptyElement | public void emptyElement(QName element, XMLAttributes attrs, Augmentations augs) throws XNIException(Code) | | Empty element.
|
endCDATA | public void endCDATA(Augmentations augs) throws XNIException(Code) | | End CDATA section.
|
endElement | public void endElement(QName element, Augmentations augs) throws XNIException(Code) | | End element.
|
handleStartDocument | protected void handleStartDocument()(Code) | | Handle start document.
|
handleStartElement | protected void handleStartElement(QName element, XMLAttributes attrs)(Code) | | Handle start element.
|
processingInstruction | public void processingInstruction(String target, XMLString data, Augmentations augs) throws XNIException(Code) | | Processing instruction.
|
purifyQName | protected QName purifyQName(QName qname)(Code) | | Purify qualified name.
|
purifyText | protected XMLString purifyText(XMLString text)(Code) | | Purify content.
|
reset | public void reset(XMLComponentManager manager) throws XMLConfigurationException(Code) | | |
startCDATA | public void startCDATA(Augmentations augs) throws XNIException(Code) | | Start CDATA section.
|
startDocument | public void startDocument(XMLLocator locator, String encoding, Augmentations augs) throws XNIException(Code) | | Start document.
|
startDocument | public void startDocument(XMLLocator locator, String encoding, NamespaceContext nscontext, Augmentations augs) throws XNIException(Code) | | Start document.
|
startElement | public void startElement(QName element, XMLAttributes attrs, Augmentations augs) throws XNIException(Code) | | Start element.
|
synthesizeBinding | protected void synthesizeBinding(XMLAttributes attrs, String ns)(Code) | | Synthesize namespace binding.
|
synthesizedAugs | final protected Augmentations synthesizedAugs()(Code) | | Returns an augmentations object with a synthesized item added.
|
toHexString | protected static String toHexString(int c, int padlen)(Code) | | Returns a padded hexadecimal string for the given value.
|
xmlDecl | public void xmlDecl(String version, String encoding, String standalone, Augmentations augs) throws XNIException(Code) | | XML declaration.
|
Fields inherited from org.cyberneko.html.filters.DefaultFilter | protected XMLDocumentHandler fDocumentHandler(Code)(Java Doc) protected XMLDocumentSource fDocumentSource(Code)(Java Doc)
|
Methods inherited from org.cyberneko.html.filters.DefaultFilter | public void characters(XMLString text, Augmentations augs) throws XNIException(Code)(Java Doc) public void comment(XMLString text, Augmentations augs) throws XNIException(Code)(Java Doc) public void doctypeDecl(String root, String publicId, String systemId, Augmentations augs) throws XNIException(Code)(Java Doc) public void emptyElement(QName element, XMLAttributes attributes, Augmentations augs) throws XNIException(Code)(Java Doc) public void endCDATA(Augmentations augs) throws XNIException(Code)(Java Doc) public void endDocument(Augmentations augs) throws XNIException(Code)(Java Doc) public void endElement(QName element, Augmentations augs) throws XNIException(Code)(Java Doc) public void endGeneralEntity(String name, Augmentations augs) throws XNIException(Code)(Java Doc) public void endPrefixMapping(String prefix, Augmentations augs) throws XNIException(Code)(Java Doc) public XMLDocumentHandler getDocumentHandler()(Code)(Java Doc) public XMLDocumentSource getDocumentSource()(Code)(Java Doc) public Boolean getFeatureDefault(String featureId)(Code)(Java Doc) public Object getPropertyDefault(String propertyId)(Code)(Java Doc) public String[] getRecognizedFeatures()(Code)(Java Doc) public String[] getRecognizedProperties()(Code)(Java Doc) public void ignorableWhitespace(XMLString text, Augmentations augs) throws XNIException(Code)(Java Doc) protected static String[] merge(String[] array1, String[] array2)(Code)(Java Doc) public void processingInstruction(String target, XMLString data, Augmentations augs) throws XNIException(Code)(Java Doc) public void reset(XMLComponentManager componentManager) throws XMLConfigurationException(Code)(Java Doc) public void setDocumentHandler(XMLDocumentHandler handler)(Code)(Java Doc) public void setDocumentSource(XMLDocumentSource source)(Code)(Java Doc) public void setFeature(String featureId, boolean state) throws XMLConfigurationException(Code)(Java Doc) public void setProperty(String propertyId, Object value) throws XMLConfigurationException(Code)(Java Doc) public void startCDATA(Augmentations augs) throws XNIException(Code)(Java Doc) public void startDocument(XMLLocator locator, String encoding, NamespaceContext nscontext, Augmentations augs) throws XNIException(Code)(Java Doc) public void startDocument(XMLLocator locator, String encoding, Augmentations augs) throws XNIException(Code)(Java Doc) public void startElement(QName element, XMLAttributes attributes, Augmentations augs) throws XNIException(Code)(Java Doc) public void startGeneralEntity(String name, XMLResourceIdentifier id, String encoding, Augmentations augs) throws XNIException(Code)(Java Doc) public void startPrefixMapping(String prefix, String uri, Augmentations augs) throws XNIException(Code)(Java Doc) public void textDecl(String version, String encoding, Augmentations augs) throws XNIException(Code)(Java Doc) public void xmlDecl(String version, String encoding, String standalone, Augmentations augs) throws XNIException(Code)(Java Doc)
|
|
|