001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.jetspeed.rewriter.html.neko;
018:
019: import java.io.Reader;
020: import java.io.IOException;
021:
022: import org.apache.commons.logging.Log;
023: import org.apache.commons.logging.LogFactory;
024: import org.apache.xerces.xni.parser.XMLDocumentFilter;
025: import org.apache.xerces.xni.parser.XMLInputSource;
026:
027: import org.apache.jetspeed.rewriter.ParserAdaptor;
028: import org.apache.jetspeed.rewriter.Rewriter;
029: import org.apache.jetspeed.rewriter.RewriterException;
030:
031: import org.xml.sax.SAXException;
032:
033: import org.cyberneko.html.parsers.SAXParser;
034: import org.cyberneko.html.filters.DefaultFilter;
035: import org.cyberneko.html.filters.Purifier;
036:
037: /**
038: * <p>
039: * NekoParserAdapter
040: * </p>
041: * <p>
042: *
043: * </p>
044: * @author <a href="mailto:dyoung@phase2systems.com">David L Young</a>
045: * @version $Id: $
046: *
047: */
048: public class NekoParserAdaptor implements ParserAdaptor {
049: protected final static Log log = LogFactory
050: .getLog(NekoParserAdaptor.class);
051:
052: /*
053: * Construct a cyberneko HTML parser adaptor
054: */
055: public NekoParserAdaptor() {
056: super ();
057: }
058:
059: /**
060: * <p>
061: * parse
062: * </p>
063: *
064: * @see org.apache.jetspeed.rewriter.ParserAdaptor#parse(org.apache.jetspeed.rewriter.Rewriter, java.io.Reader)
065: * @param rewriter
066: * @param reader
067: * @throws RewriterException
068: */
069: public void parse(Rewriter rewriter, Reader reader)
070: throws RewriterException {
071: // not sure what this means to parse without rewriting
072: rewrite(rewriter, reader, null);
073: }
074:
075: /**
076: * <p>
077: * rewrite
078: * </p>
079: *
080: * @see org.apache.jetspeed.rewriter.ParserAdaptor#rewrite(org.apache.jetspeed.rewriter.Rewriter, java.io.Reader, java.io.Writer)
081: * @param rewriter
082: * @param reader
083: * @param writer
084: * @throws RewriterException
085: */
086: public void rewrite(Rewriter rewriter, java.io.Reader reader,
087: java.io.Writer writer) throws RewriterException {
088: // use a cyberneko SAXParser
089: SAXParser parser = new SAXParser();
090:
091: // setup filter chain
092: XMLDocumentFilter[] filters = {
093: new Purifier(), // [1] standard neko purifications (tag balancing, etc)
094: new CallbackElementRemover(rewriter), // [2] accept / reject tags based on advice from rewriter
095: writer != null ? new org.cyberneko.html.filters.Writer(
096: writer, null) : new DefaultFilter() // [3] propagate results to specified writer (or do nothing -- Default -- when writer is null)
097: };
098:
099: String filtersPropName = "http://cyberneko.org/html/properties/filters";
100:
101: try {
102: parser.setProperty(filtersPropName, filters);
103: } catch (SAXException e) {
104: // either no longer supported (SAXNotSupportedException), or no logner recognized (SAXNotRecognizedException)
105: log
106: .error(
107: filtersPropName
108: + " is, unexpectedly, no longer defined for the cyberneko HTML parser",
109: e);
110: throw new RewriterException(
111: "cyberneko parser version not supported", e);
112: }
113:
114: try {
115: // parse from reader
116: parser.parse(new XMLInputSource(null, null, null, reader,
117: null));
118: } catch (IOException e) {
119: String msg = "cyberneko HTML parsing failure";
120: log.error(msg, e);
121: throw new RewriterException(msg, e);
122: }
123:
124: }
125:
126: }
|