001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.cocoon.transformation;
018:
019: import java.io.IOException;
020: import java.io.Serializable;
021: import java.util.Map;
022: import java.util.LinkedList;
023: import java.util.HashSet;
024: import java.util.Set;
025: import java.util.StringTokenizer;
026: import org.xml.sax.Attributes;
027: import org.xml.sax.SAXException;
028: import org.apache.cocoon.ProcessingException;
029: import org.apache.cocoon.caching.CacheableProcessingComponent;
030: import org.apache.cocoon.environment.SourceResolver;
031: import org.apache.cocoon.transformation.AbstractSAXTransformer;
032: import org.apache.avalon.framework.configuration.Configuration;
033: import org.apache.avalon.framework.configuration.ConfigurationException;
034: import org.apache.avalon.framework.parameters.Parameters;
035: import org.apache.excalibur.source.SourceValidity;
036: import org.apache.excalibur.source.impl.validity.NOPValidity;
037:
038: /**
039: * Cleanup transformer: Removes excess whitespace while adding some where needed
040: * for legibility. Strips unwanted namespace declarations.
041: *
042: * <p>The cleanup transformer can be used for basically any document as-is or customized by
043: * schema (inline vs. block elements) for easier reading.</p>
044: *
045: * <p>Transformer declaration:
046: * <map:components>
047: * <map:transformers>
048: * <map:transformer name="htmlcleanup"
049: * src="org.apache.cocoon.transformation.CleanupTransformer">
050: * <preserve-uri>*</preserve-uri>
051: * </map:transformer>
052: *
053: * <map:transformer name="xhtmlcleanup"
054: * src="org.apache.cocoon.transformation.CleanupTransformer">
055: * <inline-elements>a,abbr,acronym,b,br,font,i,u,img</inline-elements>
056: * <preserve-uri>http://www.w3.org/1999/xhtml</preserve-uri>
057: * </map:transformer>
058: * </map:transformers>
059: * </map:components>
060: * </p>
061: *
062: * <p>The "inline-elements" configuration element refers to a list of element names that are
063: * <strong>not</strong> to be indented. The "preserve-uri" configuration element specifies a
064: * namespace uri mapping that is meant for output. All other namespace declarations are
065: * stripped from the output. The "preserve-uri" element may appear more than once. If
066: * "preserve-uri" is omitted, all namespaces/prefixes are removed from the output.</p>
067: *
068: * <p>Transformer usage:
069: * <transform type="xhtmlcleanup">
070: * <map:parameter name="indent-size" value="4"/>
071: * </transform>
072: * </p>
073: *
074: * <p>The optional parameter "indent-size" specifies the number of additional space characters
075: * appearing at each level of the output document. The default value is 2.</p>
076: *
077: * <p>Bugs: Nested namespace declarations with the same namespace prefix will break the code.</p>
078: *
079: * @author Miles Elam
080: */
081: public class CleanupTransformer extends AbstractSAXTransformer
082: implements CacheableProcessingComponent {
083:
084: private static final char[] INDENT = ("\n"
085: + " "
086: + " ")
087: .toCharArray();
088: private static final int MAX_INDENT = CleanupTransformer.INDENT.length - 1;
089:
090: private boolean allowAllURIs = false;
091: private Set allowedURIs = new HashSet();
092: private Set inlineElements = new HashSet();
093: private LinkedList uriPrefixes = new LinkedList();
094: private int indentSize = 2;
095: private int numIndents = 0;
096: private String lastElement;
097:
098: /**
099: * @see org.apache.cocoon.transformation.AbstractSAXTransformer#configure(org.apache.avalon.framework.configuration.Configuration)
100: */
101: public void configure(Configuration conf)
102: throws ConfigurationException {
103: StringTokenizer st;
104:
105: Configuration inlineEltChild = conf.getChild("inline-elements");
106: st = new StringTokenizer(inlineEltChild.getValue(""), ",");
107: this .inlineElements.clear();
108: while (st.hasMoreTokens()) {
109: String nextElement = st.nextToken().trim();
110: if (nextElement.length() > 0) {
111: this .inlineElements.add(nextElement);
112: }
113: }
114:
115: this .allowAllURIs = false;
116: Configuration[] uriChildren = conf.getChildren("preserve-uri");
117: for (int i = 0; i < uriChildren.length; ++i) {
118: String nextChild = uriChildren[i].getValue("").trim();
119: if (nextChild.length() == 0) {
120: continue;
121: } else if (nextChild.equals("*")) {
122: this .allowAllURIs = true;
123: break;
124: }
125: this .allowedURIs.add(nextChild);
126: }
127: }
128:
129: /**
130: * @see org.apache.cocoon.transformation.AbstractSAXTransformer#setup(org.apache.cocoon.environment.SourceResolver, java.util.Map, java.lang.String, org.apache.avalon.framework.parameters.Parameters)
131: */
132: public void setup(SourceResolver resolver, Map objectModel,
133: String src, Parameters par) throws ProcessingException,
134: SAXException, IOException {
135: super .setup(resolver, objectModel, src, par);
136: this .indentSize = par.getParameterAsInteger("indent-size", 2);
137: }
138:
139: /**
140: * @see org.apache.cocoon.transformation.AbstractSAXTransformer#recycle()
141: */
142: public void recycle() {
143: super .recycle();
144: this .numIndents = 0;
145: this .lastElement = null;
146: this .uriPrefixes.clear();
147: }
148:
149: /**
150: * @see org.apache.cocoon.caching.CacheableProcessingComponent#getKey()
151: */
152: public Serializable getKey() {
153: return Integer.toString(this .indentSize);
154: }
155:
156: /**
157: * @see org.apache.cocoon.caching.CacheableProcessingComponent#getValidity()
158: */
159: public SourceValidity getValidity() {
160: return NOPValidity.SHARED_INSTANCE;
161: }
162:
163: /**
164: * @see org.apache.cocoon.transformation.AbstractSAXTransformer#startPrefixMapping(java.lang.String, java.lang.String)
165: */
166: public void startPrefixMapping(String prefix, String uri)
167: throws SAXException {
168: if (this .allowAllURIs) {
169: this .contentHandler.startPrefixMapping(prefix, uri);
170: } else if (this .allowedURIs.contains(uri)) {
171: this .contentHandler.startPrefixMapping(prefix, uri);
172: uriPrefixes.add(prefix);
173: }
174: }
175:
176: /**
177: * @see org.apache.cocoon.transformation.AbstractSAXTransformer#endPrefixMapping(java.lang.String)
178: */
179: public void endPrefixMapping(String prefix) throws SAXException {
180: if (this .allowAllURIs) {
181: this .contentHandler.endPrefixMapping(prefix);
182: } else if (!uriPrefixes.isEmpty()) {
183: if (uriPrefixes.getLast().toString().equals(prefix)) {
184: this .contentHandler.endPrefixMapping(prefix);
185: uriPrefixes.removeLast();
186: }
187: }
188: }
189:
190: /**
191: * @see org.apache.cocoon.transformation.AbstractSAXTransformer#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
192: */
193: public void startElement(String uri, String qName, String lName,
194: Attributes attrs) throws SAXException {
195: if (!inlineElements.contains(qName)) {
196: int indentSize = (this .indentSize * this .numIndents)
197: % MAX_INDENT;
198: this .contentHandler.ignorableWhitespace(INDENT, 0,
199: indentSize + 1);
200: ++this .numIndents;
201: this .lastElement = qName;
202: }
203: this .contentHandler.startElement(uri, qName, lName, attrs);
204: }
205:
206: /**
207: * @see org.apache.cocoon.transformation.AbstractSAXTransformer#endElement(java.lang.String, java.lang.String, java.lang.String)
208: */
209: public void endElement(String uri, String qName, String lName)
210: throws SAXException {
211: if (!inlineElements.contains(qName)) {
212: --this .numIndents;
213: if (this .lastElement == null
214: || !this .lastElement.equals(qName)) {
215: int indentSize = (this .indentSize * this .numIndents)
216: % MAX_INDENT;
217: this .contentHandler.ignorableWhitespace(INDENT, 0,
218: indentSize + 1);
219: }
220: this .lastElement = null;
221: }
222: this .contentHandler.endElement(uri, qName, lName);
223: }
224:
225: /**
226: * @see org.apache.cocoon.transformation.AbstractSAXTransformer#characters(char[], int, int)
227: */
228: public void characters(char[] ch, int start, int length)
229: throws SAXException {
230: int end = start + length;
231: for (int i = start; i < end; ++i) {
232: if (!Character.isWhitespace(ch[i])) {
233: this .contentHandler.characters(ch, start, length);
234: return;
235: }
236: }
237: this .contentHandler.characters(INDENT, 1, 1);
238: }
239:
240: /**
241: * @see org.apache.cocoon.transformation.AbstractSAXTransformer#ignorableWhitespace(char[], int, int)
242: */
243: public void ignorableWhitespace(char[] ch, int start, int length)
244: throws SAXException {
245: // Do nothing
246: }
247: }
|