001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.cocoon.xml;
018:
019: import org.apache.cocoon.xml.AbstractXMLPipe;
020:
021: import org.xml.sax.Attributes;
022: import org.xml.sax.ContentHandler;
023: import org.xml.sax.SAXException;
024:
025: /**
026: * A SAX filter to remove whitespace character, which disturb the
027: * XML matching process.
028: *
029: * @author <a href="mailto:stephan@apache.org">Stephan Michels</a>
030: * @version CVS $Id: WhitespaceFilter.java 433543 2006-08-22 06:22:54Z crossley $
031: */
032: public class WhitespaceFilter extends AbstractXMLPipe {
033: private StringBuffer buffer = null;
034:
035: /**
036: * Create a new WhitespaceFilter.
037: *
038: * @param handler Content handler.
039: */
040: public WhitespaceFilter(ContentHandler handler) {
041: setContentHandler(handler);
042: }
043:
044: /**
045: * Receive notification of character data.
046: */
047: public void characters(char c[], int start, int len)
048: throws SAXException {
049: if (contentHandler == null) {
050: return;
051: }
052:
053: if (buffer == null) {
054: buffer = new StringBuffer();
055: }
056:
057: buffer.append(c, start, len);
058: }
059:
060: /**
061: * Receive notification of ignorable whitespace in element content.
062: */
063: public void ignorableWhitespace(char c[], int start, int len)
064: throws SAXException {
065: // ignore
066: }
067:
068: /**
069: * Receive notification of the beginning of an element.
070: */
071: public void startElement(String namespaceURI, String localName,
072: String qName, Attributes atts) throws SAXException {
073:
074: pushText();
075: contentHandler.startElement(namespaceURI, localName, qName,
076: atts);
077: }
078:
079: /**
080: * Receive notification of the end of an element.
081: */
082: public void endElement(String uri, String loc, String raw)
083: throws SAXException {
084:
085: pushText();
086: contentHandler.endElement(uri, loc, raw);
087: }
088:
089: /**
090: * Receive notification of a processing instruction.
091: */
092: public void processingInstruction(String target, String data)
093: throws SAXException {
094:
095: pushText();
096: contentHandler.processingInstruction(target, data);
097: }
098:
099: /**
100: * Report an XML comment anywhere in the document.
101: *
102: * @param ch An array holding the characters in the comment.
103: * @param start The starting position in the array.
104: * @param len The number of characters to use from the array.
105: */
106: public void comment(char ch[], int start, int len)
107: throws SAXException {
108:
109: pushText();
110: super .comment(ch, start, len);
111: }
112:
113: public void pushText() throws SAXException {
114:
115: if (buffer != null) {
116: String text = buffer.toString();
117:
118: StringBuffer normalized = new StringBuffer();
119:
120: for (int i = 0; i < text.length(); i++) {
121: if (Character.isWhitespace(text.charAt(i))) {
122: normalized.append(' ');
123: while (((i + 1) < text.length())
124: && (Character.isWhitespace(text
125: .charAt(i + 1))))
126: i++;
127: } else {
128: normalized.append(text.charAt(i));
129: }
130: }
131:
132: text = normalized.toString().trim();
133:
134: if (text.length() > 0) {
135: contentHandler.characters(text.toCharArray(), 0, text
136: .length());
137: }
138:
139: buffer = null;
140: }
141: }
142: }
|