001: /*--
002:
003: Copyright (C) 2000 Brett McLaughlin & Jason Hunter.
004: All rights reserved.
005:
006: Redistribution and use in source and binary forms, with or without
007: modification, are permitted provided that the following conditions
008: are met:
009:
010: 1. Redistributions of source code must retain the above copyright
011: notice, this list of conditions, and the following disclaimer.
012:
013: 2. Redistributions in binary form must reproduce the above copyright
014: notice, this list of conditions, and the disclaimer that follows
015: these conditions in the documentation and/or other materials
016: provided with the distribution.
017:
018: 3. The name "JDOM" must not be used to endorse or promote products
019: derived from this software without prior written permission. For
020: written permission, please contact license@jdom.org.
021:
022: 4. Products derived from this software may not be called "JDOM", nor
023: may "JDOM" appear in their name, without prior written permission
024: from the JDOM Project Management (pm@jdom.org).
025:
026: In addition, we request (but do not require) that you include in the
027: end-user documentation provided with the redistribution and/or in the
028: software itself an acknowledgement equivalent to the following:
029: "This product includes software developed by the
030: JDOM Project (http://www.jdom.org/)."
031: Alternatively, the acknowledgment may be graphical using the logos
032: available at http://www.jdom.org/images/logos.
033:
034: THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
035: WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
036: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
037: DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
038: CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
039: SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
040: LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
041: USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
042: ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
043: OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
044: OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
045: SUCH DAMAGE.
046:
047: This software consists of voluntary contributions made by many
048: individuals on behalf of the JDOM Project and was originally
049: created by Brett McLaughlin <brett@jdom.org> and
050: Jason Hunter <jhunter@jdom.org>. For more information on the
051: JDOM Project, please see <http://www.jdom.org/>.
052:
053: */
054: package sax;
055:
056: import java.util.Stack;
057:
058: import org.xml.sax.Attributes;
059: import org.xml.sax.SAXException;
060: import org.xml.sax.XMLReader;
061:
062: /**
063: * Filter for removing formatting from data- or field-oriented XML.
064: *
065: * <i>Code and comments adapted from DataWriter-0.2, written
066: * by David Megginson and released into the public domain,
067: * without warranty.</i>
068: *
069: * <p>This filter removes leading and trailing whitespace from
070: * field-oriented XML without mixed content. Note that this class will
071: * likely not yield appropriate results for document-oriented XML like
072: * XHTML pages, which mix character data and elements together.</p>
073: *
074: * @see DataFormatFilter
075: */
076: public class DataUnformatFilter extends XMLFilterBase {
077:
078: ////////////////////////////////////////////////////////////////////
079: // Constructors.
080: ////////////////////////////////////////////////////////////////////
081:
082: /**
083: * Create a new filter.
084: */
085: public DataUnformatFilter() {
086: }
087:
088: /**
089: * Create a new filter.
090: *
091: * <p>Use the XMLReader provided as the source of events.</p>
092: *
093: * @param xmlreader The parent in the filter chain.
094: */
095: public DataUnformatFilter(XMLReader xmlreader) {
096: super (xmlreader);
097: }
098:
099: ////////////////////////////////////////////////////////////////////
100: // Public methods.
101: ////////////////////////////////////////////////////////////////////
102:
103: /**
104: * Reset the filter so that it can be reused.
105: *
106: * <p>This method is especially useful if the filter failed
107: * with an exception the last time through.</p>
108: */
109: public void reset() {
110: state = SEEN_NOTHING;
111: stateStack = new Stack();
112: whitespace = new StringBuffer();
113: }
114:
115: ////////////////////////////////////////////////////////////////////
116: // Methods from org.xml.sax.ContentHandler.
117: ////////////////////////////////////////////////////////////////////
118:
119: /**
120: * Filter a start document event.
121: *
122: * <p>Reset state and pass the event on for further processing.</p>
123: *
124: * @exception org.xml.sax.SAXException If a filter
125: * further down the chain raises an exception.
126: * @see org.xml.sax.ContentHandler#startDocument
127: */
128: public void startDocument() throws SAXException {
129: reset();
130: super .startDocument();
131: }
132:
133: /**
134: * Filter a start element event.
135: *
136: * @param uri The element's Namespace URI.
137: * @param localName The element's local name.
138: * @param qName The element's qualified (prefixed) name.
139: * @param atts The element's attribute list.
140: * @exception org.xml.sax.SAXException If a filter
141: * further down the chain raises an exception.
142: * @see org.xml.sax.ContentHandler#startElement
143: */
144: public void startElement(String uri, String localName,
145: String qName, Attributes atts) throws SAXException {
146: clearWhitespace();
147: stateStack.push(SEEN_ELEMENT);
148: state = SEEN_NOTHING;
149: super .startElement(uri, localName, qName, atts);
150: }
151:
152: /**
153: * Filter an end element event.
154: *
155: * @param uri The element's Namespace URI.
156: * @param localName The element's local name.
157: * @param qName The element's qualified (prefixed) name.
158: * @exception org.xml.sax.SAXException If a filter
159: * further down the chain raises an exception.
160: * @see org.xml.sax.ContentHandler#endElement
161: */
162: public void endElement(String uri, String localName, String qName)
163: throws SAXException {
164: if (state == SEEN_ELEMENT) {
165: clearWhitespace();
166: } else {
167: emitWhitespace();
168: }
169: state = stateStack.pop();
170: super .endElement(uri, localName, qName);
171: }
172:
173: /**
174: * Filter a character data event.
175: *
176: * @param ch The characters to write.
177: * @param start The starting position in the array.
178: * @param length The number of characters to use.
179: * @exception org.xml.sax.SAXException If a filter
180: * further down the chain raises an exception.
181: * @see org.xml.sax.ContentHandler#characters
182: */
183: public void characters(char ch[], int start, int length)
184: throws SAXException {
185: if (state != SEEN_DATA) {
186:
187: /* Look for non-whitespace. */
188:
189: int end = start + length;
190: while (end-- > start) {
191: if (!isXMLWhitespace(ch[end]))
192: break;
193: }
194:
195: /*
196: * If all the characters are whitespace, save them for later.
197: * If we've got some data, emit any saved whitespace and update
198: * our state to show we've seen data.
199: */
200:
201: if (end < start) {
202: saveWhitespace(ch, start, length);
203: } else {
204: state = SEEN_DATA;
205: emitWhitespace();
206: }
207: }
208:
209: /* Pass on everything inside a data field. */
210:
211: if (state == SEEN_DATA) {
212: super .characters(ch, start, length);
213: }
214: }
215:
216: /**
217: * Filter an ignorable whitespace event.
218: *
219: * @param ch The array of characters to write.
220: * @param start The starting position in the array.
221: * @param length The number of characters to write.
222: * @exception org.xml.sax.SAXException If a filter
223: * further down the chain raises an exception.
224: * @see org.xml.sax.ContentHandler#ignorableWhitespace
225: */
226: public void ignorableWhitespace(char ch[], int start, int length)
227: throws SAXException {
228: emitWhitespace();
229: // ignore
230: }
231:
232: /**
233: * Filter a processing instruction event.
234: *
235: * @param target The PI target.
236: * @param data The PI data.
237: * @exception org.xml.sax.SAXException If a filter
238: * further down the chain raises an exception.
239: * @see org.xml.sax.ContentHandler#processingInstruction
240: */
241: public void processingInstruction(String target, String data)
242: throws SAXException {
243: emitWhitespace();
244: super .processingInstruction(target, data);
245: }
246:
247: ////////////////////////////////////////////////////////////////////
248: // Internal methods.
249: ////////////////////////////////////////////////////////////////////
250:
251: /**
252: * Saves trailing whitespace.
253: */
254: protected void saveWhitespace(char[] ch, int start, int length) {
255: whitespace.append(ch, start, length);
256: }
257:
258: /**
259: * Passes saved whitespace down the filter chain.
260: */
261: protected void emitWhitespace() throws SAXException {
262: char[] data = new char[whitespace.length()];
263: whitespace.getChars(0, data.length, data, 0);
264: whitespace.setLength(0);
265: super .characters(data, 0, data.length);
266: }
267:
268: /**
269: * Discards saved whitespace.
270: */
271: protected void clearWhitespace() {
272: whitespace.setLength(0);
273: }
274:
275: /**
276: * Returns <var>true</var> if character is XML whitespace.
277: */
278: private boolean isXMLWhitespace(char c) {
279: return c == ' ' || c == '\t' || c == '\r' || c == '\n';
280: }
281:
282: ////////////////////////////////////////////////////////////////////
283: // Constants.
284: ////////////////////////////////////////////////////////////////////
285:
286: private static final Object SEEN_NOTHING = new Object();
287: private static final Object SEEN_ELEMENT = new Object();
288: private static final Object SEEN_DATA = new Object();
289:
290: ////////////////////////////////////////////////////////////////////
291: // Internal state.
292: ////////////////////////////////////////////////////////////////////
293:
294: private Object state = SEEN_NOTHING;
295: private Stack stateStack = new Stack();
296:
297: private StringBuffer whitespace = new StringBuffer();
298:
299: }
300:
301: // end of DataUnformatFilter.java
|