001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.cocoon.transformation;
018:
019: import java.io.IOException;
020: import java.util.HashMap;
021: import java.util.Iterator;
022: import java.util.Map;
023:
024: import org.apache.avalon.framework.parameters.Parameters;
025:
026: import org.apache.cocoon.ProcessingException;
027: import org.apache.cocoon.environment.SourceResolver;
028: import org.apache.cocoon.xml.dom.DOMBuilder;
029:
030: import org.w3c.dom.Document;
031: import org.xml.sax.Attributes;
032: import org.xml.sax.SAXException;
033:
034: /**
035: * This transformer sieves an incoming stream of xml
036: * and feeds a DOMBuilder with it.
037: *
038: * @author <a href="mailto:paul@luminas.co.uk">Paul Russell</a>
039: * @author <a href="mailto:haul@apache.org">Christian Haul</a>
040: * @version CVS $Id: AbstractExtractionTransformer.java 433543 2006-08-22 06:22:54Z crossley $
041: */
042: abstract public class AbstractExtractionTransformer extends
043: AbstractTransformer {
044:
045: protected DOMBuilder currentBuilder;
046:
047: private Map prefixMap;
048:
049: protected int extractLevel;
050:
051: /** Setup the transformer. */
052: public void setup(SourceResolver resolver, Map objectModel,
053: String src, Parameters parameters)
054: throws ProcessingException, SAXException, IOException {
055: extractLevel = 0;
056: prefixMap = new HashMap();
057: }
058:
059: public void recycle() {
060: this .extractLevel = 0;
061: this .currentBuilder = null;
062: this .prefixMap = null;
063: super .recycle();
064: }
065:
066: /**
067: * Begin the scope of a prefix-URI Namespace mapping.
068: *
069: * @param prefix The Namespace prefix being declared.
070: * @param uri The Namespace URI the prefix is mapped to.
071: */
072: public void startPrefixMapping(String prefix, String uri)
073: throws SAXException {
074: if (extractLevel == 0) {
075: super .startPrefixMapping(prefix, uri);
076: prefixMap.put(prefix, uri);
077: } else {
078: this .currentBuilder.startPrefixMapping(prefix, uri);
079: }
080: }
081:
082: /**
083: * End the scope of a prefix-URI mapping.
084: *
085: * @param prefix The prefix that was being mapping.
086: */
087: public void endPrefixMapping(String prefix) throws SAXException {
088: if (extractLevel == 0) {
089: super .endPrefixMapping(prefix);
090: prefixMap.remove(prefix);
091: } else {
092: this .currentBuilder.endPrefixMapping(prefix);
093: }
094: }
095:
096: /**
097: * Receive notification of the beginning of an element. Uses
098: * startExtraction to determine whether to start
099: * extracting. Nested triggering tags result in only one document.
100: * * startExtractedDocument with the first node of the extracted
101: * Document.
102: *
103: * @param uri The Namespace URI, or the empty string if the element has no
104: * Namespace URI or if Namespace
105: * processing is not being performed.
106: * @param loc The local name (without prefix), or the empty string if
107: * Namespace processing is not being performed.
108: * @param raw The raw XML 1.0 name (with prefix), or the empty string if
109: * raw names are not available.
110: * @param a The attributes attached to the element. If there are no
111: * attributes, it shall be an empty Attributes object.
112: */
113: public void startElement(String uri, String loc, String raw,
114: Attributes a) throws SAXException {
115: if (!startExtracting(uri, loc, raw, a)) {
116:
117: if (extractLevel == 0) {
118: super .startElement(uri, loc, raw, a);
119: } else {
120: this .currentBuilder.startElement(uri, loc, raw, a);
121: }
122:
123: } else {
124:
125: extractLevel++;
126: if (this .getLogger().isDebugEnabled()) {
127: getLogger().debug(
128: "extractLevel now " + extractLevel + ".");
129: }
130:
131: if (extractLevel != 1) {
132: this .currentBuilder.startElement(uri, loc, raw, a);
133: } else {
134:
135: // setup new document
136: this .currentBuilder = new DOMBuilder();
137: this .currentBuilder.startDocument();
138: // setup namespaces
139: Iterator itt = prefixMap.entrySet().iterator();
140: while (itt.hasNext()) {
141: Map.Entry entry = (Map.Entry) itt.next();
142: this .currentBuilder.startPrefixMapping(
143: (String) entry.getKey(), (String) entry
144: .getValue());
145: }
146: // start root node
147: startExtractingDocument(uri, loc, raw, a);
148:
149: }
150:
151: }
152: }
153:
154: /**
155: * Receive notification of the end of an element. Uses
156: * endExtraction to determine whether to stop extracting or
157: * not. Calls endExtractedDocument with the extracted document.
158: *
159: * @param uri The Namespace URI, or the empty string if the element has no
160: * Namespace URI or if Namespace
161: * processing is not being performed.
162: * @param loc The local name (without prefix), or the empty string if
163: * Namespace processing is not being performed.
164: * @param raw The raw XML 1.0 name (with prefix), or the empty string if
165: * raw names are not available.
166: */
167: public void endElement(String uri, String loc, String raw)
168: throws SAXException {
169: if (extractLevel == 0) {
170: super .endElement(uri, loc, raw);
171: } else {
172: if (endExtracting(uri, loc, raw)) {
173: extractLevel--;
174: if (this .getLogger().isDebugEnabled()) {
175: getLogger().debug(
176: "extractLevel now " + extractLevel + ".");
177: }
178:
179: if (extractLevel != 0) {
180: this .currentBuilder.endElement(uri, loc, raw);
181: } else {
182:
183: // end root element
184: endExtractingDocument(uri, loc, raw);
185: // finish building the document. remove existing prefix mappings.
186: Iterator itt = prefixMap.entrySet().iterator();
187: while (itt.hasNext()) {
188: Map.Entry entry = (Map.Entry) itt.next();
189: this .currentBuilder
190: .endPrefixMapping((String) entry
191: .getKey());
192: }
193: this .currentBuilder.endDocument();
194:
195: handleExtractedDocument(this .currentBuilder
196: .getDocument());
197:
198: if (this .getLogger().isDebugEnabled()) {
199: getLogger().debug("Stored document.");
200: }
201:
202: }
203: } else {
204: this .currentBuilder.endElement(uri, loc, raw);
205: }
206: }
207: }
208:
209: /**
210: * Receive notification of character data.
211: *
212: * @param c The characters from the XML document.
213: * @param start The start position in the array.
214: * @param len The number of characters to read from the array.
215: */
216: public void characters(char c[], int start, int len)
217: throws SAXException {
218: if (extractLevel == 0) {
219: super .characters(c, start, len);
220: } else {
221: this .currentBuilder.characters(c, start, len);
222: }
223: }
224:
225: /**
226: * Receive notification of ignorable whitespace in element content.
227: *
228: * @param c The characters from the XML document.
229: * @param start The start position in the array.
230: * @param len The number of characters to read from the array.
231: */
232: public void ignorableWhitespace(char c[], int start, int len)
233: throws SAXException {
234: if (extractLevel == 0) {
235: super .ignorableWhitespace(c, start, len);
236: } else {
237: this .currentBuilder.ignorableWhitespace(c, start, len);
238: }
239: }
240:
241: /**
242: * Receive notification of a processing instruction.
243: *
244: * @param target The processing instruction target.
245: * @param data The processing instruction data, or null if none was
246: * supplied.
247: */
248: public void processingInstruction(String target, String data)
249: throws SAXException {
250: if (extractLevel == 0) {
251: super .processingInstruction(target, data);
252: } else {
253: this .currentBuilder.processingInstruction(target, data);
254: }
255: }
256:
257: /**
258: * Receive notification of a skipped entity.
259: *
260: * @param name The name of the skipped entity. If it is a parameter
261: * entity, the name will begin with '%'.
262: */
263: public void skippedEntity(String name) throws SAXException {
264: if (extractLevel == 0) {
265: super .skippedEntity(name);
266: } else {
267: this .currentBuilder.skippedEntity(name);
268: }
269: }
270:
271: /**
272: * Report the start of DTD declarations, if any.
273: *
274: * @param name The document type name.
275: * @param publicId The declared public identifier for the external DTD
276: * subset, or null if none was declared.
277: * @param systemId The declared system identifier for the external DTD
278: * subset, or null if none was declared.
279: */
280: public void startDTD(String name, String publicId, String systemId)
281: throws SAXException {
282: if (extractLevel == 0) {
283: super .startDTD(name, publicId, systemId);
284: } else {
285: throw new SAXException(
286: "Recieved startDTD after beginning fragment extraction process.");
287: }
288: }
289:
290: /**
291: * Report the end of DTD declarations.
292: */
293: public void endDTD() throws SAXException {
294: if (extractLevel == 0) {
295: super .endDTD();
296: } else {
297: throw new SAXException(
298: "Recieved endDTD after beginning fragment extraction process.");
299: }
300: }
301:
302: /**
303: * Report the beginning of an entity.
304: *
305: * @param name The name of the entity. If it is a parameter entity, the
306: * name will begin with '%'.
307: */
308: public void startEntity(String name) throws SAXException {
309: if (extractLevel == 0) {
310: super .startEntity(name);
311: } else {
312: this .currentBuilder.startEntity(name);
313: }
314: }
315:
316: /**
317: * Report the end of an entity.
318: *
319: * @param name The name of the entity that is ending.
320: */
321: public void endEntity(String name) throws SAXException {
322: if (extractLevel == 0) {
323: super .endEntity(name);
324: } else {
325: this .currentBuilder.endEntity(name);
326: }
327: }
328:
329: /**
330: * Report the start of a CDATA section.
331: */
332: public void startCDATA() throws SAXException {
333: if (extractLevel == 0) {
334: super .startCDATA();
335: } else {
336: this .currentBuilder.startCDATA();
337: }
338: }
339:
340: /**
341: * Report the end of a CDATA section.
342: */
343: public void endCDATA() throws SAXException {
344: if (extractLevel == 0) {
345: super .endCDATA();
346: } else {
347: this .currentBuilder.endCDATA();
348: }
349: }
350:
351: /**
352: * Report an XML comment anywhere in the document.
353: *
354: * @param ch An array holding the characters in the comment.
355: * @param start The starting position in the array.
356: * @param len The number of characters to use from the array.
357: */
358: public void comment(char ch[], int start, int len)
359: throws SAXException {
360: if (extractLevel == 0) {
361: super .comment(ch, start, len);
362: } else {
363: this .currentBuilder.comment(ch, start, len);
364: }
365: }
366:
367: /**
368: * Receive notification of the beginning of an element and signal extraction start.
369: *
370: * @param uri The Namespace URI, or the empty string if the element has no
371: * Namespace URI or if Namespace
372: * processing is not being performed.
373: * @param loc The local name (without prefix), or the empty string if
374: * Namespace processing is not being performed.
375: * @param raw The raw XML 1.0 name (with prefix), or the empty string if
376: * raw names are not available.
377: * @param a The attributes attached to the element. If there are no
378: * attributes, it shall be an empty Attributes object.
379: * @return a <code>boolean</code> value to signal to start extracting
380: */
381: abstract boolean startExtracting(String uri, String loc,
382: String raw, Attributes a);
383:
384: /**
385: * Receive notification of the beginning of the extracted Document. Per default send
386: * startElement message to document builder. Override if necessary. Must override
387: * {@link #endExtractingDocument(String, String, String)} as well.
388: *
389: * @param uri The Namespace URI, or the empty string if the element has no
390: * Namespace URI or if Namespace
391: * processing is not being performed.
392: * @param loc The local name (without prefix), or the empty string if
393: * Namespace processing is not being performed.
394: * @param raw The raw XML 1.0 name (with prefix), or the empty string if
395: * raw names are not available.
396: * @param a The attributes attached to the element. If there are no
397: * attributes, it shall be an empty Attributes object.
398: */
399: public void startExtractingDocument(String uri, String loc,
400: String raw, Attributes a) throws SAXException {
401: this .currentBuilder.startElement(uri, loc, raw, a);
402: }
403:
404: /**
405: * Receive notification of the end of an element and signal extraction end.
406: *
407: * @param uri The Namespace URI, or the empty string if the element has no
408: * Namespace URI or if Namespace
409: * processing is not being performed.
410: * @param loc The local name (without prefix), or the empty string if
411: * Namespace processing is not being performed.
412: * @param raw The raw XML 1.0 name (with prefix), or the empty string if
413: * @return a <code>boolean</code> value to signal to stop extracting
414: */
415: abstract boolean endExtracting(String uri, String loc, String raw);
416:
417: /**
418: * Receive notification of the end of the extracted Document. Per default,
419: * send endElement message to document builder. Override if necessary.
420: * Must override
421: * {@link #startExtractingDocument(String, String, String, Attributes)}
422: * as well.
423: *
424: * @param uri The Namespace URI, or the empty string if the element has no
425: * Namespace URI or if Namespace
426: * processing is not being performed.
427: * @param loc The local name (without prefix), or the empty string if
428: * Namespace processing is not being performed.
429: * @param raw The raw XML 1.0 name (with prefix), or the empty string if
430: * raw names are not available.
431: */
432: public void endExtractingDocument(String uri, String loc, String raw)
433: throws SAXException {
434: this .currentBuilder.endElement(uri, loc, raw);
435: }
436:
437: /**
438: * Receive notification of the end of the extracted Document.
439: *
440: * @param doc a <code>Document</code> value
441: */
442: abstract void handleExtractedDocument(Document doc);
443:
444: }
|