001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.cocoon.generation;
018:
019: import java.io.IOException;
020: import java.util.HashMap;
021: import java.util.Map;
022:
023: import org.apache.avalon.framework.logger.Logger;
024: import org.apache.avalon.framework.parameters.Parameters;
025: import org.apache.avalon.framework.service.ServiceException;
026: import org.apache.avalon.framework.service.ServiceManager;
027: import org.apache.cocoon.ProcessingException;
028: import org.apache.cocoon.components.source.SourceUtil;
029: import org.apache.cocoon.environment.Context;
030: import org.apache.cocoon.environment.ObjectModelHelper;
031: import org.apache.cocoon.environment.SourceResolver;
032: import org.apache.cocoon.xml.dom.DOMStreamer;
033: import org.apache.excalibur.source.TraversableSource;
034: import org.apache.excalibur.xml.xpath.PrefixResolver;
035: import org.apache.excalibur.xml.xpath.XPathProcessor;
036: import org.apache.regexp.RE;
037: import org.apache.regexp.RESyntaxException;
038: import org.w3c.dom.Document;
039: import org.w3c.dom.NodeList;
040: import org.xml.sax.SAXException;
041: import org.xml.sax.helpers.AttributesImpl;
042:
043: /**
044: * Generates an XML collection listing performing XPath queries on XML sources.
045: * It can be used both as a plain TraversableGenerator or, if an XPath is
046: * specified, it will perform an XPath query on every XML resource, where "xml
047: * resource" is, by default, any resource ending with ".xml", which can be
048: * overriden by setting the (regexp) pattern "xmlFiles as a sitemap parameter,
049: * or where the name of the resource has a container-wide mime-type mapping to
050: * 'text/xml' such as specified by mime-mapping elements in a web.xml
051: * descriptor file.
052: *
053: * The XPath can be specified in two ways:
054: * <ol>
055: * <li>By using an XPointerish syntax in the URL: everything following the
056: * pound sign (possiby preceding query
057: * string arguments) will be treated as the XPath;
058: * </li>
059: * <li>Specifying it as a sitemap parameter named "xpath"
060: * </ol>
061: *
062: * Sample usage:
063: *
064: * Sitemap:
065: * <map:match pattern="documents/**">
066: * <map:generate type="xpathdirectory"
067: * src=" docs/{1}#/article/title|/article/abstract" >
068: * < map:parameter name="xmlFiles" value="\.xml$"/>
069: * </map:generate>
070: * <map: serialize type="xml" /> </map:match>
071: *
072: * Request:
073: * http://www.some.host/documents/test
074: * Result:
075: * <collection:collection
076: * name="test" lastModified="1010400942000"
077: * date="1/7/02 11:55 AM" requested="true"
078: * xmlns:collection="http://apache.org/cocoon/collection/1.0">
079: * <collection:collection name="subdirectory" lastModified="1010400942000" date="1/7/02 11:55 AM" />
080: * <collection:resource name="test.xml" lastModified="1011011579000" date="1/14/02 1:32 PM">
081: * <collection:xpath docid="test.xml" query="/article/title">
082: * <title>This is a test document</title>
083: * <abstract>
084: * <para>Abstract of my test article</para>
085: * </abstract>
086: * </collection:xpath>
087: * </collection:resource>
088: * <collection:resource name="test.gif" lastModified="1011011579000" date="1/14/02 1:32 PM">
089: * </collection:collection>
090: *
091: * If you need to use namespaces, you can set them as sitemap parameters in
092: * the form:
093: * lt;map:parameter name="xmlns:<i>your prefix</i>" value="nsURI"/**">
094: *
095: * @author <a href="mailto:gianugo@apache.org">Gianugo Rabellino</a>
096: * @author <a href="mailto:d.madama@pro-netics.com">Daniele Madama</a>
097: * @version CVS $Id: XPathTraversableGenerator.java 433543 2006-08-22 06:22:54Z crossley $
098: */
099: public class XPathTraversableGenerator extends TraversableGenerator {
100:
101: /** Local name for the element that contains the included XML snippet. */
102: protected static final String XPATH_NODE_NAME = "xpath";
103: /** Attribute for the XPath query. */
104: protected static final String QUERY_ATTR_NAME = "query";
105: /** The document containing a successful XPath query */
106: protected static final String RESULT_DOCID_ATTR = "docid";
107:
108: /** The regular expression for the XML files pattern. */
109: protected RE xmlRE;
110:
111: /** The document that should be parsed and (partly) included. */
112: protected Document doc;
113:
114: /** The XPath. */
115: protected String xpath;
116:
117: /** The XPath processor. */
118: protected XPathProcessor processor;
119:
120: /** The prefix resolver for namespaced queries */
121: protected XPathPrefixResolver prefixResolver;
122:
123: /** The cocoon context used for mime-type mappings */
124: protected Context context;
125:
126: public void setup(SourceResolver resolver, Map objectModel,
127: String src, Parameters par) throws ProcessingException,
128: SAXException, IOException {
129: super .setup(resolver, objectModel, src, par);
130:
131: // See if an XPath was specified
132: int pointer;
133: if ((pointer = this .source.indexOf("#")) != -1) {
134: int endpointer = this .source.indexOf('?');
135: if (endpointer != -1) {
136: this .xpath = source.substring(pointer + 1, endpointer);
137: } else {
138: this .xpath = source.substring(pointer + 1);
139: }
140: this .source = src.substring(0, pointer);
141: if (endpointer != -1) {
142: this .source += src.substring(endpointer);
143: }
144: } else {
145: this .xpath = par.getParameter("xpath", null);
146: }
147: this .cacheKeyParList.add(this .xpath);
148: if (getLogger().isDebugEnabled()) {
149: getLogger().debug(
150: "Applying XPath: " + xpath + " to collection "
151: + source);
152: }
153:
154: String xmlFilesPattern = null;
155: try {
156: xmlFilesPattern = par.getParameter("xmlFiles", "\\.xml$");
157: this .cacheKeyParList.add(xmlFilesPattern);
158: this .xmlRE = new RE(xmlFilesPattern);
159: if (this .getLogger().isDebugEnabled()) {
160: this .getLogger().debug(
161: "pattern for XML files: " + xmlFilesPattern);
162: }
163: } catch (RESyntaxException rese) {
164: throw new ProcessingException(
165: "Syntax error in regexp pattern '"
166: + xmlFilesPattern + "'", rese);
167: }
168:
169: String[] params = par.getNames();
170: this .prefixResolver = new XPathPrefixResolver(this .getLogger());
171: for (int i = 0; i < params.length; i++) {
172: if (params[i].startsWith("xmlns:")) {
173: String paramValue = par.getParameter(params[i], "");
174: String paramName = params[i].substring(6);
175: if (getLogger().isDebugEnabled()) {
176: getLogger()
177: .debug(
178: "add param to prefixResolver: "
179: + paramName);
180: }
181: this .prefixResolver.addPrefix(paramName, paramValue);
182: }
183: }
184:
185: this .context = ObjectModelHelper.getContext(objectModel);
186: }
187:
188: public void service(ServiceManager manager) throws ServiceException {
189: super .service(manager);
190: processor = (XPathProcessor) manager
191: .lookup(XPathProcessor.ROLE);
192: }
193:
194: public void dispose() {
195: if (this .manager != null) {
196: this .manager.release(processor);
197: this .processor = null;
198: }
199: super .dispose();
200: }
201:
202: protected void addContent(TraversableSource source)
203: throws SAXException, ProcessingException {
204: super .addContent(source);
205: if (!source.isCollection() && isXML(source) && xpath != null) {
206: performXPathQuery(source);
207: }
208: }
209:
210: /**
211: * Determines if a given TraversableSource shall be handled as XML.
212: *
213: * @param path the TraversableSource to check
214: * @return true if the given TraversableSource shall handled as XML, false
215: * otherwise.
216: */
217: protected boolean isXML(TraversableSource path) {
218: String mimeType = this .context.getMimeType(path.getName());
219: return this .xmlRE.match(path.getName())
220: || "text/xml".equalsIgnoreCase(mimeType);
221: }
222:
223: /**
224: * Performs an XPath query on the source.
225: * @param in the Source the XPath is performed on.
226: * @throws SAXException if something goes wrong while adding the XML snippet.
227: */
228: protected void performXPathQuery(TraversableSource in)
229: throws SAXException {
230: doc = null;
231: try {
232: doc = SourceUtil.toDOM(this .manager, "text/xml", in);
233: } catch (SAXException se) {
234: getLogger().error(
235: "Warning:" + in.getName()
236: + " is not a valid XML document. Ignoring");
237: } catch (Exception e) {
238: this .getLogger().error(
239: "Unable to resolve and parse document" + e);
240: }
241: if (doc != null) {
242: NodeList nl = processor.selectNodeList(doc
243: .getDocumentElement(), xpath, this .prefixResolver);
244: final String id = in.getName();
245: AttributesImpl attributes = new AttributesImpl();
246: attributes.addAttribute("", RESULT_DOCID_ATTR,
247: RESULT_DOCID_ATTR, " CDATA", id);
248: attributes.addAttribute("", QUERY_ATTR_NAME,
249: QUERY_ATTR_NAME, "CDATA", xpath);
250: super .contentHandler.startElement(URI, XPATH_NODE_NAME,
251: PREFIX + ":" + XPATH_NODE_NAME, attributes);
252: DOMStreamer ds = new DOMStreamer(super .xmlConsumer);
253: for (int i = 0; i < nl.getLength(); i++) {
254: ds.stream(nl.item(i));
255: }
256: super .contentHandler.endElement(URI, XPATH_NODE_NAME,
257: PREFIX + ":" + XPATH_NODE_NAME);
258: }
259: }
260:
261: /**
262: * Recycle resources
263: *
264: */
265: public void recycle() {
266: this .xpath = null;
267: this .doc = null;
268: this .xmlRE = null;
269: this .prefixResolver = null;
270: this .context = null;
271: super .recycle();
272: }
273:
274: /**
275: * A brain-dead PrefixResolver implementation
276: */
277: static class XPathPrefixResolver implements PrefixResolver {
278:
279: private Map params;
280:
281: private Logger logger;
282:
283: public XPathPrefixResolver(Logger logger) {
284: this .params = new HashMap();
285: this .logger = logger;
286: }
287:
288: /**
289: * Get a namespace URI given a prefix.
290: *
291: * @see org.apache.excalibur.xml.xpath.PrefixResolver#prefixToNamespace(java.lang.String)
292: */
293: public String prefixToNamespace(String prefix) {
294: if (this .logger.isDebugEnabled()) {
295: this .logger.debug("prefix: " + prefix);
296: }
297: if (this .params.containsKey(prefix)) {
298: if (this .logger.isDebugEnabled()) {
299: this .logger.debug("prefix; " + prefix
300: + " - namespace: "
301: + this .params.get(prefix));
302: }
303: return (String) this .params.get(prefix);
304: }
305: return null;
306: }
307:
308: public void addPrefix(String prefix, String uri) {
309: this.params.put(prefix, uri);
310: }
311: }
312: }
|