001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.cocoon.transformation;
018:
019: import java.io.IOException;
020: import java.util.HashSet;
021: import java.util.Iterator;
022: import java.util.Map;
023: import java.util.Set;
024: import java.util.StringTokenizer;
025:
026: import org.apache.avalon.framework.activity.Disposable;
027: import org.apache.avalon.framework.activity.Initializable;
028: import org.apache.avalon.framework.configuration.Configuration;
029: import org.apache.avalon.framework.configuration.ConfigurationException;
030: import org.apache.avalon.framework.parameters.Parameters;
031: import org.apache.cocoon.ProcessingException;
032: import org.apache.cocoon.components.modules.input.InputModuleHelper;
033: import org.apache.cocoon.environment.SourceResolver;
034: import org.apache.cocoon.transformation.helpers.VariableConfiguration;
035: import org.xml.sax.Attributes;
036: import org.xml.sax.SAXException;
037: import org.xml.sax.helpers.AttributesImpl;
038:
039: /**
040: * Rewrites URIs in links to a value determined by an InputModule.
041: * The URI scheme identifies the InputModule to use, and the rest of the URI is
042: * used as the attribute name.
043: * <h3>Example</h3>
044: * For instance, if we had an {@link
045: * org.apache.cocoon.components.modules.input.XMLFileModule}, configured to
046: * read values from an XML file:
047: * <pre>
048: * <site>
049: * <faq>
050: * <how_to_boil_eggs href="faq/eggs.html"/>
051: * </faq>
052: * </site>
053: * </pre>
054: * mapped to the prefix 'site:', then <link
055: * href="site:/site/faq/how_to_boil_eggs/@href"> would be replaced with
056: * <link href="faq/eggs.html">
057: * <p>
058: * InputModules are configured twice; first statically in
059: * <code>cocoon.xconf</code>, and then dynamically at runtime, with dynamic
060: * configuration (if any) taking precedence. VariableRewriterTransformer allows
061: * you to pass a dynamic configuration to used InputModules as follows.
062: * <p>
063: * First, a template Configuration is specified in the static
064: * <map:components> block of the sitemap:
065: * <pre>
066: * <map:transformer name="linkrewriter"
067: * src="org.apache.cocoon.transformation.VariableRewriterTransformer">
068: * <input-module name="site" src="cocoon://samples/link/linkmap" reloadable="true"/>
069: * <input-module name="mapper">
070: * <input-module name="site" src="{src}" reloadable="true"/>
071: * <prefix>/site/</prefix>
072: * <suffix>/@href</suffix>
073: * </input-module>
074: * </map:transformer>
075: * </pre>
076: * Here, we have established dynamic configuration templates for two modules,
077: * 'site' (an {@link org.apache.cocoon.components.modules.input.XMLFileModule}
078: * and 'mapper' (A {@link
079: * org.apache.cocoon.components.modules.input.SimpleMappingMetaModule}. All
080: * other InputModules will use their static configs. Note that the dynamic
081: * config syntax different to the static config syntax (attributes instead of
082: * elements). Note also that, when configuring a Meta InputModule like
083: * 'mapper', we need to also configure the 'inner' module (here, 'site') with a
084: * nested <input-module>.
085: * <p>
086: * There is one further twist; to have <em>really</em> dynamic configuration,
087: * we need information available only when the transformer actually runs. This
088: * is why the above config was called a "template" Configuration; it needs to
089: * be 'instantiated' and provided extra info, namely:
090: * <ul>
091: * <li>The {src} string will be replaced with the map:transform @src attribute value.
092: * <li>Any other {variables} will be replaced with map:parameter values
093: * </ul>
094: * With the above config template, we can have a matcher like:
095: *
096: * <pre>
097: * <map:match pattern="**welcome">
098: * <map:generate src="index.xml"/>
099: * <map:transform type="linkrewriter" src="cocoon:/{1}linkmap"/>
100: * <map:serialize type="xml"/>
101: * </map:match>
102: * </pre>
103: *
104: * Which would cause the 'mapper' XMLFileModule to be configured with a
105: * different XML file, depending on the request.
106: * <p>
107: * Similarly, we could use a dynamic prefix:
108: * <pre>
109: * <prefix>{prefix}</prefix>
110: * </pre>
111: * in the template config, and:
112: * <pre>
113: * <map:parameter name="prefix" value="/site/"/>
114: * </pre>
115: * in the map:transform
116: * <p>
117: *
118: * <h3>Configuration</h3>
119: * <p>
120: * The following map:parameter's are recognised:
121: * <dl>
122: * <dt>link-attrs</dt>
123: * <dd>Space-separated list of attributes to consider links (to be
124: * transformed). Defaults to 'href'.</dd>
125: * <dt>schemes</dt>
126: * <dd>Space-separated list of URI schemes to explicitly include. If specified, all URIs with unlisted schemes will not be converted.</dd>
127: * <dt>exclude-schemes</dt>
128: * <dd>Space-separated list of URI schemes to explicitly exclude.</dd>
129: * <dt>bad-link-str</dt>
130: * <dd>String to use for links with a correct InputModule prefix, but no value
131: * therein. Defaults to the original URI.</dd>
132: * </dl>
133: *
134: * <p>
135: * Note that currently, only links in the default ("") namespace are converted.
136: *
137: * @author <a href="mailto:jefft@apache.org">Jeff Turner</a>
138: * @version $Id: VariableRewriterTransformer.java 433543 2006-08-22 06:22:54Z crossley $
139: */
140: public class VariableRewriterTransformer extends AbstractSAXTransformer
141: implements Initializable, Disposable {
142:
143: private static final String NAMESPACE = "";
144:
145: /** A list of attributes considered 'links' */
146: private Set linkAttrs;
147:
148: /** List containing schemes (protocols) of links to log */
149: private Set inSchemes;
150: private Set outSchemes;
151:
152: /** Configuration passed to the component once through configure(). */
153: private Configuration origConf;
154:
155: /** Derivation of origConf with variables obtained from setup() parameters.
156: * Recreated once per invocation. */
157: private Configuration conf;
158:
159: private InputModuleHelper modHelper;
160:
161: private String badLinkStr;
162:
163: /**
164: * Configure this component from the map:transformer block. Called before
165: * initialization and setup.
166: */
167: public void configure(Configuration conf)
168: throws ConfigurationException {
169: super .configure(conf);
170: this .origConf = conf;
171: }
172:
173: /**
174: * Initiate resources prior to this component becoming active.
175: */
176: public void initialize() throws Exception {
177: this .defaultNamespaceURI = NAMESPACE;
178: this .modHelper = new InputModuleHelper();
179: this .modHelper.setup(this .manager);
180: }
181:
182: /**
183: * Setup this component to handle a map:transform instance.
184: */
185: public void setup(SourceResolver resolver, Map objectModel,
186: String src, Parameters parameters)
187: throws ProcessingException, SAXException, IOException {
188: super .setup(resolver, objectModel, src, parameters);
189: this .badLinkStr = parameters.getParameter("bad-link-str", null);
190: this .linkAttrs = split(parameters.getParameter("link-attrs",
191: "href"), " ");
192: this .inSchemes = split(parameters.getParameter("schemes", ""),
193: " ");
194: this .outSchemes = split(parameters.getParameter(
195: "exclude-schemes", ""), " ");
196:
197: // Generate conf
198: VariableConfiguration varConf = new VariableConfiguration(
199: this .origConf);
200: varConf.addVariable("src", src);
201: varConf.addVariables(parameters);
202: try {
203: this .conf = varConf.getConfiguration();
204: } catch (ConfigurationException ce) {
205: throw new ProcessingException(
206: "Couldn't create dynamic config ", ce);
207: }
208: }
209:
210: /** Split a string into a Set of strings.
211: * @param str String to split
212: * @param delim Delimiter character
213: * @return A Set of strings in 'str'
214: */
215: private Set split(String str, String delim) {
216: Set schemes = new HashSet();
217: StringTokenizer st = new StringTokenizer(str, delim);
218: while (st.hasMoreTokens()) {
219: String pfx = st.nextToken();
220: schemes.add(pfx);
221: }
222: return schemes;
223: }
224:
225: /**
226: * Start processing elements of our namespace.
227: * This hook is invoked for each sax event with our namespace.
228: * @param uri The namespace of the element.
229: * @param name The local name of the element.
230: * @param raw The qualified name of the element.
231: * @param attr The attributes of the element.
232: */
233: public void startTransformingElement(String uri, String name,
234: String raw, Attributes attr) throws ProcessingException,
235: IOException, SAXException {
236: Attributes newAttrs = null;
237: boolean matched = false;
238:
239: Iterator iter = linkAttrs.iterator();
240: while (iter.hasNext()) {
241: int attrIdx = attr.getIndex((String) iter.next());
242: if (attrIdx != -1) {
243: String oldAttr = attr.getValue(attrIdx);
244: int i = oldAttr.indexOf(":");
245: if (i != -1) {
246: String scheme = oldAttr.substring(0, i);
247: String addr = oldAttr.substring(i + 1);
248: if (outSchemes.contains(scheme)) {
249: if (getLogger().isDebugEnabled()) {
250: getLogger().debug(
251: "Ignoring link '" + scheme + ":"
252: + addr + "'");
253: }
254: } else if (inSchemes.contains(scheme)) {
255: matched = true;
256: newAttrs = getLinkAttr(attr, attrIdx, scheme,
257: addr);
258: if (getLogger().isDebugEnabled()) {
259: getLogger().debug(
260: "Converted link '"
261: + oldAttr
262: + "' to '"
263: + newAttrs
264: .getValue(attrIdx)
265: + "'");
266: }
267: } else {
268: if (inSchemes.size() == 0) {
269: // If the link wasn't deliberately excluded from a
270: // list of 'good' links, then include it.
271: matched = true;
272: newAttrs = getLinkAttr(attr, attrIdx,
273: scheme, addr);
274: getLogger().debug(
275: "Converted link '"
276: + oldAttr
277: + "' to '"
278: + newAttrs
279: .getValue(attrIdx)
280: + "'");
281: }
282: }
283: }
284: }
285: }
286: if (matched) {
287: super .startTransformingElement(uri, name, raw, newAttrs);
288: } else {
289: super .startTransformingElement(uri, name, raw, attr);
290: }
291: }
292:
293: /**
294: * Process the SAX event.
295: */
296: public void characters(char[] p0, int p1, int p2)
297: throws SAXException {
298: if (this .ignoreEventsCount == 0) {
299: if (this .ignoreEmptyCharacters == true) {
300: String value = new String(p0, p1, p2);
301: if (value.trim().length() > 0) {
302: super .characters(p0, p1, p2);
303: }
304: } else {
305: super .characters(p0, p1, p2);
306: }
307: }
308: }
309:
310: /**
311: * Rewrite link in a set of attributes.
312: *
313: * @param oldAttrs Attributes containing unconverted link.
314: * @param linkIndex index of link to convert
315: * @param scheme URI scheme (indicating InputModule) of link
316: * @param addr URI scheme of link
317: * @return an Attributes based on <code>oldAttrs</code>, but with one attribute rewritten.
318: */
319: private Attributes getLinkAttr(Attributes oldAttrs, int linkIndex,
320: String scheme, String addr) {
321: AttributesImpl newAttrs = new AttributesImpl(oldAttrs);
322: try {
323: String modValue = (String) modHelper.getAttribute(
324: this .objectModel, getConf(scheme), scheme, addr,
325: (badLinkStr != null ? badLinkStr : scheme + ":"
326: + addr));
327: newAttrs.setValue(linkIndex, modValue);
328: } catch (Exception e) {
329: // Swallow IM errors, usually prefixes like 'http' that aren't
330: // bound to an InputModule.
331: getLogger().warn("## IM error: " + e, e);
332: }
333: return newAttrs;
334: }
335:
336: /**
337: * Retrieve a dynamic Configuration for a specific InputModule.
338: * @param scheme InputModule name
339: * @return Configuration for specified scheme, from the map:transformer block.
340: */
341: private Configuration getConf(String scheme) {
342: Configuration[] schemeConfs = this .conf.getChildren();
343: for (int i = 0; i < schemeConfs.length; i++) {
344: if (scheme
345: .equals(schemeConfs[i].getAttribute("name", null))) {
346: return schemeConfs[i];
347: }
348: }
349: return null;
350: }
351:
352: /** Recycle this component for use in another map:transform. */
353: public void recycle() {
354: this .resolver = null;
355: this .linkAttrs = null;
356: this .inSchemes = null;
357: this .outSchemes = null;
358: this .conf = null;
359: // Note: configure() and initialize() are not called after every
360: //recycle, so don't null origConf
361: super .recycle();
362: }
363:
364: /* (non-Javadoc)
365: * @see org.apache.avalon.framework.activity.Disposable#dispose()
366: */
367: public void dispose() {
368: if (this.modHelper != null) {
369: this.modHelper.releaseAll();
370: this.modHelper = null;
371: }
372: super.dispose();
373: }
374: }
|