001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.cocoon.transformation;
018:
019: import java.io.IOException;
020: import java.util.Map;
021: import java.util.regex.Pattern;
022: import java.util.regex.PatternSyntaxException;
023:
024: import org.apache.avalon.framework.configuration.Configurable;
025: import org.apache.avalon.framework.configuration.Configuration;
026: import org.apache.avalon.framework.configuration.ConfigurationException;
027: import org.apache.avalon.framework.parameters.Parameters;
028: import org.apache.cocoon.ProcessingException;
029: import org.apache.cocoon.caching.CacheableProcessingComponent;
030: import org.apache.cocoon.environment.ObjectModelHelper;
031: import org.apache.cocoon.environment.Request;
032: import org.apache.cocoon.environment.Response;
033: import org.apache.cocoon.environment.Session;
034: import org.apache.cocoon.environment.SourceResolver;
035: import org.apache.excalibur.source.SourceValidity;
036: import org.apache.excalibur.source.impl.validity.NOPValidity;
037: import org.xml.sax.Attributes;
038: import org.xml.sax.SAXException;
039: import org.xml.sax.helpers.AttributesImpl;
040:
041: /**
042: * @cocoon.sitemap.component.documentation
043: * The encodeURL transformer emits encoded URLs.
044: *
045: * @cocoon.sitemap.component.name encodeurl
046: * @cocoon.sitemap.component.logger sitemap.transformer.encodeURL
047: * @cocoon.sitemap.component.documentation.caching
048: * TBD
049: *
050: * @cocoon.sitemap.component.pooling.max 32
051: *
052: * The encodeURL transformer emits encoded URLs.
053: * <p>
054: * This transformer applies encodeURL method to URLs.
055: * You may want to use this transform to avoid doing the manually
056: * encodeURL() calls.
057: * </p>
058: * <p>
059: * Usually this transformer is appended as last transformer before
060: * the serialization process. In this case it is possible to encode
061: * URLs introduced in the generator, and xslt transformer phase.
062: * </p>
063: * <p>
064: * You can specify which attributes hold URL values in order to restrict
065: * URL rewriting to specific attributes only.
066: * </p>
067: * <p>
068: * Usage in a sitemap:
069: * </p>
070: * <pre><tt>
071: * <map:composition>
072: * ...
073: * <map:transformers>
074: * ...
075: * <map:transformer type="encodeURL"
076: * src="org.apache.cocoon.optional.transformation.EncodeURLTransformer">
077: * <exclude-name>img/@src|a/@href=.&asterik;adserver</exclude-name>
078: * <include-name>.&asterik;/@href|.&asterik;/@src|.&asterik;/@action</include-name>
079: * </map:transformer>
080: * ...
081: * <map:pipelines>
082: * <map:pipeline>
083: * ...
084: * <map:transform type="encodeURL"/>
085: * ...
086: * </pre></tt>
087: *
088: * @author <a href="mailto:bh22351@i-one.at">Bernhard Huber</a>
089: * @version CVS $Id: EncodeURLTransformer.java 153376 2005-02-11 08:50:21Z cziegeler $
090: */
091: public class EncodeURLTransformer extends AbstractTransformer implements
092: Configurable, CacheableProcessingComponent {
093:
094: /**
095: * Configuration name for specifying excluding patterns,
096: * ie exclude-name.
097: */
098: public final static String EXCLUDE_NAME = "exclude-name";
099:
100: /**
101: * Configuration name for specifying including patterns,
102: * ie include-name.
103: */
104: public final static String INCLUDE_NAME = "include-name";
105:
106: /**
107: * Configuration default exclude pattern,
108: * ie img/@src
109: */
110: public final static String EXCLUDE_NAME_DEFAULT = "img/@src=";
111:
112: /**
113: * Configuration default exclude pattern,
114: * ie .*\/@href|.*\/@action|frame/@src
115: */
116: public final static String INCLUDE_NAME_DEFAULT = ".*/@href=|.*/@action=|frame/@src=";
117:
118: private String includeNameConfigure = INCLUDE_NAME_DEFAULT;
119: private String excludeNameConfigure = EXCLUDE_NAME_DEFAULT;
120:
121: private ElementAttributeMatching elementAttributeMatching;
122: private Response response;
123: private boolean isEncodeURLNeeded;
124: private Session session;
125:
126: /**
127: * check if encoding of URLs is neccessary.
128: *
129: * This is true if session object exists, and session-id
130: * was provided from URL, or session is new.
131: * The result is stored in some instance variables
132: */
133: protected void checkForEncoding(Request request) {
134: this .session = request.getSession(false);
135: this .isEncodeURLNeeded = false;
136:
137: if (null != this .session) {
138: // do encoding if session id is from URL, or the session is new,
139: // fixes BUG #13855, due to paint007@mc.duke.edu
140: if (request.isRequestedSessionIdFromURL()
141: || this .session.isNew()) {
142: this .isEncodeURLNeeded = true;
143: }
144: }
145: }
146:
147: /**
148: * Setup the transformer.
149: * <p>
150: * Setup include, and exclude patterns from the parameters
151: * </p>
152: *
153: * @param resolver source resolver
154: * @param objectModel sitemap objects
155: * @param parameters request parameters
156: *
157: */
158: public void setup(SourceResolver resolver, Map objectModel,
159: String source, Parameters parameters)
160: throws ProcessingException, SAXException, IOException {
161:
162: this
163: .checkForEncoding(ObjectModelHelper
164: .getRequest(objectModel));
165:
166: if (this .isEncodeURLNeeded) {
167: this .response = ObjectModelHelper.getResponse(objectModel);
168:
169: // don't check if URL encoding is needed now, as
170: // a generator might create a new session
171: final String includeName = parameters.getParameter(
172: INCLUDE_NAME, this .includeNameConfigure);
173: final String excludeName = parameters.getParameter(
174: EXCLUDE_NAME, this .excludeNameConfigure);
175: try {
176: this .elementAttributeMatching = new ElementAttributeMatching(
177: includeName, excludeName);
178: } catch (PatternSyntaxException reex) {
179: final String message = "Cannot parse include-name: "
180: + includeName + " " + "or exclude-name: "
181: + excludeName + "!";
182: throw new ProcessingException(message, reex);
183: }
184: }
185: }
186:
187: /**
188: * BEGIN SitemapComponent methods
189: *
190: * @param configuration Description of Parameter
191: * @exception ConfigurationException Description of Exception
192: */
193: public void configure(Configuration configuration)
194: throws ConfigurationException {
195: Configuration child;
196:
197: child = configuration.getChild(INCLUDE_NAME);
198: this .includeNameConfigure = child
199: .getValue(INCLUDE_NAME_DEFAULT);
200:
201: child = configuration.getChild(EXCLUDE_NAME);
202: this .excludeNameConfigure = child
203: .getValue(EXCLUDE_NAME_DEFAULT);
204:
205: if (this .includeNameConfigure == null) {
206: String message = "Configure " + INCLUDE_NAME + "!";
207: throw new ConfigurationException(message);
208: }
209: if (this .excludeNameConfigure == null) {
210: String message = "Configure " + EXCLUDE_NAME + "!";
211: throw new ConfigurationException(message);
212: }
213: }
214:
215: /**
216: * Recycle resources of this transformer
217: */
218: public void recycle() {
219: super .recycle();
220: this .response = null;
221: this .session = null;
222: this .elementAttributeMatching = null;
223: }
224:
225: /**
226: * Generate the unique key.
227: * This key must be unique inside the space of this component.
228: *
229: * @return The generated key hashes the src
230: */
231: public java.io.Serializable getKey() {
232: if (this .isEncodeURLNeeded) {
233: return null;
234: } else {
235: return "1";
236: }
237: }
238:
239: /**
240: * Generate the validity object.
241: *
242: * @return The generated validity object or <code>null</code> if the
243: * component is currently not cacheable.
244: */
245: public SourceValidity getValidity() {
246: if (this .isEncodeURLNeeded) {
247: return null;
248: } else {
249: return NOPValidity.SHARED_INSTANCE;
250: }
251: }
252:
253: /**
254: * Start parsing an element
255: *
256: * @param uri of the element
257: * @param name of the element
258: * @param raw name of the element
259: * @param attributes list
260: * @exception SAXException Description of Exception
261: */
262: public void startElement(String uri, String name, String raw,
263: Attributes attributes) throws SAXException {
264: if (this .isEncodeURLNeeded
265: && this .elementAttributeMatching != null) {
266: String lname = name;
267: if (attributes != null && attributes.getLength() > 0) {
268: AttributesImpl new_attributes = new AttributesImpl(
269: attributes);
270: for (int i = 0; i < new_attributes.getLength(); i++) {
271: String attr_lname = new_attributes.getLocalName(i);
272:
273: String value = new_attributes.getValue(i);
274:
275: if (elementAttributeMatching
276: .matchesElementAttribute(lname, attr_lname,
277: value)) {
278: // don't use simply this.response.encodeURL(value)
279: // but be more smart about the url encoding
280: final String new_value = this .encodeURL(value);
281: if (getLogger().isDebugEnabled()) {
282: this .getLogger().debug(
283: "element/@attribute matches: "
284: + name + "/@" + attr_lname);
285: this .getLogger().debug(
286: "encodeURL: " + value + " -> "
287: + new_value);
288: }
289: new_attributes.setValue(i, new_value);
290: }
291: }
292: // parent handles element using encoded attribute values
293: super .contentHandler.startElement(uri, name, raw,
294: new_attributes);
295: return;
296: }
297: }
298: // no match, parent handles element as-is
299: super .contentHandler.startElement(uri, name, raw, attributes);
300: }
301:
302: /**
303: * Do the URL rewriting.
304: * <p>
305: * Check if <code>url</code> contains already the sessionid, some servlet-engines
306: * just appends the session-id without checking if the sessionid is already present.
307: * </p>
308: *
309: * @param url the URL probably without sessionid.
310: * @return String the original url inclusive the sessionid
311: */
312: private String encodeURL(String url) {
313: String encoded_url;
314: if (this .response != null) {
315: // As some servlet-engine does not check if url has been already rewritten
316: if (this .session != null
317: && url.indexOf(this .session.getId()) > -1) {
318: // url contains already the session id encoded
319: encoded_url = url;
320: } else {
321: // do encode the session id
322: encoded_url = this .response.encodeURL(url);
323: }
324: } else {
325: encoded_url = url;
326: }
327: return encoded_url;
328: }
329:
330: /**
331: * A helper class for matching element names, and attribute names.
332: *
333: * <p>
334: * For given include-name, exclude-name decide if element-attribute pair
335: * matches. This class defines the precedence and matching algorithm.
336: * </p>
337: *
338: * @author <a href="mailto:bh22351@i-one.at">Bernhard Huber</a>
339: * @version CVS $Id: EncodeURLTransformer.java 153376 2005-02-11 08:50:21Z cziegeler $
340: */
341: public static class ElementAttributeMatching {
342: /**
343: * Regular expression of including patterns
344: *
345: */
346: protected Pattern includeNameRE;
347: /**
348: * Regular expression of excluding patterns
349: *
350: */
351: protected Pattern excludeNameRE;
352:
353: /**
354: *Constructor for the ElementAttributeMatching object
355: *
356: * @param includeName Description of Parameter
357: * @param excludeName Description of Parameter
358: * @exception PatternSyntaxException Description of Exception
359: */
360: public ElementAttributeMatching(String includeName,
361: String excludeName) throws PatternSyntaxException {
362: includeNameRE = Pattern.compile(includeName,
363: Pattern.CASE_INSENSITIVE);
364: excludeNameRE = Pattern.compile(excludeName,
365: Pattern.CASE_INSENSITIVE);
366: }
367:
368: /**
369: * Return true iff element_name attr_name pair is not matched by exclude-name,
370: * but is matched by include-name
371: * @param element_name
372: * @param attr_name
373: * @param value TODO
374: *
375: * @return boolean true iff value of attribute_name should get rewritten, else
376: * false.
377: */
378: public boolean matchesElementAttribute(String element_name,
379: String attr_name, String value) {
380: String element_attr_name = canonicalizeElementAttribute(
381: element_name, attr_name, value);
382:
383: if (excludeNameRE != null && includeNameRE != null) {
384: return !matchesExcludesElementAttribute(element_attr_name)
385: && matchesIncludesElementAttribute(element_attr_name);
386: } else {
387: return false;
388: }
389: }
390:
391: /**
392: * Build from elementname, and attribute name a single string.
393: * <p>
394: * String concatenated <code>element name + "/@" + attribute name</code>
395: * is matched against the include and excluding patterns.
396: * </p>
397: * @param element_name Description of Parameter
398: * @param attr_name Description of Parameter
399: * @param value The value
400: *
401: * @return Description of the Returned Value
402: */
403: private String canonicalizeElementAttribute(
404: String element_name, String attr_name, String value) {
405: return element_name + "/@" + attr_name + "=" + value;
406: }
407:
408: /**
409: * Return true iff element_name attr_name pair is matched by exclude-name.
410: *
411: * @param element_attr_name
412: * @return boolean true iff exclude-name matches element_name, attr_name, else
413: * false.
414: */
415: private boolean matchesExcludesElementAttribute(
416: String element_attr_name) {
417: boolean match = excludeNameRE.matcher(element_attr_name)
418: .lookingAt();
419: return match;
420: }
421:
422: /**
423: * Return true iff element_name attr_name pair is matched by include-name.
424: *
425: * @param element_attr_name
426: * @return boolean true iff include-name matches element_name, attr_name, else
427: * false.
428: */
429: private boolean matchesIncludesElementAttribute(
430: String element_attr_name) {
431: boolean match = includeNameRE.matcher(element_attr_name)
432: .lookingAt();
433: return match;
434: }
435: }
436: }
|