001: /*
002: * File : $Source: /usr/local/cvs/opencms/src/org/opencms/staticexport/CmsLinkProcessor.java,v $
003: * Date : $Date: 2008-02-27 12:05:46 $
004: * Version: $Revision: 1.56 $
005: *
006: * This library is part of OpenCms -
007: * the Open Source Content Management System
008: *
009: * Copyright (c) 2002 - 2008 Alkacon Software GmbH (http://www.alkacon.com)
010: *
011: * This library is free software; you can redistribute it and/or
012: * modify it under the terms of the GNU Lesser General Public
013: * License as published by the Free Software Foundation; either
014: * version 2.1 of the License, or (at your option) any later version.
015: *
016: * This library is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019: * Lesser General Public License for more details.
020: *
021: * For further information about Alkacon Software GmbH, please see the
022: * company website: http://www.alkacon.com
023: *
024: * For further information about OpenCms, please see the
025: * project website: http://www.opencms.org
026: *
027: * You should have received a copy of the GNU Lesser General Public
028: * License along with this library; if not, write to the Free Software
029: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
030: */
031:
032: package org.opencms.staticexport;
033:
034: import org.opencms.file.CmsObject;
035: import org.opencms.file.CmsPropertyDefinition;
036: import org.opencms.file.wrapper.CmsObjectWrapper;
037: import org.opencms.i18n.CmsEncoder;
038: import org.opencms.main.CmsException;
039: import org.opencms.main.OpenCms;
040: import org.opencms.relations.CmsLink;
041: import org.opencms.relations.CmsRelationType;
042: import org.opencms.util.CmsHtmlParser;
043: import org.opencms.util.CmsMacroResolver;
044: import org.opencms.util.CmsRequestUtil;
045: import org.opencms.util.CmsStringUtil;
046:
047: import java.util.Vector;
048:
049: import org.htmlparser.Attribute;
050: import org.htmlparser.Node;
051: import org.htmlparser.Tag;
052: import org.htmlparser.tags.ImageTag;
053: import org.htmlparser.tags.LinkTag;
054: import org.htmlparser.tags.ObjectTag;
055: import org.htmlparser.util.ParserException;
056: import org.htmlparser.util.SimpleNodeIterator;
057:
058: /**
059: * Implements the HTML parser node visitor pattern to
060: * exchange all links on the page.<p>
061: *
062: * @author Alexander Kandzior
063: *
064: * @version $Revision: 1.56 $
065: *
066: * @since 6.0.0
067: */
068: public class CmsLinkProcessor extends CmsHtmlParser {
069:
070: /** Constant for the attribute name. */
071: public static final String ATTRIBUTE_HREF = "href";
072:
073: /** Constant for the attribute name. */
074: public static final String ATTRIBUTE_SRC = "src";
075:
076: /** Constant for the attribute name. */
077: public static final String ATTRIBUTE_VALUE = "value";
078:
079: /** HTML end. */
080: public static final String HTML_END = "</body></html>";
081:
082: /** HTML start. */
083: public static final String HTML_START = "<html><body>";
084:
085: /** Constant for the tag name. */
086: public static final String TAG_AREA = "AREA";
087:
088: /** Constant for the tag name. */
089: public static final String TAG_EMBED = "EMBED";
090:
091: /** Constant for the tag name. */
092: public static final String TAG_PARAM = "PARAM";
093:
094: /** List of attributes that may contain links for the embed tag. */
095: private static final String[] EMBED_TAG_LINKED_ATTRIBS = new String[] {
096: ATTRIBUTE_SRC, "pluginurl", "pluginspage" };
097:
098: /** List of attributes that may contain links for the object tag ("codebase" has to be first). */
099: private static final String[] OBJECT_TAG_LINKED_ATTRIBS = new String[] {
100: "codebase", "data", "datasrc" };
101:
102: /** Processing mode "process links". */
103: private static final int PROCESS_LINKS = 1;
104:
105: /** Processing mode "replace links". */
106: private static final int REPLACE_LINKS = 0;
107:
108: /** The current users OpenCms context, containing the users permission and site root context. */
109: private CmsObject m_cms;
110:
111: /** The selected encoding to use for parsing the HTML. */
112: private String m_encoding;
113:
114: /** The link table used for link macro replacements. */
115: private CmsLinkTable m_linkTable;
116:
117: /** Current processing mode. */
118: private int m_mode;
119:
120: /** The relative path for relative links, if not set, relative links are treated as external links. */
121: private String m_relativePath;
122:
123: /** Another OpenCms context based on the current users OpenCms context, but with the site root set to '/'. */
124: private CmsObject m_rootCms;
125:
126: /**
127: * Creates a new link processor.<p>
128: *
129: * @param cms the current users OpenCms context
130: * @param linkTable the link table to use
131: * @param encoding the encoding to use for parsing the HTML content
132: * @param relativePath additional path for links with relative path (only used in "replace" mode)
133: */
134: public CmsLinkProcessor(CmsObject cms, CmsLinkTable linkTable,
135: String encoding, String relativePath) {
136:
137: // echo mode must be on for link processor
138: super (true);
139:
140: m_cms = cms;
141: if (m_cms != null) {
142: try {
143: m_rootCms = OpenCms.initCmsObject(cms);
144: m_rootCms.getRequestContext().setSiteRoot("/");
145: } catch (CmsException e) {
146: // this should not happen
147: m_rootCms = null;
148: }
149: }
150: m_linkTable = linkTable;
151: m_encoding = encoding;
152: m_relativePath = relativePath;
153: }
154:
155: /**
156: * Escapes all <code>&</code>, e.g. replaces them with a <code>&</code>.<p>
157: *
158: * @param source the String to escape
159: * @return the escaped String
160: */
161: public static String escapeLink(String source) {
162:
163: if (source == null) {
164: return null;
165: }
166: StringBuffer result = new StringBuffer(source.length() * 2);
167: int terminatorIndex;
168: for (int i = 0; i < source.length(); ++i) {
169: char ch = source.charAt(i);
170: switch (ch) {
171: case '&':
172: // don't escape already escaped &s;
173: terminatorIndex = source.indexOf(';', i);
174: if (terminatorIndex > 0) {
175: String substr = source.substring(i + 1,
176: terminatorIndex);
177: if ("amp".equals(substr)) {
178: result.append(ch);
179: } else {
180: result.append("&");
181: }
182: } else {
183: result.append("&");
184: }
185: break;
186: default:
187: result.append(ch);
188: }
189: }
190: return new String(result);
191: }
192:
193: /**
194: * Unescapes all <code>&amp;</code>, that is replaces them with a <code>&</code>.<p>
195: *
196: * @param source the String to unescape
197: * @return the unescaped String
198: */
199: public static String unescapeLink(String source) {
200:
201: if (source == null) {
202: return null;
203: }
204: return CmsStringUtil.substitute(source, "&", "&");
205:
206: }
207:
208: /**
209: * Returns the link table this link processor was initialized with.<p>
210: *
211: * @return the link table this link processor was initialized with
212: */
213: public CmsLinkTable getLinkTable() {
214:
215: return m_linkTable;
216: }
217:
218: /**
219: * Starts link processing for the given content in processing mode.<p>
220: *
221: * Macros are replaced by links.<p>
222: *
223: * @param content the content to process
224: * @return the processed content with replaced macros
225: *
226: * @throws ParserException if something goes wrong
227: */
228: public String processLinks(String content) throws ParserException {
229:
230: m_mode = PROCESS_LINKS;
231: return process(content, m_encoding);
232: }
233:
234: /**
235: * Starts link processing for the given content in replacement mode.<p>
236: *
237: * Links are replaced by macros.<p>
238: *
239: * @param content the content to process
240: * @return the processed content with replaced links
241: *
242: * @throws ParserException if something goes wrong
243: */
244: public String replaceLinks(String content) throws ParserException {
245:
246: m_mode = REPLACE_LINKS;
247: return process(content, m_encoding);
248: }
249:
250: /**
251: * Visitor method to process a tag (start).<p>
252: *
253: * @param tag the tag to process
254: */
255: public void visitTag(Tag tag) {
256:
257: if (tag instanceof LinkTag) {
258: processLinkTag((LinkTag) tag);
259: } else if (tag instanceof ImageTag) {
260: processImageTag((ImageTag) tag);
261: } else if (tag instanceof ObjectTag) {
262: processObjectTag((ObjectTag) tag);
263: } else {
264: // there are no specialized tag classes for these tags :(
265: if (TAG_EMBED.equals(tag.getTagName())) {
266: processEmbedTag(tag);
267: } else if (TAG_AREA.equals(tag.getTagName())) {
268: processAreaTag(tag);
269: }
270: }
271: // append text content of the tag (may have been changed by above methods)
272: super .visitTag(tag);
273: }
274:
275: /**
276: * Process an area tag.<p>
277: *
278: * @param tag the tag to process
279: */
280: protected void processAreaTag(Tag tag) {
281:
282: processLink(tag, ATTRIBUTE_HREF, CmsRelationType.HYPERLINK);
283: }
284:
285: /**
286: * Process an embed tag.<p>
287: *
288: * @param tag the tag to process
289: */
290: protected void processEmbedTag(Tag tag) {
291:
292: for (int i = 0; i < EMBED_TAG_LINKED_ATTRIBS.length; i++) {
293: String attr = EMBED_TAG_LINKED_ATTRIBS[i];
294: processLink(tag, attr, CmsRelationType.EMBEDDED_OBJECT);
295: }
296: }
297:
298: /**
299: * Process an image tag.<p>
300: *
301: * @param tag the tag to process
302: */
303: protected void processImageTag(ImageTag tag) {
304:
305: processLink(tag, ATTRIBUTE_SRC, CmsRelationType.valueOf(tag
306: .getTagName()));
307: }
308:
309: /**
310: * Process a tag having a link in the given attribute, considering the link as the given type.<p>
311: *
312: * @param tag the tag to process
313: * @param attr the attribute
314: * @param type the link type
315: */
316: protected void processLink(Tag tag, String attr,
317: CmsRelationType type) {
318:
319: if (tag.getAttribute(attr) == null) {
320: return;
321: }
322: CmsLink link = null;
323: switch (m_mode) {
324: case PROCESS_LINKS:
325: // macros are replaced with links
326: link = m_linkTable.getLink(CmsMacroResolver.stripMacro(tag
327: .getAttribute(attr)));
328: if (link != null) {
329: // link management check
330: String l = link.getLink(m_cms);
331: if (TAG_PARAM.equals(tag.getTagName())) {
332: // HACK: to distinguish link parameters the link itself has to end with '&' or '?'
333: // another solution should be a kind of macro...
334: if (!l.endsWith(CmsRequestUtil.URL_DELIMITER)
335: && !l
336: .endsWith(CmsRequestUtil.PARAMETER_DELIMITER)) {
337: if (l.indexOf(CmsRequestUtil.URL_DELIMITER) > 0) {
338: l += CmsRequestUtil.PARAMETER_DELIMITER;
339: } else {
340: l += CmsRequestUtil.URL_DELIMITER;
341: }
342: }
343: }
344: // set the real target
345: tag.setAttribute(attr, CmsEncoder.escapeXml(l));
346: }
347: break;
348: case REPLACE_LINKS:
349: // links are replaced with macros
350: String targetUri = tag.getAttribute(attr);
351: if (CmsStringUtil.isNotEmpty(targetUri)) {
352: String internalUri = null;
353: if (!CmsMacroResolver.isMacro(targetUri)) {
354: internalUri = OpenCms.getLinkManager().getRootPath(
355: m_cms, targetUri, m_relativePath);
356: }
357: // HACK: to distinguish link parameters the link itself has to end with '&' or '?'
358: // another solution should be a kind of macro...
359: if (!TAG_PARAM.equals(tag.getTagName())
360: || targetUri
361: .endsWith(CmsRequestUtil.URL_DELIMITER)
362: || targetUri
363: .endsWith(CmsRequestUtil.PARAMETER_DELIMITER)) {
364: if (internalUri != null) {
365: internalUri = rewriteUri(internalUri);
366: // this is an internal link
367: link = m_linkTable.addLink(type, internalUri,
368: true);
369: // link management check
370: link.checkConsistency(m_cms);
371:
372: if ("IMG".equals(tag.getTagName())
373: || TAG_AREA.equals(tag.getTagName())) {
374: // now ensure the image has the "alt" attribute set
375: setAltAttributeFromTitle(tag, internalUri);
376: }
377: } else {
378: // this is an external link
379: link = m_linkTable.addLink(type, targetUri,
380: false);
381: }
382: }
383: if (link != null) {
384: tag.setAttribute(attr, CmsMacroResolver
385: .formatMacro(link.getName()));
386: }
387: }
388: break;
389: default: // empty
390: }
391: }
392:
393: /**
394: * Process a link tag.<p>
395: *
396: * @param tag the tag to process
397: */
398: protected void processLinkTag(LinkTag tag) {
399:
400: processLink(tag, ATTRIBUTE_HREF, CmsRelationType.valueOf(tag
401: .getTagName()));
402: }
403:
404: /**
405: * Process an object tag.<p>
406: *
407: * @param tag the tag to process
408: */
409: protected void processObjectTag(ObjectTag tag) {
410:
411: CmsRelationType type = CmsRelationType
412: .valueOf(tag.getTagName());
413: for (int i = 0; i < OBJECT_TAG_LINKED_ATTRIBS.length; i++) {
414: String attr = OBJECT_TAG_LINKED_ATTRIBS[i];
415: processLink(tag, attr, type);
416: if ((i == 0) && (tag.getAttribute(attr) != null)) {
417: // if code base is available, the other attributes are relative to it, so do not process them
418: break;
419: }
420: }
421: SimpleNodeIterator itChildren = tag.children();
422: while (itChildren.hasMoreNodes()) {
423: Node node = itChildren.nextNode();
424: if (node instanceof Tag) {
425: Tag childTag = (Tag) node;
426: if (TAG_PARAM.equals(childTag.getTagName())) {
427: processLink(childTag, ATTRIBUTE_VALUE, type);
428: }
429: }
430: }
431: }
432:
433: /**
434: * Ensures that the given tag has the "alt" attribute set.<p>
435: *
436: * if not set, it will be set from the title of the given resource.<p>
437: *
438: * @param tag the tag to set the alt attribute for
439: * @param internalUri the internal URI to get the title from
440: */
441: protected void setAltAttributeFromTitle(Tag tag, String internalUri) {
442:
443: boolean hasAltAttrib = (tag.getAttribute("alt") != null);
444: if (!hasAltAttrib) {
445: String value = null;
446: if ((internalUri != null) && (m_rootCms != null)) {
447: // internal image: try to read the "alt" text from the "Title" property
448: try {
449: value = m_rootCms
450: .readPropertyObject(
451: internalUri,
452: CmsPropertyDefinition.PROPERTY_TITLE,
453: false).getValue();
454: } catch (CmsException e) {
455: // property can't be read, ignore
456: }
457: }
458: // some editors add a "/" at the end of the tag, we must make sure to insert before that
459: Vector attrs = tag.getAttributesEx();
460: // first element is always the tag name
461: attrs.add(1, new Attribute(" "));
462: attrs.add(2, new Attribute("alt", value == null ? ""
463: : value, '"'));
464: }
465: }
466:
467: /**
468: * Use the {@link org.opencms.file.wrapper.CmsObjectWrapper} to restore the link in the VFS.<p>
469: *
470: * @param internalUri the internal URI to restore
471: *
472: * @return the restored URI
473: */
474: private String rewriteUri(String internalUri) {
475:
476: // if an object wrapper is used, rewrite the uri
477: if (m_cms != null) {
478: Object obj = m_cms.getRequestContext().getAttribute(
479: CmsObjectWrapper.ATTRIBUTE_NAME);
480: if (obj != null) {
481: CmsObjectWrapper wrapper = (CmsObjectWrapper) obj;
482: return wrapper.restoreLink(internalUri);
483: }
484: }
485:
486: return internalUri;
487: }
488: }
|