001: /******************************************************************************
002: * JBoss, a division of Red Hat *
003: * Copyright 2006, Red Hat Middleware, LLC, and individual *
004: * contributors as indicated by the @authors tag. See the *
005: * copyright.txt in the distribution for a full listing of *
006: * individual contributors. *
007: * *
008: * This is free software; you can redistribute it and/or modify it *
009: * under the terms of the GNU Lesser General Public License as *
010: * published by the Free Software Foundation; either version 2.1 of *
011: * the License, or (at your option) any later version. *
012: * *
013: * This software is distributed in the hope that it will be useful, *
014: * but WITHOUT ANY WARRANTY; without even the implied warranty of *
015: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
016: * Lesser General Public License for more details. *
017: * *
018: * You should have received a copy of the GNU Lesser General Public *
019: * License along with this software; if not, write to the Free *
020: * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA *
021: * 02110-1301 USA, or see the FSF site: http://www.fsf.org. *
022: ******************************************************************************/package org.jboss.portal.test.cms;
023:
024: import junit.framework.TestCase;
025: import org.jboss.portal.cms.util.FileUtil;
026:
027: import java.io.BufferedReader;
028: import java.io.FileReader;
029: import java.util.regex.Matcher;
030: import java.util.regex.Pattern;
031:
032: /**
033: * Tests for the CMS Regex when retrieving stored content.
034: *
035: * @author <a href="mailto:roy@jboss.org">Roy Russo</a>
036: */
037: public class TestRegEx extends TestCase {
038:
039: String HTMLHeaderFile = "resources/test/jcr/headerpage.html";
040: String HTMLHeaderFile_Good = "resources/test/jcr/headerpage_good.html";
041:
042: private static final String URI_schemeRegex = "[a-z][-+.0-9a-z]*:";
043:
044: /**
045: * For our purposes, ignore URIs that start with a scheme idicator, a slash (indicating an absolute path), or a hash
046: * sign (# = ASCII hex 23).
047: */
048: private static final String URI_ignoreRegex = "" + URI_schemeRegex
049: + "|/|\\x23";
050:
051: private static final String regex = "((?:href|src)\\s*=\\s*) # Capture preliminaries in $1. \n"
052: + "(?: # First look for URL in quotes. \n"
053: + " ([\"\']) # Capture open quote in $2. \n"
054: + " (?!"
055: + URI_ignoreRegex
056: + ") # If it isn't absolute... \n"
057: + " /?(.+?) # ...capture URL in $3 \n"
058: + " \\2 # Match the closing quote \n"
059: + " | # Look for non-quoted URL. \n"
060: + " (?![\"\']|"
061: + URI_ignoreRegex
062: + ") # If it isn't absolute... \n"
063: + " /?([^\\s>]+) # ...capture URL in $4 \n"
064: + ")";
065:
066: /** Removes header content, and leaves content between body tags */
067: private static final String HTMLStripperRegex = "(.*<body[^>]*>(.+)</body>.*)";
068: //private static final String HTMLStripperRegex = "(.*<body[^>]*>(.+)</body>.*')";
069:
070: private static final Pattern RELATIVE_URI_PATTERN = Pattern
071: .compile(regex, Pattern.MULTILINE
072: | Pattern.CASE_INSENSITIVE | Pattern.COMMENTS);
073:
074: private static final Pattern STRIP_TAGS_PATTERN = Pattern.compile(
075: HTMLStripperRegex, Pattern.DOTALL
076: | Pattern.CASE_INSENSITIVE);
077:
078: public void setUp() throws Exception {
079: super .setUp();
080: }
081:
082: /**
083: * Tests HTML rewriting of header content and links in CMSPortlet.
084: *
085: * @throws Exception
086: */
087: public void testHTMLPageHeaderRewrite() throws Exception {
088: String fileHTML = "";
089: StringBuffer fileData = new StringBuffer(1000);
090: BufferedReader reader = new BufferedReader(new FileReader(
091: HTMLHeaderFile));
092: char[] buffer = new char[1024];
093: int count = 0;
094: while ((count = reader.read(buffer)) != -1) {
095: String read = String.valueOf(buffer, 0, count);
096: fileData.append(read);
097: buffer = new char[1024];
098: }
099: reader.close();
100: fileHTML = fileData.toString();
101:
102: // begin cleaner
103: String CleanHTML = "";
104: Matcher h = STRIP_TAGS_PATTERN.matcher(fileHTML);
105: while (h.find()) {
106: CleanHTML = h.group(2);
107: }
108: // begin modifying links
109: StringBuffer stringbuffer = new StringBuffer();
110: Matcher m = RELATIVE_URI_PATTERN.matcher(CleanHTML);
111: while (m.find()) {
112: String relURI = m.group(3) != null ? m.group(3) : m
113: .group(4);
114: String absoluteURI = this .buildURL("/" + relURI);
115: m.appendReplacement(stringbuffer, "$1$2"
116: + FileUtil.cleanDoubleSlashes(absoluteURI) + "$2");
117: }
118: m.appendTail(stringbuffer);
119: CleanHTML = stringbuffer.toString();
120:
121: /* Assert */
122: String goodHTML = "";
123: fileData = new StringBuffer(1000);
124: reader = new BufferedReader(new FileReader(HTMLHeaderFile_Good));
125: buffer = new char[1024];
126: count = 0;
127: while ((count = reader.read(buffer)) != -1) {
128: String read = String.valueOf(buffer, 0, count);
129: fileData.append(read);
130: buffer = new char[1024];
131: }
132: reader.close();
133: goodHTML = fileData.toString();
134:
135: assertEquals("RegEx failed to match!", CleanHTML, goodHTML);
136: }
137:
138: protected void tearDown() throws Exception {
139: super .tearDown();
140: }
141:
142: /** Faking it. */
143: private String buildURL(String path) {
144: return "http://localhost:8080/portal/content" + path;
145: }
146: }
|