001: /*
002: * File : $Source: /usr/local/cvs/opencms/src/org/opencms/util/CmsXsltUtil.java,v $
003: * Date : $Date: 2008-02-27 12:05:36 $
004: * Version: $Revision: 1.5 $
005: *
006: * This library is part of OpenCms -
007: * the Open Source Content Management System
008: *
009: * Copyright (c) 2002 - 2008 Alkacon Software GmbH (http://www.alkacon.com)
010: *
011: * This library is free software; you can redistribute it and/or
012: * modify it under the terms of the GNU Lesser General Public
013: * License as published by the Free Software Foundation; either
014: * version 2.1 of the License, or (at your option) any later version.
015: *
016: * This library is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019: * Lesser General Public License for more details.
020: *
021: * For further information about Alkacon Software GmbH, please see the
022: * company website: http://www.alkacon.com
023: *
024: * For further information about OpenCms, please see the
025: * project website: http://www.opencms.org
026: *
027: * You should have received a copy of the GNU Lesser General Public
028: * License along with this library; if not, write to the Free Software
029: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
030: */
031:
032: package org.opencms.util;
033:
034: import org.opencms.file.CmsObject;
035: import org.opencms.main.CmsException;
036: import org.opencms.xml.CmsXmlException;
037:
038: import java.io.BufferedReader;
039: import java.io.IOException;
040: import java.io.StringReader;
041: import java.io.StringWriter;
042: import java.util.StringTokenizer;
043:
044: import javax.xml.transform.Source;
045: import javax.xml.transform.Transformer;
046: import javax.xml.transform.TransformerFactory;
047: import javax.xml.transform.stream.StreamResult;
048: import javax.xml.transform.stream.StreamSource;
049:
050: /**
051: * Provides utility functions for XSLT transformations.<p>
052: *
053: * TODO: This class is apparently customer specific and should probably be removed from the core!
054: *
055: * @author Carsten Weinholz
056: *
057: * @version $Revision: 1.5 $
058: *
059: * @since 6.2.1
060: */
061: public final class CmsXsltUtil {
062:
063: /** The delimiter to end a tag. */
064: public static final String TAG_END_DELIMITER = ">";
065:
066: /** The delimiter to start a tag. */
067: public static final String TAG_START_DELIMITER = "<";
068:
069: /** The delimiter to separate the text. */
070: public static final char TEXT_DELIMITER = '"';
071:
072: /** the delimiters, the csv data can be separated with.*/
073: static final String[] DELIMITERS = { ";", ",", "\t" };
074:
075: /**
076: * Hides the public constructor.<p>
077: */
078: private CmsXsltUtil() {
079:
080: // noop
081: }
082:
083: /**
084: * Returns the delimiter that most often occures in the CSV content and is therefore best applicable for the CSV data .<p>
085: *
086: * @param csvData the comma separated values
087: *
088: * @return the delimiter that is best applicable for the CSV data
089: */
090: public static String getPreferredDelimiter(String csvData) {
091:
092: String bestMatch = "";
093: int bestMatchCount = 0;
094: // find for each delimiter, how often it occures in the String csvData
095: for (int i = 0; i < DELIMITERS.length; i++) {
096: int currentCount = csvData.split(DELIMITERS[i]).length;
097: if (currentCount > bestMatchCount) {
098: bestMatch = DELIMITERS[i];
099: bestMatchCount = currentCount;
100: }
101: }
102: return bestMatch;
103: }
104:
105: /**
106: * Changes content from CSV to xml/html.<p>
107: *
108: * The method does not use DOM4J, because iso-8859-1 code ist not transformed correctly.
109: *
110: * @param cms the cms object
111: * @param xsltFile the XSLT transformation file
112: * @param csvContent the csv content to transform
113: * @param delimiter delimiter used to separate csv fields
114: *
115: * @return the transformed xml
116: *
117: * @throws CmsXmlException if something goes wrong
118: * @throws CmsException if something goes wrong
119: */
120: public static String transformCsvContent(CmsObject cms,
121: String xsltFile, String csvContent, String delimiter)
122: throws CmsException, CmsXmlException {
123:
124: String xmlContent = "";
125: try {
126: xmlContent = getTableHtml(csvContent, delimiter);
127: } catch (IOException e) {
128: throw new CmsXmlException(Messages.get().container(
129: Messages.ERR_CSV_XML_TRANSFORMATION_FAILED_0));
130: }
131:
132: // if xslt file parameter is set, transform the raw html and set the css stylesheet property
133: // of the converted file to that of the stylesheet
134: if (xsltFile != null) {
135: xmlContent = transformXmlContent(cms, xsltFile, xmlContent);
136: }
137:
138: return xmlContent;
139: }
140:
141: /**
142: * Applies a XSLT Transformation to the content.<p>
143: *
144: * The method does not use DOM4J, because iso-8859-1 code ist not transformed correctly.
145: *
146: * @param cms the cms object
147: * @param xsltFile the XSLT transformation file
148: * @param xmlContent the XML content to transform
149: *
150: * @return the transformed xml
151: *
152: * @throws CmsXmlException if something goes wrong
153: * @throws CmsException if something goes wrong
154: */
155: public static String transformXmlContent(CmsObject cms,
156: String xsltFile, String xmlContent) throws CmsException,
157: CmsXmlException {
158:
159: // JAXP reads data
160: Source xmlSource = new StreamSource(
161: new StringReader(xmlContent));
162: String xsltString = new String(cms.readFile(xsltFile)
163: .getContents());
164: Source xsltSource = new StreamSource(new StringReader(
165: xsltString));
166: String result = null;
167:
168: try {
169: TransformerFactory transFact = TransformerFactory
170: .newInstance();
171: Transformer trans = transFact.newTransformer(xsltSource);
172:
173: StringWriter writer = new StringWriter();
174: trans.transform(xmlSource, new StreamResult(writer));
175: result = writer.toString();
176: } catch (Exception exc) {
177: throw new CmsXmlException(Messages.get().container(
178: Messages.ERR_CSV_XML_TRANSFORMATION_FAILED_0));
179: }
180:
181: // cut of the prefacing declaration '<?xml version="1.0" encoding="UTF-8"?>'
182: if (result
183: .startsWith("<?xml version=\"1.0\" encoding=\"UTF-8\"?>")) {
184: return result.substring(38);
185: } else {
186: return result;
187: }
188: }
189:
190: /**
191: * Converts a delimiter separated format string int o colgroup html fragment.<p>
192: *
193: * @param formatString the formatstring to convert
194: * @param delimiter the delimiter the formats (l,r or c) are delimited with
195: *
196: * @return the resulting colgroup HTML
197: */
198: private static String getColGroup(String formatString,
199: String delimiter) {
200:
201: StringBuffer colgroup = new StringBuffer(128);
202: String[] formatStrings = formatString.split(delimiter);
203: colgroup.append("<colgroup>");
204: for (int i = 0; i < formatStrings.length; i++) {
205: colgroup.append("<col align=\"");
206: char align = formatStrings[i].trim().charAt(0);
207: switch (align) {
208: case 'l':
209: colgroup.append("left");
210: break;
211: case 'c':
212: colgroup.append("center");
213: break;
214: case 'r':
215: colgroup.append("right");
216: break;
217: default:
218: throw new RuntimeException("invalid format option");
219: }
220: colgroup.append("\"/>");
221: }
222: return colgroup.append("</colgroup>").toString();
223: }
224:
225: /**
226: * Converts CSV data to xml.<p>
227: *
228: * @return a XML representation of the CSV data
229: *
230: * @param csvData the csv data to convert
231: * @param delimiter the delimiter to separate the values with
232: *
233: * @throws IOException if there is an IO problem
234: */
235: private static String getTableHtml(String csvData, String delimiter)
236: throws IOException {
237:
238: String lineSeparator = System.getProperty("line.separator");
239: String formatString = csvData.substring(0, csvData
240: .indexOf(lineSeparator));
241:
242: if (delimiter == null) {
243: delimiter = getPreferredDelimiter(csvData);
244: }
245:
246: StringBuffer xml = new StringBuffer("<table>");
247: if (isFormattingInformation(formatString, delimiter)) {
248: // transform formatting to HTML colgroup
249: xml.append(getColGroup(formatString, delimiter));
250: // cut of first line
251: csvData = csvData.substring(formatString.length()
252: + lineSeparator.length());
253: }
254:
255: String line;
256: BufferedReader br = new BufferedReader(
257: new StringReader(csvData));
258: while ((line = br.readLine()) != null) {
259: xml.append("<tr>\n");
260:
261: // must use tokenizer with delimiters include in order to handle empty cells appropriately
262: StringTokenizer t = new StringTokenizer(line, delimiter,
263: true);
264: boolean hasValue = false;
265: while (t.hasMoreElements()) {
266: String item = (String) t.nextElement();
267: if (!hasValue) {
268: xml.append("\t<td>");
269: hasValue = true;
270: }
271: if (!item.equals(delimiter)) {
272:
273: // remove enclosing delimiters
274: item = removeStringDelimiters(item);
275:
276: // in order to allow links, lines starting and ending with tag delimiters (< ...>) remains unescaped
277: if (item.startsWith(TAG_START_DELIMITER)
278: && item.endsWith(TAG_END_DELIMITER)) {
279: xml.append(item);
280: } else {
281: xml.append(CmsStringUtil.escapeHtml(item));
282: }
283: } else {
284: xml.append("</td>\n");
285: hasValue = false;
286: }
287: }
288: if (hasValue) {
289: xml.append("</td>\n");
290: } else {
291: xml.append("<td></td>\n");
292: }
293:
294: xml.append("</tr>\n");
295: }
296:
297: return xml.append("</table>").toString();
298: }
299:
300: /**
301: * Tests if the given string is a <code>delimiter</code> separated list of formatting information.<p>
302: *
303: * @param formatString the string to check
304: * @param delimiter the list separators
305: *
306: * @return true if the string is a <code>delimiter</code> separated list of Formatting Information
307: */
308: private static boolean isFormattingInformation(String formatString,
309: String delimiter) {
310:
311: String[] formatStrings = formatString.split(delimiter);
312: for (int i = 0; i < formatStrings.length; i++) {
313: if (!formatStrings[i].trim().matches("[lcr]")) {
314: return false;
315: }
316: }
317: return true;
318: }
319:
320: /**
321: * Removes the string delimiters from a key (as well as any white space
322: * outside the delimiters).<p>
323: *
324: * @param key the key (including delimiters)
325: *
326: * @return the key without delimiters
327: */
328: private static String removeStringDelimiters(String key) {
329:
330: String k = key.trim();
331: if (CmsStringUtil.isNotEmpty(k)) {
332: if (k.charAt(0) == TEXT_DELIMITER) {
333: k = k.substring(1);
334: }
335: if (k.charAt(k.length() - 1) == TEXT_DELIMITER) {
336: k = k.substring(0, k.length() - 1);
337: }
338: }
339: // replace excel protected quotations marks ("") by single quotation marks
340: k = CmsStringUtil.substitute(k, "\"\"", "\"");
341: return k;
342: }
343: }
|