001: /*
002: JSPWiki - a JSP-based WikiWiki clone.
003:
004: Copyright (C) 2001-2002 Janne Jalkanen (Janne.Jalkanen@iki.fi)
005:
006: This program is free software; you can redistribute it and/or modify
007: it under the terms of the GNU Lesser General Public License as published by
008: the Free Software Foundation; either version 2.1 of the License, or
009: (at your option) any later version.
010:
011: This program is distributed in the hope that it will be useful,
012: but WITHOUT ANY WARRANTY; without even the implied warranty of
013: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014: GNU Lesser General Public License for more details.
015:
016: You should have received a copy of the GNU Lesser General Public License
017: along with this program; if not, write to the Free Software
018: Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
019: */
020: package com.ecyrd.jspwiki.parser;
021:
022: import java.io.UnsupportedEncodingException;
023: import java.security.MessageDigest;
024: import java.security.NoSuchAlgorithmException;
025: import java.text.SimpleDateFormat;
026: import java.util.ArrayList;
027: import java.util.Calendar;
028: import java.util.HashMap;
029: import java.util.Map;
030: import java.util.Properties;
031: import java.util.Set;
032: import java.util.regex.Matcher;
033: import java.util.regex.Pattern;
034:
035: /**
036: * <p>Translates Creole markp to JSPWiki markup. Simple translator uses regular expressions.
037: * See http://www.wikicreole.org for the WikiCreole spec.</p>
038: *
039: * <p>This translator can be configured through properties defined in
040: * jspwiki.properties starting with "creole.*". See the
041: * jspwiki.properties file for an explanation of the properties</p>
042: *
043: * <p><b>WARNING</b>: This is an experimental feature, and known to be
044: * broken. Use at your own risk.</o>
045: *
046: * @author Steffen Schramm
047: * @author Hanno Eichelberger
048: * @author Christoph Sauer
049: *
050: * @see <a href="http://www.wikicreole.org/">Wiki Creole Spec</a>
051: */
052: public class CreoleToJSPWikiTranslator {
053:
054: // These variables are expanded so that admins
055: // can display information about the current installed
056: // pagefilter
057: //
058: // The syntax is the same as a wiki var. Unlike a wiki
059: // war though, the CreoleTranslator itself
060: //
061: // [{$creolepagefilter.version}]
062: // [{$creolepagefilter.creoleversion}]
063: // [{$creolepagefilter.linebreak}] -> bloglike/wikilike
064:
065: public static String VAR_VERSION = "1.0.3";
066:
067: public static String VAR_CREOLE_VERSION = "1.0";
068:
069: public static String VAR_LINEBREAK_BLOGLIKE = "bloglike";
070:
071: public static String VAR_LINEBREAK_C2LIKE = "c2like";
072:
073: private static final String CREOLE_BOLD = "\\*\\*((?s:.)*?)(\\*\\*|(\n\n|\r\r|\r\n\r\n))";
074:
075: private static final String JSPWIKI_BOLD = "__$1__$3";
076:
077: private static final String CREOLE_ITALIC = "//((?s:.)*?)(//|(\n\n|\r\r|\r\n\r\n))";
078:
079: private static final String JSPWIKI_ITALIC = "''$1''$3";
080:
081: private static final String CREOLE_SIMPLELINK = "\\[\\[([^\\]]*?)\\]\\]";
082:
083: private static final String JSPWIKI_SIMPLELINK = "[$1]";
084:
085: private static final String CREOLE_LINK = "\\[\\[([^\\]]*?)\\|([^\\[\\]]*?)\\]\\]";
086:
087: private static final String JSPWIKI_LINK = "[$2|$1]";
088:
089: private static final String CREOLE_HEADER_0 = "(\n|\r|\r\n|^)=([^=\\r\\n]*)={0,2}";
090:
091: private static final String JSPWIKI_HEADER_0 = "$1!!!$2";
092:
093: private static final String CREOLE_HEADER_1 = "(\n|\r|\r\n|^)==([^=\\r\\n]*)={0,2}";
094:
095: private static final String JSPWIKI_HEADER_1 = "$1!!!$2";
096:
097: private static final String CREOLE_HEADER_2 = "(\n|\r|\r\n|^)===([^=\\r\\n]*)={0,3}";
098:
099: private static final String JSPWIKI_HEADER_2 = "$1!!$2";
100:
101: private static final String CREOLE_HEADER_3 = "(\n|\r|\r\n|^)====([^=\\r\\n]*)={0,4}";
102:
103: private static final String JSPWIKI_HEADER_3 = "$1!$2";
104:
105: private static final String CREOLE_HEADER_4 = "(\n|\r|\r\n|^)=====([^=\\r\\n]*)={0,5}";
106:
107: private static final String JSPWIKI_HEADER_4 = "$1__$2__";
108:
109: private static final String CREOLE_SIMPLEIMAGE = "\\{\\{([^\\}]*?)\\}\\}";
110:
111: private static final String JSPWIKI_SIMPLEIMAGE = "[{Image src='$1'}]";
112:
113: private static final String CREOLE_IMAGE = "\\{\\{([^\\}]*?)\\|([^\\}]*?)\\}\\}";
114:
115: private static final String JSPWIKI_IMAGE = "[{Image src='$1' caption='$2'}]";
116:
117: private static final String CREOLE_IMAGE_LINK = "\\[\\[(.*?)\\|\\{\\{(.*?)\\}\\}\\]\\]";
118:
119: private static final String JSPWIKI_IMAGE_LINK = "[{Image src='$2' link='$1'}]";
120:
121: private static final String CREOLE_IMAGE_LINK_DESC = "\\[\\[(.*?)\\|\\{\\{(.*?)\\|(.*?)\\}\\}\\]\\]";
122:
123: private static final String JSPWIKI_IMAGE_LINK_DESC = "[{Image src='$2' link='$1' caption='$3'}]";
124:
125: private static final String PREFORMATTED_PROTECTED = "\\Q{{{\\E.*?\\Q}}}\\E";
126:
127: //private static final String CREOLE_LINEBREAKS = "([^\\s\\\\])(\r\n|\r|\n)+(?=[^\\s\\*#])";
128:
129: //private static final String JSPWIKI_LINEBREAKS = "$1\\\\\\\\$2";
130:
131: private static final String CREOLE_TABLE = "(\n|\r|\r\n|^)(\\|[^\n\r]*)\\|(\\t| )*(\n|\r|\r\n|$)";
132:
133: private static final String CREOLE_PLUGIN = "\\<\\<((?s:.)*?)\\>\\>";
134:
135: private static final String JSPWIKI_PLUGIN = "[{$1}]";
136:
137: private static final String WWW_URL = "(\\[\\[)\\s*(www\\..*?)(\\]\\])";
138:
139: private static final String HTTP_URL = "$1http://$2$3";
140:
141: private static final String CREOLE_IMAGE_X = "\\{\\{(.*?)((\\|)(.*?)){0,1}((\\|)(.*?)){0,1}\\}\\}";
142:
143: private static final String JSPWIKI_IMAGE_X = "[{\u2016 src='$1' caption='$4' \u2015}]";
144:
145: private static final String CREOLE_LINK_IMAG_X = "\\[\\[(.*?)\\|\\{\\{(.*?)((\\|)(.*?)){0,1}((\\|)(.*?)){0,1}\\}\\}\\]\\]";
146:
147: private static final String JSPWIKI_LINK_IMAGE_X = "[{\u2016 src='$2' link='$1' caption='$5' \u2015}]";
148:
149: private static final String JSPWIKI_TABLE = "$1$2$4";
150:
151: /* TODO Is it possible to use just protect :// ? */
152: private static final String URL_PROTECTED = "http://|ftp://|https://";
153:
154: private static final String TABLE_HEADER_PROTECTED = "((\n|\r|\r\n|^)(\\|.*?)(\n|\r|\r\n|$))";
155:
156: private static final String SIGNATURE = "--~~~";
157:
158: private static final String SIGNATURE_AND_DATE = "--~~~~";
159:
160: private static final String DEFAULT_DATEFORMAT = "yyyy-MM-dd";
161:
162: private static final String ESCAPE_PROTECTED = "~(\\*\\*|~|//|-|#|\\{\\{|}}|\\\\|~\\[~~[|]]|----|=|\\|)";
163:
164: private static Map c_protectionMap = new HashMap();
165:
166: private ArrayList m_hashList = new ArrayList();
167:
168: public String translateSignature(Properties wikiProps,
169: final String content, String username) {
170:
171: String dateFormat = wikiProps.getProperty("creole.dateFormat");
172:
173: if (dateFormat == null) {
174: dateFormat = DEFAULT_DATEFORMAT;
175: }
176:
177: SimpleDateFormat df = null;
178: try {
179: df = new SimpleDateFormat(dateFormat);
180: } catch (Exception e) {
181: e.printStackTrace();
182: df = new SimpleDateFormat(DEFAULT_DATEFORMAT);
183: }
184:
185: String result = content;
186: result = protectMarkup(result, PREFORMATTED_PROTECTED, "", "");
187: result = protectMarkup(result, URL_PROTECTED, "", "");
188:
189: Calendar cal = Calendar.getInstance();
190: result = translateElement(result, SIGNATURE_AND_DATE, "-- [["
191: + username + "]], " + df.format(cal.getTime()));
192: result = translateElement(result, SIGNATURE, "-- [[" + username
193: + "]]");
194: result = unprotectMarkup(result, false);
195: return result;
196: }
197:
198: /** Translates Creole markup to JSPWiki markup */
199: public String translate(Properties wikiProps, final String content) {
200: boolean blogLineBreaks = false;
201: /*
202: // BROKEN, breaks on different platforms.
203: String tmp = wikiProps.getProperty("creole.blogLineBreaks");
204: if (tmp != null)
205: {
206: if (tmp.trim().equals("true"))
207: blogLineBreaks = true;
208: }
209: */
210: String imagePlugin = wikiProps
211: .getProperty("creole.imagePlugin.name");
212:
213: String result = content;
214: //
215: // Breaks on OSX. It is never a good idea to tamper with the linebreaks. JSPWiki always
216: // stores linebreaks as \r\n, regardless of the platform.
217: //result = result.replace("\r\n", "\n");
218: //result = result.replace("\r", "\n");
219:
220: /* Now protect the rest */
221: result = protectMarkup(result);
222: result = translateLists(result, "*", "-", "Nothing");
223: result = translateElement(result, CREOLE_BOLD, JSPWIKI_BOLD);
224: result = translateElement(result, CREOLE_ITALIC, JSPWIKI_ITALIC);
225: result = translateElement(result, WWW_URL, HTTP_URL);
226:
227: if (imagePlugin != null && !imagePlugin.equals("")) {
228: result = this .replaceImageArea(wikiProps, result,
229: CREOLE_LINK_IMAG_X, JSPWIKI_LINK_IMAGE_X, 6,
230: imagePlugin);
231: result = this .replaceImageArea(wikiProps, result,
232: CREOLE_IMAGE_X, JSPWIKI_IMAGE_X, 5, imagePlugin);
233: }
234: result = translateElement(result, CREOLE_IMAGE_LINK_DESC,
235: JSPWIKI_IMAGE_LINK_DESC);
236: result = translateElement(result, CREOLE_IMAGE_LINK,
237: JSPWIKI_IMAGE_LINK);
238: result = translateElement(result, CREOLE_LINK, JSPWIKI_LINK);
239: result = translateElement(result, CREOLE_SIMPLELINK,
240: JSPWIKI_SIMPLELINK);
241: result = translateElement(result, CREOLE_HEADER_4,
242: JSPWIKI_HEADER_4);
243: result = translateElement(result, CREOLE_HEADER_3,
244: JSPWIKI_HEADER_3);
245: result = translateElement(result, CREOLE_HEADER_2,
246: JSPWIKI_HEADER_2);
247: result = translateElement(result, CREOLE_HEADER_1,
248: JSPWIKI_HEADER_1);
249: result = translateElement(result, CREOLE_HEADER_0,
250: JSPWIKI_HEADER_0);
251: result = translateElement(result, CREOLE_IMAGE, JSPWIKI_IMAGE);
252: result = translateLists(result, "-", "*", "#");
253: result = translateElement(result, CREOLE_SIMPLEIMAGE,
254: JSPWIKI_SIMPLEIMAGE);
255: result = translateElement(result, CREOLE_TABLE, JSPWIKI_TABLE);
256: result = replaceArea(result, TABLE_HEADER_PROTECTED,
257: "\\|=([^\\|]*)=|\\|=([^\\|]*)", "||$1$2");
258:
259: /*
260: if (blogLineBreaks)
261: {
262: result = translateElement(result, CREOLE_LINEBREAKS, JSPWIKI_LINEBREAKS);
263: }
264: */
265: result = unprotectMarkup(result, true);
266:
267: result = translateVariables(result, blogLineBreaks);
268: //result = result.replace("\n", System.getProperty("line.separator"));
269: return result;
270: }
271:
272: /** Translates lists. */
273: private static String translateLists(String content,
274: String sourceSymbol, String targetSymbol,
275: String sourceSymbol2) {
276: String[] lines = content.split("\n");
277: StringBuffer result = new StringBuffer();
278: int counter = 0;
279: int inList = -1;
280: for (int i = 0; i < lines.length; i++) {
281: String line = lines[i];
282: String actSourceSymbol = "";
283: while ((line.startsWith(sourceSymbol) || line
284: .startsWith(sourceSymbol2))
285: && (actSourceSymbol.equals("") || line.substring(0,
286: 1).equals(actSourceSymbol))) {
287: actSourceSymbol = line.substring(0, 1);
288: line = line.substring(1, line.length());
289: counter++;
290: }
291: if ((inList == -1 && counter != 1)
292: || (inList != -1 && inList + 1 < counter)) {
293: for (int c = 0; c < counter; c++) {
294: result.append(actSourceSymbol);
295: }
296: inList = -1;
297: } else {
298: for (int c = 0; c < counter; c++) {
299: if (actSourceSymbol.equals(sourceSymbol2)) {
300: result.append(sourceSymbol2);
301: } else {
302: result.append(targetSymbol);
303: }
304: }
305: inList = counter;
306: }
307: result.append(line);
308: if (i < lines.length - 1) {
309: result.append("\n");
310: }
311: counter = 0;
312: }
313: return result.toString();
314: }
315:
316: private String translateVariables(String result,
317: boolean blogLineBreaks) {
318: result = result.replace("[{$creolepagefilter.version}]",
319: VAR_VERSION);
320: result = result.replace("[{$creolepagefilter.creoleversion}]",
321: VAR_CREOLE_VERSION);
322: String linebreaks = blogLineBreaks ? VAR_LINEBREAK_BLOGLIKE
323: : VAR_LINEBREAK_C2LIKE;
324: result = result.replace("[{$creolepagefilter.linebreak}]",
325: linebreaks);
326: return result;
327: }
328:
329: /**
330: * Undoes the protection. This is done by replacing the md5 hashes by the
331: * original markup.
332: *
333: * @see #protectMarkup(String)
334: */
335: private String unprotectMarkup(String content,
336: boolean replacePlugins) {
337: Object[] it = this .m_hashList.toArray();
338:
339: for (int i = it.length - 1; i >= 0; i--) {
340: String hash = (String) it[i];
341: String protectedMarkup = (String) c_protectionMap.get(hash);
342: content = content.replace(hash, protectedMarkup);
343: if ((protectedMarkup.length() < 3 || (protectedMarkup
344: .length() > 2 && !protectedMarkup.substring(0, 3)
345: .equals("{{{")))
346: && replacePlugins)
347: content = translateElement(content, CREOLE_PLUGIN,
348: JSPWIKI_PLUGIN);
349:
350: }
351: return content;
352: }
353:
354: /**
355: * Protects markup that should not be processed. For now this includes:
356: * <ul>
357: * <li>Preformatted sections, they should be ignored</li>
358: * </li>
359: * <li>Protocol strings like <code>http://</code>, they cause problems
360: * because of the <code>//</code> which is interpreted as italic</li>
361: * </ul>
362: * This protection is a simple method to keep the regular expressions for
363: * the other markup simple. Internally the protection is done by replacing
364: * the protected markup with the the md5 hash of the markup.
365: *
366: * @param content
367: * @return
368: */
369: private String protectMarkup(String content) {
370: c_protectionMap.clear();
371: this .m_hashList = new ArrayList();
372: content = protectMarkup(content, PREFORMATTED_PROTECTED, "", "");
373: content = protectMarkup(content, URL_PROTECTED, "", "");
374: content = protectMarkup(content, ESCAPE_PROTECTED, "", "");
375: content = protectMarkup(content, CREOLE_PLUGIN, "", "");
376:
377: // content = protectMarkup(content, LINE_PROTECTED);
378: // content = protectMarkup(content, SIGNATURE_PROTECTED);
379: return content;
380: }
381:
382: private ArrayList readPlaceholderProperties(Properties wikiProps) {
383: Set keySet = wikiProps.keySet();
384: Object[] keys = keySet.toArray();
385: ArrayList result = new ArrayList();
386:
387: for (int i = 0; i < keys.length; i++) {
388: String key = keys[i] + "";
389: String value = wikiProps.getProperty(keys[i] + "");
390: if ((key).indexOf("creole.imagePlugin.para.%") > -1) {
391: String[] pair = new String[2];
392: pair[0] = key.replaceAll("creole.imagePlugin.para.%",
393: "");
394: pair[1] = value;
395: result.add(pair);
396: }
397: }
398: return result;
399: }
400:
401: private String replaceImageArea(Properties wikiProps,
402: String content, String markupRegex, String replaceContent,
403: int groupPos, String imagePlugin) {
404: Matcher matcher = Pattern.compile(markupRegex,
405: Pattern.MULTILINE | Pattern.DOTALL).matcher(content);
406: String contentCopy = content;
407:
408: ArrayList plProperties = readPlaceholderProperties(wikiProps);
409:
410: while (matcher.find()) {
411: String protectedMarkup = matcher.group(0);
412: String paramsField = matcher.group(groupPos);
413: String paramsString = "";
414:
415: if (paramsField != null) {
416: String[] params = paramsField.split(",");
417:
418: for (int i = 0; i < params.length; i++) {
419: String param = params[i].replaceAll("\\||\\s", "")
420: .toUpperCase();
421:
422: // Replace placeholder params
423: for (int j = 0; j < plProperties.size(); j++) {
424: String[] pair = (String[]) plProperties.get(j);
425: String key = pair[0];
426: String value = pair[1];
427: String code = param.replaceAll("(?i)([0-9]+)"
428: + key, value + "<check>" + "$1"
429: + "</check>");
430: code = code.replaceAll(
431: "(.*?)%(.*?)<check>(.*?)</check>",
432: "$1$3$2");
433: if (!code.equals(param))
434: paramsString += code;
435: }
436:
437: // Check if it is a number
438: try {
439: Integer.parseInt(param);
440: paramsString += " width='" + param + "px'";
441: } catch (Exception e) {
442:
443: if (wikiProps
444: .getProperty("creole.imagePlugin.para."
445: + param) != null)
446: paramsString += " "
447: + wikiProps
448: .getProperty(
449: "creole.imagePlugin.para."
450: + param)
451: .replaceAll(
452: "^(\"|')(.*)(\"|')$",
453: "$2");
454: }
455: }
456: }
457: String temp = protectedMarkup;
458:
459: protectedMarkup = translateElement(protectedMarkup,
460: markupRegex, replaceContent);
461: protectedMarkup = protectedMarkup.replaceAll("\u2015",
462: paramsString);
463: protectedMarkup = protectedMarkup.replaceAll("\u2016",
464: imagePlugin);
465: protectedMarkup = protectedMarkup.replaceAll("caption=''",
466: "");
467: protectedMarkup = protectedMarkup.replaceAll("\\s+", " ");
468:
469: int pos = contentCopy.indexOf(temp);
470: contentCopy = contentCopy.substring(0, pos)
471: + protectedMarkup
472: + contentCopy.substring(pos + temp.length(),
473: contentCopy.length());
474: }
475: return contentCopy;
476: }
477:
478: private String replaceArea(String content, String markupRegex,
479: String replaceSource, String replaceTarget) {
480: Matcher matcher = Pattern.compile(markupRegex,
481: Pattern.MULTILINE | Pattern.DOTALL).matcher(content);
482: String contentCopy = content;
483:
484: while (matcher.find()) {
485: String protectedMarkup = matcher.group(0);
486: String temp = protectedMarkup;
487: protectedMarkup = protectedMarkup.replaceAll(replaceSource,
488: replaceTarget);
489: int pos = contentCopy.indexOf(temp);
490: contentCopy = contentCopy.substring(0, pos)
491: + protectedMarkup
492: + contentCopy.substring(pos + temp.length(),
493: contentCopy.length());
494: }
495: return contentCopy;
496: }
497:
498: /**
499: * Protects a specific markup
500: *
501: * @see #protectMarkup(String)
502: */
503: private String protectMarkup(String content, String markupRegex,
504: String replaceSource, String replaceTarget) {
505: Matcher matcher = Pattern.compile(markupRegex,
506: Pattern.MULTILINE | Pattern.DOTALL).matcher(content);
507: StringBuffer result = new StringBuffer();
508: while (matcher.find()) {
509: String protectedMarkup = matcher.group();
510: protectedMarkup = protectedMarkup.replaceAll(replaceSource,
511: replaceTarget);
512: try {
513: MessageDigest digest = MessageDigest.getInstance("MD5");
514: digest.reset();
515: digest.update(protectedMarkup.getBytes("UTF-8"));
516: String hash = bytesToHash(digest.digest());
517: matcher.appendReplacement(result, hash);
518: c_protectionMap.put(hash, protectedMarkup);
519: this .m_hashList.add(hash);
520: } catch (NoSuchAlgorithmException e) {
521: // FIXME: Should log properly
522: e.printStackTrace();
523: } catch (UnsupportedEncodingException e) {
524: // FIXME: Auto-generated catch block
525: e.printStackTrace();
526: }
527: }
528: matcher.appendTail(result);
529: return result.toString();
530: }
531:
532: private String bytesToHash(byte[] b) {
533: String hash = "";
534: for (int i = 0; i < b.length; i++) {
535: hash += Integer.toString((b[i] & 0xff) + 0x100, 16)
536: .substring(1);
537: }
538: return hash;
539: }
540:
541: private String translateElement(String content, String fromMarkup,
542: String toMarkup) {
543: Matcher matcher = Pattern
544: .compile(fromMarkup, Pattern.MULTILINE)
545: .matcher(content);
546: StringBuffer result = new StringBuffer();
547:
548: while (matcher.find()) {
549: matcher.appendReplacement(result, toMarkup);
550: }
551: matcher.appendTail(result);
552: return result.toString();
553: }
554: }
|