001: package com.technoetic.xplanner.wiki;
002:
003: import com.technoetic.xplanner.XPlannerProperties;
004: import org.apache.log4j.Logger;
005: import org.apache.oro.text.perl.MalformedPerl5PatternException;
006: import org.apache.oro.text.perl.Perl5Util;
007: import org.apache.oro.text.regex.PatternMatcherInput;
008:
009: import java.io.BufferedReader;
010: import java.io.StringReader;
011: import java.util.ArrayList;
012: import java.util.HashMap;
013: import java.util.Map;
014: import java.util.Properties;
015:
016: public class TwikiFormat implements WikiFormat {
017: private final Logger log = Logger.getLogger(getClass());
018: private Perl5Util perl = new Perl5Util();
019: private ArrayList codeStack = new ArrayList();
020: private static final String mailSubstitution = "s/([\\s\\(])(?:mailto\\:)*([a-zA-Z0-9\\-\\_\\.\\+]+)\\@"
021: + "([a-zA-Z0-9\\-\\_\\.]+)\\.([a-zA-Z0-9\\-\\_]+)(?=[\\s\\.\\,\\;\\:\\!\\?\\)])/"
022: + "$1<a href=\"mailto:$2@$3.$4\">$2@$3.$4<\\/a>/go";
023: private static final String fancyHr = "s@^([a-zA-Z0-9]+)----*@<table width=\"100%\"><tr><td valign=\"bottom\"><h2>$1</h2></td>"
024: + "<td width=\"98%\" valign=\"middle\"><hr /></td></tr></table>@o";
025: private static final String escapeRegexp = "s@([\\*\\?\\.\\[\\](\\)])@\\\\$1@g";
026: private static final String urlPattern = "m@(^|[-*\\W])((\\w+):([\\w\\$\\-_\\@\\&\\;\\.&\\+\\?/:#%~=]+))(\\[([^\\]]+)\\]|)@";
027: private static final String headerPatternDa = "^---+(\\++|\\#+)\\s+(.+)\\s*$"; // '---++ Header', '---## Header'
028: private static final String headerPatternSp = "^\\t(\\++|\\#+)\\s+(.+)\\s*$"; // ' ++ Header', ' + Header'
029: private static final String headerPatternHt = "^<h([1-6])>\\s*(.+?)\\s*</h[1-6]>"; // '<h6>Header</h6>
030: private static final String wikiWordPattern = "(^|[^\\w:/])(\\w+\\.|)([A-Z][a-z]\\w*[A-Z][a-z]\\w*)(\\b|$)";
031: private static final String wikiWordMatch = "m/" + wikiWordPattern
032: + "/";
033: private static Map schemeHandlers;
034: private ExternalWikiAdapter externalWikiAdapter = null;
035: private MalformedPerl5PatternException malformedPattern = null;
036: private Properties properties = XPlannerProperties.getProperties();
037:
038: public TwikiFormat() {
039: this (new HashMap());
040: }
041:
042: public TwikiFormat(Map schemeTranslations) {
043: this .schemeHandlers = schemeTranslations;
044: if (properties.getProperty("twiki.wikiadapter") != null) {
045: try {
046: externalWikiAdapter = (ExternalWikiAdapter) Class
047: .forName(
048: properties
049: .getProperty("twiki.wikiadapter"))
050: .newInstance();
051: } catch (Exception e) {
052: throw new RuntimeException(e);
053: }
054: }
055: }
056:
057: public String format(String text) {
058: boolean inPreformattedSection = false;
059: boolean inVerbatimSection = false;
060: boolean inList = false;
061: boolean inTable = false;
062: PatternMatcherInput patternMatcherInput = new PatternMatcherInput(
063: "");
064: BufferedReader reader = new BufferedReader(new StringReader(
065: text));
066: StringBuffer outputText = new StringBuffer();
067: try {
068: String line = reader.readLine();
069: while (line != null) {
070: try {
071: if (perl.match("m|<pre>|i", line)) {
072: inPreformattedSection = true;
073: }
074: if (perl.match("m|</pre>|i", line)) {
075: inPreformattedSection = false;
076: }
077: if (perl.match("m|<verbatim>|", line)) {
078: line = perl.substitute(
079: "s|<verbatim>|<pre>|goi", line);
080: inVerbatimSection = true;
081:
082: }
083: if (perl.match("m|</verbatim>|", line)) {
084: line = perl.substitute(
085: "s|</verbatim>|</pre>|goi", line);
086: inVerbatimSection = false;
087: }
088: boolean escapeBrackets = (new Boolean(properties
089: .getProperty(
090: WikiFormat.ESCAPE_BRACKETS_KEY,
091: "true"))).booleanValue();
092: if (inPreformattedSection || inVerbatimSection) {
093: if (inVerbatimSection) {
094: line = perl
095: .substitute("s/&/&/go", line);
096: if (escapeBrackets) {
097: line = perl.substitute("s/</</go",
098: line);
099: line = perl.substitute("s/>/>/go",
100: line);
101: }
102: line = perl.substitute(
103: "s/<pre>/<pre>/go", line);
104: }
105: } else {
106: // Blockquote
107: line = perl
108: .substitute(
109: "s|^>(.*?)$|> <cite> $1 </cite><br>|go",
110: line);
111:
112: // Embedded HTML - \263 is a special translation token
113: // -- Allow standalone "<!--"
114: line = perl.substitute("s/<(!--)/\\\\263$1/go",
115: line);
116: // -- Allow standalone "-->"
117: line = perl.substitute("s/(--)>/$1\\\\263/go",
118: line);
119: line = perl.substitute(
120: "s/<(\\S.*?)>/\\\\263$1$\\\\263/g",
121: line);
122: if (escapeBrackets) {
123: line = perl.substitute("s/</</go", line);
124: line = perl.substitute("s/>/>/go", line);
125: }
126: line = perl
127: .substitute(
128: "s/\\\\263(\\S.*?)\\\\263/<$1>/g",
129: line);
130: line = perl.substitute("s/(--)\\\\263/$1>/go",
131: line);
132: line = perl.substitute("s/\\\\263(!--)/<$1/go",
133: line);
134:
135: // Entities
136: line = perl.substitute(
137: "s/&(\\w+?);/\\\\236$1;/g", line); // "&abc;"
138: line = perl.substitute(
139: "s/&(#[0-9]+);/\\\\236$1;/g", line); // "{"
140: line = perl.substitute("s/&/&/go", line); // escape standalone "&"
141: line = perl.substitute("s/\\\\236/&/go", line);
142:
143: // Headings
144: // -- '<h6>...</h6>' HTML rule
145: patternMatcherInput.setInput(line);
146: while (perl.match("m|" + headerPatternHt + "|",
147: patternMatcherInput)) {
148: line = perl.substitute("s@"
149: + headerPatternHt
150: + "@"
151: + makeAnchorHeading(perl.group(2),
152: Integer.parseInt(perl
153: .group(1)))
154: + "@goi", line);
155: }
156: // -- '\t+++++++' rule
157: patternMatcherInput.setInput(line);
158: while (perl.match("m|" + headerPatternSp + "|",
159: patternMatcherInput)) {
160: line = perl.substitute("s@"
161: + headerPatternSp
162: + "@"
163: + makeAnchorHeading(perl.group(2),
164: perl.group(1).length())
165: + "@goi", line);
166: }
167: // -- '---+++++++' rule
168: patternMatcherInput.setInput(line);
169: while (perl.match("m|" + headerPatternDa + "|",
170: patternMatcherInput)) {
171: line = perl.substitute("s@"
172: + headerPatternDa
173: + "@"
174: + makeAnchorHeading(perl.group(2),
175: perl.group(1).length())
176: + "@goi", line);
177: }
178:
179: // Lists etc.
180: // -- TWiki seems to be looking for tabs at the beginning of the line.
181: // However, the formatting clearly uses 3 spaces, so ???.
182: // The following substitutes tabs for groups of 3 spaces at the begininng of
183: // the line.
184: while (perl.match("m/^(\t*) /", line)) {
185: line = perl.substitute(
186: "s/^(\t*) /$1\t/o", line);
187: }
188: if (perl.match("m/^\\s*$/", line)) {
189: line = perl.substitute("s/^\\s*$/<p\\/>/o",
190: line);
191: inList = false;
192: }
193: if (perl.match("m/^(\\S+?)/", line)) {
194: inList = false;
195: }
196: if (perl.match("m/^(\\t+)(\\S+?):\\s/", line)) {
197: line = perl
198: .substitute(
199: "s/^(\\t+)(\\S+?):\\s/<dt> $2<dd> /o",
200: line);
201: emitCode(outputText, "dl", perl.group(1)
202: .length());
203: inList = true;
204: }
205: if (perl.match("m/^(\\t+)\\* /", line)) {
206: line = perl.substitute(
207: "s/^(\\t+)\\* /<li> /o", line);
208: emitCode(outputText, "ul", perl.group(1)
209: .length());
210: inList = true;
211: }
212: if (perl.match("m/^(\\t+)\\d+\\.?/", line)) {
213: line = perl.substitute(
214: "s/^(\\t+)\\d+\\.? /<li> /o", line);
215: emitCode(outputText, "ol", perl.group(1)
216: .length());
217: inList = true;
218: }
219:
220: if (inList == false) {
221: emitCode(outputText, "", 0);
222: }
223:
224: // Table
225: if (perl.match("m/^(\\s*)\\|(.*)/", line)) {
226: line = perl.substitute("s/^(\\s*)\\|(.*)/"
227: + emitTableRow("", perl.group(2),
228: inTable) + "/", line);
229: inTable = true;
230: } else if (inTable) {
231: outputText.append("</table>");
232: inTable = false;
233: }
234:
235: // Emphasizing
236: if (perl
237: .match(
238: "m/([\\s\\(]*)==([^\\s]+?|[^\\s].*?[^\\s])==([\\s,.;:!?<)]|$)/",
239: line)) {
240: line = perl
241: .substitute(
242: "s/([\\s\\(]*)==([^\\s]+?|[^\\s].*?[^\\s])==([\\s,.;:!?)<]|$)/"
243: + "$1<code><b>$2<\\/b><\\/code>$3/g",
244: line);
245: }
246:
247: if (perl
248: .match(
249: "m/([\\s\\(]*)__([^\\s]+?|[^\\s].*?[^\\s])__([\\s,.;:!?)<]|$)/",
250: line)) {
251: line = perl
252: .substitute(
253: "s/([\\s\\(]*)__([^\\s]+?|[^\\s].*?[^\\s])__([\\s,.;:!?)<]|$)/"
254: + "$1<strong><em>$2<\\/em><\\/strong>$3/g",
255: line);
256: }
257:
258: if (perl
259: .match(
260: "m/([\\s\\(]*)\\*([^\\s]+?|[^\\s].*?[^\\s])\\*([\\s,.;:!?)<]|$)/",
261: line)) {
262: line = perl
263: .substitute(
264: "s/([\\s\\(]*)\\*([^\\s]+?|[^\\s].*?[^\\s])\\*([\\s,.;:!?)<]|$)/"
265: + "$1<strong>$2<\\/strong>$3/g",
266: line);
267: }
268:
269: if (perl
270: .match(
271: "m/([\\s\\(]*)_([^\\s]+?|[^\\s].*?[^\\s])_([\\s,.;:!?)<]|$)/",
272: line)) {
273: line = perl.substitute(
274: "s/([\\s\\(]*)_([^\\s]+?|[^\\s].*?[^\\s])_([\\s,.;:!?)<]|$)/"
275: + "$1<em>$2<\\/em>$3/g",
276: line);
277: }
278:
279: if (perl
280: .match(
281: "m/([\\s\\(]*)=([^\\s]+?|[^\\s].*?[^\\s])=([\\s,.;:!?)<]|$)/",
282: line)) {
283: line = perl
284: .substitute(
285: "s/([\\s\\(]*)=([^\\s]+?|[^\\s].*?[^\\s])=([\\s,.;:!?)<]|$)/"
286: + "$1<code>$2<\\/code>$3/g",
287: line);
288: }
289:
290: // Mailto
291: line = perl.substitute(mailSubstitution, line);
292:
293: //# Horizontal rule
294: line = perl.substitute("s/^---+/<hr\\/>/o",
295: line);
296: line = perl.substitute(fancyHr, line);
297:
298: // patternMatcherInput.setInput(line);
299: // while (perl.match("m|" + headerPatternHt + "|", patternMatcherInput)) {
300: // line = perl.substitute("s@" + headerPatternHt + "@" +
301: // makeAnchorHeading(perl.group(2),
302: // Integer.parseInt(perl.group(1))) + "@goi", line);
303: // }
304:
305: // WikiWord
306: if (externalWikiAdapter != null) {
307: patternMatcherInput.setInput(line);
308: while (perl.match(wikiWordMatch,
309: patternMatcherInput)) {
310: String wikiWord = perl.group(2)
311: + perl.group(3);
312: line = perl
313: .substitute(
314: "s\0"
315: + wikiWord
316: + "\0"
317: +
318: /*perl.group(1)+*/
319: externalWikiAdapter
320: .formatWikiWord(wikiWord)
321: + perl.group(4)
322: + "\0", line);
323: }
324: }
325:
326: // Handle embedded URLs
327: patternMatcherInput.setInput(line);
328: while (perl.match(urlPattern,
329: patternMatcherInput)) {
330: String link = perl.group(0);
331: String previousText = perl.group(1);
332: String scheme = perl.group(3);
333: String location = perl.group(4);
334: String linkText = perl.group(6);
335: String formattedLink = formatLink(
336: previousText, scheme, location,
337: linkText);
338: if (formattedLink != null) {
339: link = perl.substitute(escapeRegexp,
340: link);
341: formattedLink = formattedLink
342: .replaceAll("@", "\\\\@");
343: line = perl.substitute("s@" + link
344: + "@" + formattedLink + "@go",
345: line);
346: }
347: }
348:
349: }
350: } catch (MalformedPerl5PatternException ex) {
351: // just continue, set flag for testing purposes
352: malformedPattern = ex;
353: }
354: outputText.append(line);
355: outputText.append("\n");
356: line = reader.readLine();
357: }
358: emitCode(outputText, "", 0);
359: if (inTable) {
360: outputText.append("</table>");
361: }
362: if (inPreformattedSection || inVerbatimSection) {
363: outputText.append("</pre>");
364: }
365: } catch (Exception ex) {
366: log.error("error during formatting", ex);
367: outputText.setLength(0);
368: outputText.append("[Error during formatting]");
369: }
370: return outputText.toString();
371: }
372:
373: public void setProperties(Properties properties) {
374: this .properties = properties;
375: }
376:
377: private String formatLink(String previousText, String scheme,
378: String location, String linkText) {
379: if (scheme.equals("mailto")) {
380: return null;
381: }
382:
383: SchemeHandler handler = (SchemeHandler) schemeHandlers
384: .get(scheme);
385: if (handler != null) {
386: return previousText
387: + handler.translate(properties, scheme, location,
388: linkText);
389: }
390: String url = scheme + ":" + location;
391: if (perl.match("m/http|ftp|gopher|news|file|https/", scheme)) {
392: if (linkText == null) {
393: linkText = url;
394: }
395: if (perl.match("m|\\.(gif|jpg|jpeg|png)(#|$)|i", url)) {
396: return previousText + "<img border=\"0\" src=\"" + url
397: + "\"/>";
398: } else {
399: return previousText + "<a href=\"" + url
400: + "\" target=\"_top\">" + linkText + "</a>";
401: }
402: }
403: String returnFormatLink = "";
404: if (linkText != null) {
405: returnFormatLink = previousText + url + "[" + linkText
406: + "]";
407: } else {
408: returnFormatLink = previousText + url;
409: }
410:
411: return returnFormatLink;
412: }
413:
414: private String makeAnchorName(String text) {
415: text = perl.substitute("s/^[\\s\\#\\_]*//o", text); //no leading space nor '#', '_'
416: text = perl.substitute("s/[\\s\\_]*$//o", text); // no trailing space, nor '_'
417: text = perl.substitute("s/<\\w[^>]*>//goi", text); //remove HTML tags
418: text = perl.substitute("s/[^a-zA-Z0-9]/_/go", text); // only allowed chars
419: text = perl.substitute("s/__+/_/go", text); // remove excessive '_'
420: text = perl.substitute("s/^(.{32})(.*)$/$1/o", text); // limit to 32 chars
421: return text;
422: }
423:
424: private String makeAnchorHeading(String text, int level) {
425: // - Need to build '<nop><h1><a name="atext"> text </a></h1>'
426: // type markup.
427: // - Initial '<nop>' is needed to prevent subsequent matches.
428: // - Need to make sure that <a> tags are not nested, i.e. in
429: // case heading has a WikiName that gets linked
430: String anchorName = makeAnchorName(text);
431: boolean hasAnchor = perl.match("m/<a /i", text)
432: || perl.match("m/\\[\\[/", text)
433: || perl.match("m/(^|[\\*\\s][\\-\\*\\s]*)([A-Z]{3,})/",
434: text)
435: || perl
436: .match(
437: "m/(^|[\\*\\s][\\(\\-\\*\\s]*)([A-Z]+[a-z0-9]*)\\.([A-Z]+[a-z]+[A-Z]+[a-zA-Z0-9]*)/",
438: text)
439: || perl
440: .match(
441: "m/(^|[\\*\\s][\\(\\-\\*\\s]*)([A-Z]+[a-z]+[A-Z]+[a-zA-Z0-9]*)/",
442: text);
443: if (hasAnchor) {
444: // # (From TWiki) FIXME: '<h1><a name="atext"></a></h1> WikiName' has an
445: // empty <a> tag, which is not HTML conform
446: text = "<nop><h" + level + "><a name=\"" + anchorName
447: + "\"> </a> " + text + "</h" + level + ">";
448: } else {
449: text = "<nop><h" + level + "><a name=\"" + anchorName
450: + "\"> " + text + " </a></h" + level + ">";
451: }
452: return text;
453: }
454:
455: public void emitCode(StringBuffer result, String code, int depth) {
456: while (codeStack.size() > depth) {
457: String c = (String) codeStack.remove(codeStack.size() - 1);
458: result.append("</").append(c).append(">\n");
459: }
460: while (codeStack.size() < depth) {
461: codeStack.add(code);
462: result.append("<").append(code).append(">\n");
463: }
464:
465: //if( ( $#code > -1 ) && ( $code[$#code] ne $code ) ) {
466: if (!codeStack.isEmpty()
467: && !codeStack.get(codeStack.size() - 1).equals(code)) {
468: result.append("</").append(
469: codeStack.get(codeStack.size() - 1)).append("><")
470: .append(code).append(">\n");
471: codeStack.set(codeStack.size() - 1, code);
472: }
473: }
474:
475: public String emitTableRow(String previousText, String row,
476: boolean inTable) {
477: StringBuffer result = new StringBuffer();
478: if (inTable) {
479: result.append(previousText).append("<tr class=\"twiki\">");
480: } else {
481: result.append(previousText);
482: result
483: .append("<table class=\"twiki\" border=\"1\" cellspacing=\"0\" cellpadding=\"1\">");
484: result.append("<tr class=\"twiki\">");
485: }
486: row = perl.substitute("s/\\t/ /go", row); // change tab to spaces
487: row = perl.substitute("s/\\s*$//o", row); // remove trailing white space
488: while (perl.match("m/(\\|\\|+)/", row)) {
489: // calc COLSPAN
490: row = perl.substitute("s/(\\|\\|+)/\\\\236"
491: + perl.group(1).length() + "\\|/go", row);
492: }
493:
494: ArrayList cells = new ArrayList();
495: perl.split(cells, "/\\|/", row);
496: for (int i = 0, n = cells.size() - 1; i < n; i++) {
497: String cell = (String) cells.get(i);
498: //TODO 3/21/05 JM Added during merge from the Sabre codebase. Verify it was added in the Sabre codebase otherwise remove it
499: cell = perl.substitute("s/\\//-/go", cell);
500: String attribute = "";
501: if (perl.match("m/\\\\236([0-9]+)/", cell)) {
502: cell = perl.substitute("s/\\\\236([0-9]+)//", cell);
503: attribute = " colspan=\""
504: + Integer.parseInt(perl.group(1)) + "\"";
505: }
506: cell = perl.substitute("s/^\\s+$/ /o", cell);
507: perl.match("m/^(\\s*).*?(\\s*)$/", cell);
508: String left = perl.group(1);
509: String right = perl.group(2);
510: if (left.length() > right.length()) {
511: if (right.length() <= 1) {
512: attribute += " align=\"right\"";
513: } else {
514: attribute += " align=\"center\"";
515: }
516: }
517: if (perl.match("m/^\\s*(\\*.*\\*)\\s*$/", cell)) {
518: result.append("<th").append(attribute).append(
519: " class=\"twiki\" bgcolor=\"#99CCCC\">")
520: .append(perl.group(1)).append("<\\/th>");
521: } else {
522: result.append("<td").append(attribute).append(
523: " class=\"twiki\">").append(cell).append(
524: "<\\/td>");
525: }
526: }
527: result.append("<\\/tr>");
528: return result.toString();
529: }
530:
531: public void setExternalWikiAdapter(
532: ExternalWikiAdapter wikiWordFormatter) {
533: this .externalWikiAdapter = wikiWordFormatter;
534: }
535:
536: public MalformedPerl5PatternException getMalformedPatternException() {
537: return malformedPattern;
538: }
539: }
|