0001: /*
0002: JSPWiki - a JSP-based WikiWiki clone.
0003:
0004: Copyright (C) 2001-2005 Janne Jalkanen (Janne.Jalkanen@iki.fi)
0005:
0006: This program is free software; you can redistribute it and/or modify
0007: it under the terms of the GNU Lesser General Public License as published by
0008: the Free Software Foundation; either version 2.1 of the License, or
0009: (at your option) any later version.
0010:
0011: This program is distributed in the hope that it will be useful,
0012: but WITHOUT ANY WARRANTY; without even the implied warranty of
0013: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0014: GNU Lesser General Public License for more details.
0015:
0016: You should have received a copy of the GNU Lesser General Public License
0017: along with this program; if not, write to the Free Software
0018: Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
0019: */
0020: package com.ecyrd.jspwiki.parser;
0021:
0022: import java.io.IOException;
0023: import java.io.Reader;
0024: import java.io.StringReader;
0025: import java.text.MessageFormat;
0026: import java.util.*;
0027:
0028: import javax.xml.transform.Result;
0029:
0030: import org.apache.commons.lang.StringEscapeUtils;
0031: import org.apache.commons.lang.StringUtils;
0032: import org.apache.log4j.Logger;
0033: import org.apache.oro.text.GlobCompiler;
0034: import org.apache.oro.text.regex.*;
0035: import org.jdom.*;
0036:
0037: import com.ecyrd.jspwiki.*;
0038: import com.ecyrd.jspwiki.attachment.Attachment;
0039: import com.ecyrd.jspwiki.attachment.AttachmentManager;
0040: import com.ecyrd.jspwiki.auth.WikiSecurityException;
0041: import com.ecyrd.jspwiki.auth.acl.Acl;
0042: import com.ecyrd.jspwiki.i18n.InternationalizationManager;
0043: import com.ecyrd.jspwiki.plugin.PluginException;
0044: import com.ecyrd.jspwiki.plugin.PluginManager;
0045: import com.ecyrd.jspwiki.plugin.WikiPlugin;
0046: import com.ecyrd.jspwiki.providers.ProviderException;
0047: import com.ecyrd.jspwiki.render.CleanTextRenderer;
0048: import com.ecyrd.jspwiki.render.RenderingManager;
0049:
0050: /**
0051: * Parses JSPWiki-style markup into a WikiDocument DOM tree. This class is the
0052: * heart and soul of JSPWiki : make sure you test properly anything that is added,
0053: * or else it breaks down horribly.
0054: *
0055: * @author Janne Jalkanen
0056: * @since 2.4
0057: */
0058: public class JSPWikiMarkupParser extends MarkupParser {
0059: /** Name of the outlink image; relative path to the JSPWiki directory. */
0060: private static final String OUTLINK_IMAGE = "images/out.png";
0061:
0062: /** The value for anchor element <tt>class</tt> attributes when used
0063: * for wiki page (normal) links. The value is "wikipage". */
0064: public static final String CLASS_WIKIPAGE = "wikipage";
0065:
0066: /** The value for anchor element <tt>class</tt> attributes when used
0067: * for edit page links. The value is "createpage". */
0068: public static final String CLASS_EDITPAGE = "createpage";
0069:
0070: /** The value for anchor element <tt>class</tt> attributes when used
0071: * for interwiki page links. The value is "interwiki". */
0072: public static final String CLASS_INTERWIKI = "interwiki";
0073:
0074: protected static final int READ = 0;
0075: protected static final int EDIT = 1;
0076: protected static final int EMPTY = 2; // Empty message
0077: protected static final int LOCAL = 3;
0078: protected static final int LOCALREF = 4;
0079: protected static final int IMAGE = 5;
0080: protected static final int EXTERNAL = 6;
0081: protected static final int INTERWIKI = 7;
0082: protected static final int IMAGELINK = 8;
0083: protected static final int IMAGEWIKILINK = 9;
0084: protected static final int ATTACHMENT = 10;
0085:
0086: private static Logger log = Logger
0087: .getLogger(JSPWikiMarkupParser.class);
0088:
0089: private boolean m_isbold = false;
0090: private boolean m_isitalic = false;
0091: private boolean m_istable = false;
0092: private boolean m_isPre = false;
0093: private boolean m_isEscaping = false;
0094: private boolean m_isdefinition = false;
0095: private boolean m_isPreBlock = false;
0096:
0097: /** Contains style information, in multiple forms. */
0098: private Stack m_styleStack = new Stack();
0099:
0100: // general list handling
0101: private int m_genlistlevel = 0;
0102: private StringBuffer m_genlistBulletBuffer = new StringBuffer(10); // stores the # and * pattern
0103: private boolean m_allowPHPWikiStyleLists = true;
0104:
0105: private boolean m_isOpenParagraph = false;
0106:
0107: /** Keeps image regexp Patterns */
0108: private List m_inlineImagePatterns;
0109:
0110: /** Parser for extended link functionality. */
0111: private LinkParser m_linkParser = new LinkParser();
0112:
0113: private PatternMatcher m_inlineMatcher = new Perl5Matcher();
0114:
0115: /** Keeps track of any plain text that gets put in the Text nodes */
0116: private StringBuffer m_plainTextBuf = new StringBuffer(20);
0117:
0118: private Element m_currentElement;
0119:
0120: /**
0121: * This property defines the inline image pattern. It's current value
0122: * is jspwiki.translatorReader.inlinePattern
0123: */
0124: public static final String PROP_INLINEIMAGEPTRN = "jspwiki.translatorReader.inlinePattern";
0125:
0126: /** If true, consider CamelCase hyperlinks as well. */
0127: public static final String PROP_CAMELCASELINKS = "jspwiki.translatorReader.camelCaseLinks";
0128:
0129: /** If true, all hyperlinks are translated as well, regardless whether they
0130: are surrounded by brackets. */
0131: public static final String PROP_PLAINURIS = "jspwiki.translatorReader.plainUris";
0132:
0133: /** If true, all outward links (external links) have a small link image appended. */
0134: public static final String PROP_USEOUTLINKIMAGE = "jspwiki.translatorReader.useOutlinkImage";
0135:
0136: /** If true, all outward attachment info links have a small link image appended. */
0137: public static final String PROP_USEATTACHMENTIMAGE = "jspwiki.translatorReader.useAttachmentImage";
0138:
0139: /** If set to "true", all external links are tagged with 'rel="nofollow"' */
0140: public static final String PROP_USERELNOFOLLOW = "jspwiki.translatorReader.useRelNofollow";
0141:
0142: /** If true, then considers CamelCase links as well. */
0143: private boolean m_camelCaseLinks = false;
0144:
0145: /** If true, then generate special output for wysiwyg editing in certain cases */
0146: private boolean m_wysiwygEditorMode = false;
0147:
0148: /** If true, consider URIs that have no brackets as well. */
0149: // FIXME: Currently reserved, but not used.
0150: private boolean m_plainUris = false;
0151:
0152: /** If true, all outward links use a small link image. */
0153: private boolean m_useOutlinkImage = true;
0154:
0155: private boolean m_useAttachmentImage = true;
0156:
0157: /** If true, allows raw HTML. */
0158: private boolean m_allowHTML = false;
0159:
0160: private boolean m_useRelNofollow = false;
0161:
0162: private PatternCompiler m_compiler = new Perl5Compiler();
0163:
0164: static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))";
0165:
0166: private PatternMatcher m_camelCaseMatcher = new Perl5Matcher();
0167: private Pattern m_camelCasePattern;
0168:
0169: private int m_rowNum = 1;
0170:
0171: /**
0172: * The default inlining pattern. Currently "*.png"
0173: */
0174: public static final String DEFAULT_INLINEPATTERN = "*.png";
0175:
0176: /**
0177: * This list contains all IANA registered URI protocol
0178: * types as of September 2004 + a few well-known extra types.
0179: *
0180: * JSPWiki recognises all of them as external links.
0181: *
0182: * This array is sorted during class load, so you can just dump
0183: * here whatever you want in whatever order you want.
0184: */
0185: static final String[] c_externalLinks = { "http:", "ftp:",
0186: "https:", "mailto:", "news:", "file:", "rtsp:", "mms:",
0187: "ldap:", "gopher:", "nntp:", "telnet:", "wais:",
0188: "prospero:", "z39.50s", "z39.50r", "vemmi:", "imap:",
0189: "nfs:", "acap:", "tip:", "pop:", "dav:",
0190: "opaquelocktoken:", "sip:", "sips:", "tel:", "fax:",
0191: "modem:", "soap.beep:", "soap.beeps", "xmlrpc.beep",
0192: "xmlrpc.beeps", "urn:", "go:", "h323:", "ipp:", "tftp:",
0193: "mupdate:", "pres:", "im:", "mtqp", "smb:" };
0194:
0195: private static final String INLINE_IMAGE_PATTERNS = "JSPWikiMarkupParser.inlineImagePatterns";
0196:
0197: private static final String CAMELCASE_PATTERN = "JSPWikiMarkupParser.camelCasePattern";
0198:
0199: private static final String[] CLASS_TYPES = { CLASS_WIKIPAGE,
0200: CLASS_EDITPAGE, "", "footnote", "footnoteref", "",
0201: "external", CLASS_INTERWIKI, "external", CLASS_WIKIPAGE,
0202: "attachment" };
0203:
0204: /**
0205: * This Comparator is used to find an external link from c_externalLinks. It
0206: * checks if the link starts with the other arraythingie.
0207: */
0208: private static Comparator c_startingComparator = new StartingComparator();
0209:
0210: static {
0211: Arrays.sort(c_externalLinks);
0212: }
0213:
0214: /**
0215: * Creates a markup parser.
0216: */
0217: public JSPWikiMarkupParser(WikiContext context, Reader in) {
0218: super (context, in);
0219: initialize();
0220: }
0221:
0222: /**
0223: * @param m_engine The WikiEngine this reader is attached to. Is
0224: * used to figure out of a page exits.
0225: */
0226:
0227: // FIXME: parsers should be pooled for better performance.
0228: private void initialize() {
0229: PatternCompiler compiler = new GlobCompiler();
0230: List compiledpatterns;
0231:
0232: //
0233: // We cache compiled patterns in the engine, since their creation is
0234: // really expensive
0235: //
0236: compiledpatterns = (List) m_engine
0237: .getAttribute(INLINE_IMAGE_PATTERNS);
0238:
0239: if (compiledpatterns == null) {
0240: compiledpatterns = new ArrayList(20);
0241: Collection ptrns = getImagePatterns(m_engine);
0242:
0243: //
0244: // Make them into Regexp Patterns. Unknown patterns
0245: // are ignored.
0246: //
0247: for (Iterator i = ptrns.iterator(); i.hasNext();) {
0248: try {
0249: compiledpatterns.add(compiler.compile((String) i
0250: .next(), GlobCompiler.DEFAULT_MASK
0251: | GlobCompiler.READ_ONLY_MASK));
0252: } catch (MalformedPatternException e) {
0253: log.error("Malformed pattern in properties: ", e);
0254: }
0255: }
0256:
0257: m_engine.setAttribute(INLINE_IMAGE_PATTERNS,
0258: compiledpatterns);
0259: }
0260:
0261: m_inlineImagePatterns = Collections
0262: .unmodifiableList(compiledpatterns);
0263:
0264: m_camelCasePattern = (Pattern) m_engine
0265: .getAttribute(CAMELCASE_PATTERN);
0266: if (m_camelCasePattern == null) {
0267: try {
0268: m_camelCasePattern = m_compiler.compile(WIKIWORD_REGEX,
0269: Perl5Compiler.DEFAULT_MASK
0270: | Perl5Compiler.READ_ONLY_MASK);
0271: } catch (MalformedPatternException e) {
0272: log
0273: .fatal(
0274: "Internal error: Someone put in a faulty pattern.",
0275: e);
0276: throw new InternalWikiException(
0277: "Faulty camelcasepattern in TranslatorReader");
0278: }
0279: m_engine
0280: .setAttribute(CAMELCASE_PATTERN, m_camelCasePattern);
0281: }
0282: //
0283: // Set the properties.
0284: //
0285: Properties props = m_engine.getWikiProperties();
0286:
0287: String cclinks = (String) m_context.getPage().getAttribute(
0288: PROP_CAMELCASELINKS);
0289:
0290: if (cclinks != null) {
0291: m_camelCaseLinks = TextUtil.isPositive(cclinks);
0292: } else {
0293: m_camelCaseLinks = TextUtil.getBooleanProperty(props,
0294: PROP_CAMELCASELINKS, m_camelCaseLinks);
0295: }
0296:
0297: Boolean wysiwygVariable = (Boolean) m_context
0298: .getVariable(RenderingManager.WYSIWYG_EDITOR_MODE);
0299: if (wysiwygVariable != null) {
0300: m_wysiwygEditorMode = wysiwygVariable.booleanValue();
0301: }
0302:
0303: m_plainUris = getLocalBooleanProperty(m_context, props,
0304: PROP_PLAINURIS, m_plainUris);
0305: m_useOutlinkImage = getLocalBooleanProperty(m_context, props,
0306: PROP_USEOUTLINKIMAGE, m_useOutlinkImage);
0307: m_useAttachmentImage = getLocalBooleanProperty(m_context,
0308: props, PROP_USEATTACHMENTIMAGE, m_useAttachmentImage);
0309: m_allowHTML = getLocalBooleanProperty(m_context, props,
0310: MarkupParser.PROP_ALLOWHTML, m_allowHTML);
0311:
0312: m_useRelNofollow = getLocalBooleanProperty(m_context, props,
0313: PROP_USERELNOFOLLOW, m_useRelNofollow);
0314:
0315: if (m_engine.getUserManager().getUserDatabase() == null
0316: || m_engine.getAuthorizationManager() == null) {
0317: disableAccessRules();
0318: }
0319:
0320: m_context.getPage().setHasMetadata();
0321: }
0322:
0323: /**
0324: * This is just a simple helper method which will first check the context
0325: * if there is already an override in place, and if there is not,
0326: * it will then check the given properties.
0327: *
0328: * @param context WikiContext to check first
0329: * @param props Properties to check next
0330: * @param key What key are we searching for?
0331: * @param defValue Default value for the boolean
0332: * @return True or false
0333: */
0334: private static boolean getLocalBooleanProperty(WikiContext context,
0335: Properties props, String key, boolean defValue) {
0336: Object bool = context.getVariable(key);
0337:
0338: if (bool != null) {
0339: return TextUtil.isPositive((String) bool);
0340: }
0341:
0342: return TextUtil.getBooleanProperty(props, key, defValue);
0343: }
0344:
0345: /**
0346: * Figure out which image suffixes should be inlined.
0347: * @return Collection of Strings with patterns.
0348: */
0349:
0350: // FIXME: Does not belong here; should be elsewhere
0351: public static Collection getImagePatterns(WikiEngine engine) {
0352: Properties props = engine.getWikiProperties();
0353: ArrayList ptrnlist = new ArrayList();
0354:
0355: for (Enumeration e = props.propertyNames(); e.hasMoreElements();) {
0356: String name = (String) e.nextElement();
0357:
0358: if (name.startsWith(PROP_INLINEIMAGEPTRN)) {
0359: String ptrn = TextUtil.getStringProperty(props, name,
0360: null);
0361:
0362: ptrnlist.add(ptrn);
0363: }
0364: }
0365:
0366: if (ptrnlist.size() == 0) {
0367: ptrnlist.add(DEFAULT_INLINEPATTERN);
0368: }
0369:
0370: return ptrnlist;
0371: }
0372:
0373: /**
0374: * Returns link name, if it exists; otherwise it returns null.
0375: */
0376: private String linkExists(String page) {
0377: try {
0378: if (page == null || page.length() == 0)
0379: return null;
0380:
0381: return m_engine.getFinalPageName(page);
0382: } catch (ProviderException e) {
0383: log.warn("TranslatorReader got a faulty page name!", e);
0384:
0385: return page; // FIXME: What would be the correct way to go back?
0386: }
0387: }
0388:
0389: /**
0390: * Calls a transmutator chain.
0391: *
0392: * @param list Chain to call
0393: * @param text Text that should be passed to the mutate() method
0394: * of each of the mutators in the chain.
0395: * @return The result of the mutation.
0396: */
0397:
0398: protected String callMutatorChain(Collection list, String text) {
0399: if (list == null || list.size() == 0) {
0400: return text;
0401: }
0402:
0403: for (Iterator i = list.iterator(); i.hasNext();) {
0404: StringTransmutator m = (StringTransmutator) i.next();
0405:
0406: text = m.mutate(m_context, text);
0407: }
0408:
0409: return text;
0410: }
0411:
0412: /**
0413: * Calls the heading listeners.
0414: *
0415: * @param param A Heading object.
0416: */
0417: protected void callHeadingListenerChain(Heading param) {
0418: List list = m_headingListenerChain;
0419:
0420: for (Iterator i = list.iterator(); i.hasNext();) {
0421: HeadingListener h = (HeadingListener) i.next();
0422:
0423: h.headingAdded(m_context, param);
0424: }
0425: }
0426:
0427: /**
0428: * Creates a JDOM anchor element. Can be overridden to change the URL creation,
0429: * if you really know what you are doing.
0430: *
0431: * @param type One of the types above
0432: * @param link URL to which to link to
0433: * @param text Link text
0434: * @param section If a particular section identifier is required.
0435: * @return An A element.
0436: * @since 2.4.78
0437: */
0438: protected Element createAnchor(int type, String link, String text,
0439: String section) {
0440: text = escapeHTMLEntities(text);
0441: section = escapeHTMLEntities(section);
0442: Element el = new Element("a");
0443: el.setAttribute("class", CLASS_TYPES[type]);
0444: el.setAttribute("href", link + section);
0445: el.addContent(text);
0446: return el;
0447: }
0448:
0449: private Element makeLink(int type, String link, String text,
0450: String section, Iterator attributes) {
0451: Element el = null;
0452:
0453: if (text == null)
0454: text = link;
0455:
0456: text = callMutatorChain(m_linkMutators, text);
0457:
0458: section = (section != null) ? ("#" + section) : "";
0459:
0460: // Make sure we make a link name that can be accepted
0461: // as a valid URL.
0462:
0463: if (link.length() == 0) {
0464: type = EMPTY;
0465: }
0466: ResourceBundle rb = m_context
0467: .getBundle(InternationalizationManager.CORE_BUNDLE);
0468: Object[] args = { link };
0469:
0470: switch (type) {
0471: case READ:
0472: el = createAnchor(READ, m_context.getURL(WikiContext.VIEW,
0473: link), text, section);
0474: break;
0475:
0476: case EDIT:
0477: el = createAnchor(EDIT, m_context.getURL(WikiContext.EDIT,
0478: link), text, "");
0479: el.setAttribute("title", MessageFormat.format(rb
0480: .getString("markupparser.link.create"), args));
0481: break;
0482:
0483: case EMPTY:
0484: el = new Element("u").addContent(text);
0485: break;
0486:
0487: //
0488: // These two are for local references - footnotes and
0489: // references to footnotes.
0490: // We embed the page name (or whatever WikiContext gives us)
0491: // to make sure the links are unique across Wiki.
0492: //
0493: case LOCALREF:
0494: el = createAnchor(LOCALREF, "#ref-" + m_context.getName()
0495: + "-" + link, "[" + text + "]", "");
0496: break;
0497:
0498: case LOCAL:
0499: el = new Element("a").setAttribute("class", "footnote");
0500: el.setAttribute("name", "ref-" + m_context.getName() + "-"
0501: + link.substring(1));
0502: el.addContent("[" + text + "]");
0503: break;
0504:
0505: //
0506: // With the image, external and interwiki types we need to
0507: // make sure nobody can put in Javascript or something else
0508: // annoying into the links themselves. We do this by preventing
0509: // a haxor from stopping the link name short with quotes in
0510: // fillBuffer().
0511: //
0512: case IMAGE:
0513: el = new Element("img").setAttribute("class", "inline");
0514: el.setAttribute("src", link);
0515: el.setAttribute("alt", text);
0516: break;
0517:
0518: case IMAGELINK:
0519: el = new Element("img").setAttribute("class", "inline");
0520: el.setAttribute("src", link);
0521: el.setAttribute("alt", text);
0522: el = createAnchor(IMAGELINK, text, "", "").addContent(el);
0523: break;
0524:
0525: case IMAGEWIKILINK:
0526: String pagelink = m_context.getURL(WikiContext.VIEW, text);
0527: el = new Element("img").setAttribute("class", "inline");
0528: el.setAttribute("src", link);
0529: el.setAttribute("alt", text);
0530: el = createAnchor(IMAGEWIKILINK, pagelink, "", "")
0531: .addContent(el);
0532: break;
0533:
0534: case EXTERNAL:
0535: el = createAnchor(EXTERNAL, link, text, section);
0536: if (m_useRelNofollow)
0537: el.setAttribute("rel", "nofollow");
0538: break;
0539:
0540: case INTERWIKI:
0541: el = createAnchor(INTERWIKI, link, text, section);
0542: break;
0543:
0544: case ATTACHMENT:
0545: String attlink = m_context.getURL(WikiContext.ATTACH, link);
0546:
0547: String infolink = m_context.getURL(WikiContext.INFO, link);
0548:
0549: String imglink = m_context.getURL(WikiContext.NONE,
0550: "images/attachment_small.png");
0551:
0552: el = createAnchor(ATTACHMENT, attlink, text, "");
0553:
0554: pushElement(el);
0555: popElement(el.getName());
0556:
0557: if (m_useAttachmentImage) {
0558: el = new Element("img").setAttribute("src", imglink);
0559: el.setAttribute("border", "0");
0560: el.setAttribute("alt", "(info)");
0561:
0562: el = new Element("a").setAttribute("href", infolink)
0563: .addContent(el);
0564: } else {
0565: el = null;
0566: }
0567: break;
0568:
0569: default:
0570: break;
0571: }
0572:
0573: if (el != null && attributes != null) {
0574: while (attributes.hasNext()) {
0575: Attribute attr = (Attribute) attributes.next();
0576: if (attr != null) {
0577: el.setAttribute(attr);
0578: }
0579: }
0580: }
0581:
0582: if (el != null) {
0583: flushPlainText();
0584: m_currentElement.addContent(el);
0585: }
0586: return el;
0587: }
0588:
0589: /**
0590: * Figures out if a link is an off-site link. This recognizes
0591: * the most common protocols by checking how it starts.
0592: *
0593: * @since 2.4
0594: */
0595:
0596: public static boolean isExternalLink(String link) {
0597: int idx = Arrays.binarySearch(c_externalLinks, link,
0598: c_startingComparator);
0599:
0600: //
0601: // We need to check here once again; otherwise we might
0602: // get a match for something like "h".
0603: //
0604: if (idx >= 0 && link.startsWith(c_externalLinks[idx]))
0605: return true;
0606:
0607: return false;
0608: }
0609:
0610: /**
0611: * Returns true, if the link in question is an access
0612: * rule.
0613: */
0614: private static boolean isAccessRule(String link) {
0615: return link.startsWith("{ALLOW") || link.startsWith("{DENY");
0616: }
0617:
0618: /**
0619: * Matches the given link to the list of image name patterns
0620: * to determine whether it should be treated as an inline image
0621: * or not.
0622: */
0623: private boolean isImageLink(String link) {
0624: if (m_inlineImages) {
0625: link = link.toLowerCase();
0626:
0627: for (Iterator i = m_inlineImagePatterns.iterator(); i
0628: .hasNext();) {
0629: if (m_inlineMatcher.matches(link, (Pattern) i.next()))
0630: return true;
0631: }
0632: }
0633:
0634: return false;
0635: }
0636:
0637: private static boolean isMetadata(String link) {
0638: return link.startsWith("{SET");
0639: }
0640:
0641: /**
0642: * These are all of the HTML 4.01 block-level elements.
0643: */
0644: private static final String[] BLOCK_ELEMENTS = { "address",
0645: "blockquote", "div", "dl", "fieldset", "form", "h1", "h2",
0646: "h3", "h4", "h5", "h6", "hr", "noscript", "ol", "p", "pre",
0647: "table", "ul" };
0648:
0649: private static final boolean isBlockLevel(String name) {
0650: return Arrays.binarySearch(BLOCK_ELEMENTS, name) >= 0;
0651: }
0652:
0653: /**
0654: * This method peeks ahead in the stream until EOL and returns the result.
0655: * It will keep the buffers untouched.
0656: *
0657: * @return The string from the current position to the end of line.
0658: */
0659:
0660: // FIXME: Always returns an empty line, even if the stream is full.
0661: private String peekAheadLine() throws IOException {
0662: String s = readUntilEOL().toString();
0663:
0664: if (s.length() > PUSHBACK_BUFFER_SIZE) {
0665: log.warn("Line is longer than maximum allowed size ("
0666: + PUSHBACK_BUFFER_SIZE
0667: + " characters. Attempting to recover...");
0668: pushBack(s.substring(0, PUSHBACK_BUFFER_SIZE - 1));
0669: } else {
0670: try {
0671: pushBack(s);
0672: } catch (IOException e) {
0673: log
0674: .warn("Pushback failed: the line is probably too long. Attempting to recover.");
0675: }
0676: }
0677: return s;
0678: }
0679:
0680: /**
0681: * Writes HTML for error message.
0682: */
0683:
0684: public static Element makeError(String error) {
0685: return new Element("span").setAttribute("class", "error")
0686: .addContent(error);
0687: }
0688:
0689: private int flushPlainText() {
0690: int numChars = m_plainTextBuf.length();
0691:
0692: if (numChars > 0) {
0693: String buf;
0694:
0695: if (!m_allowHTML) {
0696: buf = escapeHTMLEntities(m_plainTextBuf.toString());
0697: } else {
0698: buf = m_plainTextBuf.toString();
0699: }
0700: //
0701: // We must first empty the buffer because the side effect of
0702: // calling makeCamelCaseLink() is to call this routine.
0703: //
0704:
0705: m_plainTextBuf = new StringBuffer(20);
0706:
0707: try {
0708: //
0709: // This is the heaviest part of parsing, and therefore we can
0710: // do some optimization here.
0711: //
0712: // 1) Only when the length of the buffer is big enough, we try to do the match
0713: //
0714:
0715: if (m_camelCaseLinks && !m_isEscaping
0716: && buf.length() > 3) {
0717: // System.out.println("Buffer="+buf);
0718:
0719: while (m_camelCaseMatcher.contains(buf,
0720: m_camelCasePattern)) {
0721: MatchResult result = m_camelCaseMatcher
0722: .getMatch();
0723:
0724: String firstPart = buf.substring(0, result
0725: .beginOffset(0));
0726: String prefix = result.group(1);
0727:
0728: if (prefix == null)
0729: prefix = "";
0730:
0731: String camelCase = result.group(2);
0732: String protocol = result.group(3);
0733: String uri = protocol + result.group(4);
0734: buf = buf.substring(result.endOffset(0));
0735:
0736: m_currentElement.addContent(firstPart);
0737:
0738: //
0739: // Check if the user does not wish to do URL or WikiWord expansion
0740: //
0741: if (prefix.endsWith("~")
0742: || prefix.indexOf('[') != -1) {
0743: if (prefix.endsWith("~")) {
0744: if (m_wysiwygEditorMode) {
0745: m_currentElement.addContent("~");
0746: }
0747: prefix = prefix.substring(0, prefix
0748: .length() - 1);
0749: }
0750: if (camelCase != null) {
0751: m_currentElement.addContent(prefix
0752: + camelCase);
0753: } else if (protocol != null) {
0754: m_currentElement.addContent(prefix
0755: + uri);
0756: }
0757: continue;
0758: }
0759:
0760: //
0761: // Fine, then let's check what kind of a link this was
0762: // and emit the proper elements
0763: //
0764: if (protocol != null) {
0765: char c = uri.charAt(uri.length() - 1);
0766: if (c == '.' || c == ',') {
0767: uri = uri
0768: .substring(0, uri.length() - 1);
0769: buf = c + buf;
0770: }
0771: // System.out.println("URI match "+uri);
0772: m_currentElement.addContent(prefix);
0773: makeDirectURILink(uri);
0774: } else {
0775: // System.out.println("Matched: '"+camelCase+"'");
0776: // System.out.println("Split to '"+firstPart+"', and '"+buf+"'");
0777: // System.out.println("prefix="+prefix);
0778: m_currentElement.addContent(prefix);
0779:
0780: makeCamelCaseLink(camelCase);
0781: }
0782: }
0783:
0784: m_currentElement.addContent(buf);
0785: } else {
0786: //
0787: // No camelcase asked for, just add the elements
0788: //
0789: m_currentElement.addContent(buf);
0790: }
0791: } catch (IllegalDataException e) {
0792: //
0793: // Sometimes it's possible that illegal XML chars is added to the data.
0794: // Here we make sure it does not stop parsing.
0795: //
0796: m_currentElement
0797: .addContent(makeError(cleanupSuspectData(e
0798: .getMessage())));
0799: }
0800: }
0801:
0802: return numChars;
0803: }
0804:
0805: /**
0806: * Escapes XML entities in a HTML-compatible way (i.e. does not escape
0807: * entities that are already escaped).
0808: *
0809: * @param buf
0810: * @return
0811: */
0812: private String escapeHTMLEntities(String buf) {
0813: StringBuffer tmpBuf = new StringBuffer(buf.length() + 20);
0814:
0815: for (int i = 0; i < buf.length(); i++) {
0816: char ch = buf.charAt(i);
0817:
0818: if (ch == '<') {
0819: tmpBuf.append("<");
0820: } else if (ch == '>') {
0821: tmpBuf.append(">");
0822: } else if (ch == '\"') {
0823: tmpBuf.append(""");
0824: } else if (ch == '&') {
0825: for (int j = (i < buf.length() - 1) ? i + 1 : i; j < buf
0826: .length(); j++) {
0827: int ch2 = buf.charAt(j);
0828: if (ch2 == ';') {
0829: tmpBuf.append(ch);
0830: break;
0831: }
0832: if (ch2 != '#'
0833: && !Character.isLetterOrDigit((char) ch2)) {
0834: tmpBuf.append("&");
0835: break;
0836: }
0837: }
0838: } else {
0839: tmpBuf.append(ch);
0840: }
0841: }
0842:
0843: return tmpBuf.toString();
0844: }
0845:
0846: private Element pushElement(Element e) {
0847: flushPlainText();
0848: m_currentElement.addContent(e);
0849: m_currentElement = e;
0850:
0851: return e;
0852: }
0853:
0854: private Element addElement(Content e) {
0855: if (e != null) {
0856: flushPlainText();
0857: m_currentElement.addContent(e);
0858: }
0859: return m_currentElement;
0860: }
0861:
0862: /**
0863: * All elements that can be empty by the HTML DTD.
0864: */
0865: // Keep sorted.
0866: private static final String[] EMPTY_ELEMENTS = { "area", "base",
0867: "br", "col", "hr", "img", "input", "link", "meta", "p",
0868: "param" };
0869:
0870: /**
0871: * Goes through the current element stack and pops all elements until this
0872: * element is found - this essentially "closes" and element.
0873: *
0874: * @param s
0875: * @return The new current element, or null, if there was no such element in the entire stack.
0876: */
0877: private Element popElement(String s) {
0878: int flushedBytes = flushPlainText();
0879:
0880: Element currEl = m_currentElement;
0881:
0882: while (currEl.getParentElement() != null) {
0883: if (currEl.getName().equals(s) && !currEl.isRootElement()) {
0884: m_currentElement = currEl.getParentElement();
0885:
0886: //
0887: // Check if it's okay for this element to be empty. Then we will
0888: // trick the JDOM generator into not generating an empty element,
0889: // by putting an empty string between the tags. Yes, it's a kludge
0890: // but what'cha gonna do about it. :-)
0891: //
0892:
0893: if (flushedBytes == 0
0894: && Arrays.binarySearch(EMPTY_ELEMENTS, s) < 0) {
0895: currEl.addContent("");
0896: }
0897:
0898: return m_currentElement;
0899: }
0900:
0901: currEl = currEl.getParentElement();
0902: }
0903:
0904: return null;
0905: }
0906:
0907: /**
0908: * Reads the stream until it meets one of the specified
0909: * ending characters, or stream end. The ending character will be left
0910: * in the stream.
0911: */
0912: private String readUntil(String endChars) throws IOException {
0913: StringBuffer sb = new StringBuffer(80);
0914: int ch = nextToken();
0915:
0916: while (ch != -1) {
0917: if (ch == '\\') {
0918: ch = nextToken();
0919: if (ch == -1) {
0920: break;
0921: }
0922: } else {
0923: if (endChars.indexOf((char) ch) != -1) {
0924: pushBack(ch);
0925: break;
0926: }
0927: }
0928: sb.append((char) ch);
0929: ch = nextToken();
0930: }
0931:
0932: return sb.toString();
0933: }
0934:
0935: /**
0936: * Reads the stream while the characters that have been specified are
0937: * in the stream, returning then the result as a String.
0938: */
0939: private String readWhile(String endChars) throws IOException {
0940: StringBuffer sb = new StringBuffer(80);
0941: int ch = nextToken();
0942:
0943: while (ch != -1) {
0944: if (endChars.indexOf((char) ch) == -1) {
0945: pushBack(ch);
0946: break;
0947: }
0948:
0949: sb.append((char) ch);
0950: ch = nextToken();
0951: }
0952:
0953: return sb.toString();
0954: }
0955:
0956: private JSPWikiMarkupParser m_cleanTranslator;
0957:
0958: /**
0959: * Does a lazy init. Otherwise, we would get into a situation
0960: * where HTMLRenderer would try and boot a TranslatorReader before
0961: * the TranslatorReader it is contained by is up.
0962: */
0963: private JSPWikiMarkupParser getCleanTranslator() {
0964: if (m_cleanTranslator == null) {
0965: WikiContext dummyContext = new WikiContext(m_engine,
0966: m_context.getHttpRequest(), m_context.getPage());
0967: m_cleanTranslator = new JSPWikiMarkupParser(dummyContext,
0968: null);
0969:
0970: m_cleanTranslator.m_allowHTML = true;
0971: }
0972:
0973: return m_cleanTranslator;
0974: }
0975:
0976: /**
0977: * Modifies the "hd" parameter to contain proper values. Because
0978: * an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the
0979: * % after url encoding with '_'.
0980: */
0981: // FIXME: This method should probably be public and in an util class somewhere
0982: private String makeHeadingAnchor(String baseName, String title,
0983: Heading hd) {
0984: hd.m_titleText = title;
0985: title = MarkupParser.wikifyLink(title);
0986: hd.m_titleSection = m_engine.encodeName(title);
0987: hd.m_titleAnchor = "section-" + m_engine.encodeName(baseName)
0988: + "-" + hd.m_titleSection;
0989:
0990: hd.m_titleAnchor = hd.m_titleAnchor.replace('%', '_');
0991: hd.m_titleAnchor = hd.m_titleAnchor.replace('/', '_');
0992: return hd.m_titleAnchor;
0993: }
0994:
0995: private String makeSectionTitle(String title) {
0996: title = title.trim();
0997: String outTitle;
0998:
0999: try {
1000: JSPWikiMarkupParser dtr = getCleanTranslator();
1001: dtr.setInputReader(new StringReader(title));
1002:
1003: CleanTextRenderer ctt = new CleanTextRenderer(m_context,
1004: dtr.parse());
1005:
1006: outTitle = ctt.getString();
1007: } catch (IOException e) {
1008: log.fatal("CleanTranslator not working", e);
1009: throw new InternalWikiException(
1010: "CleanTranslator not working as expected, when cleaning title"
1011: + e.getMessage());
1012: }
1013:
1014: return outTitle;
1015: }
1016:
1017: /**
1018: * Returns XHTML for the start of the heading. Also sets the
1019: * line-end emitter.
1020: * @param level
1021: * @param title the title for the heading
1022: * @param hd a List to which heading should be added
1023: */
1024: public Element makeHeading(int level, String title, Heading hd) {
1025: Element el = null;
1026:
1027: String pageName = m_context.getPage().getName();
1028:
1029: String outTitle = makeSectionTitle(title);
1030:
1031: hd.m_level = level;
1032:
1033: switch (level) {
1034: case Heading.HEADING_SMALL:
1035: el = new Element("h4").setAttribute("id",
1036: makeHeadingAnchor(pageName, outTitle, hd));
1037: break;
1038:
1039: case Heading.HEADING_MEDIUM:
1040: el = new Element("h3").setAttribute("id",
1041: makeHeadingAnchor(pageName, outTitle, hd));
1042: break;
1043:
1044: case Heading.HEADING_LARGE:
1045: el = new Element("h2").setAttribute("id",
1046: makeHeadingAnchor(pageName, outTitle, hd));
1047: break;
1048:
1049: default:
1050: throw new InternalWikiException("Illegal heading type "
1051: + level);
1052: }
1053:
1054: return el;
1055: }
1056:
1057: /**
1058: * When given a link to a WikiName, we just return
1059: * a proper HTML link for it. The local link mutator
1060: * chain is also called.
1061: */
1062: private Element makeCamelCaseLink(String wikiname) {
1063: String matchedLink;
1064:
1065: callMutatorChain(m_localLinkMutatorChain, wikiname);
1066:
1067: if ((matchedLink = linkExists(wikiname)) != null) {
1068: makeLink(READ, matchedLink, wikiname, null, null);
1069: } else {
1070: makeLink(EDIT, wikiname, wikiname, null, null);
1071: }
1072:
1073: return m_currentElement;
1074: }
1075:
1076: /** Holds the image URL for the duration of this parser */
1077: private String m_outlinkImageURL = null;
1078:
1079: /**
1080: * Returns an element for the external link image (out.png). However,
1081: * this method caches the URL for the lifetime of this MarkupParser,
1082: * because it's commonly used, and we'll end up with possibly hundreds
1083: * our thousands of references to it... It's a lot faster, too.
1084: *
1085: * @return An element containing the HTML for the outlink image.
1086: */
1087: private Element outlinkImage() {
1088: Element el = null;
1089:
1090: if (m_useOutlinkImage) {
1091: if (m_outlinkImageURL == null) {
1092: m_outlinkImageURL = m_context.getURL(WikiContext.NONE,
1093: OUTLINK_IMAGE);
1094: }
1095:
1096: el = new Element("img").setAttribute("class", "outlink");
1097: el.setAttribute("src", m_outlinkImageURL);
1098: el.setAttribute("alt", "");
1099: }
1100:
1101: return el;
1102: }
1103:
1104: /**
1105: * Takes an URL and turns it into a regular wiki link. Unfortunately,
1106: * because of the way that flushPlainText() works, it already encodes
1107: * all of the XML entities. But so does WikiContext.getURL(), so we
1108: * have to do a reverse-replace here, so that it can again be replaced in makeLink.
1109: * <p>
1110: * What a crappy problem.
1111: *
1112: * @param url
1113: * @return
1114: */
1115: private Element makeDirectURILink(String url) {
1116: Element result;
1117: String last = null;
1118:
1119: if (url.endsWith(",") || url.endsWith(".")) {
1120: last = url.substring(url.length() - 1);
1121: url = url.substring(0, url.length() - 1);
1122: }
1123:
1124: callMutatorChain(m_externalLinkMutatorChain, url);
1125:
1126: if (isImageLink(url)) {
1127: result = handleImageLink(StringUtils.replace(url, "&",
1128: "&"), url, false);
1129: } else {
1130: result = makeLink(EXTERNAL, StringUtils.replace(url,
1131: "&", "&"), url, null, null);
1132: addElement(outlinkImage());
1133: }
1134:
1135: if (last != null) {
1136: m_plainTextBuf.append(last);
1137: }
1138:
1139: return result;
1140: }
1141:
1142: /**
1143: * Image links are handled differently:
1144: * 1. If the text is a WikiName of an existing page,
1145: * it gets linked.
1146: * 2. If the text is an external link, then it is inlined.
1147: * 3. Otherwise it becomes an ALT text.
1148: *
1149: * @param reallink The link to the image.
1150: * @param link Link text portion, may be a link to somewhere else.
1151: * @param hasLinkText If true, then the defined link had a link text available.
1152: * This means that the link text may be a link to a wiki page,
1153: * or an external resource.
1154: */
1155:
1156: // FIXME: isExternalLink() is called twice.
1157: private Element handleImageLink(String reallink, String link,
1158: boolean hasLinkText) {
1159: String possiblePage = MarkupParser.cleanLink(link);
1160:
1161: if (isExternalLink(link) && hasLinkText) {
1162: return makeLink(IMAGELINK, reallink, link, null, null);
1163: } else if ((linkExists(possiblePage)) != null && hasLinkText) {
1164: // System.out.println("Orig="+link+", Matched: "+matchedLink);
1165: callMutatorChain(m_localLinkMutatorChain, possiblePage);
1166:
1167: return makeLink(IMAGEWIKILINK, reallink, link, null, null);
1168: } else {
1169: return makeLink(IMAGE, reallink, link, null, null);
1170: }
1171: }
1172:
1173: private Element handleAccessRule(String ruleLine) {
1174: if (m_wysiwygEditorMode) {
1175: m_currentElement.addContent("[" + ruleLine + "]");
1176: }
1177:
1178: if (!m_parseAccessRules)
1179: return m_currentElement;
1180: Acl acl;
1181: WikiPage page = m_context.getPage();
1182: // UserDatabase db = m_context.getEngine().getUserDatabase();
1183:
1184: if (ruleLine.startsWith("{"))
1185: ruleLine = ruleLine.substring(1);
1186: if (ruleLine.endsWith("}"))
1187: ruleLine = ruleLine.substring(0, ruleLine.length() - 1);
1188:
1189: if (log.isDebugEnabled())
1190: log.debug("page=" + page.getName() + ", ACL = " + ruleLine);
1191:
1192: try {
1193: acl = m_engine.getAclManager().parseAcl(page, ruleLine);
1194:
1195: page.setAcl(acl);
1196:
1197: if (log.isDebugEnabled())
1198: log.debug(acl.toString());
1199: } catch (WikiSecurityException wse) {
1200: return makeError(wse.getMessage());
1201: }
1202:
1203: return m_currentElement;
1204: }
1205:
1206: /**
1207: * Handles metadata setting [{SET foo=bar}]
1208: */
1209: private Element handleMetadata(String link) {
1210: if (m_wysiwygEditorMode) {
1211: m_currentElement.addContent("[" + link + "]");
1212: }
1213:
1214: try {
1215: String args = link.substring(link.indexOf(' '), link
1216: .length() - 1);
1217:
1218: String name = args.substring(0, args.indexOf('='));
1219: String val = args.substring(args.indexOf('=') + 1, args
1220: .length());
1221:
1222: name = name.trim();
1223: val = val.trim();
1224:
1225: if (val.startsWith("'"))
1226: val = val.substring(1);
1227: if (val.endsWith("'"))
1228: val = val.substring(0, val.length() - 1);
1229:
1230: // log.debug("SET name='"+name+"', value='"+val+"'.");
1231:
1232: if (name.length() > 0 && val.length() > 0) {
1233: val = m_engine.getVariableManager().expandVariables(
1234: m_context, val);
1235:
1236: m_context.getPage().setAttribute(name, val);
1237: }
1238: } catch (Exception e) {
1239: ResourceBundle rb = m_context
1240: .getBundle(InternationalizationManager.CORE_BUNDLE);
1241: Object[] args = { link };
1242: return makeError(MessageFormat.format(rb
1243: .getString("markupparser.error.invalidset"), args));
1244: }
1245:
1246: return m_currentElement;
1247: }
1248:
1249: /**
1250: * Emits a processing instruction that will disable markup escaping. This is
1251: * very useful if you want to emit HTML directly into the stream.
1252: *
1253: */
1254: private void disableOutputEscaping() {
1255: addElement(new ProcessingInstruction(
1256: Result.PI_DISABLE_OUTPUT_ESCAPING, ""));
1257: }
1258:
1259: /**
1260: * Gobbles up all hyperlinks that are encased in square brackets.
1261: */
1262: private Element handleHyperlinks(String linktext, int pos) {
1263: ResourceBundle rb = m_context
1264: .getBundle(InternationalizationManager.CORE_BUNDLE);
1265:
1266: StringBuffer sb = new StringBuffer(linktext.length() + 80);
1267:
1268: if (isAccessRule(linktext)) {
1269: return handleAccessRule(linktext);
1270: }
1271:
1272: if (isMetadata(linktext)) {
1273: return handleMetadata(linktext);
1274: }
1275:
1276: if (PluginManager.isPluginLink(linktext)) {
1277: try {
1278: PluginContent pluginContent = m_engine
1279: .getPluginManager().parsePluginLine(m_context,
1280: linktext, pos);
1281:
1282: addElement(pluginContent);
1283:
1284: pluginContent.executeParse(m_context);
1285: } catch (PluginException e) {
1286: log.info("Failed to insert plugin: " + e.getMessage());
1287: //log.info( "Root cause:",e.getRootThrowable() );
1288: if (!m_wysiwygEditorMode) {
1289: ResourceBundle rbPlugin = m_context
1290: .getBundle(WikiPlugin.CORE_PLUGINS_RESOURCEBUNDLE);
1291: Object[] args = { e.getMessage() };
1292: return addElement(makeError(MessageFormat
1293: .format(
1294: rbPlugin
1295: .getString("plugin.error.insertionfailed"),
1296: args)));
1297: }
1298: }
1299:
1300: return m_currentElement;
1301: }
1302:
1303: try {
1304: LinkParser.Link link = m_linkParser.parse(linktext);
1305: linktext = link.getText();
1306: String linkref = link.getReference();
1307:
1308: //
1309: // Yes, we now have the components separated.
1310: // linktext = the text the link should have
1311: // linkref = the url or page name.
1312: //
1313: // In many cases these are the same. [linktext|linkref].
1314: //
1315: if (VariableManager.isVariableLink(linktext)) {
1316: Content el = new VariableContent(linktext);
1317:
1318: addElement(el);
1319: } else if (isExternalLink(linkref)) {
1320: // It's an external link, out of this Wiki
1321:
1322: callMutatorChain(m_externalLinkMutatorChain, linkref);
1323:
1324: if (isImageLink(linkref)) {
1325: handleImageLink(linkref, linktext, link
1326: .hasReference());
1327: } else {
1328: makeLink(EXTERNAL, linkref, linktext, null, link
1329: .getAttributes());
1330: addElement(outlinkImage());
1331: }
1332: } else if (link.isInterwikiLink()) {
1333: // It's an interwiki link
1334: // InterWiki links also get added to external link chain
1335: // after the links have been resolved.
1336:
1337: // FIXME: There is an interesting issue here: We probably should
1338: // URLEncode the wikiPage, but we can't since some of the
1339: // Wikis use slashes (/), which won't survive URLEncoding.
1340: // Besides, we don't know which character set the other Wiki
1341: // is using, so you'll have to write the entire name as it appears
1342: // in the URL. Bugger.
1343:
1344: String extWiki = link.getExternalWiki();
1345: String wikiPage = link.getExternalWikiPage();
1346:
1347: if (m_wysiwygEditorMode) {
1348: makeLink(INTERWIKI, extWiki + ":" + wikiPage,
1349: linktext, null, link.getAttributes());
1350: } else {
1351: String urlReference = m_engine
1352: .getInterWikiURL(extWiki);
1353:
1354: if (urlReference != null) {
1355: urlReference = TextUtil.replaceString(
1356: urlReference, "%s", wikiPage);
1357: urlReference = callMutatorChain(
1358: m_externalLinkMutatorChain,
1359: urlReference);
1360:
1361: if (isImageLink(urlReference)) {
1362: handleImageLink(urlReference, linktext,
1363: link.hasReference());
1364: } else {
1365: makeLink(INTERWIKI, urlReference, linktext,
1366: null, link.getAttributes());
1367: }
1368:
1369: if (isExternalLink(urlReference)) {
1370: addElement(outlinkImage());
1371: }
1372: } else {
1373: Object[] args = { extWiki };
1374: addElement(makeError(MessageFormat
1375: .format(
1376: rb
1377: .getString("markupparser.error.nointerwikiref"),
1378: args)));
1379: }
1380: }
1381: } else if (linkref.startsWith("#")) {
1382: // It defines a local footnote
1383: makeLink(LOCAL, linkref, linktext, null, link
1384: .getAttributes());
1385: } else if (TextUtil.isNumber(linkref)) {
1386: // It defines a reference to a local footnote
1387: makeLink(LOCALREF, linkref, linktext, null, link
1388: .getAttributes());
1389: } else {
1390: int hashMark = -1;
1391:
1392: //
1393: // Internal wiki link, but is it an attachment link?
1394: //
1395: String attachment = findAttachment(linkref);
1396: if (attachment != null) {
1397: callMutatorChain(m_attachmentLinkMutatorChain,
1398: attachment);
1399:
1400: if (isImageLink(linkref)) {
1401: attachment = m_context.getURL(
1402: WikiContext.ATTACH, attachment);
1403: sb.append(handleImageLink(attachment, linktext,
1404: link.hasReference()));
1405: } else {
1406: makeLink(ATTACHMENT, attachment, linktext,
1407: null, link.getAttributes());
1408: }
1409: } else if ((hashMark = linkref.indexOf('#')) != -1) {
1410: // It's an internal Wiki link, but to a named section
1411:
1412: String namedSection = linkref
1413: .substring(hashMark + 1);
1414: linkref = linkref.substring(0, hashMark);
1415:
1416: linkref = MarkupParser.cleanLink(linkref);
1417:
1418: callMutatorChain(m_localLinkMutatorChain, linkref);
1419:
1420: String matchedLink;
1421: if ((matchedLink = linkExists(linkref)) != null) {
1422: String sectref = "section-"
1423: + m_engine.encodeName(matchedLink)
1424: + "-" + wikifyLink(namedSection);
1425: sectref = sectref.replace('%', '_');
1426: makeLink(READ, matchedLink, linktext, sectref,
1427: link.getAttributes());
1428: } else {
1429: makeLink(EDIT, linkref, linktext, null, link
1430: .getAttributes());
1431: }
1432: } else {
1433: // It's an internal Wiki link
1434: linkref = MarkupParser.cleanLink(linkref);
1435:
1436: callMutatorChain(m_localLinkMutatorChain, linkref);
1437:
1438: String matchedLink = linkExists(linkref);
1439:
1440: if (matchedLink != null) {
1441: makeLink(READ, matchedLink, linktext, null,
1442: link.getAttributes());
1443: } else {
1444: makeLink(EDIT, linkref, linktext, null, link
1445: .getAttributes());
1446: }
1447: }
1448: }
1449: } catch (ParseException e) {
1450: log.info("Parser failure: ", e);
1451: Object[] args = { e.getMessage() };
1452: addElement(makeError(MessageFormat.format(rb
1453: .getString("markupparser.error.parserfailure"),
1454: args)));
1455: }
1456:
1457: return m_currentElement;
1458: }
1459:
1460: private String findAttachment(String linktext) {
1461: AttachmentManager mgr = m_engine.getAttachmentManager();
1462: Attachment att = null;
1463:
1464: try {
1465: att = mgr.getAttachmentInfo(m_context, linktext);
1466: } catch (ProviderException e) {
1467: log.warn("Finding attachments failed: ", e);
1468: return null;
1469: }
1470:
1471: if (att != null) {
1472: return att.getName();
1473: } else if (linktext.indexOf('/') != -1) {
1474: return linktext;
1475: }
1476:
1477: return null;
1478: }
1479:
1480: /**
1481: * Pushes back any string that has been read. It will obviously
1482: * be pushed back in a reverse order.
1483: *
1484: * @since 2.1.77
1485: */
1486: private void pushBack(String s) throws IOException {
1487: for (int i = s.length() - 1; i >= 0; i--) {
1488: pushBack(s.charAt(i));
1489: }
1490: }
1491:
1492: private Element handleBackslash() throws IOException {
1493: int ch = nextToken();
1494:
1495: if (ch == '\\') {
1496: int ch2 = nextToken();
1497:
1498: if (ch2 == '\\') {
1499: pushElement(new Element("br").setAttribute("clear",
1500: "all"));
1501: return popElement("br");
1502: }
1503:
1504: pushBack(ch2);
1505:
1506: pushElement(new Element("br"));
1507: return popElement("br");
1508: }
1509:
1510: pushBack(ch);
1511:
1512: return null;
1513: }
1514:
1515: private Element handleUnderscore() throws IOException {
1516: int ch = nextToken();
1517: Element el = null;
1518:
1519: if (ch == '_') {
1520: if (m_isbold) {
1521: el = popElement("b");
1522: } else {
1523: el = pushElement(new Element("b"));
1524: }
1525: m_isbold = !m_isbold;
1526: } else {
1527: pushBack(ch);
1528: }
1529:
1530: return el;
1531: }
1532:
1533: /**
1534: * For example: italics.
1535: */
1536: private Element handleApostrophe() throws IOException {
1537: int ch = nextToken();
1538: Element el = null;
1539:
1540: if (ch == '\'') {
1541: if (m_isitalic) {
1542: el = popElement("i");
1543: } else {
1544: el = pushElement(new Element("i"));
1545: }
1546: m_isitalic = !m_isitalic;
1547: } else {
1548: pushBack(ch);
1549: }
1550:
1551: return el;
1552: }
1553:
1554: private Element handleOpenbrace(boolean isBlock) throws IOException {
1555: int ch = nextToken();
1556:
1557: if (ch == '{') {
1558: int ch2 = nextToken();
1559:
1560: if (ch2 == '{') {
1561: m_isPre = true;
1562: m_isEscaping = true;
1563: m_isPreBlock = isBlock;
1564:
1565: if (isBlock) {
1566: startBlockLevel();
1567: return pushElement(new Element("pre"));
1568: }
1569:
1570: return pushElement(new Element("span").setAttribute(
1571: "style",
1572: "font-family:monospace; white-space:pre;"));
1573: }
1574:
1575: pushBack(ch2);
1576:
1577: return pushElement(new Element("tt"));
1578: }
1579:
1580: pushBack(ch);
1581:
1582: return null;
1583: }
1584:
1585: /**
1586: * Handles both }} and }}}
1587: */
1588: private Element handleClosebrace() throws IOException {
1589: int ch2 = nextToken();
1590:
1591: if (ch2 == '}') {
1592: int ch3 = nextToken();
1593:
1594: if (ch3 == '}') {
1595: if (m_isPre) {
1596: if (m_isPreBlock) {
1597: popElement("pre");
1598: } else {
1599: popElement("span");
1600: }
1601:
1602: m_isPre = false;
1603: m_isEscaping = false;
1604: return m_currentElement;
1605: }
1606:
1607: m_plainTextBuf.append("}}}");
1608: return m_currentElement;
1609: }
1610:
1611: pushBack(ch3);
1612:
1613: if (!m_isEscaping) {
1614: return popElement("tt");
1615: }
1616: }
1617:
1618: pushBack(ch2);
1619:
1620: return null;
1621: }
1622:
1623: private Element handleDash() throws IOException {
1624: int ch = nextToken();
1625:
1626: if (ch == '-') {
1627: int ch2 = nextToken();
1628:
1629: if (ch2 == '-') {
1630: int ch3 = nextToken();
1631:
1632: if (ch3 == '-') {
1633: // Empty away all the rest of the dashes.
1634: // Do not forget to return the first non-match back.
1635: while ((ch = nextToken()) == '-')
1636: ;
1637:
1638: pushBack(ch);
1639: startBlockLevel();
1640: pushElement(new Element("hr"));
1641: return popElement("hr");
1642: }
1643:
1644: pushBack(ch3);
1645: }
1646: pushBack(ch2);
1647: }
1648:
1649: pushBack(ch);
1650:
1651: return null;
1652: }
1653:
1654: private Element handleHeading() throws IOException {
1655: Element el = null;
1656:
1657: int ch = nextToken();
1658:
1659: Heading hd = new Heading();
1660:
1661: if (ch == '!') {
1662: int ch2 = nextToken();
1663:
1664: if (ch2 == '!') {
1665: String title = peekAheadLine();
1666:
1667: el = makeHeading(Heading.HEADING_LARGE, title, hd);
1668: } else {
1669: pushBack(ch2);
1670: String title = peekAheadLine();
1671: el = makeHeading(Heading.HEADING_MEDIUM, title, hd);
1672: }
1673: } else {
1674: pushBack(ch);
1675: String title = peekAheadLine();
1676: el = makeHeading(Heading.HEADING_SMALL, title, hd);
1677: }
1678:
1679: callHeadingListenerChain(hd);
1680:
1681: if (el != null)
1682: pushElement(el);
1683:
1684: return el;
1685: }
1686:
1687: /**
1688: * Reads the stream until the next EOL or EOF. Note that it will also read the
1689: * EOL from the stream.
1690: */
1691: private StringBuffer readUntilEOL() throws IOException {
1692: int ch;
1693: StringBuffer buf = new StringBuffer(256);
1694:
1695: while (true) {
1696: ch = nextToken();
1697:
1698: if (ch == -1)
1699: break;
1700:
1701: buf.append((char) ch);
1702:
1703: if (ch == '\n')
1704: break;
1705: }
1706: return buf;
1707: }
1708:
1709: /** Controls whether italic is restarted after a paragraph shift */
1710:
1711: private boolean m_restartitalic = false;
1712: private boolean m_restartbold = false;
1713:
1714: private boolean m_newLine;
1715:
1716: /**
1717: * Starts a block level element, therefore closing
1718: * a potential open paragraph tag.
1719: */
1720: private void startBlockLevel() {
1721: // These may not continue over block level limits in XHTML
1722:
1723: popElement("i");
1724: popElement("b");
1725: popElement("tt");
1726:
1727: if (m_isOpenParagraph) {
1728: m_isOpenParagraph = false;
1729: popElement("p");
1730: m_plainTextBuf.append("\n"); // Just small beautification
1731: }
1732:
1733: m_restartitalic = m_isitalic;
1734: m_restartbold = m_isbold;
1735:
1736: m_isitalic = false;
1737: m_isbold = false;
1738: }
1739:
1740: private static String getListType(char c) {
1741: if (c == '*') {
1742: return "ul";
1743: } else if (c == '#') {
1744: return "ol";
1745: }
1746: throw new InternalWikiException("Parser got faulty list type: "
1747: + c);
1748: }
1749:
1750: /**
1751: * Like original handleOrderedList() and handleUnorderedList()
1752: * however handles both ordered ('#') and unordered ('*') mixed together.
1753: */
1754:
1755: // FIXME: Refactor this; it's a bit messy.
1756: private Element handleGeneralList() throws IOException {
1757: startBlockLevel();
1758:
1759: String strBullets = readWhile("*#");
1760: // String strBulletsRaw = strBullets; // to know what was original before phpwiki style substitution
1761: int numBullets = strBullets.length();
1762:
1763: // override the beginning portion of bullet pattern to be like the previous
1764: // to simulate PHPWiki style lists
1765:
1766: if (m_allowPHPWikiStyleLists) {
1767: // only substitute if different
1768: if (!(strBullets.substring(0, Math.min(numBullets,
1769: m_genlistlevel))
1770: .equals(m_genlistBulletBuffer.substring(0, Math
1771: .min(numBullets, m_genlistlevel))))) {
1772: if (numBullets <= m_genlistlevel) {
1773: // Substitute all but the last character (keep the expressed bullet preference)
1774: strBullets = (numBullets > 1 ? m_genlistBulletBuffer
1775: .substring(0, numBullets - 1)
1776: : "")
1777: + strBullets.substring(numBullets - 1,
1778: numBullets);
1779: } else {
1780: strBullets = m_genlistBulletBuffer
1781: + strBullets.substring(m_genlistlevel,
1782: numBullets);
1783: }
1784: }
1785: }
1786:
1787: //
1788: // Check if this is still of the same type
1789: //
1790: if (strBullets.substring(0,
1791: Math.min(numBullets, m_genlistlevel)).equals(
1792: m_genlistBulletBuffer.substring(0, Math.min(numBullets,
1793: m_genlistlevel)))) {
1794: if (numBullets > m_genlistlevel) {
1795: pushElement(new Element(getListType(strBullets
1796: .charAt(m_genlistlevel++))));
1797:
1798: for (; m_genlistlevel < numBullets; m_genlistlevel++) {
1799: // bullets are growing, get from new bullet list
1800: pushElement(new Element("li"));
1801: pushElement(new Element(getListType(strBullets
1802: .charAt(m_genlistlevel))));
1803: }
1804: } else if (numBullets < m_genlistlevel) {
1805: // Close the previous list item.
1806: // buf.append( m_renderer.closeListItem() );
1807: popElement("li");
1808:
1809: for (; m_genlistlevel > numBullets; m_genlistlevel--) {
1810: // bullets are shrinking, get from old bullet list
1811:
1812: popElement(getListType(m_genlistBulletBuffer
1813: .charAt(m_genlistlevel - 1)));
1814: if (m_genlistlevel > 0) {
1815: popElement("li");
1816: }
1817:
1818: }
1819: } else {
1820: if (m_genlistlevel > 0) {
1821: popElement("li");
1822: }
1823: }
1824: } else {
1825: //
1826: // The pattern has changed, unwind and restart
1827: //
1828: int numEqualBullets;
1829: int numCheckBullets;
1830:
1831: // find out how much is the same
1832: numEqualBullets = 0;
1833: numCheckBullets = Math.min(numBullets, m_genlistlevel);
1834:
1835: while (numEqualBullets < numCheckBullets) {
1836: // if the bullets are equal so far, keep going
1837: if (strBullets.charAt(numEqualBullets) == m_genlistBulletBuffer
1838: .charAt(numEqualBullets))
1839: numEqualBullets++;
1840: // otherwise giveup, we have found how many are equal
1841: else
1842: break;
1843: }
1844:
1845: //unwind
1846: for (; m_genlistlevel > numEqualBullets; m_genlistlevel--) {
1847: popElement(getListType(m_genlistBulletBuffer
1848: .charAt(m_genlistlevel - 1)));
1849: if (m_genlistlevel > 0) {
1850: popElement("li");
1851: }
1852: }
1853:
1854: //rewind
1855:
1856: pushElement(new Element(getListType(strBullets
1857: .charAt(numEqualBullets++))));
1858: for (int i = numEqualBullets; i < numBullets; i++) {
1859: pushElement(new Element("li"));
1860: pushElement(new Element(getListType(strBullets
1861: .charAt(i))));
1862: }
1863: m_genlistlevel = numBullets;
1864: }
1865:
1866: //
1867: // Push a new list item, and eat away any extra whitespace
1868: //
1869: pushElement(new Element("li"));
1870: readWhile(" ");
1871:
1872: // work done, remember the new bullet list (in place of old one)
1873: m_genlistBulletBuffer.setLength(0);
1874: m_genlistBulletBuffer.append(strBullets);
1875:
1876: return m_currentElement;
1877: }
1878:
1879: private Element unwindGeneralList() {
1880: //unwind
1881: for (; m_genlistlevel > 0; m_genlistlevel--) {
1882: popElement("li");
1883: popElement(getListType(m_genlistBulletBuffer
1884: .charAt(m_genlistlevel - 1)));
1885: }
1886:
1887: m_genlistBulletBuffer.setLength(0);
1888:
1889: return null;
1890: }
1891:
1892: private Element handleDefinitionList() throws IOException {
1893: if (!m_isdefinition) {
1894: m_isdefinition = true;
1895:
1896: startBlockLevel();
1897:
1898: pushElement(new Element("dl"));
1899: return pushElement(new Element("dt"));
1900: }
1901:
1902: return null;
1903: }
1904:
1905: private Element handleOpenbracket() throws IOException {
1906: StringBuffer sb = new StringBuffer(40);
1907: int pos = getPosition();
1908: int ch = nextToken();
1909: boolean isPlugin = false;
1910:
1911: if (ch == '[') {
1912: if (m_wysiwygEditorMode) {
1913: sb.append('[');
1914: }
1915:
1916: sb.append((char) ch);
1917:
1918: while ((ch = nextToken()) == '[') {
1919: sb.append((char) ch);
1920: }
1921: }
1922:
1923: if (ch == '{') {
1924: isPlugin = true;
1925: }
1926:
1927: pushBack(ch);
1928:
1929: if (sb.length() > 0) {
1930: m_plainTextBuf.append(sb);
1931: return m_currentElement;
1932: }
1933:
1934: //
1935: // Find end of hyperlink
1936: //
1937:
1938: ch = nextToken();
1939: int nesting = 1; // Check for nested plugins
1940:
1941: while (ch != -1) {
1942: int ch2 = nextToken();
1943: pushBack(ch2);
1944:
1945: if (isPlugin) {
1946: if (ch == '[' && ch2 == '{') {
1947: nesting++;
1948: } else if (nesting == 0 && ch == ']'
1949: && sb.charAt(sb.length() - 1) == '}') {
1950: break;
1951: } else if (ch == '}' && ch2 == ']') {
1952: // NB: This will be decremented once at the end
1953: nesting--;
1954: }
1955: } else {
1956: if (ch == ']') {
1957: break;
1958: }
1959: }
1960:
1961: sb.append((char) ch);
1962:
1963: ch = nextToken();
1964: }
1965:
1966: //
1967: // If the link is never finished, do some tricks to display the rest of the line
1968: // unchanged.
1969: //
1970: if (ch == -1) {
1971: log.debug("Warning: unterminated link detected!");
1972: m_isEscaping = true;
1973: m_plainTextBuf.append(sb);
1974: flushPlainText();
1975: m_isEscaping = false;
1976: return m_currentElement;
1977: }
1978:
1979: return handleHyperlinks(sb.toString(), pos);
1980: }
1981:
1982: /**
1983: * Reads the stream until the current brace is closed or stream end.
1984: */
1985: private String readBraceContent(char opening, char closing)
1986: throws IOException {
1987: StringBuffer sb = new StringBuffer(40);
1988: int braceLevel = 1;
1989: int ch;
1990: while ((ch = nextToken()) != -1) {
1991: if (ch == '\\') {
1992: continue;
1993: } else if (ch == opening) {
1994: braceLevel++;
1995: } else if (ch == closing) {
1996: braceLevel--;
1997: if (braceLevel == 0) {
1998: break;
1999: }
2000: }
2001: sb.append((char) ch);
2002: }
2003: return sb.toString();
2004: }
2005:
2006: /**
2007: * Handles constructs of type %%(style) and %%class
2008: * @param newLine
2009: * @return
2010: * @throws IOException
2011: */
2012: private Element handleDiv(boolean newLine) throws IOException {
2013: int ch = nextToken();
2014: Element el = null;
2015:
2016: if (ch == '%') {
2017: String style = null;
2018: String clazz = null;
2019:
2020: ch = nextToken();
2021:
2022: //
2023: // Style or class?
2024: //
2025: if (ch == '(') {
2026: style = readBraceContent('(', ')');
2027: } else if (Character.isLetter((char) ch)) {
2028: pushBack(ch);
2029: clazz = readUntil(" \t\n\r");
2030: ch = nextToken();
2031:
2032: //
2033: // Pop out only spaces, so that the upcoming EOL check does not check the
2034: // next line.
2035: //
2036: if (ch == '\n' || ch == '\r') {
2037: pushBack(ch);
2038: }
2039: } else {
2040: //
2041: // Anything else stops.
2042: //
2043:
2044: pushBack(ch);
2045:
2046: try {
2047: Boolean isSpan = (Boolean) m_styleStack.pop();
2048:
2049: if (isSpan == null) {
2050: // Fail quietly
2051: } else if (isSpan.booleanValue()) {
2052: el = popElement("span");
2053: } else {
2054: el = popElement("div");
2055: }
2056: } catch (EmptyStackException e) {
2057: log
2058: .debug("Page '"
2059: + m_context.getName()
2060: + "' closes a %%-block that has not been opened.");
2061: return m_currentElement;
2062: }
2063:
2064: return el;
2065: }
2066:
2067: //
2068: // Check if there is an attempt to do something nasty
2069: //
2070:
2071: try {
2072: style = StringEscapeUtils.unescapeHtml(style);
2073: if (style != null && style.indexOf("javascript:") != -1) {
2074: log
2075: .debug("Attempt to output javascript within CSS:"
2076: + style);
2077: ResourceBundle rb = m_context
2078: .getBundle(InternationalizationManager.CORE_BUNDLE);
2079: return addElement(makeError(rb
2080: .getString("markupparser.error.javascriptattempt")));
2081: }
2082: } catch (NumberFormatException e) {
2083: //
2084: // If there are unknown entities, we don't want the parser to stop.
2085: //
2086: ResourceBundle rb = m_context
2087: .getBundle(InternationalizationManager.CORE_BUNDLE);
2088: Object[] args = { e.getMessage() };
2089: String msg = MessageFormat.format(rb
2090: .getString("markupparser.error.parserfailure"),
2091: args);
2092: return addElement(makeError(msg));
2093: }
2094:
2095: //
2096: // Decide if we should open a div or a span?
2097: //
2098: String eol = peekAheadLine();
2099:
2100: if (eol.trim().length() > 0) {
2101: // There is stuff after the class
2102:
2103: el = new Element("span");
2104:
2105: m_styleStack.push(Boolean.TRUE);
2106: } else {
2107: startBlockLevel();
2108: el = new Element("div");
2109: m_styleStack.push(Boolean.FALSE);
2110: }
2111:
2112: if (style != null)
2113: el.setAttribute("style", style);
2114: if (clazz != null)
2115: el.setAttribute("class", clazz);
2116: el = pushElement(el);
2117:
2118: return el;
2119: }
2120:
2121: pushBack(ch);
2122:
2123: return el;
2124: }
2125:
2126: private Element handleSlash(boolean newLine) throws IOException {
2127: int ch = nextToken();
2128:
2129: pushBack(ch);
2130: if (ch == '%' && !m_styleStack.isEmpty()) {
2131: return handleDiv(newLine);
2132: }
2133:
2134: return null;
2135: }
2136:
2137: private Element handleBar(boolean newLine) throws IOException {
2138: Element el = null;
2139:
2140: if (!m_istable && !newLine) {
2141: return null;
2142: }
2143:
2144: //
2145: // If the bar is in the first column, we will either start
2146: // a new table or continue the old one.
2147: //
2148:
2149: if (newLine) {
2150: if (!m_istable) {
2151: startBlockLevel();
2152: el = pushElement(new Element("table").setAttribute(
2153: "class", "wikitable").setAttribute("border",
2154: "1"));
2155: m_istable = true;
2156: m_rowNum = 0;
2157: }
2158:
2159: m_rowNum++;
2160: Element tr = (m_rowNum % 2 != 0) ? new Element("tr")
2161: .setAttribute("class", "odd") : new Element("tr");
2162: el = pushElement(tr);
2163: }
2164:
2165: //
2166: // Check out which table cell element to start;
2167: // a header element (th) or a regular element (td).
2168: //
2169: int ch = nextToken();
2170:
2171: if (ch == '|') {
2172: if (!newLine) {
2173: el = popElement("th");
2174: if (el == null)
2175: popElement("td");
2176: }
2177: el = pushElement(new Element("th"));
2178: } else {
2179: if (!newLine) {
2180: el = popElement("td");
2181: if (el == null)
2182: popElement("th");
2183: }
2184:
2185: el = pushElement(new Element("td"));
2186:
2187: pushBack(ch);
2188: }
2189:
2190: return el;
2191: }
2192:
2193: /**
2194: * Generic escape of next character or entity.
2195: */
2196: private Element handleTilde() throws IOException {
2197: int ch = nextToken();
2198:
2199: if (ch == ' ') {
2200: if (m_wysiwygEditorMode) {
2201: m_plainTextBuf.append("~ ");
2202: }
2203: return m_currentElement;
2204: }
2205:
2206: if (ch == '|' || ch == '~' || ch == '\\' || ch == '*'
2207: || ch == '#' || ch == '-' || ch == '!' || ch == '\''
2208: || ch == '_' || ch == '[' || ch == '{' || ch == ']'
2209: || ch == '}' || ch == '%') {
2210: if (m_wysiwygEditorMode) {
2211: m_plainTextBuf.append('~');
2212: }
2213:
2214: m_plainTextBuf.append((char) ch);
2215: m_plainTextBuf.append(readWhile("" + (char) ch));
2216: return m_currentElement;
2217: }
2218:
2219: // No escape.
2220: pushBack(ch);
2221:
2222: return null;
2223: }
2224:
2225: private void fillBuffer(Element startElement) throws IOException {
2226: m_currentElement = startElement;
2227:
2228: boolean quitReading = false;
2229: m_newLine = true;
2230: disableOutputEscaping();
2231:
2232: while (!quitReading) {
2233: int ch = nextToken();
2234:
2235: if (ch == -1)
2236: break;
2237:
2238: //
2239: // Check if we're actually ending the preformatted mode.
2240: // We still must do an entity transformation here.
2241: //
2242: if (m_isEscaping) {
2243: if (ch == '}') {
2244: if (handleClosebrace() == null)
2245: m_plainTextBuf.append((char) ch);
2246: } else if (ch == -1) {
2247: quitReading = true;
2248: } else if (ch == '\r') {
2249: // DOS line feeds we ignore.
2250: } else if (ch == '<') {
2251: m_plainTextBuf.append("<");
2252: } else if (ch == '>') {
2253: m_plainTextBuf.append(">");
2254: } else if (ch == '&') {
2255: m_plainTextBuf.append("&");
2256: } else if (ch == '~') {
2257: String braces = readWhile("}");
2258: if (braces.length() >= 3) {
2259: m_plainTextBuf.append("}}}");
2260:
2261: braces = braces.substring(3);
2262: } else {
2263: m_plainTextBuf.append((char) ch);
2264: }
2265:
2266: for (int i = braces.length() - 1; i >= 0; i--) {
2267: pushBack(braces.charAt(i));
2268: }
2269: } else {
2270: m_plainTextBuf.append((char) ch);
2271: }
2272:
2273: continue;
2274: }
2275:
2276: //
2277: // An empty line stops a list
2278: //
2279: if (m_newLine && ch != '*' && ch != '#' && ch != ' '
2280: && m_genlistlevel > 0) {
2281: m_plainTextBuf.append(unwindGeneralList());
2282: }
2283:
2284: if (m_newLine && ch != '|' && m_istable) {
2285: popElement("table");
2286: m_istable = false;
2287: }
2288:
2289: int skip = IGNORE;
2290:
2291: //
2292: // Do the actual parsing and catch any errors.
2293: //
2294: try {
2295: skip = parseToken(ch);
2296: } catch (IllegalDataException e) {
2297: log
2298: .info("Page "
2299: + m_context.getPage().getName()
2300: + " contains data which cannot be added to DOM tree: "
2301: + e.getMessage());
2302:
2303: makeError("Error: "
2304: + cleanupSuspectData(e.getMessage()));
2305: }
2306:
2307: //
2308: // The idea is as follows: If the handler method returns
2309: // an element (el != null), it is assumed that it has been
2310: // added in the stack. Otherwise the character is added
2311: // as is to the plaintext buffer.
2312: //
2313: // For the transition phase, if s != null, it also gets
2314: // added in the plaintext buffer.
2315: //
2316:
2317: switch (skip) {
2318: case ELEMENT:
2319: m_newLine = false;
2320: break;
2321:
2322: case CHARACTER:
2323: m_plainTextBuf.append((char) ch);
2324: m_newLine = false;
2325: break;
2326:
2327: case IGNORE:
2328: default:
2329: break;
2330: }
2331: }
2332:
2333: popElement("domroot");
2334: }
2335:
2336: private String cleanupSuspectData(String s) {
2337: StringBuffer sb = new StringBuffer(s.length());
2338:
2339: for (int i = 0; i < s.length(); i++) {
2340: char c = s.charAt(i);
2341:
2342: if (Verifier.isXMLCharacter(c))
2343: sb.append(c);
2344: else
2345: sb.append("0x" + Integer.toString(c, 16).toUpperCase());
2346: }
2347:
2348: return sb.toString();
2349: }
2350:
2351: public static final int CHARACTER = 0;
2352: public static final int ELEMENT = 1;
2353: public static final int IGNORE = 2;
2354:
2355: /**
2356: * Return CHARACTER, if you think this was a plain character; ELEMENT, if
2357: * you think this was a wiki markup element, and IGNORE, if you think
2358: * we should ignore this altogether.
2359: *
2360: * @param ch
2361: * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}.
2362: * @throws IOException
2363: */
2364: protected int parseToken(int ch) throws IOException {
2365: Element el = null;
2366:
2367: //
2368: // Now, check the incoming token.
2369: //
2370: switch (ch) {
2371: case '\r':
2372: // DOS linefeeds we forget
2373: return IGNORE;
2374:
2375: case '\n':
2376: //
2377: // Close things like headings, etc.
2378: //
2379:
2380: // FIXME: This is not really very fast
2381: popElement("dl"); // Close definition lists.
2382: popElement("h2");
2383: popElement("h3");
2384: popElement("h4");
2385: if (m_istable) {
2386: popElement("tr");
2387: }
2388:
2389: m_isdefinition = false;
2390:
2391: if (m_newLine) {
2392: // Paragraph change.
2393: startBlockLevel();
2394:
2395: //
2396: // Figure out which elements cannot be enclosed inside
2397: // a <p></p> pair according to XHTML rules.
2398: //
2399: String nextLine = peekAheadLine();
2400: if (nextLine.length() == 0
2401: || (nextLine.length() > 0
2402: && !nextLine.startsWith("{{{")
2403: && !nextLine.startsWith("----")
2404: && !nextLine.startsWith("%%") && "*#!;"
2405: .indexOf(nextLine.charAt(0)) == -1)) {
2406: pushElement(new Element("p"));
2407: m_isOpenParagraph = true;
2408:
2409: if (m_restartitalic) {
2410: pushElement(new Element("i"));
2411: m_isitalic = true;
2412: m_restartitalic = false;
2413: }
2414: if (m_restartbold) {
2415: pushElement(new Element("b"));
2416: m_isbold = true;
2417: m_restartbold = false;
2418: }
2419: }
2420: } else {
2421: m_plainTextBuf.append("\n");
2422: m_newLine = true;
2423: }
2424: return IGNORE;
2425:
2426: case '\\':
2427: el = handleBackslash();
2428: break;
2429:
2430: case '_':
2431: el = handleUnderscore();
2432: break;
2433:
2434: case '\'':
2435: el = handleApostrophe();
2436: break;
2437:
2438: case '{':
2439: el = handleOpenbrace(m_newLine);
2440: break;
2441:
2442: case '}':
2443: el = handleClosebrace();
2444: break;
2445:
2446: case '-':
2447: if (m_newLine)
2448: el = handleDash();
2449:
2450: break;
2451:
2452: case '!':
2453: if (m_newLine) {
2454: el = handleHeading();
2455: }
2456: break;
2457:
2458: case ';':
2459: if (m_newLine) {
2460: el = handleDefinitionList();
2461: }
2462: break;
2463:
2464: case ':':
2465: if (m_isdefinition) {
2466: popElement("dt");
2467: el = pushElement(new Element("dd"));
2468: m_isdefinition = false;
2469: }
2470: break;
2471:
2472: case '[':
2473: el = handleOpenbracket();
2474: break;
2475:
2476: case '*':
2477: if (m_newLine) {
2478: pushBack('*');
2479: el = handleGeneralList();
2480: }
2481: break;
2482:
2483: case '#':
2484: if (m_newLine) {
2485: pushBack('#');
2486: el = handleGeneralList();
2487: }
2488: break;
2489:
2490: case '|':
2491: el = handleBar(m_newLine);
2492: break;
2493:
2494: case '~':
2495: el = handleTilde();
2496: break;
2497:
2498: case '%':
2499: el = handleDiv(m_newLine);
2500: break;
2501:
2502: case '/':
2503: el = handleSlash(m_newLine);
2504: break;
2505:
2506: default:
2507: break;
2508: }
2509:
2510: return el != null ? ELEMENT : CHARACTER;
2511: }
2512:
2513: public WikiDocument parse() throws IOException {
2514: WikiDocument d = new WikiDocument(m_context.getPage());
2515: d.setContext(m_context);
2516:
2517: Element rootElement = new Element("domroot");
2518:
2519: d.setRootElement(rootElement);
2520:
2521: fillBuffer(rootElement);
2522:
2523: paragraphify(rootElement);
2524:
2525: return d;
2526: }
2527:
2528: /**
2529: * Checks out that the first paragraph is correctly installed.
2530: *
2531: * @param rootElement
2532: */
2533: private void paragraphify(Element rootElement) {
2534: //
2535: // Add the paragraph tag to the first paragraph
2536: //
2537: List kids = rootElement.getContent();
2538:
2539: if (rootElement.getChild("p") != null) {
2540: ArrayList ls = new ArrayList();
2541: int idxOfFirstContent = 0;
2542: int count = 0;
2543:
2544: for (Iterator i = kids.iterator(); i.hasNext(); count++) {
2545: Content c = (Content) i.next();
2546: if (c instanceof Element) {
2547: String name = ((Element) c).getName();
2548: if (isBlockLevel(name))
2549: break;
2550: }
2551:
2552: if (!(c instanceof ProcessingInstruction)) {
2553: ls.add(c);
2554: if (idxOfFirstContent == 0)
2555: idxOfFirstContent = count;
2556: }
2557: }
2558:
2559: //
2560: // If there were any elements, then add a new <p> (unless it would
2561: // be an empty one)
2562: //
2563: if (ls.size() > 0) {
2564: Element newel = new Element("p");
2565:
2566: for (Iterator i = ls.iterator(); i.hasNext();) {
2567: Content c = (Content) i.next();
2568:
2569: c.detach();
2570: newel.addContent(c);
2571: }
2572:
2573: //
2574: // Make sure there are no empty <p/> tags added.
2575: //
2576: if (newel.getTextTrim().length() > 0
2577: || !newel.getChildren().isEmpty())
2578: rootElement.addContent(idxOfFirstContent, newel);
2579: }
2580: }
2581: }
2582:
2583: /**
2584: * Compares two Strings, and if one starts with the other, then
2585: * returns null. Otherwise just like the normal Comparator
2586: * for strings.
2587: *
2588: * @author jalkanen
2589: *
2590: * @since
2591: */
2592: private static class StartingComparator implements Comparator {
2593: public int compare(Object arg0, Object arg1) {
2594: String s1 = (String) arg0;
2595: String s2 = (String) arg1;
2596:
2597: if (s1.length() > s2.length()) {
2598: if (s1.startsWith(s2) && s2.length() > 1)
2599: return 0;
2600: } else {
2601: if (s2.startsWith(s1) && s1.length() > 1)
2602: return 0;
2603: }
2604:
2605: return s1.compareTo(s2);
2606: }
2607:
2608: }
2609:
2610: }
|