001: /*
002: JSPWiki - a JSP-based WikiWiki clone.
003:
004: Copyright (C) 2001-2006 Janne Jalkanen (Janne.Jalkanen@iki.fi)
005:
006: This program is free software; you can redistribute it and/or modify
007: it under the terms of the GNU Lesser General Public License as published by
008: the Free Software Foundation; either version 2.1 of the License, or
009: (at your option) any later version.
010:
011: This program is distributed in the hope that it will be useful,
012: but WITHOUT ANY WARRANTY; without even the implied warranty of
013: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014: GNU Lesser General Public License for more details.
015:
016: You should have received a copy of the GNU Lesser General Public License
017: along with this program; if not, write to the Free Software
018: Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
019: */
020: package com.ecyrd.jspwiki.parser;
021:
022: import java.io.BufferedReader;
023: import java.io.IOException;
024: import java.io.PushbackReader;
025: import java.io.Reader;
026: import java.util.ArrayList;
027:
028: import com.ecyrd.jspwiki.StringTransmutator;
029: import com.ecyrd.jspwiki.WikiContext;
030: import com.ecyrd.jspwiki.WikiEngine;
031:
032: /**
033: * Provides an abstract class for the parser instances.
034: *
035: * @author Janne Jalkanen
036: * @since 2.4
037: */
038: public abstract class MarkupParser {
039: /** Allow this many characters to be pushed back in the stream. In effect,
040: this limits the size of a single line. */
041: protected static final int PUSHBACK_BUFFER_SIZE = 10 * 1024;
042: protected PushbackReader m_in;
043: private int m_pos = -1; // current position in reader stream
044:
045: protected WikiEngine m_engine;
046: protected WikiContext m_context;
047:
048: /** Optionally stores internal wikilinks */
049: protected ArrayList m_localLinkMutatorChain = new ArrayList();
050: protected ArrayList m_externalLinkMutatorChain = new ArrayList();
051: protected ArrayList m_attachmentLinkMutatorChain = new ArrayList();
052: protected ArrayList m_headingListenerChain = new ArrayList();
053: protected ArrayList m_linkMutators = new ArrayList();
054:
055: protected boolean m_inlineImages = true;
056:
057: protected boolean m_parseAccessRules = true;
058: /** If set to "true", allows using raw HTML within Wiki text. Be warned,
059: this is a VERY dangerous option to set - never turn this on in a publicly
060: allowable Wiki, unless you are absolutely certain of what you're doing. */
061: public static final String PROP_ALLOWHTML = "jspwiki.translatorReader.allowHTML";
062: /** If set to "true", enables plugins during parsing */
063: public static final String PROP_RUNPLUGINS = "jspwiki.translatorReader.runPlugins";
064:
065: /** Lists all punctuation characters allowed in WikiMarkup. These
066: will not be cleaned away. This is for compatibility for older versions
067: of JSPWiki. */
068:
069: protected static final String LEGACY_CHARS_ALLOWED = "._";
070:
071: /** Lists all punctuation characters allowed in page names. */
072: public static final String PUNCTUATION_CHARS_ALLOWED = " ()&+,-=._$";
073:
074: protected MarkupParser(WikiContext context, Reader in) {
075: m_engine = context.getEngine();
076: m_context = context;
077: setInputReader(in);
078: }
079:
080: /**
081: * Replaces the current input character stream with a new one.
082: * @param in New source for input. If null, this method does nothing.
083: * @return the old stream
084: */
085: public Reader setInputReader(Reader in) {
086: Reader old = m_in;
087:
088: if (in != null) {
089: m_in = new PushbackReader(new BufferedReader(in),
090: PUSHBACK_BUFFER_SIZE);
091: }
092:
093: return old;
094: }
095:
096: /**
097: * Adds a hook for processing link texts. This hook is called
098: * when the link text is written into the output stream, and
099: * you may use it to modify the text. It does not affect the
100: * actual link, only the user-visible text.
101: *
102: * @param mutator The hook to call. Null is safe.
103: */
104: public void addLinkTransmutator(StringTransmutator mutator) {
105: if (mutator != null) {
106: m_linkMutators.add(mutator);
107: }
108: }
109:
110: /**
111: * Adds a hook for processing local links. The engine
112: * transforms both non-existing and existing page links.
113: *
114: * @param mutator The hook to call. Null is safe.
115: */
116: public void addLocalLinkHook(StringTransmutator mutator) {
117: if (mutator != null) {
118: m_localLinkMutatorChain.add(mutator);
119: }
120: }
121:
122: /**
123: * Adds a hook for processing external links. This includes
124: * all http:// ftp://, etc. links, including inlined images.
125: *
126: * @param mutator The hook to call. Null is safe.
127: */
128: public void addExternalLinkHook(StringTransmutator mutator) {
129: if (mutator != null) {
130: m_externalLinkMutatorChain.add(mutator);
131: }
132: }
133:
134: /**
135: * Adds a hook for processing attachment links.
136: *
137: * @param mutator The hook to call. Null is safe.
138: */
139: public void addAttachmentLinkHook(StringTransmutator mutator) {
140: if (mutator != null) {
141: m_attachmentLinkMutatorChain.add(mutator);
142: }
143: }
144:
145: public void addHeadingListener(HeadingListener listener) {
146: if (listener != null) {
147: m_headingListenerChain.add(listener);
148: }
149: }
150:
151: public void disableAccessRules() {
152: m_parseAccessRules = false;
153: }
154:
155: /**
156: * Use this to turn on or off image inlining.
157: * @param toggle If true, images are inlined (as per set in jspwiki.properties)
158: * If false, then images won't be inlined; instead, they will be
159: * treated as standard hyperlinks.
160: * @since 2.2.9
161: */
162: public void enableImageInlining(boolean toggle) {
163: m_inlineImages = toggle;
164: }
165:
166: /**
167: * Parses the document.
168: * @return the parsed document, as a WikiDocument
169: * @throws IOException
170: */
171: public abstract WikiDocument parse() throws IOException;
172:
173: /**
174: * Return the current position in the reader stream.
175: * The value will be -1 prior to reading.
176: * @return the reader position as an int.
177: */
178: public int getPosition() {
179: return m_pos;
180: }
181:
182: /**
183: * Returns the next token in the stream. This is the most called method
184: * in the entire parser, so it needs to be lean and mean.
185: *
186: * @return The next token in the stream; or, if the stream is ended, -1.
187: * @throws IOException If something bad happens
188: * @throws NullPointerException If you have not yet created an input document.
189: */
190: protected final int nextToken() throws IOException {
191: // if( m_in == null ) return -1;
192: m_pos++;
193: return m_in.read();
194: }
195:
196: /**
197: * Push back any character to the current input. Does not
198: * push back a read EOF, though.
199: */
200: protected void pushBack(int c) throws IOException {
201: if (c != -1 && m_in != null) {
202: m_pos--;
203: m_in.unread(c);
204: }
205: }
206:
207: /**
208: * Cleans a Wiki name. The functionality of this method was changed in 2.6
209: * so that the list of allowed characters is much larger. Use wikifyLink()
210: * to get the legacy behaviour.
211: * <P>
212: * [ This is a link ] -> This is a link
213: *
214: * @param link Link to be cleared. Null is safe, and causes this to return null.
215: * @return A cleaned link.
216: *
217: * @since 2.0
218: */
219: public static String cleanLink(String link) {
220: return cleanLink(link, PUNCTUATION_CHARS_ALLOWED);
221: }
222:
223: /**
224: * Cleans a Wiki name based on a list of characters. Also, any multiple
225: * whitespace is collapsed into a single space, and any leading or trailing
226: * space is removed.
227: *
228: * @param link Link to be cleared. Null is safe, and causes this to return null.
229: * @param allowedChars Characters which are allowed in the string.
230: * @return A cleaned link.
231: *
232: * @since 2.6
233: */
234: public static String cleanLink(String link, String allowedChars) {
235: if (link == null)
236: return null;
237:
238: link = link.trim();
239: StringBuffer clean = new StringBuffer(link.length());
240:
241: //
242: // Remove non-alphanumeric characters that should not
243: // be put inside WikiNames. Note that all valid
244: // Unicode letters are considered okay for WikiNames.
245: // It is the problem of the WikiPageProvider to take
246: // care of actually storing that information.
247: //
248: // Also capitalize things, if necessary.
249: //
250:
251: boolean isWord = true; // If true, we've just crossed a word boundary
252: boolean wasSpace = false;
253:
254: for (int i = 0; i < link.length(); i++) {
255: char ch = link.charAt(i);
256:
257: //
258: // Cleans away repetitive whitespace and only uses the first one.
259: //
260: if (Character.isWhitespace(ch)) {
261: if (wasSpace)
262: continue;
263:
264: wasSpace = true;
265: } else {
266: wasSpace = false;
267: }
268:
269: //
270: // Check if it is allowed to use this char, and capitalize, if necessary.
271: //
272: if (Character.isLetterOrDigit(ch)
273: || allowedChars.indexOf(ch) != -1) {
274: // Is a letter
275:
276: if (isWord)
277: ch = Character.toUpperCase(ch);
278: clean.append(ch);
279: isWord = false;
280: } else {
281: isWord = true;
282: }
283: }
284:
285: return clean.toString();
286: }
287:
288: /**
289: * Cleans away extra legacy characters. This method functions exactly
290: * like pre-2.6 cleanLink()
291: * <P>
292: * [ This is a link ] -> ThisIsALink
293: *
294: * @param link Link to be cleared. Null is safe, and causes this to return null.
295: * @return A cleaned link.
296: * @since 2.6
297: */
298: public static String wikifyLink(String link) {
299: return MarkupParser.cleanLink(link,
300: MarkupParser.LEGACY_CHARS_ALLOWED);
301: }
302:
303: }
|