001: /**
002: * Licensed under the GNU LESSER GENERAL PUBLIC LICENSE, version 2.1, dated February 1999.
003: *
004: * This program is free software; you can redistribute it and/or modify
005: * it under the terms of the latest version of the GNU Lesser General
006: * Public License as published by the Free Software Foundation;
007: *
008: * This program is distributed in the hope that it will be useful,
009: * but WITHOUT ANY WARRANTY; without even the implied warranty of
010: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
011: * GNU Lesser General Public License for more details.
012: *
013: * You should have received a copy of the GNU Lesser General Public License
014: * along with this program (LICENSE.txt); if not, write to the Free Software
015: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
016: */package org.jamwiki.parser.jflex;
017:
018: import java.io.StringReader;
019: import java.util.regex.Matcher;
020: import java.util.regex.Pattern;
021: import org.apache.commons.lang.StringUtils;
022: import org.jamwiki.parser.AbstractParser;
023: import org.jamwiki.parser.ParserInput;
024: import org.jamwiki.parser.ParserOutput;
025: import org.jamwiki.utils.WikiLogger;
026: import org.jamwiki.utils.LinkUtil;
027: import org.jamwiki.utils.Utilities;
028: import org.jamwiki.utils.WikiLink;
029:
030: /**
031: * Implementation of {@link org.jamwiki.parser.AbstractParser} that uses
032: * <a href="http://jflex.de/">JFlex</a> as a lexer to convert Wiki syntax into
033: * HTML or other formats.
034: */
035: public class JFlexParser extends AbstractParser {
036:
037: private static final WikiLogger logger = WikiLogger
038: .getLogger(JFlexParser.class.getName());
039:
040: /** Splice mode is used when inserting an edited topic section back into the full topic content. */
041: protected static final int MODE_SPLICE = 1;
042: /** Slice mode is used when retrieving a section of a topic for editing. */
043: protected static final int MODE_SLICE = 2;
044: /** Minimal mode is used to do a bare minimum of parsing, usually just converting signature tags, prior to saving to the database. */
045: protected static final int MODE_MINIMAL = 3;
046: /** Pre-process mode is currently equivalent to metadata mode and indicates that that the JFlex pre-processor parser should be run in full. */
047: protected static final int MODE_PREPROCESS = 4;
048: /** Processing mode indicates that the pre-processor and processor should be run in full, parsing all Wiki syntax into formatted output. */
049: protected static final int MODE_PROCESS = 5;
050: /** Layout mode indicates that the pre-processor, processor and post-processor should be run in full, parsing all Wiki syntax into formatted output and adding layout tags such as paragraphs. */
051: protected static final int MODE_LAYOUT = 6;
052:
053: private static Pattern REDIRECT_PATTERN = null;
054:
055: static {
056: try {
057: // is the topic a redirect?
058: REDIRECT_PATTERN = Pattern.compile(
059: "#REDIRECT[ ]+\\[\\[([^\\n\\r\\]]+)\\]\\]",
060: Pattern.CASE_INSENSITIVE);
061: } catch (Exception e) {
062: logger.severe("Unable to compile pattern", e);
063: }
064: }
065:
066: /**
067: * The constructor creates a parser instance, initialized with the
068: * specified parser input settings.
069: *
070: * @param parserInput Input configuration settings for this parser
071: * instance.
072: */
073: public JFlexParser(ParserInput parserInput) {
074: super (parserInput);
075: }
076:
077: /**
078: * Return a parser-specific value that can be used as the content of a
079: * topic representing a redirect. For the Mediawiki syntax parser the
080: * value returned would be of the form "#REDIRECT [[Topic]]".
081: *
082: * @param topicName The name of the topic to redirect to.
083: * @return A parser-specific value that can be used as the content of a
084: * topic representing a redirect.
085: */
086: public String buildRedirectContent(String topicName) {
087: return "#REDIRECT [[" + topicName + "]]";
088: }
089:
090: /**
091: *
092: */
093: private String isRedirect(String content) {
094: if (StringUtils.isBlank(content)) {
095: return null;
096: }
097: Matcher m = REDIRECT_PATTERN.matcher(content.trim());
098: return (m.matches()) ? Utilities.decodeFromURL(m.group(1)
099: .trim(), true) : null;
100: }
101:
102: /**
103: * Utility method for executing a lexer parse.
104: */
105: private String lex(JFlexLexer lexer, String raw,
106: ParserOutput parserOutput, int mode) throws Exception {
107: lexer.init(this .parserInput, parserOutput, mode);
108: validate(lexer);
109: this .parserInput.incrementDepth();
110: // avoid infinite loops
111: if (this .parserInput.getDepth() > 100) {
112: String topicName = (!StringUtils.isBlank(this .parserInput
113: .getTopicName())) ? this .parserInput.getTopicName()
114: : null;
115: throw new Exception("Infinite parsing loop - over "
116: + this .parserInput.getDepth()
117: + " parser iterations while parsing topic "
118: + topicName);
119: }
120: while (true) {
121: String line = lexer.yylex();
122: if (line == null) {
123: break;
124: }
125: lexer.append(line);
126: }
127: this .parserInput.decrementDepth();
128: String redirect = this .isRedirect(raw);
129: if (!StringUtils.isBlank(redirect)) {
130: parserOutput.setRedirect(redirect);
131: }
132: return lexer.popAllTags();
133: }
134:
135: /**
136: * This method parses content, performing all transformations except for
137: * layout changes such as adding paragraph tags. It is suitable to be used
138: * when parsing the contents of a link or performing similar internal
139: * manipulation.
140: *
141: * @param parserOutput A ParserOutput object containing parser
142: * metadata output.
143: * @param raw The raw Wiki syntax to be converted into HTML.
144: * @param mode The parser mode to use when parsing. Mode affects what
145: * type of parsing actions are taken when processing raw text.
146: * @return The parsed content.
147: * @throws Exception Thrown if any error occurs during parsing.
148: */
149: public String parseFragment(ParserOutput parserOutput, String raw,
150: int mode) throws Exception {
151: // maintain the original output, which has all of the category and link info
152: int preMode = (mode > JFlexParser.MODE_PREPROCESS) ? JFlexParser.MODE_PREPROCESS
153: : mode;
154: String output = raw;
155: output = this .parsePreProcess(parserOutput, output, preMode);
156: if (mode >= JFlexParser.MODE_PROCESS) {
157: // layout should not be done while parsing fragments
158: preMode = JFlexParser.MODE_PROCESS;
159: output = this .parseProcess(parserOutput, output, preMode);
160: }
161: return output;
162: }
163:
164: /**
165: * Returns a HTML representation of the given wiki raw text for online representation.
166: *
167: * @param parserOutput A ParserOutput object containing parser
168: * metadata output.
169: * @param raw The raw Wiki syntax to be converted into HTML.
170: * @return The parsed content.
171: * @throws Exception Thrown if any error occurs during parsing.
172: */
173: public String parseHTML(ParserOutput parserOutput, String raw)
174: throws Exception {
175: long start = System.currentTimeMillis();
176: // some parser expressions require that lines end in a newline, so add a newline
177: // to the end of the content for good measure
178: String output = raw + '\n';
179: output = this .parsePreProcess(parserOutput, output,
180: JFlexParser.MODE_PREPROCESS);
181: output = this .parseProcess(parserOutput, output,
182: JFlexParser.MODE_PROCESS);
183: output = this .parsePostProcess(parserOutput, output,
184: JFlexParser.MODE_LAYOUT);
185: if (!StringUtils.isBlank(this .isRedirect(raw))) {
186: // redirects are parsed differently
187: output = this .parseRedirect(parserOutput, raw);
188: }
189: String topicName = (!StringUtils.isBlank(this .parserInput
190: .getTopicName())) ? this .parserInput.getTopicName()
191: : null;
192: logger.info("Parse time (parseHTML) for " + topicName + " ("
193: + ((System.currentTimeMillis() - start) / 1000.000)
194: + " s.)");
195: return output;
196: }
197:
198: /**
199: * This method provides a way to parse content and set all output metadata,
200: * such as link values used by the search engine.
201: *
202: * @return A ParserOutput object containing results of the parsing process.
203: * @param raw The raw Wiki syntax to be converted into HTML.
204: */
205: public void parseMetadata(ParserOutput parserOutput, String raw)
206: throws Exception {
207: long start = System.currentTimeMillis();
208: // FIXME - set a bogus context value to avoid parser errors
209: if (this .parserInput.getContext() == null) {
210: this .parserInput.setContext("/wiki");
211: }
212: // some parser expressions require that lines end in a newline, so add a newline
213: // to the end of the content for good measure
214: String output = raw + '\n';
215: output = this .parsePreProcess(parserOutput, output,
216: JFlexParser.MODE_PREPROCESS);
217: output = this .parseProcess(parserOutput, output,
218: JFlexParser.MODE_PROCESS);
219: String topicName = (!StringUtils.isBlank(this .parserInput
220: .getTopicName())) ? this .parserInput.getTopicName()
221: : null;
222: logger.info("Parse time (parseMetadata) for " + topicName
223: + " ("
224: + ((System.currentTimeMillis() - start) / 1000.000)
225: + " s.)");
226: }
227:
228: /**
229: * Perform a bare minimum of parsing as required prior to saving a topic
230: * to the database. In general this method will simply parse signature
231: * tags are return.
232: *
233: * @param raw The raw Wiki syntax to be converted into HTML.
234: * @return The parsed content.
235: * @throws Exception Thrown if any error occurs during parsing.
236: */
237: public String parseMinimal(String raw) throws Exception {
238: long start = System.currentTimeMillis();
239: String output = raw;
240: ParserOutput parserOutput = new ParserOutput();
241: output = this .parsePreProcess(parserOutput, output,
242: JFlexParser.MODE_MINIMAL);
243: String topicName = (!StringUtils.isBlank(this .parserInput
244: .getTopicName())) ? this .parserInput.getTopicName()
245: : null;
246: logger.info("Parse time (parseHTML) for " + topicName + " ("
247: + ((System.currentTimeMillis() - start) / 1000.000)
248: + " s.)");
249: return output;
250: }
251:
252: /**
253: * First stage of the parser, this method parses templates and signatures
254: * and builds metadata.
255: *
256: * @param parserOutput A ParserOutput object containing parser
257: * metadata output.
258: * @param raw The raw Wiki syntax to be converted into HTML.
259: * @return The parsed content.
260: * @throws Exception Thrown if any error occurs during parsing.
261: */
262: private String parsePreProcess(ParserOutput parserOutput,
263: String raw, int mode) throws Exception {
264: StringReader reader = new StringReader(raw);
265: JAMWikiPreProcessor lexer = new JAMWikiPreProcessor(reader);
266: int preMode = (mode > JFlexParser.MODE_PREPROCESS) ? JFlexParser.MODE_PREPROCESS
267: : mode;
268: return this .lex(lexer, raw, parserOutput, preMode);
269: }
270:
271: /**
272: * Second stage of the parser, this method parses most Wiki syntax, validates
273: * HTML, and performs the majority of the parser conversion.
274: *
275: * @param parserOutput A ParserOutput object containing parser
276: * metadata output.
277: * @param raw The raw Wiki syntax to be converted into HTML.
278: * @return The parsed content.
279: * @throws Exception Thrown if any error occurs during parsing.
280: */
281: private String parseProcess(ParserOutput parserOutput, String raw,
282: int mode) throws Exception {
283: StringReader reader = new StringReader(raw);
284: JAMWikiProcessor lexer = new JAMWikiProcessor(reader);
285: return this .lex(lexer, raw, parserOutput,
286: JFlexParser.MODE_PROCESS);
287: }
288:
289: /**
290: * In most cases this method is the second and final stage of the parser,
291: * adding paragraph tags and other layout elements that for various reasons
292: * cannot be added during the first parsing stage.
293: *
294: * @param parserOutput A ParserOutput object containing parser
295: * metadata output.
296: * @param raw The raw Wiki syntax to be converted into HTML.
297: * @return The parsed content.
298: * @throws Exception Thrown if any error occurs during parsing.
299: */
300: private String parsePostProcess(ParserOutput parserOutput,
301: String raw, int mode) throws Exception {
302: StringReader reader = new StringReader(raw);
303: JAMWikiPostProcessor lexer = new JAMWikiPostProcessor(reader);
304: return this .lex(lexer, raw, parserOutput,
305: JFlexParser.MODE_LAYOUT);
306: }
307:
308: /**
309: * Parse a topic that is a redirect. Ordinarily the contents of the redirected
310: * topic would be displayed, but in some cases (such as when explicitly viewing
311: * a redirect) the redirect page contents need to be displayed.
312: *
313: * @param parserOutput A ParserOutput object containing parser
314: * metadata output.
315: * @param raw The raw Wiki syntax to be converted into HTML.
316: * @return The parsed content.
317: * @throws Exception Thrown if any error occurs during parsing.
318: */
319: private String parseRedirect(ParserOutput parserOutput, String raw)
320: throws Exception {
321: String redirect = this .isRedirect(raw);
322: String style = "redirect";
323: if (!LinkUtil.isExistingArticle(this .parserInput
324: .getVirtualWiki(), redirect.trim())) {
325: style = "edit redirect";
326: }
327: WikiLink wikiLink = new WikiLink();
328: wikiLink.setDestination(redirect);
329: return LinkUtil.buildInternalLinkHtml(this .parserInput
330: .getContext(), this .parserInput.getVirtualWiki(),
331: wikiLink, null, style, null, false);
332: }
333:
334: /**
335: * This method provides the capability for retrieving a section of Wiki markup
336: * from an existing document. It is used primarily when editing a section of
337: * a topic. This method will return all content from the specified section, up
338: * to the either the next section of the same or greater level or the end of the
339: * document. For example, if the specified section is an <h3>, all content
340: * up to the next <h1>, <h2>, <h3> or the end of the document
341: * will be returned.
342: *
343: * @param parserOutput A ParserOutput object containing parser
344: * metadata output.
345: * @param raw The raw Wiki text that is to be parsed.
346: * @param targetSection The section (counted from zero) that is to be returned.
347: * @return Returns the raw topic content for the target section.
348: * @throws Exception Thrown if any error occurs during parsing.
349: */
350: public String parseSlice(ParserOutput parserOutput, String raw,
351: int targetSection) throws Exception {
352: long start = System.currentTimeMillis();
353: StringReader reader = new StringReader(raw);
354: JAMWikiSpliceProcessor lexer = new JAMWikiSpliceProcessor(
355: reader);
356: lexer.setTargetSection(targetSection);
357: String output = this .lex(lexer, raw, parserOutput,
358: JFlexParser.MODE_SLICE);
359: String topicName = (!StringUtils.isBlank(this .parserInput
360: .getTopicName())) ? this .parserInput.getTopicName()
361: : null;
362: logger.fine("Parse time (parseSlice) for " + topicName + " ("
363: + ((System.currentTimeMillis() - start) / 1000.000)
364: + " s.)");
365: return output;
366: }
367:
368: /**
369: * This method provides the capability for splicing a section of new content back
370: * into a document. It is used primarily when editing a section of a topic. This
371: * method will replace all content in a specified section, up to the either the next
372: * section of the same or greater level or the end of the document. For example, if
373: * the specified section is an <h3>, all content up to the next <h1>,
374: * <h2>, <h3> or the end of the document will be replaced with the
375: * specified text.
376: *
377: * @param parserOutput A ParserOutput object containing parser
378: * metadata output.
379: * @param raw The raw Wiki text that is to be parsed.
380: * @param targetSection The section (counted from zero) that is to be returned.
381: * @param replacementText The text to replace the target section text with.
382: * @return The raw topic content including the new replacement text.
383: * @throws Exception Thrown if any error occurs during parsing.
384: */
385: public String parseSplice(ParserOutput parserOutput, String raw,
386: int targetSection, String replacementText) throws Exception {
387: long start = System.currentTimeMillis();
388: StringReader reader = new StringReader(raw);
389: JAMWikiSpliceProcessor lexer = new JAMWikiSpliceProcessor(
390: reader);
391: lexer.setReplacementText(replacementText);
392: lexer.setTargetSection(targetSection);
393: String output = this .lex(lexer, raw, parserOutput,
394: JFlexParser.MODE_SPLICE);
395: String topicName = (!StringUtils.isBlank(this .parserInput
396: .getTopicName())) ? this .parserInput.getTopicName()
397: : null;
398: logger.fine("Parse time (parseSplice) for " + topicName + " ("
399: + ((System.currentTimeMillis() - start) / 1000.000)
400: + " s.)");
401: return output;
402: }
403:
404: /**
405: * Validate that all settings required for the parser have been set, and if
406: * not throw an exception.
407: *
408: * @throws Exception Thrown if the parser is not initialized properly,
409: * usually due to a parser input field not being set.
410: */
411: private static void validate(JFlexLexer lexer) throws Exception {
412: // validate parser settings
413: boolean validated = true;
414: if (lexer.mode == JFlexParser.MODE_SPLICE
415: || lexer.mode == JFlexParser.MODE_SLICE) {
416: if (lexer.parserInput.getTopicName() == null) {
417: logger
418: .info("Failure while initializing parser: topic name is null.");
419: validated = false;
420: }
421: } else if (lexer.mode == JFlexParser.MODE_LAYOUT) {
422: if (lexer.parserInput == null) {
423: logger
424: .info("Failure while initializing parser: ParserInput is null.");
425: validated = false;
426: }
427: if (lexer.parserInput.getTableOfContents() == null) {
428: logger
429: .info("Failure while initializing parser: table of contents object is null.");
430: validated = false;
431: }
432: } else if (lexer.mode == JFlexParser.MODE_PROCESS) {
433: if (lexer.parserInput.getTableOfContents() == null) {
434: logger
435: .info("Failure while initializing parser: table of contents object is null.");
436: validated = false;
437: }
438: if (lexer.parserInput.getTopicName() == null) {
439: logger
440: .info("Failure while initializing parser: topic name is null.");
441: validated = false;
442: }
443: if (lexer.parserInput.getContext() == null) {
444: logger
445: .info("Failure while initializing parser: context is null.");
446: validated = false;
447: }
448: if (lexer.parserInput.getVirtualWiki() == null) {
449: logger
450: .info("Failure while initializing parser: virtual wiki is null.");
451: validated = false;
452: }
453: } else if (lexer.mode <= JFlexParser.MODE_PREPROCESS
454: && lexer.mode >= JFlexParser.MODE_MINIMAL) {
455: if (lexer.parserInput.getVirtualWiki() == null) {
456: logger
457: .info("Failure while initializing parser: virtual wiki is null.");
458: validated = false;
459: }
460: if (lexer.parserInput.getTopicName() == null) {
461: logger
462: .info("Failure while initializing parser: topic name is null.");
463: validated = false;
464: }
465: }
466: if (!validated) {
467: throw new Exception("Parser info not properly initialized");
468: }
469: }
470: }
|