Source Code Cross Referenced for JFlexParser.java in » Wiki-Engine » JAMWiki » org » jamwiki » parser » jflex » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Wiki Engine » JAMWiki » org.jamwiki.parser.jflex
Source Cross Referenced Class Diagram Java Document (Java Doc)
001:        /**
002:         * Licensed under the GNU LESSER GENERAL PUBLIC LICENSE, version 2.1, dated February 1999.
003:         *
004:         * This program is free software; you can redistribute it and/or modify
005:         * it under the terms of the latest version of the GNU Lesser General
006:         * Public License as published by the Free Software Foundation;
007:         *
008:         * This program is distributed in the hope that it will be useful,
009:         * but WITHOUT ANY WARRANTY; without even the implied warranty of
010:         * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
011:         * GNU Lesser General Public License for more details.
012:         *
013:         * You should have received a copy of the GNU Lesser General Public License
014:         * along with this program (LICENSE.txt); if not, write to the Free Software
015:         * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
016:         */package org.jamwiki.parser.jflex;
017:
018:        import java.io.StringReader;
019:        import java.util.regex.Matcher;
020:        import java.util.regex.Pattern;
021:        import org.apache.commons.lang.StringUtils;
022:        import org.jamwiki.parser.AbstractParser;
023:        import org.jamwiki.parser.ParserInput;
024:        import org.jamwiki.parser.ParserOutput;
025:        import org.jamwiki.utils.WikiLogger;
026:        import org.jamwiki.utils.LinkUtil;
027:        import org.jamwiki.utils.Utilities;
028:        import org.jamwiki.utils.WikiLink;
029:
030:        /**
031:         * Implementation of {@link org.jamwiki.parser.AbstractParser} that uses
032:         * <a href="http://jflex.de/">JFlex</a> as a lexer to convert Wiki syntax into
033:         * HTML or other formats.
034:         */
035:        public class JFlexParser extends AbstractParser {
036:
037:            private static final WikiLogger logger = WikiLogger
038:                    .getLogger(JFlexParser.class.getName());
039:
040:            /** Splice mode is used when inserting an edited topic section back into the full topic content. */
041:            protected static final int MODE_SPLICE = 1;
042:            /** Slice mode is used when retrieving a section of a topic for editing. */
043:            protected static final int MODE_SLICE = 2;
044:            /** Minimal mode is used to do a bare minimum of parsing, usually just converting signature tags, prior to saving to the database. */
045:            protected static final int MODE_MINIMAL = 3;
046:            /** Pre-process mode is currently equivalent to metadata mode and indicates that that the JFlex pre-processor parser should be run in full. */
047:            protected static final int MODE_PREPROCESS = 4;
048:            /** Processing mode indicates that the pre-processor and processor should be run in full, parsing all Wiki syntax into formatted output. */
049:            protected static final int MODE_PROCESS = 5;
050:            /** Layout mode indicates that the pre-processor, processor and post-processor should be run in full, parsing all Wiki syntax into formatted output and adding layout tags such as paragraphs. */
051:            protected static final int MODE_LAYOUT = 6;
052:
053:            private static Pattern REDIRECT_PATTERN = null;
054:
055:            static {
056:                try {
057:                    // is the topic a redirect?
058:                    REDIRECT_PATTERN = Pattern.compile(
059:                            "#REDIRECT[ ]+\\[\\[([^\\n\\r\\]]+)\\]\\]",
060:                            Pattern.CASE_INSENSITIVE);
061:                } catch (Exception e) {
062:                    logger.severe("Unable to compile pattern", e);
063:                }
064:            }
065:
066:            /**
067:             * The constructor creates a parser instance, initialized with the
068:             * specified parser input settings.
069:             *
070:             * @param parserInput Input configuration settings for this parser
071:             *  instance.
072:             */
073:            public JFlexParser(ParserInput parserInput) {
074:                super (parserInput);
075:            }
076:
077:            /**
078:             * Return a parser-specific value that can be used as the content of a
079:             * topic representing a redirect.  For the Mediawiki syntax parser the
080:             * value returned would be of the form "#REDIRECT [[Topic]]".
081:             *
082:             * @param topicName The name of the topic to redirect to.
083:             * @return A parser-specific value that can be used as the content of a
084:             *  topic representing a redirect.
085:             */
086:            public String buildRedirectContent(String topicName) {
087:                return "#REDIRECT [[" + topicName + "]]";
088:            }
089:
090:            /**
091:             *
092:             */
093:            private String isRedirect(String content) {
094:                if (StringUtils.isBlank(content)) {
095:                    return null;
096:                }
097:                Matcher m = REDIRECT_PATTERN.matcher(content.trim());
098:                return (m.matches()) ? Utilities.decodeFromURL(m.group(1)
099:                        .trim(), true) : null;
100:            }
101:
102:            /**
103:             * Utility method for executing a lexer parse.
104:             */
105:            private String lex(JFlexLexer lexer, String raw,
106:                    ParserOutput parserOutput, int mode) throws Exception {
107:                lexer.init(this .parserInput, parserOutput, mode);
108:                validate(lexer);
109:                this .parserInput.incrementDepth();
110:                // avoid infinite loops
111:                if (this .parserInput.getDepth() > 100) {
112:                    String topicName = (!StringUtils.isBlank(this .parserInput
113:                            .getTopicName())) ? this .parserInput.getTopicName()
114:                            : null;
115:                    throw new Exception("Infinite parsing loop - over "
116:                            + this .parserInput.getDepth()
117:                            + " parser iterations while parsing topic "
118:                            + topicName);
119:                }
120:                while (true) {
121:                    String line = lexer.yylex();
122:                    if (line == null) {
123:                        break;
124:                    }
125:                    lexer.append(line);
126:                }
127:                this .parserInput.decrementDepth();
128:                String redirect = this .isRedirect(raw);
129:                if (!StringUtils.isBlank(redirect)) {
130:                    parserOutput.setRedirect(redirect);
131:                }
132:                return lexer.popAllTags();
133:            }
134:
135:            /**
136:             * This method parses content, performing all transformations except for
137:             * layout changes such as adding paragraph tags.  It is suitable to be used
138:             * when parsing the contents of a link or performing similar internal
139:             * manipulation.
140:             *
141:             * @param parserOutput A ParserOutput object containing parser
142:             *  metadata output.
143:             * @param raw The raw Wiki syntax to be converted into HTML.
144:             * @param mode The parser mode to use when parsing.  Mode affects what
145:             *  type of parsing actions are taken when processing raw text.
146:             * @return The parsed content.
147:             * @throws Exception Thrown if any error occurs during parsing.
148:             */
149:            public String parseFragment(ParserOutput parserOutput, String raw,
150:                    int mode) throws Exception {
151:                // maintain the original output, which has all of the category and link info
152:                int preMode = (mode > JFlexParser.MODE_PREPROCESS) ? JFlexParser.MODE_PREPROCESS
153:                        : mode;
154:                String output = raw;
155:                output = this .parsePreProcess(parserOutput, output, preMode);
156:                if (mode >= JFlexParser.MODE_PROCESS) {
157:                    // layout should not be done while parsing fragments
158:                    preMode = JFlexParser.MODE_PROCESS;
159:                    output = this .parseProcess(parserOutput, output, preMode);
160:                }
161:                return output;
162:            }
163:
164:            /**
165:             * Returns a HTML representation of the given wiki raw text for online representation.
166:             *
167:             * @param parserOutput A ParserOutput object containing parser
168:             *  metadata output.
169:             * @param raw The raw Wiki syntax to be converted into HTML.
170:             * @return The parsed content.
171:             * @throws Exception Thrown if any error occurs during parsing.
172:             */
173:            public String parseHTML(ParserOutput parserOutput, String raw)
174:                    throws Exception {
175:                long start = System.currentTimeMillis();
176:                // some parser expressions require that lines end in a newline, so add a newline
177:                // to the end of the content for good measure
178:                String output = raw + '\n';
179:                output = this .parsePreProcess(parserOutput, output,
180:                        JFlexParser.MODE_PREPROCESS);
181:                output = this .parseProcess(parserOutput, output,
182:                        JFlexParser.MODE_PROCESS);
183:                output = this .parsePostProcess(parserOutput, output,
184:                        JFlexParser.MODE_LAYOUT);
185:                if (!StringUtils.isBlank(this .isRedirect(raw))) {
186:                    // redirects are parsed differently
187:                    output = this .parseRedirect(parserOutput, raw);
188:                }
189:                String topicName = (!StringUtils.isBlank(this .parserInput
190:                        .getTopicName())) ? this .parserInput.getTopicName()
191:                        : null;
192:                logger.info("Parse time (parseHTML) for " + topicName + " ("
193:                        + ((System.currentTimeMillis() - start) / 1000.000)
194:                        + " s.)");
195:                return output;
196:            }
197:
198:            /**
199:             * This method provides a way to parse content and set all output metadata,
200:             * such as link values used by the search engine.
201:             *
202:             * @return A ParserOutput object containing results of the parsing process.
203:             * @param raw The raw Wiki syntax to be converted into HTML.
204:             */
205:            public void parseMetadata(ParserOutput parserOutput, String raw)
206:                    throws Exception {
207:                long start = System.currentTimeMillis();
208:                // FIXME - set a bogus context value to avoid parser errors
209:                if (this .parserInput.getContext() == null) {
210:                    this .parserInput.setContext("/wiki");
211:                }
212:                // some parser expressions require that lines end in a newline, so add a newline
213:                // to the end of the content for good measure
214:                String output = raw + '\n';
215:                output = this .parsePreProcess(parserOutput, output,
216:                        JFlexParser.MODE_PREPROCESS);
217:                output = this .parseProcess(parserOutput, output,
218:                        JFlexParser.MODE_PROCESS);
219:                String topicName = (!StringUtils.isBlank(this .parserInput
220:                        .getTopicName())) ? this .parserInput.getTopicName()
221:                        : null;
222:                logger.info("Parse time (parseMetadata) for " + topicName
223:                        + " ("
224:                        + ((System.currentTimeMillis() - start) / 1000.000)
225:                        + " s.)");
226:            }
227:
228:            /**
229:             * Perform a bare minimum of parsing as required prior to saving a topic
230:             * to the database.  In general this method will simply parse signature
231:             * tags are return.
232:             *
233:             * @param raw The raw Wiki syntax to be converted into HTML.
234:             * @return The parsed content.
235:             * @throws Exception Thrown if any error occurs during parsing.
236:             */
237:            public String parseMinimal(String raw) throws Exception {
238:                long start = System.currentTimeMillis();
239:                String output = raw;
240:                ParserOutput parserOutput = new ParserOutput();
241:                output = this .parsePreProcess(parserOutput, output,
242:                        JFlexParser.MODE_MINIMAL);
243:                String topicName = (!StringUtils.isBlank(this .parserInput
244:                        .getTopicName())) ? this .parserInput.getTopicName()
245:                        : null;
246:                logger.info("Parse time (parseHTML) for " + topicName + " ("
247:                        + ((System.currentTimeMillis() - start) / 1000.000)
248:                        + " s.)");
249:                return output;
250:            }
251:
252:            /**
253:             * First stage of the parser, this method parses templates and signatures
254:             * and builds metadata.
255:             *
256:             * @param parserOutput A ParserOutput object containing parser
257:             *  metadata output.
258:             * @param raw The raw Wiki syntax to be converted into HTML.
259:             * @return The parsed content.
260:             * @throws Exception Thrown if any error occurs during parsing.
261:             */
262:            private String parsePreProcess(ParserOutput parserOutput,
263:                    String raw, int mode) throws Exception {
264:                StringReader reader = new StringReader(raw);
265:                JAMWikiPreProcessor lexer = new JAMWikiPreProcessor(reader);
266:                int preMode = (mode > JFlexParser.MODE_PREPROCESS) ? JFlexParser.MODE_PREPROCESS
267:                        : mode;
268:                return this .lex(lexer, raw, parserOutput, preMode);
269:            }
270:
271:            /**
272:             * Second stage of the parser, this method parses most Wiki syntax, validates
273:             * HTML, and performs the majority of the parser conversion.
274:             *
275:             * @param parserOutput A ParserOutput object containing parser
276:             *  metadata output.
277:             * @param raw The raw Wiki syntax to be converted into HTML.
278:             * @return The parsed content.
279:             * @throws Exception Thrown if any error occurs during parsing.
280:             */
281:            private String parseProcess(ParserOutput parserOutput, String raw,
282:                    int mode) throws Exception {
283:                StringReader reader = new StringReader(raw);
284:                JAMWikiProcessor lexer = new JAMWikiProcessor(reader);
285:                return this .lex(lexer, raw, parserOutput,
286:                        JFlexParser.MODE_PROCESS);
287:            }
288:
289:            /**
290:             * In most cases this method is the second and final stage of the parser,
291:             * adding paragraph tags and other layout elements that for various reasons
292:             * cannot be added during the first parsing stage.
293:             *
294:             * @param parserOutput A ParserOutput object containing parser
295:             *  metadata output.
296:             * @param raw The raw Wiki syntax to be converted into HTML.
297:             * @return The parsed content.
298:             * @throws Exception Thrown if any error occurs during parsing.
299:             */
300:            private String parsePostProcess(ParserOutput parserOutput,
301:                    String raw, int mode) throws Exception {
302:                StringReader reader = new StringReader(raw);
303:                JAMWikiPostProcessor lexer = new JAMWikiPostProcessor(reader);
304:                return this .lex(lexer, raw, parserOutput,
305:                        JFlexParser.MODE_LAYOUT);
306:            }
307:
308:            /**
309:             * Parse a topic that is a redirect.  Ordinarily the contents of the redirected
310:             * topic would be displayed, but in some cases (such as when explicitly viewing
311:             * a redirect) the redirect page contents need to be displayed.
312:             *
313:             * @param parserOutput A ParserOutput object containing parser
314:             *  metadata output.
315:             * @param raw The raw Wiki syntax to be converted into HTML.
316:             * @return The parsed content.
317:             * @throws Exception Thrown if any error occurs during parsing.
318:             */
319:            private String parseRedirect(ParserOutput parserOutput, String raw)
320:                    throws Exception {
321:                String redirect = this .isRedirect(raw);
322:                String style = "redirect";
323:                if (!LinkUtil.isExistingArticle(this .parserInput
324:                        .getVirtualWiki(), redirect.trim())) {
325:                    style = "edit redirect";
326:                }
327:                WikiLink wikiLink = new WikiLink();
328:                wikiLink.setDestination(redirect);
329:                return LinkUtil.buildInternalLinkHtml(this .parserInput
330:                        .getContext(), this .parserInput.getVirtualWiki(),
331:                        wikiLink, null, style, null, false);
332:            }
333:
334:            /**
335:             * This method provides the capability for retrieving a section of Wiki markup
336:             * from an existing document.  It is used primarily when editing a section of
337:             * a topic.  This method will return all content from the specified section, up
338:             * to the either the next section of the same or greater level or the end of the
339:             * document.  For example, if the specified section is an &lt;h3&gt;, all content
340:             * up to the next &lt;h1&gt;, &lt;h2&gt;, &lt;h3&gt; or the end of the document
341:             * will be returned.
342:             *
343:             * @param parserOutput A ParserOutput object containing parser
344:             *  metadata output.
345:             * @param raw The raw Wiki text that is to be parsed.
346:             * @param targetSection The section (counted from zero) that is to be returned.
347:             * @return Returns the raw topic content for the target section.
348:             * @throws Exception Thrown if any error occurs during parsing.
349:             */
350:            public String parseSlice(ParserOutput parserOutput, String raw,
351:                    int targetSection) throws Exception {
352:                long start = System.currentTimeMillis();
353:                StringReader reader = new StringReader(raw);
354:                JAMWikiSpliceProcessor lexer = new JAMWikiSpliceProcessor(
355:                        reader);
356:                lexer.setTargetSection(targetSection);
357:                String output = this .lex(lexer, raw, parserOutput,
358:                        JFlexParser.MODE_SLICE);
359:                String topicName = (!StringUtils.isBlank(this .parserInput
360:                        .getTopicName())) ? this .parserInput.getTopicName()
361:                        : null;
362:                logger.fine("Parse time (parseSlice) for " + topicName + " ("
363:                        + ((System.currentTimeMillis() - start) / 1000.000)
364:                        + " s.)");
365:                return output;
366:            }
367:
368:            /**
369:             * This method provides the capability for splicing a section of new content back
370:             * into a document.  It is used primarily when editing a section of a topic.  This
371:             * method will replace all content in a specified section, up to the either the next
372:             * section of the same or greater level or the end of the document.  For example, if
373:             * the specified section is an &lt;h3&gt;, all content up to the next &lt;h1&gt;,
374:             * &lt;h2&gt;, &lt;h3&gt; or the end of the document will be replaced with the
375:             * specified text.
376:             *
377:             * @param parserOutput A ParserOutput object containing parser
378:             *  metadata output.
379:             * @param raw The raw Wiki text that is to be parsed.
380:             * @param targetSection The section (counted from zero) that is to be returned.
381:             * @param replacementText The text to replace the target section text with.
382:             * @return The raw topic content including the new replacement text.
383:             * @throws Exception Thrown if any error occurs during parsing.
384:             */
385:            public String parseSplice(ParserOutput parserOutput, String raw,
386:                    int targetSection, String replacementText) throws Exception {
387:                long start = System.currentTimeMillis();
388:                StringReader reader = new StringReader(raw);
389:                JAMWikiSpliceProcessor lexer = new JAMWikiSpliceProcessor(
390:                        reader);
391:                lexer.setReplacementText(replacementText);
392:                lexer.setTargetSection(targetSection);
393:                String output = this .lex(lexer, raw, parserOutput,
394:                        JFlexParser.MODE_SPLICE);
395:                String topicName = (!StringUtils.isBlank(this .parserInput
396:                        .getTopicName())) ? this .parserInput.getTopicName()
397:                        : null;
398:                logger.fine("Parse time (parseSplice) for " + topicName + " ("
399:                        + ((System.currentTimeMillis() - start) / 1000.000)
400:                        + " s.)");
401:                return output;
402:            }
403:
404:            /**
405:             * Validate that all settings required for the parser have been set, and if
406:             * not throw an exception.
407:             *
408:             * @throws Exception Thrown if the parser is not initialized properly,
409:             *  usually due to a parser input field not being set.
410:             */
411:            private static void validate(JFlexLexer lexer) throws Exception {
412:                // validate parser settings
413:                boolean validated = true;
414:                if (lexer.mode == JFlexParser.MODE_SPLICE
415:                        || lexer.mode == JFlexParser.MODE_SLICE) {
416:                    if (lexer.parserInput.getTopicName() == null) {
417:                        logger
418:                                .info("Failure while initializing parser: topic name is null.");
419:                        validated = false;
420:                    }
421:                } else if (lexer.mode == JFlexParser.MODE_LAYOUT) {
422:                    if (lexer.parserInput == null) {
423:                        logger
424:                                .info("Failure while initializing parser: ParserInput is null.");
425:                        validated = false;
426:                    }
427:                    if (lexer.parserInput.getTableOfContents() == null) {
428:                        logger
429:                                .info("Failure while initializing parser: table of contents object is null.");
430:                        validated = false;
431:                    }
432:                } else if (lexer.mode == JFlexParser.MODE_PROCESS) {
433:                    if (lexer.parserInput.getTableOfContents() == null) {
434:                        logger
435:                                .info("Failure while initializing parser: table of contents object is null.");
436:                        validated = false;
437:                    }
438:                    if (lexer.parserInput.getTopicName() == null) {
439:                        logger
440:                                .info("Failure while initializing parser: topic name is null.");
441:                        validated = false;
442:                    }
443:                    if (lexer.parserInput.getContext() == null) {
444:                        logger
445:                                .info("Failure while initializing parser: context is null.");
446:                        validated = false;
447:                    }
448:                    if (lexer.parserInput.getVirtualWiki() == null) {
449:                        logger
450:                                .info("Failure while initializing parser: virtual wiki is null.");
451:                        validated = false;
452:                    }
453:                } else if (lexer.mode <= JFlexParser.MODE_PREPROCESS
454:                        && lexer.mode >= JFlexParser.MODE_MINIMAL) {
455:                    if (lexer.parserInput.getVirtualWiki() == null) {
456:                        logger
457:                                .info("Failure while initializing parser: virtual wiki is null.");
458:                        validated = false;
459:                    }
460:                    if (lexer.parserInput.getTopicName() == null) {
461:                        logger
462:                                .info("Failure while initializing parser: topic name is null.");
463:                        validated = false;
464:                    }
465:                }
466:                if (!validated) {
467:                    throw new Exception("Parser info not properly initialized");
468:                }
469:            }
470:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.