001: /* The following code was generated by JFlex 1.3.5 on 13-9-06 0:53 */
002:
003: package vqwiki.lex;
004:
005: /*
006: * This class adds paragraph tags as appropriate.
007: *
008: *
009: * @author W. Ryan Holliday
010: */
011:
012: import java.util.Stack;
013: import org.apache.log4j.Logger;
014: import vqwiki.WikiBase;
015:
016: /**
017: * This class is a scanner generated by
018: * <a href="http://www.jflex.de/">JFlex</a> 1.3.5
019: * on 13-9-06 0:53 from the specification file
020: * <tt>file:/C:/SourceForge/VQWiki_2X/vqwiki-classic/jflex/mediawiki-html.jflex</tt>
021: */
022: public class MediaWikiHTML implements vqwiki.lex.Lexer {
023:
024: /** This character denotes the end of file */
025: final public static int YYEOF = -1;
026:
027: /** initial size of the lookahead buffer */
028: final private static int YY_BUFFERSIZE = 16384;
029:
030: /** lexical states */
031: final public static int YYINITIAL = 0;
032: final public static int NONPARAGRAPH = 6;
033: final public static int P = 4;
034: final public static int NORMAL = 2;
035:
036: /**
037: * YY_LEXSTATE[l] is the state in the DFA for the lexical state l
038: * YY_LEXSTATE[l+1] is the state in the DFA for the lexical state l
039: * at the beginning of a line
040: * l is of the form l = 2*k, k a non negative integer
041: */
042: private final static int YY_LEXSTATE[] = { 0, 0, 1, 2, 3, 3, 4, 4 };
043:
044: /**
045: * Translates characters to character classes
046: */
047: final private static String yycmap_packed = "\11\0\1\4\1\2\1\0\1\4\1\1\22\0\1\26\16\0\1\22"
048: + "\1\0\1\16\1\16\1\16\1\16\1\16\6\0\1\3\1\0\1\23"
049: + "\42\0\1\6\1\7\1\0\1\12\1\11\1\27\1\0\1\15\1\13"
050: + "\2\0\1\10\1\0\1\25\1\21\1\17\1\0\1\20\1\24\1\5"
051: + "\1\21\1\14\uff89\0";
052:
053: /**
054: * Translates characters to character classes
055: */
056: final private static char[] yycmap = yy_unpack_cmap(yycmap_packed);
057:
058: /**
059: * Translates a state to a row index in the transition table
060: */
061: final private static int yy_rowMap[] = { 0, 24, 48, 72, 96, 0, 120,
062: 0, 144, 0, 168, 192, 216, 240, 264, 288, 312, 336, 360,
063: 384, 408, 432, 456, 480, 504, 216, 0, 528, 552, 576, 600,
064: 624, 648, 0, 672, 696, 720, 744, 768, 792, 816, 840, 864,
065: 888, 912, 936, 960, 984, 1008, 1032, 1056, 1080, 1104,
066: 1128, 1152, 1176, 1200, 1224, 1248, 1272, 1296, 1320, 0,
067: 1344, 1368, 1392, 1416, 1440, 0, 1464, 1488, 1512, 1536,
068: 1560, 1584 };
069:
070: /**
071: * The packed transition table of the DFA (part 0)
072: */
073: final private static String yy_packed0 = "\30\0\1\6\1\7\1\10\1\11\1\10\21\6\1\10"
074: + "\1\6\1\12\1\7\1\10\1\13\24\12\1\6\1\14"
075: + "\1\15\1\16\1\10\21\6\1\10\2\6\1\7\1\10"
076: + "\1\17\1\10\21\6\1\10\1\6\2\0\1\10\32\0"
077: + "\1\20\4\0\1\21\2\0\1\22\1\0\1\23\1\0"
078: + "\1\24\1\25\1\0\1\26\10\0\1\20\1\27\1\30"
079: + "\2\0\1\21\1\30\1\0\1\22\1\0\1\23\1\0"
080: + "\1\24\1\25\1\0\1\26\4\0\1\31\1\32\26\0"
081: + "\1\31\1\33\32\0\1\34\4\0\1\21\2\0\1\22"
082: + "\1\0\1\23\1\0\1\24\1\35\1\0\1\26\10\0"
083: + "\1\36\4\0\1\21\2\0\1\22\1\0\1\23\1\0"
084: + "\1\24\1\37\1\0\1\26\11\0\1\40\3\0\1\41"
085: + "\25\0\1\42\2\0\1\43\32\0\1\42\31\0\1\44"
086: + "\17\0\1\42\24\0\1\45\4\0\1\46\2\0\1\47"
087: + "\1\0\1\50\1\0\1\51\2\0\1\52\22\0\1\53"
088: + "\36\0\1\54\24\0\1\12\6\0\1\33\33\0\1\40"
089: + "\26\0\1\55\30\0\1\40\3\0\1\56\22\0\1\57"
090: + "\4\0\1\60\2\0\1\61\1\0\1\62\1\0\1\63"
091: + "\2\0\1\64\12\0\1\65\20\0\23\41\1\66\4\41"
092: + "\14\0\1\42\24\0\1\42\24\0\1\67\3\0\1\70"
093: + "\25\0\1\71\2\0\1\72\32\0\1\71\31\0\1\73"
094: + "\17\0\1\71\36\0\1\74\16\0\1\75\36\0\1\76"
095: + "\24\0\1\70\15\0\23\56\1\77\4\56\6\0\1\100"
096: + "\3\0\1\70\25\0\1\101\2\0\1\102\32\0\1\101"
097: + "\31\0\1\103\17\0\1\101\36\0\1\104\20\0\1\44"
098: + "\17\0\1\105\3\0\24\105\7\0\1\106\43\0\1\42"
099: + "\27\0\1\66\20\0\1\71\24\0\1\71\24\0\1\107"
100: + "\46\0\1\42\22\0\1\110\16\0\1\111\43\0\1\77"
101: + "\20\0\1\101\24\0\1\101\24\0\1\112\31\0\1\73"
102: + "\44\0\1\71\13\0\1\113\26\0\1\103\44\0\1\101"
103: + "\31\0\1\12";
104:
105: /**
106: * The transition table of the DFA
107: */
108: final private static int yytrans[] = yy_unpack();
109:
110: /* error codes */
111: final private static int YY_UNKNOWN_ERROR = 0;
112: final private static int YY_ILLEGAL_STATE = 1;
113: final private static int YY_NO_MATCH = 2;
114: final private static int YY_PUSHBACK_2BIG = 3;
115:
116: /* error messages for the codes above */
117: final private static String YY_ERROR_MSG[] = {
118: "Unkown internal scanner error",
119: "Internal error: unknown state",
120: "Error: could not match input",
121: "Error: pushback value was too large" };
122:
123: /**
124: * YY_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
125: */
126: private final static byte YY_ATTRIBUTE[] = { 8, 0, 0, 0, 0, 9, 1,
127: 9, 1, 9, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 9,
128: 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
129: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0,
130: 0, 9, 0, 0, 0, 0, 0, 0 };
131:
132: /** the input device */
133: private java.io.Reader yy_reader;
134:
135: /** the current state of the DFA */
136: private int yy_state;
137:
138: /** the current lexical state */
139: private int yy_lexical_state = YYINITIAL;
140:
141: /** this buffer contains the current text to be matched and is
142: the source of the yytext() string */
143: private char yy_buffer[] = new char[YY_BUFFERSIZE];
144:
145: /** the textposition at the last accepting state */
146: private int yy_markedPos;
147:
148: /** the textposition at the last state to be included in yytext */
149: private int yy_pushbackPos;
150:
151: /** the current text position in the buffer */
152: private int yy_currentPos;
153:
154: /** startRead marks the beginning of the yytext() string in the buffer */
155: private int yy_startRead;
156:
157: /** endRead marks the last character in the buffer, that has been read
158: from input */
159: private int yy_endRead;
160:
161: /** number of newlines encountered up to the start of the matched text */
162: private int yyline;
163:
164: /** the number of characters up to the start of the matched text */
165: private int yychar;
166:
167: /**
168: * the number of characters from the last newline up to the start of the
169: * matched text
170: */
171: private int yycolumn;
172:
173: /**
174: * yy_atBOL == true <=> the scanner is currently at the beginning of a line
175: */
176: private boolean yy_atBOL = true;
177:
178: /** yy_atEOF == true <=> the scanner is at the EOF */
179: private boolean yy_atEOF;
180:
181: /* user code: */
182: protected static Logger log = Logger.getLogger(MediaWikiHTML.class
183: .getName());
184: /** Member variable used to keep track of the state history for the lexer. */
185: protected Stack states = new Stack();
186: protected String virtualWiki;
187:
188: /**
189: * Begin a new state and store the old state onto the stack.
190: */
191: protected void beginState(int state) {
192: // store current state
193: Integer current = new Integer(yystate());
194: states.push(current);
195: // switch to new state
196: yybegin(state);
197: }
198:
199: /**
200: * End processing of a state and switch to the previous state.
201: */
202: protected void endState() {
203: // revert to previous state
204: int next = ((Integer) states.pop()).intValue();
205: yybegin(next);
206: }
207:
208: /**
209: *
210: */
211: public void setVirtualWiki(String vWiki) {
212: this .virtualWiki = vWiki;
213: }
214:
215: /**
216: * Creates a new scanner
217: * There is also a java.io.InputStream version of this constructor.
218: *
219: * @param in the java.io.Reader to read input from.
220: */
221: public MediaWikiHTML(java.io.Reader in) {
222: yybegin(NORMAL);
223: states.add(new Integer(yystate()));
224: this .yy_reader = in;
225: }
226:
227: /**
228: * Creates a new scanner.
229: * There is also java.io.Reader version of this constructor.
230: *
231: * @param in the java.io.Inputstream to read input from.
232: */
233: public MediaWikiHTML(java.io.InputStream in) {
234: this (new java.io.InputStreamReader(in));
235: }
236:
237: /**
238: * Unpacks the split, compressed DFA transition table.
239: *
240: * @return the unpacked transition table
241: */
242: private static int[] yy_unpack() {
243: int[] trans = new int[1608];
244: int offset = 0;
245: offset = yy_unpack(yy_packed0, offset, trans);
246: return trans;
247: }
248:
249: /**
250: * Unpacks the compressed DFA transition table.
251: *
252: * @param packed the packed transition table
253: * @return the index of the last entry
254: */
255: private static int yy_unpack(String packed, int offset, int[] trans) {
256: int i = 0; /* index in packed string */
257: int j = offset; /* index in unpacked array */
258: int l = packed.length();
259: while (i < l) {
260: int count = packed.charAt(i++);
261: int value = packed.charAt(i++);
262: value--;
263: do
264: trans[j++] = value;
265: while (--count > 0);
266: }
267: return j;
268: }
269:
270: /**
271: * Unpacks the compressed character translation table.
272: *
273: * @param packed the packed character translation table
274: * @return the unpacked character translation table
275: */
276: private static char[] yy_unpack_cmap(String packed) {
277: char[] map = new char[0x10000];
278: int i = 0; /* index in packed string */
279: int j = 0; /* index in unpacked array */
280: while (i < 86) {
281: int count = packed.charAt(i++);
282: char value = packed.charAt(i++);
283: do
284: map[j++] = value;
285: while (--count > 0);
286: }
287: return map;
288: }
289:
290: /**
291: * Refills the input buffer.
292: *
293: * @return <code>false</code>, iff there was new input.
294: *
295: * @exception IOException if any I/O-Error occurs
296: */
297: private boolean yy_refill() throws java.io.IOException {
298:
299: /* first: make room (if you can) */
300: if (yy_startRead > 0) {
301: System.arraycopy(yy_buffer, yy_startRead, yy_buffer, 0,
302: yy_endRead - yy_startRead);
303:
304: /* translate stored positions */
305: yy_endRead -= yy_startRead;
306: yy_currentPos -= yy_startRead;
307: yy_markedPos -= yy_startRead;
308: yy_pushbackPos -= yy_startRead;
309: yy_startRead = 0;
310: }
311:
312: /* is the buffer big enough? */
313: if (yy_currentPos >= yy_buffer.length) {
314: /* if not: blow it up */
315: char newBuffer[] = new char[yy_currentPos * 2];
316: System.arraycopy(yy_buffer, 0, newBuffer, 0,
317: yy_buffer.length);
318: yy_buffer = newBuffer;
319: }
320:
321: /* finally: fill the buffer with new input */
322: int numRead = yy_reader.read(yy_buffer, yy_endRead,
323: yy_buffer.length - yy_endRead);
324:
325: if (numRead < 0) {
326: return true;
327: } else {
328: yy_endRead += numRead;
329: return false;
330: }
331: }
332:
333: /**
334: * Closes the input stream.
335: */
336: final public void yyclose() throws java.io.IOException {
337: yy_atEOF = true; /* indicate end of file */
338: yy_endRead = yy_startRead; /* invalidate buffer */
339:
340: if (yy_reader != null)
341: yy_reader.close();
342: }
343:
344: /**
345: * Closes the current stream, and resets the
346: * scanner to read from a new input stream.
347: *
348: * All internal variables are reset, the old input stream
349: * <b>cannot</b> be reused (internal buffer is discarded and lost).
350: * Lexical state is set to <tt>YY_INITIAL</tt>.
351: *
352: * @param reader the new input stream
353: */
354: final public void yyreset(java.io.Reader reader)
355: throws java.io.IOException {
356: yyclose();
357: yy_reader = reader;
358: yy_atBOL = true;
359: yy_atEOF = false;
360: yy_endRead = yy_startRead = 0;
361: yy_currentPos = yy_markedPos = yy_pushbackPos = 0;
362: yyline = yychar = yycolumn = 0;
363: yy_lexical_state = YYINITIAL;
364: }
365:
366: /**
367: * Returns the current lexical state.
368: */
369: final public int yystate() {
370: return yy_lexical_state;
371: }
372:
373: /**
374: * Enters a new lexical state
375: *
376: * @param newState the new lexical state
377: */
378: final public void yybegin(int newState) {
379: yy_lexical_state = newState;
380: }
381:
382: /**
383: * Returns the text matched by the current regular expression.
384: */
385: final public String yytext() {
386: return new String(yy_buffer, yy_startRead, yy_markedPos
387: - yy_startRead);
388: }
389:
390: /**
391: * Returns the character at position <tt>pos</tt> from the
392: * matched text.
393: *
394: * It is equivalent to yytext().charAt(pos), but faster
395: *
396: * @param pos the position of the character to fetch.
397: * A value from 0 to yylength()-1.
398: *
399: * @return the character at position pos
400: */
401: final public char yycharat(int pos) {
402: return yy_buffer[yy_startRead + pos];
403: }
404:
405: /**
406: * Returns the length of the matched text region.
407: */
408: final public int yylength() {
409: return yy_markedPos - yy_startRead;
410: }
411:
412: /**
413: * Reports an error that occured while scanning.
414: *
415: * In a wellformed scanner (no or only correct usage of
416: * yypushback(int) and a match-all fallback rule) this method
417: * will only be called with things that "Can't Possibly Happen".
418: * If this method is called, something is seriously wrong
419: * (e.g. a JFlex bug producing a faulty scanner etc.).
420: *
421: * Usual syntax/scanner level error handling should be done
422: * in error fallback rules.
423: *
424: * @param errorCode the code of the errormessage to display
425: */
426: private void yy_ScanError(int errorCode) {
427: String message;
428: try {
429: message = YY_ERROR_MSG[errorCode];
430: } catch (ArrayIndexOutOfBoundsException e) {
431: message = YY_ERROR_MSG[YY_UNKNOWN_ERROR];
432: }
433:
434: throw new Error(message);
435: }
436:
437: /**
438: * Pushes the specified amount of characters back into the input stream.
439: *
440: * They will be read again by then next call of the scanning method
441: *
442: * @param number the number of characters to be read again.
443: * This number must not be greater than yylength()!
444: */
445: private void yypushback(int number) {
446: if (number > yylength())
447: yy_ScanError(YY_PUSHBACK_2BIG);
448:
449: yy_markedPos -= number;
450: }
451:
452: /**
453: * Resumes scanning until the next regular expression is matched,
454: * the end of input is encountered or an I/O-Error occurs.
455: *
456: * @return the next token
457: * @exception IOException if any I/O-Error occurs
458: */
459: public String yylex() throws java.io.IOException {
460: int yy_input;
461: int yy_action;
462:
463: // cached fields:
464: int yy_currentPos_l;
465: int yy_startRead_l;
466: int yy_markedPos_l;
467: int yy_endRead_l = yy_endRead;
468: char[] yy_buffer_l = yy_buffer;
469: char[] yycmap_l = yycmap;
470:
471: int[] yytrans_l = yytrans;
472: int[] yy_rowMap_l = yy_rowMap;
473: byte[] yy_attr_l = YY_ATTRIBUTE;
474:
475: while (true) {
476: yy_markedPos_l = yy_markedPos;
477:
478: if (yy_markedPos_l > yy_startRead) {
479: switch (yy_buffer_l[yy_markedPos_l - 1]) {
480: case '\n':
481: case '\u000B':
482: case '\u000C':
483: case '\u0085':
484: case '\u2028':
485: case '\u2029':
486: yy_atBOL = true;
487: break;
488: case '\r':
489: if (yy_markedPos_l < yy_endRead_l)
490: yy_atBOL = yy_buffer_l[yy_markedPos_l] != '\n';
491: else if (yy_atEOF)
492: yy_atBOL = false;
493: else {
494: boolean eof = yy_refill();
495: yy_markedPos_l = yy_markedPos;
496: yy_buffer_l = yy_buffer;
497: if (eof)
498: yy_atBOL = false;
499: else
500: yy_atBOL = yy_buffer_l[yy_markedPos_l] != '\n';
501: }
502: break;
503: default:
504: yy_atBOL = false;
505: }
506: }
507: yy_action = -1;
508:
509: yy_startRead_l = yy_currentPos_l = yy_currentPos = yy_startRead = yy_markedPos_l;
510:
511: if (yy_atBOL)
512: yy_state = YY_LEXSTATE[yy_lexical_state + 1];
513: else
514: yy_state = YY_LEXSTATE[yy_lexical_state];
515:
516: yy_forAction: {
517: while (true) {
518:
519: if (yy_currentPos_l < yy_endRead_l)
520: yy_input = yy_buffer_l[yy_currentPos_l++];
521: else if (yy_atEOF) {
522: yy_input = YYEOF;
523: break yy_forAction;
524: } else {
525: // store back cached positions
526: yy_currentPos = yy_currentPos_l;
527: yy_markedPos = yy_markedPos_l;
528: boolean eof = yy_refill();
529: // get translated positions and possibly new buffer
530: yy_currentPos_l = yy_currentPos;
531: yy_markedPos_l = yy_markedPos;
532: yy_buffer_l = yy_buffer;
533: yy_endRead_l = yy_endRead;
534: if (eof) {
535: yy_input = YYEOF;
536: break yy_forAction;
537: } else {
538: yy_input = yy_buffer_l[yy_currentPos_l++];
539: }
540: }
541: int yy_next = yytrans_l[yy_rowMap_l[yy_state]
542: + yycmap_l[yy_input]];
543: if (yy_next == -1)
544: break yy_forAction;
545: yy_state = yy_next;
546:
547: int yy_attributes = yy_attr_l[yy_state];
548: if ((yy_attributes & 1) == 1) {
549: yy_action = yy_state;
550: yy_markedPos_l = yy_currentPos_l;
551: if ((yy_attributes & 8) == 8)
552: break yy_forAction;
553: }
554:
555: }
556: }
557:
558: // store back cached position
559: yy_markedPos = yy_markedPos_l;
560:
561: switch (yy_action) {
562:
563: case 68: {
564: log.debug("paragraphstart: " + yytext() + " ("
565: + yystate() + ")");
566: beginState(P);
567: // start paragraph, then rollback to allow normal processing
568: yypushback(1);
569: return yytext() + "<p>";
570: }
571: case 76:
572: break;
573: case 33: {
574: log.debug("nonparagraphstart: " + yytext() + " ("
575: + yystate() + ")");
576: StringBuffer output = new StringBuffer();
577: if (yystate() == P) {
578: output.append("</p>");
579: endState();
580: }
581: beginState(NONPARAGRAPH);
582: return output.toString() + yytext();
583: }
584: case 77:
585: break;
586: case 24:
587: case 25:
588: case 26: {
589: log.debug("end of paragraph: " + yytext() + " ("
590: + yystate() + ")");
591: endState();
592: return "</p>" + yytext();
593: }
594: case 78:
595: break;
596: case 62: {
597: log.debug("nonparagraphend: " + yytext() + " ("
598: + yystate() + ")");
599: endState();
600: if (yystate() != NONPARAGRAPH) {
601: // if not non-paragraph, roll back to allow potential paragraph start
602: yypushback(yytext().length());
603: }
604: return yytext();
605: }
606: case 79:
607: break;
608: case 9: {
609: log.debug("paragraphstart2: " + yytext() + " ("
610: + yystate() + ")");
611: beginState(P);
612: // start paragraph, then rollback to allow normal processing
613: yypushback(yytext().length());
614: return "<p>";
615: }
616: case 80:
617: break;
618: case 5:
619: case 8:
620: case 10:
621: case 13:
622: case 14: {
623: log.debug("default: " + yytext() + " (" + yystate()
624: + ")");
625: return yytext();
626: }
627: case 81:
628: break;
629: case 6:
630: case 7:
631: case 11:
632: case 12: {
633: log.debug("{whitespace}: " + yytext() + " ("
634: + yystate() + ")");
635: return yytext();
636: }
637: case 82:
638: break;
639: default:
640: if (yy_input == YYEOF && yy_startRead == yy_currentPos) {
641: yy_atEOF = true;
642: {
643: StringBuffer output = new StringBuffer();
644: if (yystate() == P) {
645: endState();
646: output.append("</p>");
647: }
648: return (output.length() == 0) ? null : output
649: .toString();
650: }
651: } else {
652: yy_ScanError(YY_NO_MATCH);
653: }
654: }
655: }
656: }
657:
658: }
|