001: /* Generated By:JavaCC: Do not edit this line. HtmlExtractorParser.java */
002: package com.flexive.extractor.htmlExtractor;
003:
004: import java.io.Serializable;
005: import java.util.ArrayList;
006:
007: public class HtmlExtractorParser implements
008: HtmlExtractorParserConstants {
009: HtmlExtractor extractor = null;
010: boolean debug = true;
011:
012: /**
013: * Entry point of the parser.
014: */
015: final public void extract(HtmlExtractor e) throws ParseException {
016: this .extractor = e;
017: readText();
018: }
019:
020: final public void readText() throws ParseException {
021: Token str = null;
022: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
023: case QUOTE:
024: case EQUALS:
025: case CLOSE_TAG:
026: case STRING:
027: textElement();
028: readText();
029: break;
030: case OPEN_TAG:
031: tagElement();
032: readText();
033: break;
034: case TITLE:
035: str = jj_consume_token(TITLE);
036: extractor.setTitle(str);
037: readText();
038: break;
039: case 0:
040: jj_consume_token(0);
041: break;
042: default:
043: jj_la1[0] = jj_gen;
044: jj_consume_token(-1);
045: throw new ParseException();
046: }
047: }
048:
049: final public void tagElement() throws ParseException {
050: Token str = null;
051: htmlTag tag = null;
052: str = jj_consume_token(OPEN_TAG);
053: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
054: case STRING:
055: tag = new htmlTag(extractor, str.image.substring(1));
056: tagBody(tag);
057: break;
058: default:
059: jj_la1[1] = jj_gen;
060: ;
061: }
062: jj_consume_token(CLOSE_TAG);
063: if (tag != null)
064: tag.close();
065: }
066:
067: final public void tagBody(htmlTag tag) throws ParseException {
068: Token str = null;
069: str = jj_consume_token(STRING);
070: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
071: case EQUALS:
072: jj_consume_token(EQUALS);
073: tagBodyValue(tag, str);
074: break;
075: default:
076: jj_la1[2] = jj_gen;
077: ;
078: }
079: if (jj_2_1(2)) {
080: tagBody(tag);
081: } else {
082: ;
083: }
084: }
085:
086: final public void tagBodyValue(htmlTag tag, Token attrName)
087: throws ParseException {
088: StringBuffer body = new StringBuffer(256);
089: Token str = null;
090: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
091: case STRING:
092: str = jj_consume_token(STRING);
093: tag.add(attrName, new StringBuffer(str.image));
094: break;
095: case QUOTE:
096: jj_consume_token(QUOTE);
097: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
098: case EQUALS:
099: case OPEN_TAG:
100: case CLOSE_TAG:
101: case STRING:
102: case 19:
103: quoteBody(body);
104: tag.add(attrName, body);
105: break;
106: default:
107: jj_la1[3] = jj_gen;
108: ;
109: }
110: jj_consume_token(QUOTE);
111:
112: break;
113: default:
114: jj_la1[4] = jj_gen;
115: jj_consume_token(-1);
116: throw new ParseException();
117: }
118: }
119:
120: final public void quoteBody(StringBuffer body)
121: throws ParseException {
122: Token str;
123: str = quoteBodyElement();
124: body.append(str.image);
125: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
126: case EQUALS:
127: case OPEN_TAG:
128: case CLOSE_TAG:
129: case STRING:
130: case 19:
131: quoteBody(body);
132: break;
133: default:
134: jj_la1[5] = jj_gen;
135: ;
136: }
137: }
138:
139: final public Token quoteBodyElement() throws ParseException {
140: Token str = null;
141: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
142: case STRING:
143: str = jj_consume_token(STRING);
144: {
145: if (true)
146: return str;
147: }
148: break;
149: case CLOSE_TAG:
150: str = jj_consume_token(CLOSE_TAG);
151: {
152: if (true)
153: return str;
154: }
155: break;
156: case EQUALS:
157: str = jj_consume_token(EQUALS);
158: {
159: if (true)
160: return str;
161: }
162: break;
163: case OPEN_TAG:
164: str = jj_consume_token(OPEN_TAG);
165: {
166: if (true)
167: return str;
168: }
169: break;
170: case 19:
171: str = jj_consume_token(19);
172: {
173: if (true)
174: return str;
175: }
176: break;
177: default:
178: jj_la1[6] = jj_gen;
179: jj_consume_token(-1);
180: throw new ParseException();
181: }
182: throw new Error("Missing return statement in function");
183: }
184:
185: final public void textElement() throws ParseException {
186: Token str = null;
187: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
188: case STRING:
189: str = jj_consume_token(STRING);
190: extractor.append(str);
191: break;
192: case QUOTE:
193: str = jj_consume_token(QUOTE);
194: extractor.append(str);
195: break;
196: case CLOSE_TAG:
197: str = jj_consume_token(CLOSE_TAG);
198: extractor.append(str);
199: break;
200: case EQUALS:
201: str = jj_consume_token(EQUALS);
202: extractor.append(str);
203: break;
204: default:
205: jj_la1[7] = jj_gen;
206: jj_consume_token(-1);
207: throw new ParseException();
208: }
209: }
210:
211: final private boolean jj_2_1(int xla) {
212: jj_la = xla;
213: jj_lastpos = jj_scanpos = token;
214: try {
215: return !jj_3_1();
216: } catch (LookaheadSuccess ls) {
217: return true;
218: } finally {
219: jj_save(0, xla);
220: }
221: }
222:
223: final private boolean jj_3R_1() {
224: if (jj_scan_token(STRING))
225: return true;
226: Token xsp;
227: xsp = jj_scanpos;
228: if (jj_3R_2())
229: jj_scanpos = xsp;
230: xsp = jj_scanpos;
231: if (jj_3_1())
232: jj_scanpos = xsp;
233: return false;
234: }
235:
236: final private boolean jj_3_1() {
237: if (jj_3R_1())
238: return true;
239: return false;
240: }
241:
242: final private boolean jj_3R_2() {
243: if (jj_scan_token(EQUALS))
244: return true;
245: return false;
246: }
247:
248: public HtmlExtractorParserTokenManager token_source;
249: SimpleCharStream jj_input_stream;
250: public Token token, jj_nt;
251: private int jj_ntk;
252: private Token jj_scanpos, jj_lastpos;
253: private int jj_la;
254: public boolean lookingAhead = false;
255: private boolean jj_semLA;
256: private int jj_gen;
257: final private int[] jj_la1 = new int[8];
258: static private int[] jj_la1_0;
259: static {
260: jj_la1_0();
261: }
262:
263: private static void jj_la1_0() {
264: jj_la1_0 = new int[] { 0x7e001, 0x40000, 0x4000, 0xf4000,
265: 0x42000, 0xf4000, 0xf4000, 0x66000, };
266: }
267:
268: final private JJCalls[] jj_2_rtns = new JJCalls[1];
269: private boolean jj_rescan = false;
270: private int jj_gc = 0;
271:
272: public HtmlExtractorParser(java.io.InputStream stream) {
273: this (stream, null);
274: }
275:
276: public HtmlExtractorParser(java.io.InputStream stream,
277: String encoding) {
278: try {
279: jj_input_stream = new SimpleCharStream(stream, encoding, 1,
280: 1);
281: } catch (java.io.UnsupportedEncodingException e) {
282: throw new RuntimeException(e);
283: }
284: token_source = new HtmlExtractorParserTokenManager(
285: jj_input_stream);
286: token = new Token();
287: jj_ntk = -1;
288: jj_gen = 0;
289: for (int i = 0; i < 8; i++)
290: jj_la1[i] = -1;
291: for (int i = 0; i < jj_2_rtns.length; i++)
292: jj_2_rtns[i] = new JJCalls();
293: }
294:
295: public void ReInit(java.io.InputStream stream) {
296: ReInit(stream, null);
297: }
298:
299: public void ReInit(java.io.InputStream stream, String encoding) {
300: try {
301: jj_input_stream.ReInit(stream, encoding, 1, 1);
302: } catch (java.io.UnsupportedEncodingException e) {
303: throw new RuntimeException(e);
304: }
305: token_source.ReInit(jj_input_stream);
306: token = new Token();
307: jj_ntk = -1;
308: jj_gen = 0;
309: for (int i = 0; i < 8; i++)
310: jj_la1[i] = -1;
311: for (int i = 0; i < jj_2_rtns.length; i++)
312: jj_2_rtns[i] = new JJCalls();
313: }
314:
315: public HtmlExtractorParser(java.io.Reader stream) {
316: jj_input_stream = new SimpleCharStream(stream, 1, 1);
317: token_source = new HtmlExtractorParserTokenManager(
318: jj_input_stream);
319: token = new Token();
320: jj_ntk = -1;
321: jj_gen = 0;
322: for (int i = 0; i < 8; i++)
323: jj_la1[i] = -1;
324: for (int i = 0; i < jj_2_rtns.length; i++)
325: jj_2_rtns[i] = new JJCalls();
326: }
327:
328: public void ReInit(java.io.Reader stream) {
329: jj_input_stream.ReInit(stream, 1, 1);
330: token_source.ReInit(jj_input_stream);
331: token = new Token();
332: jj_ntk = -1;
333: jj_gen = 0;
334: for (int i = 0; i < 8; i++)
335: jj_la1[i] = -1;
336: for (int i = 0; i < jj_2_rtns.length; i++)
337: jj_2_rtns[i] = new JJCalls();
338: }
339:
340: public HtmlExtractorParser(HtmlExtractorParserTokenManager tm) {
341: token_source = tm;
342: token = new Token();
343: jj_ntk = -1;
344: jj_gen = 0;
345: for (int i = 0; i < 8; i++)
346: jj_la1[i] = -1;
347: for (int i = 0; i < jj_2_rtns.length; i++)
348: jj_2_rtns[i] = new JJCalls();
349: }
350:
351: public void ReInit(HtmlExtractorParserTokenManager tm) {
352: token_source = tm;
353: token = new Token();
354: jj_ntk = -1;
355: jj_gen = 0;
356: for (int i = 0; i < 8; i++)
357: jj_la1[i] = -1;
358: for (int i = 0; i < jj_2_rtns.length; i++)
359: jj_2_rtns[i] = new JJCalls();
360: }
361:
362: final private Token jj_consume_token(int kind)
363: throws ParseException {
364: Token oldToken;
365: if ((oldToken = token).next != null)
366: token = token.next;
367: else
368: token = token.next = token_source.getNextToken();
369: jj_ntk = -1;
370: if (token.kind == kind) {
371: jj_gen++;
372: if (++jj_gc > 100) {
373: jj_gc = 0;
374: for (int i = 0; i < jj_2_rtns.length; i++) {
375: JJCalls c = jj_2_rtns[i];
376: while (c != null) {
377: if (c.gen < jj_gen)
378: c.first = null;
379: c = c.next;
380: }
381: }
382: }
383: return token;
384: }
385: token = oldToken;
386: jj_kind = kind;
387: throw generateParseException();
388: }
389:
390: static private final class LookaheadSuccess extends java.lang.Error {
391: }
392:
393: final private LookaheadSuccess jj_ls = new LookaheadSuccess();
394:
395: final private boolean jj_scan_token(int kind) {
396: if (jj_scanpos == jj_lastpos) {
397: jj_la--;
398: if (jj_scanpos.next == null) {
399: jj_lastpos = jj_scanpos = jj_scanpos.next = token_source
400: .getNextToken();
401: } else {
402: jj_lastpos = jj_scanpos = jj_scanpos.next;
403: }
404: } else {
405: jj_scanpos = jj_scanpos.next;
406: }
407: if (jj_rescan) {
408: int i = 0;
409: Token tok = token;
410: while (tok != null && tok != jj_scanpos) {
411: i++;
412: tok = tok.next;
413: }
414: if (tok != null)
415: jj_add_error_token(kind, i);
416: }
417: if (jj_scanpos.kind != kind)
418: return true;
419: if (jj_la == 0 && jj_scanpos == jj_lastpos)
420: throw jj_ls;
421: return false;
422: }
423:
424: final public Token getNextToken() {
425: if (token.next != null)
426: token = token.next;
427: else
428: token = token.next = token_source.getNextToken();
429: jj_ntk = -1;
430: jj_gen++;
431: return token;
432: }
433:
434: final public Token getToken(int index) {
435: Token t = lookingAhead ? jj_scanpos : token;
436: for (int i = 0; i < index; i++) {
437: if (t.next != null)
438: t = t.next;
439: else
440: t = t.next = token_source.getNextToken();
441: }
442: return t;
443: }
444:
445: final private int jj_ntk() {
446: if ((jj_nt = token.next) == null)
447: return (jj_ntk = (token.next = token_source.getNextToken()).kind);
448: else
449: return (jj_ntk = jj_nt.kind);
450: }
451:
452: private java.util.Vector jj_expentries = new java.util.Vector();
453: private int[] jj_expentry;
454: private int jj_kind = -1;
455: private int[] jj_lasttokens = new int[100];
456: private int jj_endpos;
457:
458: private void jj_add_error_token(int kind, int pos) {
459: if (pos >= 100)
460: return;
461: if (pos == jj_endpos + 1) {
462: jj_lasttokens[jj_endpos++] = kind;
463: } else if (jj_endpos != 0) {
464: jj_expentry = new int[jj_endpos];
465: for (int i = 0; i < jj_endpos; i++) {
466: jj_expentry[i] = jj_lasttokens[i];
467: }
468: boolean exists = false;
469: for (java.util.Enumeration e = jj_expentries.elements(); e
470: .hasMoreElements();) {
471: int[] oldentry = (int[]) (e.nextElement());
472: if (oldentry.length == jj_expentry.length) {
473: exists = true;
474: for (int i = 0; i < jj_expentry.length; i++) {
475: if (oldentry[i] != jj_expentry[i]) {
476: exists = false;
477: break;
478: }
479: }
480: if (exists)
481: break;
482: }
483: }
484: if (!exists)
485: jj_expentries.addElement(jj_expentry);
486: if (pos != 0)
487: jj_lasttokens[(jj_endpos = pos) - 1] = kind;
488: }
489: }
490:
491: public ParseException generateParseException() {
492: jj_expentries.removeAllElements();
493: boolean[] la1tokens = new boolean[20];
494: for (int i = 0; i < 20; i++) {
495: la1tokens[i] = false;
496: }
497: if (jj_kind >= 0) {
498: la1tokens[jj_kind] = true;
499: jj_kind = -1;
500: }
501: for (int i = 0; i < 8; i++) {
502: if (jj_la1[i] == jj_gen) {
503: for (int j = 0; j < 32; j++) {
504: if ((jj_la1_0[i] & (1 << j)) != 0) {
505: la1tokens[j] = true;
506: }
507: }
508: }
509: }
510: for (int i = 0; i < 20; i++) {
511: if (la1tokens[i]) {
512: jj_expentry = new int[1];
513: jj_expentry[0] = i;
514: jj_expentries.addElement(jj_expentry);
515: }
516: }
517: jj_endpos = 0;
518: jj_rescan_token();
519: jj_add_error_token(0, 0);
520: int[][] exptokseq = new int[jj_expentries.size()][];
521: for (int i = 0; i < jj_expentries.size(); i++) {
522: exptokseq[i] = (int[]) jj_expentries.elementAt(i);
523: }
524: return new ParseException(token, exptokseq, tokenImage);
525: }
526:
527: final public void enable_tracing() {
528: }
529:
530: final public void disable_tracing() {
531: }
532:
533: final private void jj_rescan_token() {
534: jj_rescan = true;
535: for (int i = 0; i < 1; i++) {
536: try {
537: JJCalls p = jj_2_rtns[i];
538: do {
539: if (p.gen > jj_gen) {
540: jj_la = p.arg;
541: jj_lastpos = jj_scanpos = p.first;
542: switch (i) {
543: case 0:
544: jj_3_1();
545: break;
546: }
547: }
548: p = p.next;
549: } while (p != null);
550: } catch (LookaheadSuccess ls) {
551: }
552: }
553: jj_rescan = false;
554: }
555:
556: final private void jj_save(int index, int xla) {
557: JJCalls p = jj_2_rtns[index];
558: while (p.gen > jj_gen) {
559: if (p.next == null) {
560: p = p.next = new JJCalls();
561: break;
562: }
563: p = p.next;
564: }
565: p.gen = jj_gen + xla - jj_la;
566: p.first = token;
567: p.arg = xla;
568: }
569:
570: static final class JJCalls {
571: int gen;
572: Token first;
573: int arg;
574: JJCalls next;
575: }
576:
577: }
|