Source Code Cross Referenced for HtmlExtractorParser.java in  » J2EE » fleXive » com » flexive » extractor » htmlExtractor » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » J2EE » fleXive » com.flexive.extractor.htmlExtractor 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /* Generated By:JavaCC: Do not edit this line. HtmlExtractorParser.java */
002:        package com.flexive.extractor.htmlExtractor;
003:
004:        import java.io.Serializable;
005:        import java.util.ArrayList;
006:
007:        public class HtmlExtractorParser implements 
008:                HtmlExtractorParserConstants {
009:            HtmlExtractor extractor = null;
010:            boolean debug = true;
011:
012:            /**
013:             * Entry point of the parser.
014:             */
015:            final public void extract(HtmlExtractor e) throws ParseException {
016:                this .extractor = e;
017:                readText();
018:            }
019:
020:            final public void readText() throws ParseException {
021:                Token str = null;
022:                switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
023:                case QUOTE:
024:                case EQUALS:
025:                case CLOSE_TAG:
026:                case STRING:
027:                    textElement();
028:                    readText();
029:                    break;
030:                case OPEN_TAG:
031:                    tagElement();
032:                    readText();
033:                    break;
034:                case TITLE:
035:                    str = jj_consume_token(TITLE);
036:                    extractor.setTitle(str);
037:                    readText();
038:                    break;
039:                case 0:
040:                    jj_consume_token(0);
041:                    break;
042:                default:
043:                    jj_la1[0] = jj_gen;
044:                    jj_consume_token(-1);
045:                    throw new ParseException();
046:                }
047:            }
048:
049:            final public void tagElement() throws ParseException {
050:                Token str = null;
051:                htmlTag tag = null;
052:                str = jj_consume_token(OPEN_TAG);
053:                switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
054:                case STRING:
055:                    tag = new htmlTag(extractor, str.image.substring(1));
056:                    tagBody(tag);
057:                    break;
058:                default:
059:                    jj_la1[1] = jj_gen;
060:                    ;
061:                }
062:                jj_consume_token(CLOSE_TAG);
063:                if (tag != null)
064:                    tag.close();
065:            }
066:
067:            final public void tagBody(htmlTag tag) throws ParseException {
068:                Token str = null;
069:                str = jj_consume_token(STRING);
070:                switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
071:                case EQUALS:
072:                    jj_consume_token(EQUALS);
073:                    tagBodyValue(tag, str);
074:                    break;
075:                default:
076:                    jj_la1[2] = jj_gen;
077:                    ;
078:                }
079:                if (jj_2_1(2)) {
080:                    tagBody(tag);
081:                } else {
082:                    ;
083:                }
084:            }
085:
086:            final public void tagBodyValue(htmlTag tag, Token attrName)
087:                    throws ParseException {
088:                StringBuffer body = new StringBuffer(256);
089:                Token str = null;
090:                switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
091:                case STRING:
092:                    str = jj_consume_token(STRING);
093:                    tag.add(attrName, new StringBuffer(str.image));
094:                    break;
095:                case QUOTE:
096:                    jj_consume_token(QUOTE);
097:                    switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
098:                    case EQUALS:
099:                    case OPEN_TAG:
100:                    case CLOSE_TAG:
101:                    case STRING:
102:                    case 19:
103:                        quoteBody(body);
104:                        tag.add(attrName, body);
105:                        break;
106:                    default:
107:                        jj_la1[3] = jj_gen;
108:                        ;
109:                    }
110:                    jj_consume_token(QUOTE);
111:
112:                    break;
113:                default:
114:                    jj_la1[4] = jj_gen;
115:                    jj_consume_token(-1);
116:                    throw new ParseException();
117:                }
118:            }
119:
120:            final public void quoteBody(StringBuffer body)
121:                    throws ParseException {
122:                Token str;
123:                str = quoteBodyElement();
124:                body.append(str.image);
125:                switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
126:                case EQUALS:
127:                case OPEN_TAG:
128:                case CLOSE_TAG:
129:                case STRING:
130:                case 19:
131:                    quoteBody(body);
132:                    break;
133:                default:
134:                    jj_la1[5] = jj_gen;
135:                    ;
136:                }
137:            }
138:
139:            final public Token quoteBodyElement() throws ParseException {
140:                Token str = null;
141:                switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
142:                case STRING:
143:                    str = jj_consume_token(STRING);
144:                    {
145:                        if (true)
146:                            return str;
147:                    }
148:                    break;
149:                case CLOSE_TAG:
150:                    str = jj_consume_token(CLOSE_TAG);
151:                    {
152:                        if (true)
153:                            return str;
154:                    }
155:                    break;
156:                case EQUALS:
157:                    str = jj_consume_token(EQUALS);
158:                    {
159:                        if (true)
160:                            return str;
161:                    }
162:                    break;
163:                case OPEN_TAG:
164:                    str = jj_consume_token(OPEN_TAG);
165:                    {
166:                        if (true)
167:                            return str;
168:                    }
169:                    break;
170:                case 19:
171:                    str = jj_consume_token(19);
172:                    {
173:                        if (true)
174:                            return str;
175:                    }
176:                    break;
177:                default:
178:                    jj_la1[6] = jj_gen;
179:                    jj_consume_token(-1);
180:                    throw new ParseException();
181:                }
182:                throw new Error("Missing return statement in function");
183:            }
184:
185:            final public void textElement() throws ParseException {
186:                Token str = null;
187:                switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
188:                case STRING:
189:                    str = jj_consume_token(STRING);
190:                    extractor.append(str);
191:                    break;
192:                case QUOTE:
193:                    str = jj_consume_token(QUOTE);
194:                    extractor.append(str);
195:                    break;
196:                case CLOSE_TAG:
197:                    str = jj_consume_token(CLOSE_TAG);
198:                    extractor.append(str);
199:                    break;
200:                case EQUALS:
201:                    str = jj_consume_token(EQUALS);
202:                    extractor.append(str);
203:                    break;
204:                default:
205:                    jj_la1[7] = jj_gen;
206:                    jj_consume_token(-1);
207:                    throw new ParseException();
208:                }
209:            }
210:
211:            final private boolean jj_2_1(int xla) {
212:                jj_la = xla;
213:                jj_lastpos = jj_scanpos = token;
214:                try {
215:                    return !jj_3_1();
216:                } catch (LookaheadSuccess ls) {
217:                    return true;
218:                } finally {
219:                    jj_save(0, xla);
220:                }
221:            }
222:
223:            final private boolean jj_3R_1() {
224:                if (jj_scan_token(STRING))
225:                    return true;
226:                Token xsp;
227:                xsp = jj_scanpos;
228:                if (jj_3R_2())
229:                    jj_scanpos = xsp;
230:                xsp = jj_scanpos;
231:                if (jj_3_1())
232:                    jj_scanpos = xsp;
233:                return false;
234:            }
235:
236:            final private boolean jj_3_1() {
237:                if (jj_3R_1())
238:                    return true;
239:                return false;
240:            }
241:
242:            final private boolean jj_3R_2() {
243:                if (jj_scan_token(EQUALS))
244:                    return true;
245:                return false;
246:            }
247:
248:            public HtmlExtractorParserTokenManager token_source;
249:            SimpleCharStream jj_input_stream;
250:            public Token token, jj_nt;
251:            private int jj_ntk;
252:            private Token jj_scanpos, jj_lastpos;
253:            private int jj_la;
254:            public boolean lookingAhead = false;
255:            private boolean jj_semLA;
256:            private int jj_gen;
257:            final private int[] jj_la1 = new int[8];
258:            static private int[] jj_la1_0;
259:            static {
260:                jj_la1_0();
261:            }
262:
263:            private static void jj_la1_0() {
264:                jj_la1_0 = new int[] { 0x7e001, 0x40000, 0x4000, 0xf4000,
265:                        0x42000, 0xf4000, 0xf4000, 0x66000, };
266:            }
267:
268:            final private JJCalls[] jj_2_rtns = new JJCalls[1];
269:            private boolean jj_rescan = false;
270:            private int jj_gc = 0;
271:
272:            public HtmlExtractorParser(java.io.InputStream stream) {
273:                this (stream, null);
274:            }
275:
276:            public HtmlExtractorParser(java.io.InputStream stream,
277:                    String encoding) {
278:                try {
279:                    jj_input_stream = new SimpleCharStream(stream, encoding, 1,
280:                            1);
281:                } catch (java.io.UnsupportedEncodingException e) {
282:                    throw new RuntimeException(e);
283:                }
284:                token_source = new HtmlExtractorParserTokenManager(
285:                        jj_input_stream);
286:                token = new Token();
287:                jj_ntk = -1;
288:                jj_gen = 0;
289:                for (int i = 0; i < 8; i++)
290:                    jj_la1[i] = -1;
291:                for (int i = 0; i < jj_2_rtns.length; i++)
292:                    jj_2_rtns[i] = new JJCalls();
293:            }
294:
295:            public void ReInit(java.io.InputStream stream) {
296:                ReInit(stream, null);
297:            }
298:
299:            public void ReInit(java.io.InputStream stream, String encoding) {
300:                try {
301:                    jj_input_stream.ReInit(stream, encoding, 1, 1);
302:                } catch (java.io.UnsupportedEncodingException e) {
303:                    throw new RuntimeException(e);
304:                }
305:                token_source.ReInit(jj_input_stream);
306:                token = new Token();
307:                jj_ntk = -1;
308:                jj_gen = 0;
309:                for (int i = 0; i < 8; i++)
310:                    jj_la1[i] = -1;
311:                for (int i = 0; i < jj_2_rtns.length; i++)
312:                    jj_2_rtns[i] = new JJCalls();
313:            }
314:
315:            public HtmlExtractorParser(java.io.Reader stream) {
316:                jj_input_stream = new SimpleCharStream(stream, 1, 1);
317:                token_source = new HtmlExtractorParserTokenManager(
318:                        jj_input_stream);
319:                token = new Token();
320:                jj_ntk = -1;
321:                jj_gen = 0;
322:                for (int i = 0; i < 8; i++)
323:                    jj_la1[i] = -1;
324:                for (int i = 0; i < jj_2_rtns.length; i++)
325:                    jj_2_rtns[i] = new JJCalls();
326:            }
327:
328:            public void ReInit(java.io.Reader stream) {
329:                jj_input_stream.ReInit(stream, 1, 1);
330:                token_source.ReInit(jj_input_stream);
331:                token = new Token();
332:                jj_ntk = -1;
333:                jj_gen = 0;
334:                for (int i = 0; i < 8; i++)
335:                    jj_la1[i] = -1;
336:                for (int i = 0; i < jj_2_rtns.length; i++)
337:                    jj_2_rtns[i] = new JJCalls();
338:            }
339:
340:            public HtmlExtractorParser(HtmlExtractorParserTokenManager tm) {
341:                token_source = tm;
342:                token = new Token();
343:                jj_ntk = -1;
344:                jj_gen = 0;
345:                for (int i = 0; i < 8; i++)
346:                    jj_la1[i] = -1;
347:                for (int i = 0; i < jj_2_rtns.length; i++)
348:                    jj_2_rtns[i] = new JJCalls();
349:            }
350:
351:            public void ReInit(HtmlExtractorParserTokenManager tm) {
352:                token_source = tm;
353:                token = new Token();
354:                jj_ntk = -1;
355:                jj_gen = 0;
356:                for (int i = 0; i < 8; i++)
357:                    jj_la1[i] = -1;
358:                for (int i = 0; i < jj_2_rtns.length; i++)
359:                    jj_2_rtns[i] = new JJCalls();
360:            }
361:
362:            final private Token jj_consume_token(int kind)
363:                    throws ParseException {
364:                Token oldToken;
365:                if ((oldToken = token).next != null)
366:                    token = token.next;
367:                else
368:                    token = token.next = token_source.getNextToken();
369:                jj_ntk = -1;
370:                if (token.kind == kind) {
371:                    jj_gen++;
372:                    if (++jj_gc > 100) {
373:                        jj_gc = 0;
374:                        for (int i = 0; i < jj_2_rtns.length; i++) {
375:                            JJCalls c = jj_2_rtns[i];
376:                            while (c != null) {
377:                                if (c.gen < jj_gen)
378:                                    c.first = null;
379:                                c = c.next;
380:                            }
381:                        }
382:                    }
383:                    return token;
384:                }
385:                token = oldToken;
386:                jj_kind = kind;
387:                throw generateParseException();
388:            }
389:
390:            static private final class LookaheadSuccess extends java.lang.Error {
391:            }
392:
393:            final private LookaheadSuccess jj_ls = new LookaheadSuccess();
394:
395:            final private boolean jj_scan_token(int kind) {
396:                if (jj_scanpos == jj_lastpos) {
397:                    jj_la--;
398:                    if (jj_scanpos.next == null) {
399:                        jj_lastpos = jj_scanpos = jj_scanpos.next = token_source
400:                                .getNextToken();
401:                    } else {
402:                        jj_lastpos = jj_scanpos = jj_scanpos.next;
403:                    }
404:                } else {
405:                    jj_scanpos = jj_scanpos.next;
406:                }
407:                if (jj_rescan) {
408:                    int i = 0;
409:                    Token tok = token;
410:                    while (tok != null && tok != jj_scanpos) {
411:                        i++;
412:                        tok = tok.next;
413:                    }
414:                    if (tok != null)
415:                        jj_add_error_token(kind, i);
416:                }
417:                if (jj_scanpos.kind != kind)
418:                    return true;
419:                if (jj_la == 0 && jj_scanpos == jj_lastpos)
420:                    throw jj_ls;
421:                return false;
422:            }
423:
424:            final public Token getNextToken() {
425:                if (token.next != null)
426:                    token = token.next;
427:                else
428:                    token = token.next = token_source.getNextToken();
429:                jj_ntk = -1;
430:                jj_gen++;
431:                return token;
432:            }
433:
434:            final public Token getToken(int index) {
435:                Token t = lookingAhead ? jj_scanpos : token;
436:                for (int i = 0; i < index; i++) {
437:                    if (t.next != null)
438:                        t = t.next;
439:                    else
440:                        t = t.next = token_source.getNextToken();
441:                }
442:                return t;
443:            }
444:
445:            final private int jj_ntk() {
446:                if ((jj_nt = token.next) == null)
447:                    return (jj_ntk = (token.next = token_source.getNextToken()).kind);
448:                else
449:                    return (jj_ntk = jj_nt.kind);
450:            }
451:
452:            private java.util.Vector jj_expentries = new java.util.Vector();
453:            private int[] jj_expentry;
454:            private int jj_kind = -1;
455:            private int[] jj_lasttokens = new int[100];
456:            private int jj_endpos;
457:
458:            private void jj_add_error_token(int kind, int pos) {
459:                if (pos >= 100)
460:                    return;
461:                if (pos == jj_endpos + 1) {
462:                    jj_lasttokens[jj_endpos++] = kind;
463:                } else if (jj_endpos != 0) {
464:                    jj_expentry = new int[jj_endpos];
465:                    for (int i = 0; i < jj_endpos; i++) {
466:                        jj_expentry[i] = jj_lasttokens[i];
467:                    }
468:                    boolean exists = false;
469:                    for (java.util.Enumeration e = jj_expentries.elements(); e
470:                            .hasMoreElements();) {
471:                        int[] oldentry = (int[]) (e.nextElement());
472:                        if (oldentry.length == jj_expentry.length) {
473:                            exists = true;
474:                            for (int i = 0; i < jj_expentry.length; i++) {
475:                                if (oldentry[i] != jj_expentry[i]) {
476:                                    exists = false;
477:                                    break;
478:                                }
479:                            }
480:                            if (exists)
481:                                break;
482:                        }
483:                    }
484:                    if (!exists)
485:                        jj_expentries.addElement(jj_expentry);
486:                    if (pos != 0)
487:                        jj_lasttokens[(jj_endpos = pos) - 1] = kind;
488:                }
489:            }
490:
491:            public ParseException generateParseException() {
492:                jj_expentries.removeAllElements();
493:                boolean[] la1tokens = new boolean[20];
494:                for (int i = 0; i < 20; i++) {
495:                    la1tokens[i] = false;
496:                }
497:                if (jj_kind >= 0) {
498:                    la1tokens[jj_kind] = true;
499:                    jj_kind = -1;
500:                }
501:                for (int i = 0; i < 8; i++) {
502:                    if (jj_la1[i] == jj_gen) {
503:                        for (int j = 0; j < 32; j++) {
504:                            if ((jj_la1_0[i] & (1 << j)) != 0) {
505:                                la1tokens[j] = true;
506:                            }
507:                        }
508:                    }
509:                }
510:                for (int i = 0; i < 20; i++) {
511:                    if (la1tokens[i]) {
512:                        jj_expentry = new int[1];
513:                        jj_expentry[0] = i;
514:                        jj_expentries.addElement(jj_expentry);
515:                    }
516:                }
517:                jj_endpos = 0;
518:                jj_rescan_token();
519:                jj_add_error_token(0, 0);
520:                int[][] exptokseq = new int[jj_expentries.size()][];
521:                for (int i = 0; i < jj_expentries.size(); i++) {
522:                    exptokseq[i] = (int[]) jj_expentries.elementAt(i);
523:                }
524:                return new ParseException(token, exptokseq, tokenImage);
525:            }
526:
527:            final public void enable_tracing() {
528:            }
529:
530:            final public void disable_tracing() {
531:            }
532:
533:            final private void jj_rescan_token() {
534:                jj_rescan = true;
535:                for (int i = 0; i < 1; i++) {
536:                    try {
537:                        JJCalls p = jj_2_rtns[i];
538:                        do {
539:                            if (p.gen > jj_gen) {
540:                                jj_la = p.arg;
541:                                jj_lastpos = jj_scanpos = p.first;
542:                                switch (i) {
543:                                case 0:
544:                                    jj_3_1();
545:                                    break;
546:                                }
547:                            }
548:                            p = p.next;
549:                        } while (p != null);
550:                    } catch (LookaheadSuccess ls) {
551:                    }
552:                }
553:                jj_rescan = false;
554:            }
555:
556:            final private void jj_save(int index, int xla) {
557:                JJCalls p = jj_2_rtns[index];
558:                while (p.gen > jj_gen) {
559:                    if (p.next == null) {
560:                        p = p.next = new JJCalls();
561:                        break;
562:                    }
563:                    p = p.next;
564:                }
565:                p.gen = jj_gen + xla - jj_la;
566:                p.first = token;
567:                p.arg = xla;
568:            }
569:
570:            static final class JJCalls {
571:                int gen;
572:                Token first;
573:                int arg;
574:                JJCalls next;
575:            }
576:
577:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.