Source Code Cross Referenced for SyntaxParser.java in » IDE-Netbeans » html » org » netbeans » editor » ext » html » parser » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » IDE Netbeans » html » org.netbeans.editor.ext.html.parser
Source Cross Referenced Class Diagram Java Document (Java Doc)
001:        /*
002:         * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003:         *
004:         * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005:         *
006:         * The contents of this file are subject to the terms of either the GNU
007:         * General Public License Version 2 only ("GPL") or the Common
008:         * Development and Distribution License("CDDL") (collectively, the
009:         * "License"). You may not use this file except in compliance with the
010:         * License. You can obtain a copy of the License at
011:         * http://www.netbeans.org/cddl-gplv2.html
012:         * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013:         * specific language governing permissions and limitations under the
014:         * License.  When distributing the software, include this License Header
015:         * Notice in each file and include the License file at
016:         * nbbuild/licenses/CDDL-GPL-2-CP.  Sun designates this
017:         * particular file as subject to the "Classpath" exception as provided
018:         * by Sun in the GPL Version 2 section of the License file that
019:         * accompanied this code. If applicable, add the following below the
020:         * License Header, with the fields enclosed by brackets [] replaced by
021:         * your own identifying information:
022:         * "Portions Copyrighted [year] [name of copyright owner]"
023:         *
024:         * Contributor(s):
025:         *
026:         * The Original Software is NetBeans. The Initial Developer of the Original
027:         * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
028:         * Microsystems, Inc. All Rights Reserved.
029:         *
030:         * If you wish your version of this file to be governed by only the CDDL
031:         * or only the GPL Version 2, indicate your decision by adding
032:         * "[Contributor] elects to include this software in this distribution
033:         * under the [CDDL or GPL Version 2] license." If you do not indicate a
034:         * single choice of license, a recipient has the option to distribute
035:         * your version of this file under either the CDDL, the GPL Version 2 or
036:         * to extend the choice of license to its licensees as provided above.
037:         * However, if you add GPL Version 2 code and therefore, elected the GPL
038:         * Version 2 license, then the option applies only if the new code is
039:         * made subject to such option by the copyright holder.
040:         */
041:        package org.netbeans.editor.ext.html.parser;
042:
043:        import java.util.ArrayList;
044:        import java.util.Collections;
045:        import java.util.HashMap;
046:        import java.util.List;
047:        import java.util.Map;
048:        import java.util.logging.Level;
049:        import java.util.logging.Logger;
050:        import javax.swing.text.BadLocationException;
051:        import javax.swing.text.Document;
052:        import org.netbeans.api.html.lexer.HTMLTokenId;
053:        import org.netbeans.api.lexer.LanguagePath;
054:        import org.netbeans.api.lexer.Token;
055:        import org.netbeans.api.lexer.TokenHierarchy;
056:        import org.netbeans.api.lexer.TokenHierarchyEvent;
057:        import org.netbeans.api.lexer.TokenHierarchyEventType;
058:        import org.netbeans.api.lexer.TokenHierarchyListener;
059:        import org.netbeans.api.lexer.TokenSequence;
060:        import org.netbeans.editor.BaseDocument;
061:        import org.openide.util.Exceptions;
062:        import org.openide.util.RequestProcessor;
063:
064:        /**
065:         * Simple HTML syntax analyzer
066:         *
067:         * @author Marek.Fukala@Sun.com
068:         */
069:        public final class SyntaxParser {
070:
071:            private static final Logger LOGGER = Logger
072:                    .getLogger(SyntaxParser.class.getName());
073:            private static final boolean LOG = LOGGER.isLoggable(Level.FINE);
074:
075:            private static final int PARSER_DELAY = 1000; //ms (=1second)
076:
077:            private final Document doc;
078:            private final LanguagePath languagePath;
079:            private final TokenHierarchy hi;
080:            private final RequestProcessor.Task parserTask;
081:            private final ArrayList<SyntaxParserListener> listeners = new ArrayList<SyntaxParserListener>();
082:            private final TokenHierarchyListener tokenHierarchyListener = new TokenHierarchyListener() {
083:
084:                public void tokenHierarchyChanged(TokenHierarchyEvent evt) {
085:                    if (evt.type() == TokenHierarchyEventType.MODIFICATION) {
086:                        restartParser();
087:                    }
088:                }
089:            };
090:
091:            private List<SyntaxElement> EMPTY_ELEMENTS_LIST = Collections
092:                    .emptyList();
093:            private List<SyntaxElement> parsedElements;
094:            private boolean isSuccessfulyParsed = false;
095:
096:            protected final ParserSource parserSource;
097:
098:            /** Returns an instance of SyntaxParser for given document.
099:             *  The client is supposed to add a SyntaxParserListener to the obtained instance
100:             *  to get notification whenever the document changes and is reparsed.
101:             */
102:            //XXX We cannot create multiple SyntaxParser-s for various languagePaths on one document.
103:            public static synchronized SyntaxParser get(Document doc,
104:                    LanguagePath languagePath) {
105:                SyntaxParser parser = (SyntaxParser) doc
106:                        .getProperty(SyntaxParser.class);
107:                if (parser == null) {
108:                    parser = new SyntaxParser(doc, languagePath);
109:                    doc.putProperty(SyntaxParser.class, parser);
110:                }
111:                return parser;
112:            }
113:
114:            /** Creates a new instance of SyntaxParser parsing the immutable source. */
115:            public static SyntaxParser create(CharSequence source) {
116:                return new SyntaxParser(source);
117:            }
118:
119:            private SyntaxParser(final CharSequence source) {
120:                this .parserTask = null;
121:                this .doc = null;
122:                this .parsedElements = EMPTY_ELEMENTS_LIST;
123:                this .languagePath = LanguagePath.get(HTMLTokenId.language());
124:                this .hi = TokenHierarchy.create(source, HTMLTokenId.language());
125:                this .parserSource = new ParserSource() {
126:                    public CharSequence getText(int offset, int length)
127:                            throws BadLocationException {
128:                        return source.subSequence(offset, offset + length);
129:                    }
130:                };
131:            }
132:
133:            private SyntaxParser(Document document, LanguagePath languagePath) {
134:                this .doc = document;
135:                this .languagePath = languagePath;
136:                this .hi = TokenHierarchy.get(doc);
137:
138:                if (hi == null) {
139:                    String mimeType = (String) doc.getProperty("mimeType"); //NOI18N
140:                    if (mimeType == null) {
141:                        mimeType = "unknown";
142:                    }
143:                    throw new IllegalStateException(
144:                            "Cannot obtain TokenHierarchy instance for document "
145:                                    + document + " with " + mimeType
146:                                    + " mimetype."); //NOI18N
147:                }
148:
149:                this .parserSource = new ParserSource() {
150:                    public String getText(int offset, int length)
151:                            throws BadLocationException {
152:                        return doc.getText(offset, length);
153:                    }
154:                };
155:
156:                parsedElements = EMPTY_ELEMENTS_LIST;
157:
158:                parserTask = RequestProcessor.getDefault().create(
159:                        new Runnable() {
160:                            public void run() {
161:                                parse();
162:                            }
163:                        });
164:
165:                //add itself as token hierarchy listener
166:                hi.addTokenHierarchyListener(tokenHierarchyListener);
167:
168:                //ensure the document is parsed
169:                restartParser();
170:
171:            }
172:
173:            /** Parses the immutable source. */
174:            public List<SyntaxElement> parseImmutableSource() {
175:                if (doc != null) {
176:                    throw new IllegalStateException(
177:                            "Cannot explicitly parse muttable source!");
178:                } else {
179:                    try {
180:                        return parseDocument();
181:                    } catch (BadLocationException ex) {
182:                        LOGGER.log(Level.WARNING,
183:                                "Error during parsing html content", ex);
184:                        return null;
185:
186:                    }
187:                }
188:            }
189:
190:            //---------------------------- public methods ------------------------------
191:            public void addSyntaxParserListener(SyntaxParserListener spl) {
192:                listeners.add(spl);
193:            }
194:
195:            /** Removes the SyntaxParserListener from the listeners list.*/
196:            public void removeSyntaxParserListener(SyntaxParserListener spl) {
197:                listeners.remove(spl);
198:            }
199:
200:            //----------------------- package private methods---------------------------
201:            /** used by unit tests */
202:            void forceParse() {
203:                parserTask.cancel();
204:                parse();
205:            }
206:
207:            List<SyntaxElement> elements() {
208:                return parsedElements;
209:            }
210:
211:            //---------------------------- private methods -----------------------------
212:            private void restartParser() {
213:                if (!parserTask.isFinished()) {
214:                    parserTask.cancel(); //removes the task from the queue AND INTERRUPTS the thread!
215:                }
216:                parserTask.schedule(PARSER_DELAY);
217:            }
218:
219:            private synchronized void parse() {
220:                BaseDocument bdoc = (BaseDocument) doc;
221:                bdoc.readLock();
222:                try {
223:                    List<SyntaxElement> newElements = parseDocument();
224:                    parsedElements = newElements;
225:                    isSuccessfulyParsed = true;
226:                } catch (BadLocationException ble) {
227:                    isSuccessfulyParsed = false;
228:                    LOGGER.log(Level.WARNING,
229:                            "Error during parsing html content", ble);
230:                } finally {
231:                    bdoc.readUnlock();
232:                }
233:
234:                if (isSuccessfulyParsed) {
235:                    notifyParsingFinished();
236:                }
237:            }
238:
239:            private void notifyParsingFinished() {
240:                if (!parsedElements.isEmpty()) {
241:
242:                    //debug messages
243:                    if (LOG) {
244:                        for (SyntaxElement se : parsedElements) {
245:                            LOGGER.log(Level.FINE, se.toString());
246:                            System.out.println(se.toString());
247:                        }
248:                    }
249:
250:                    for (SyntaxParserListener spl : listeners) {
251:                        spl.parsingFinished(parsedElements);
252:                    }
253:                }
254:            }
255:
256:            private void entityReference() {
257:                elements.add(new SyntaxElement(parserSource, start, token
258:                        .offset(hi)
259:                        + token.length() - start,
260:                        SyntaxElement.TYPE_ENTITY_REFERENCE));
261:
262:            }
263:
264:            private void comment() {
265:                elements.add(new SyntaxElement(parserSource, start, token
266:                        .offset(hi)
267:                        + token.length() - start, SyntaxElement.TYPE_COMMENT));
268:            }
269:
270:            private void declaration() {
271:                elements.add(new SyntaxElement.Declaration(parserSource, start,
272:                        token.offset(hi) + token.length() - start,
273:                        root_element, doctype_public_id, doctype_file));
274:            }
275:
276:            private void tag(boolean emptyTag) {
277:                List<SyntaxElement.TagAttribute> attributes = new ArrayList<SyntaxElement.TagAttribute>();
278:                for (int i = 0; i < attr_keys.size(); i++) {
279:                    Token key = attr_keys.get(i);
280:                    List<Token> values = attr_values.get(i);
281:                    StringBuffer joinedValue = new StringBuffer();
282:                    for (Token t : values) {
283:                        joinedValue.append(t.text());
284:                    }
285:
286:                    Token firstValuePart = values.get(0);
287:                    Token lastValuePart = values.get(values.size() - 1);
288:
289:                    SyntaxElement.TagAttribute ta = new SyntaxElement.TagAttribute(
290:                            key.text().toString(), joinedValue.toString(), key
291:                                    .offset(hi), firstValuePart.offset(hi),
292:                            lastValuePart.offset(hi) + lastValuePart.length()
293:                                    - firstValuePart.offset(hi));
294:                    attributes.add(ta);
295:                }
296:
297:                elements.add(new SyntaxElement.Tag(parserSource, start, token
298:                        .offset(hi)
299:                        + token.length() - start, tagName, attributes, openTag,
300:                        emptyTag));
301:
302:                tagName = null;
303:                attrib = null;
304:                attr_keys = new ArrayList<Token>();
305:                attr_values = new ArrayList<List<Token>>();
306:            }
307:
308:            private void reset() {
309:                state = S_INIT;
310:                start = -1;
311:                backup(1);
312:            }
313:
314:            private void backup(int tokens) {
315:                for (int i = 0; i < tokens; i++) {
316:                    ts.movePrevious();
317:                    token = ts.token();
318:                }
319:            }
320:
321:            private static final int S_INIT = 0;
322:            private static final int S_TAG_OPEN_SYMBOL = 1;
323:            private static final int S_TAG = 2;
324:            private static final int S_TAG_ATTR = 3;
325:            private static final int S_TAG_VALUE = 4;
326:            private static final int S_COMMENT = 5;
327:            private static final int S_DECLARATION = 6;
328:            private static final int S_DOCTYPE_DECLARATION = 7;
329:            private static final int S_DOCTYPE_AFTER_ROOT_ELEMENT = 8;
330:            private static final int S_DOCTYPE_PUBLIC_ID = 9;
331:            private static final int S_DOCTYPE_FILE = 10;
332:
333:            private int state;
334:            private int start;
335:            private TokenSequence ts;
336:            private Token<HTMLTokenId> token;
337:            private List<SyntaxElement> elements;
338:
339:            private boolean openTag = true;
340:            private String tagName = null;
341:            private Token attrib = null;
342:            private ArrayList<Token> attr_keys = null;
343:            private ArrayList<List<Token>> attr_values = null;
344:
345:            private String root_element, doctype_public_id, doctype_file;
346:
347:            //PENDING: we do not handle incomplete tokens yet - should be added
348:            private List<SyntaxElement> parseDocument()
349:                    throws BadLocationException {
350:                elements = new ArrayList<SyntaxElement>();
351:                List<TokenSequence<HTMLTokenId>> sequences = hi
352:                        .tokenSequenceList(languagePath, 0, Integer.MAX_VALUE);
353:                state = S_INIT;
354:                start = -1;
355:                attr_keys = new ArrayList<Token>();
356:                attr_values = new ArrayList<List<Token>>();
357:
358:                for (TokenSequence _ts : sequences) {
359:                    ts = _ts;
360:                    while (ts.moveNext()) {
361:                        token = ts.token();
362:                        HTMLTokenId id = token.id();
363:
364:                        switch (state) {
365:                        case S_INIT:
366:                            switch (id) {
367:                            case CHARACTER:
368:                                start = ts.offset();
369:                                entityReference();
370:                                state = S_INIT;
371:                                start = -1;
372:                                break;
373:                            case TAG_OPEN_SYMBOL:
374:                                start = ts.offset();
375:                                state = S_TAG_OPEN_SYMBOL;
376:                                break;
377:                            case BLOCK_COMMENT:
378:                                start = ts.offset();
379:                                state = S_COMMENT;
380:                                break;
381:                            case DECLARATION:
382:                                start = ts.offset();
383:                                if (token.text().toString().equals("<!DOCTYPE")) {
384:                                    root_element = null;
385:                                    doctype_public_id = null;
386:                                    doctype_file = null;
387:                                    state = S_DOCTYPE_DECLARATION;
388:                                } else {
389:                                    state = S_DECLARATION;
390:                                }
391:                                break;
392:                            }
393:                            break;
394:
395:                        case S_TAG_OPEN_SYMBOL:
396:                            switch (id) {
397:                            case TAG_OPEN:
398:                                state = S_TAG;
399:                                openTag = true;
400:                                tagName = token.text().toString();
401:                                break;
402:                            case TAG_CLOSE:
403:                                state = S_TAG;
404:                                openTag = false;
405:                                tagName = token.text().toString();
406:                                break;
407:                            default:
408:                                reset(); //error
409:                                break;
410:                            }
411:                            break;
412:
413:                        case S_TAG:
414:                            switch (id) {
415:                            case WS:
416:                            case EOL:
417:                            case ERROR:
418:                                break;
419:                            case ARGUMENT:
420:                                state = S_TAG_ATTR;
421:                                attrib = token;
422:                                break;
423:                            case TAG_CLOSE_SYMBOL:
424:                                boolean emptyTag = "/>".equals(token.text()
425:                                        .toString());
426:                                tag(emptyTag);
427:                                state = S_INIT;
428:                                start = -1;
429:                                break;
430:                            default:
431:                                reset(); //error
432:                                break;
433:                            }
434:                            break;
435:
436:                        case S_TAG_ATTR:
437:                            switch (id) {
438:                            case OPERATOR:
439:                            case WS:
440:                                break;
441:                            case VALUE:
442:                                backup(1); //backup the value
443:                                state = S_TAG_VALUE;
444:                                break;
445:                            default:
446:                                reset(); //error
447:                                break;
448:                            }
449:                            break;
450:
451:                        case S_TAG_VALUE:
452:                            switch (id) {
453:                            case VALUE:
454:                                int index = attr_keys.indexOf(attrib);
455:                                if (index == -1) {
456:                                    List<Token> values = new ArrayList<Token>();
457:                                    values.add(token);
458:                                    attr_keys.add(attrib);
459:                                    attr_values.add(values);
460:                                } else {
461:                                    attr_values.get(index).add(token);
462:                                }
463:
464:                                break;
465:                            default:
466:                                backup(1);
467:                                state = S_TAG;
468:                                break;
469:                            }
470:                            break;
471:
472:                        case S_COMMENT:
473:                            switch (id) {
474:                            case BLOCK_COMMENT:
475:                            case EOL:
476:                            case WS:
477:                                break;
478:                            default:
479:                                backup(1);
480:                                comment();
481:                                state = S_INIT;
482:                                start = -1;
483:                                break;
484:                            }
485:                            break;
486:
487:                        case S_DECLARATION:
488:                            switch (id) {
489:                            case DECLARATION:
490:                            case SGML_COMMENT:
491:                            case EOL:
492:                            case WS:
493:                                break;
494:                            default:
495:                                backup(1);
496:                                declaration();
497:                                state = S_INIT;
498:                                start = -1;
499:                                break;
500:                            }
501:                            break;
502:
503:                        case S_DOCTYPE_DECLARATION:
504:                            switch (id) {
505:                            case DECLARATION:
506:                                root_element = token.text().toString();
507:                                state = S_DOCTYPE_AFTER_ROOT_ELEMENT;
508:                                break;
509:                            case SGML_COMMENT:
510:                            case EOL:
511:                            case WS:
512:                                break;
513:                            default:
514:                                backup(1);
515:                                declaration();
516:                                state = S_INIT;
517:                                start = -1;
518:                                break;
519:                            }
520:                            break;
521:
522:                        case S_DOCTYPE_AFTER_ROOT_ELEMENT:
523:                            switch (id) {
524:                            case DECLARATION:
525:                                if (token.text().toString().equals("PUBLIC")) {
526:                                    state = S_DOCTYPE_PUBLIC_ID;
527:                                    break;
528:                                } else if (token.text().toString().equals(
529:                                        "SYSTEM")) {
530:                                    state = S_DOCTYPE_FILE;
531:                                    break;
532:                                }
533:                                //not of the expected
534:                                backup(1);
535:                                declaration();
536:                                state = S_INIT;
537:                                start = -1;
538:
539:                                break;
540:                            case SGML_COMMENT:
541:                            case EOL:
542:                            case WS:
543:                                break;
544:                            default:
545:                                backup(1);
546:                                declaration();
547:                                state = S_INIT;
548:                                start = -1;
549:                                break;
550:                            }
551:                            break;
552:
553:                        case S_DOCTYPE_PUBLIC_ID:
554:                            switch (id) {
555:                            case DECLARATION:
556:                                doctype_public_id = token.text().toString();
557:                                state = S_DOCTYPE_FILE;
558:                                break;
559:                            case SGML_COMMENT:
560:                            case EOL:
561:                            case WS:
562:                                break;
563:                            default:
564:                                backup(1);
565:                                declaration();
566:                                state = S_INIT;
567:                                start = -1;
568:                                break;
569:                            }
570:                            break;
571:
572:                        case S_DOCTYPE_FILE:
573:                            switch (id) {
574:                            case DECLARATION:
575:                                doctype_file = token.text().toString();
576:                                //jump to simple sgml declaration so potentially 
577:                                //other declaration tokens are inluded
578:                                state = S_DECLARATION;
579:                                break;
580:                            case SGML_COMMENT:
581:                            case EOL:
582:                            case WS:
583:                                break;
584:                            default:
585:                                backup(1);
586:                                declaration();
587:                                state = S_INIT;
588:                                start = -1;
589:                                break;
590:                            }
591:                            break;
592:
593:                        }
594:                    }
595:                }
596:
597:                if (state != S_INIT) {
598:                    //an incomplete syntax element at the end of the file
599:                    switch (state) {
600:                    case S_COMMENT:
601:                        comment();
602:                        break;
603:                    case S_DECLARATION:
604:                    case S_DOCTYPE_AFTER_ROOT_ELEMENT:
605:                    case S_DOCTYPE_DECLARATION:
606:                    case S_DOCTYPE_FILE:
607:                    case S_DOCTYPE_PUBLIC_ID:
608:                        declaration();
609:                        break;
610:                    }
611:
612:                }
613:
614:                return elements;
615:
616:            }
617:
618:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.