001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: package org.netbeans.modules.html.editor.coloring;
043:
044: import java.util.Iterator;
045: import java.util.List;
046: import java.util.logging.Level;
047: import java.util.logging.Logger;
048: import javax.swing.text.Document;
049: import org.netbeans.api.lexer.Language;
050: import org.netbeans.api.lexer.LanguagePath;
051: import org.netbeans.api.lexer.Token;
052: import org.netbeans.api.lexer.TokenHierarchy;
053: import org.netbeans.api.lexer.TokenSequence;
054: import org.netbeans.editor.BaseDocument;
055: import org.netbeans.editor.BaseDocument;
056: import org.netbeans.editor.ext.html.parser.SyntaxElement;
057: import org.netbeans.editor.ext.html.parser.SyntaxElement.TagAttribute;
058: import org.netbeans.editor.ext.html.parser.SyntaxParserListener;
059: import org.netbeans.modules.editor.NbEditorUtilities;
060:
061: /**
062: * This class creates lexer embeddings of CSS or JAVASCRIPT language in HTML code.
063: * The HTML code may be either the top level language (.html file) or
064: * may be embedded as FIRST level embedding language in other language like JSP, RHTML.
065: *
066: *
067: * Note: Dynamic embedding creation for <script> and <style> tags has been removed temporarily
068: * due to problems described in following issues:
069: * [Issue 117450] Provide unified LexerInput across multiple joined embedded sections
070: * [Issue 118892] Allow Schlieman lexer to continuously lex embedded language over more tokens of its parent language
071: *
072: * Once Issue 117450 is fixed the old way of dynamic embeddings creation should be used
073: *
074: * @author Marek.Fukala@Sun.com
075: */
076: public class EmbeddingUpdater implements SyntaxParserListener {
077:
078: private static final String JAVASCRIPT_MIMETYPE = "text/javascript";//NOI18N
079: private static final String JAVASCRIPT_HREF_PREFIX = "javascript:"; //NOI18N
080:
081: //XXX update mimetype once Hanz fixes the mimetype in CSS editor module
082: // private static final String CSS_MIMETYPE = "text/x-css"; //NOI18N
083: private static final String CSS_INLINED_MIMETYPE = "text/x-css"; //NOI18N
084: // private static final String CSS_SCRIPT_TAG_NAME = "style"; //NOI18N
085:
086: private static final Logger LOGGER = Logger
087: .getLogger(EmbeddingUpdater.class.getName());
088:
089: private final Document doc;
090:
091: // private int styleStart = -1;
092:
093: private LanguagePath languagePath;
094:
095: @SuppressWarnings("unchecked")
096: public EmbeddingUpdater(Document doc) {
097: this .doc = doc;
098:
099: String topLevelLanguageMimeType = NbEditorUtilities
100: .getMimeType(doc);
101: if (topLevelLanguageMimeType == null) {
102: throw new IllegalArgumentException(
103: "Cannot determine document mimetype " + doc);
104: }
105:
106: Language lang = Language.find(topLevelLanguageMimeType);
107: if (lang == null) {
108: throw new IllegalArgumentException("Cannot find language "
109: + topLevelLanguageMimeType);
110: }
111:
112: if ("text/html".equals(topLevelLanguageMimeType)) {
113: languagePath = LanguagePath.get(lang);
114: } else {
115: languagePath = LanguagePath.get(LanguagePath.get(lang),
116: Language.find("text/html"));
117: }
118:
119: }
120:
121: public void parsingFinished(List<SyntaxElement> elements) {
122: for (SyntaxElement sel : elements) {
123: if (sel.type() == SyntaxElement.TYPE_TAG) {
124: startTag((SyntaxElement.Tag) sel);
125: }
126: }
127: }
128:
129: private void startTag(SyntaxElement.Tag sel) {
130: // if(CSS_SCRIPT_TAG_NAME.equalsIgnoreCase(sel.getName()) && declaresCSS(sel)) {
131: // styleStart = sel.getElementOffset() + sel.getElementLength();
132: // } else if("a".equalsIgnoreCase(sel.getName())) {
133: if ("a".equalsIgnoreCase(sel.getName())) {
134: //check whether the href attribute value contains the javascript: prefix
135: TagAttribute hrefAttr = sel.getAttribute("href"); //NOI18N
136: if (hrefAttr != null) {
137: String hrefVal = unquote(hrefAttr.getValue());
138: boolean quoted = isQuotationChar(hrefAttr.getValue()
139: .charAt(0));
140: if (hrefVal.startsWith(JAVASCRIPT_HREF_PREFIX)) {
141: createEmbedding(JAVASCRIPT_MIMETYPE, hrefAttr
142: .getValueOffset(), hrefAttr
143: .getValueOffset()
144: + hrefAttr.getValue().length(),
145: JAVASCRIPT_HREF_PREFIX.length()
146: + (quoted ? 1 : 0), quoted ? 1 : 0);
147: }
148: }
149: }
150:
151: //various attributes values embedding
152: for (SyntaxElement.TagAttribute tagattr : sel.getAttributes()) {
153: if ("style".equalsIgnoreCase(tagattr.getName())) { //NOI18N
154: //XXX we need to look for it just in certain html tags
155: createEmbedding(CSS_INLINED_MIMETYPE, tagattr);
156: } else if (tagattr.getName().startsWith("on")
157: || tagattr.getName().startsWith("ON")) { //NOI18N
158: //XXX very simple algorithm for finding "onclick" like attributes
159: //should be restricted according to the html specification
160: createEmbedding(JAVASCRIPT_MIMETYPE, tagattr);
161: }
162: }
163: }
164:
165: // private boolean declaresCSS(SyntaxElement.Tag sel) {
166: // TagAttribute type = sel.getAttribute("type"); //NOI18N
167: //
168: // if(type == null) {
169: // return true; //default is css
170: // } else if(unquote(type.getValue()).equalsIgnoreCase("text/css")) { //NOI18N
171: // return true;
172: // }
173: //
174: // return false;
175: // }
176:
177: private String unquote(String s) {
178: if (s.length() == 0) {
179: //nothing to unquote
180: return s;
181: }
182: if (isQuotationChar(s.charAt(0))) {
183: s = s.substring(1);
184: }
185: if (s.length() == 0) {
186: //nothing more to unquote
187: return s;
188: }
189: if (isQuotationChar(s.charAt(s.length() - 1))) {
190: s = s.substring(0, s.length() - 1);
191: }
192: return s;
193: }
194:
195: private boolean isQuotationChar(char ch) {
196: return ch == '"' || ch == '\'';
197: }
198:
199: // private void endTag(SyntaxElement.Named sel) {
200: // if(CSS_SCRIPT_TAG_NAME.equalsIgnoreCase(sel.getName())) {
201: // if(styleStart != -1) {
202: // createEmbedding(CSS_MIMETYPE, styleStart, sel.getElementOffset(), 0,0);
203: // styleStart = -1;
204: // }
205: // }
206: // }
207:
208: // //I need to specially handle the case where the javascript block contains
209: // //html comments.
210: // private void createJavascriptEmbedding(SyntaxElement.Named sel, int from, int to) {
211: // ((BaseDocument)doc).readLock();
212: // try {
213: // TokenHierarchy th = TokenHierarchy.get(doc);
214: // TokenSequence ts = tokenSequence(th, from);
215: // if(ts == null) {
216: // //no html token sequence there - weird
217: // return ;
218: // }
219: //
220: // ts.move(from);
221: // if(!ts.moveNext() && !ts.movePrevious()) {
222: // return ; //no token
223: // }
224: //
225: // int jsStart = from; int jsStartSkipLength = 0;
226: // int jsEnd = to; int jsEndSkipLength = 0;
227: // while(ts.moveNext() && (ts.token().offset(th) <= to)) {
228: // Token t = ts.token();
229: // if(t.id() == HTMLTokenId.BLOCK_COMMENT) {
230: // if(t.text().toString().startsWith("<!--")) { //NOI18N
231: // jsStart = t.offset(th);
232: // jsStartSkipLength = "<!--".length(); //NOI18N
233: // } else if(t.text().toString().endsWith("-->")) { //NOI18N
234: // jsEnd = t.offset(th) + t.length();
235: // jsEndSkipLength = "-->".length(); //NOI18N
236: // }
237: // }
238: // }
239: // createEmbedding(JAVASCRIPT_MIMETYPE, jsStart, jsEnd, jsStartSkipLength, jsEndSkipLength);
240: // } finally {
241: // ((BaseDocument)doc).readUnlock();
242: // }
243: // }
244:
245: private void createEmbedding(String mimeType,
246: SyntaxElement.TagAttribute tagAttr) {
247: if (tagAttr.getValue().charAt(0) == '\''
248: || tagAttr.getValue().charAt(0) == '"') {
249: //cut off the qutation marks
250: createEmbedding(mimeType, tagAttr.getValueOffset(), tagAttr
251: .getValueOffset()
252: + tagAttr.getValueLength(), 1, 1);
253: } else {
254: createEmbedding(mimeType, tagAttr.getValueOffset(), tagAttr
255: .getValueOffset()
256: + tagAttr.getValueLength(), 0, 0);
257: }
258: }
259:
260: @SuppressWarnings("unchecked")
261: private void createEmbedding(String mimeType, int startOffset,
262: int endOffset, int startSkipLength, int endSkipLength) {
263: if (startOffset >= endOffset) {
264: LOGGER.log(Level.WARNING, "startOffset >= endOffset: "
265: + startOffset + " >= " + endOffset); //NOI18N
266: return;
267: }
268:
269: Language lang = Language.find(mimeType);
270: if (lang == null) {
271: LOGGER.log(Level.WARNING, "No " + mimeType
272: + " language found! (" + startOffset + " - "
273: + endOffset + ")"); //NOI18N
274: return; //no language found
275: }
276:
277: ((BaseDocument) doc).extWriteLock(); //writeLock is required since we create embedding what is kind of document change
278: try {
279: TokenHierarchy th = TokenHierarchy.get(doc);
280: List<TokenSequence> tokenSequenceList = th
281: .tokenSequenceList(languagePath, startOffset,
282: endOffset);
283:
284: //use the startSkipLength and endSkipLength only on the first and last token
285: //in the sequence of tokens we create the embedding.
286: boolean iAmFirstToken = true;
287: boolean iAmLastToken = false;
288:
289: //find all token sequences of the language in the given range
290: Iterator<TokenSequence> sequences = tokenSequenceList
291: .iterator();
292: while (sequences.hasNext()) {
293: TokenSequence ts = sequences.next();
294: ts.move(startOffset);
295: if (!ts.moveNext() && !ts.movePrevious()) {
296: return; //no token
297: }
298: if (ts.offset() < startOffset
299: || ts.offset() > endOffset) {
300: //a bug #121045 in lexer, we got a token sequence outside of the specified area
301: //just ignore...
302: continue;
303: }
304: do {
305: Token item = ts.token();
306: //test if the token != null, according to the API doc shouldn't happen
307: //once moveNext/Previous() has been called
308: if (item == null) {
309: LOGGER
310: .log(
311: Level.FINE,
312: "Please refer to issue #126628: tokenSequence.token() returned null after tokenSequence.moveNext/Previous() called! This seems to be a bug in lexer. Please attach the info dumped into the log, the document and possibly steps to reproduce."); //NOI18N
313: LOGGER.log(Level.FINE, "TokenSequence:\n"
314: + ts.toString()); //NOI18N
315: }
316:
317: //test if we are last token
318: boolean hasNextToken = ts.moveNext();
319: iAmLastToken = !(hasNextToken && ts.offset() < endOffset);
320: boolean iAmLastSequence = !sequences.hasNext();
321: if (hasNextToken) {
322: //rewind the tokenSequence back so the
323: //embedding is created on a proper token
324: ts.movePrevious();
325: }
326: if (ts.embedded(lang) == null) {
327: //the embedding doesn't exist, try to create
328: if (!ts
329: .createEmbedding(
330: lang,
331: iAmFirstToken ? startSkipLength
332: : 0,
333: iAmLastToken && iAmLastSequence ? endSkipLength
334: : 0, true)) {
335: CharSequence text = item.text();
336: if (text == null) {
337: //according to the Token.text() javadoc this shouldn't happen =>
338: //notify user about the situation and provide some debug info.
339: LOGGER
340: .log(
341: Level.FINE,
342: null,
343: new IllegalStateException(
344: "Please refer to issue #126628: Token.text() of "
345: + item
346: .toString()
347: + " == null without any previous modification of the underlying document! This seems to be a bug in lexer. Please attach the info dumped into the log, the document and possibly steps to reproduce.")); //NOI18N
348: LOGGER.log(Level.FINE,
349: "TokenSequence:\n"
350: + ts.toString()); //NOI18N
351: } else {
352: LOGGER.log(Level.FINE,
353: "Cannot create embedding for "
354: + mimeType + " ["
355: + startOffset + " - "
356: + endOffset + "] ("
357: + text + ")\n"); //NOI18N
358: }
359: } else {
360: CharSequence text = item.text();
361: if (text == null) {
362: //according to the Token.text() javadoc this shouldn't happen =>
363: //notify user about the situation and provide some debug info.
364: LOGGER
365: .log(
366: Level.FINE,
367: null,
368: new IllegalStateException(
369: "Token.text() of "
370: + item
371: .toString()
372: + " == null without any previous modification of the underlying document! This seems to be a bug in lexer. Please report the issue to lexer module and attach the info dumped into the log, the document and possibly steps to reproduce.")); //NOI18N
373: LOGGER.log(Level.FINE,
374: "TokenSequence:\n"
375: + ts.toString()); //NOI18N
376: } else {
377: LOGGER
378: .log(
379: Level.FINE,
380: "Embedding for "
381: + mimeType
382: + " created ["
383: + startOffset
384: + " - "
385: + endOffset
386: + "] ("
387: + printEmbeddedText(
388: text,
389: iAmFirstToken ? startSkipLength
390: : 0,
391: iAmLastToken
392: && iAmLastSequence ? endSkipLength
393: : 0)
394: + ")\n"); //NOI18N
395: }
396: }
397: }
398: iAmFirstToken = false;
399: if (!iAmLastToken) {
400: ts.moveNext();
401: }
402: } while (!iAmLastToken);
403: }
404: } finally {
405: ((BaseDocument) doc).extWriteUnlock();
406: }
407: }
408:
409: private CharSequence printEmbeddedText(CharSequence text,
410: int startSkipLength, int endSkipLength) {
411: StringBuffer sb = new StringBuffer(text);
412: if (startSkipLength >= 0) {
413: sb.insert(startSkipLength, '[');
414: }
415: if (endSkipLength >= 0) {
416: sb.insert(sb.length() - endSkipLength, ']');
417: }
418: return sb;
419: }
420:
421: }
|