001: /*
002: * HTMLTokenMarker.java - HTML token marker
003: * Copyright (C) 1998, 1999 Slava Pestov
004: *
005: * You may use and modify this package for any purpose. Redistribution is
006: * permitted, in both source and binary form, provided that this notice
007: * remains intact in all source distributions of this package.
008: */
009:
010: package org.syntax.jedit.tokenmarker;
011:
012: import org.syntax.jedit.*;
013: import javax.swing.text.Segment;
014:
015: /**
016: * HTML token marker.
017: *
018: * @author Slava Pestov
019: * @version $Id: HTMLTokenMarker.java 3074 2004-11-08 04:24:58Z bquig $
020: */
021: public class HTMLTokenMarker extends TokenMarker {
022: public static final byte JAVASCRIPT = Token.INTERNAL_FIRST;
023:
024: public HTMLTokenMarker() {
025: this (true);
026: }
027:
028: public HTMLTokenMarker(boolean js) {
029: this .js = js;
030: keywords = JavaScriptTokenMarker.getKeywords();
031: }
032:
033: public byte markTokensImpl(byte token, Segment line, int lineIndex) {
034: char[] array = line.array;
035: int offset = line.offset;
036: lastOffset = offset;
037: lastKeyword = offset;
038: int length = line.count + offset;
039: boolean backslash = false;
040:
041: loop: for (int i = offset; i < length; i++) {
042: int i1 = (i + 1);
043:
044: char c = array[i];
045: if (c == '\\') {
046: backslash = !backslash;
047: continue;
048: }
049:
050: switch (token) {
051: case Token.NULL: // HTML text
052: backslash = false;
053: switch (c) {
054: case '<':
055: addToken(i - lastOffset, token);
056: lastOffset = lastKeyword = i;
057: if (SyntaxUtilities.regionMatches(false, line, i1,
058: "!--")) {
059: i += 3;
060: token = Token.COMMENT1;
061: } else if (js
062: && SyntaxUtilities.regionMatches(true,
063: line, i1, "script>")) {
064: addToken(8, Token.KEYWORD1);
065: lastOffset = lastKeyword = (i += 8);
066: token = JAVASCRIPT;
067: } else {
068: token = Token.KEYWORD1;
069: }
070: break;
071: case '&':
072: addToken(i - lastOffset, token);
073: lastOffset = lastKeyword = i;
074: token = Token.KEYWORD2;
075: break;
076: }
077: break;
078: case Token.KEYWORD1: // Inside a tag
079: backslash = false;
080: if (c == '>') {
081: addToken(i1 - lastOffset, token);
082: lastOffset = lastKeyword = i1;
083: token = Token.NULL;
084: }
085: break;
086: case Token.KEYWORD2: // Inside an entity
087: backslash = false;
088: if (c == ';') {
089: addToken(i1 - lastOffset, token);
090: lastOffset = lastKeyword = i1;
091: token = Token.NULL;
092: break;
093: }
094: break;
095: case Token.COMMENT1: // Inside a comment
096: backslash = false;
097: if (SyntaxUtilities
098: .regionMatches(false, line, i, "-->")) {
099: addToken((i + 3) - lastOffset, token);
100: lastOffset = lastKeyword = i + 3;
101: token = Token.NULL;
102: }
103: break;
104: case JAVASCRIPT: // Inside a JavaScript
105: switch (c) {
106: case '<':
107: backslash = false;
108: doKeyword(line, i, c);
109: if (SyntaxUtilities.regionMatches(true, line, i1,
110: "/script>")) {
111: addToken(i - lastOffset, Token.NULL);
112: addToken(9, Token.KEYWORD1);
113: lastOffset = lastKeyword = (i += 9);
114: token = Token.NULL;
115: }
116: break;
117: case '"':
118: if (backslash)
119: backslash = false;
120: else {
121: doKeyword(line, i, c);
122: addToken(i - lastOffset, Token.NULL);
123: lastOffset = lastKeyword = i;
124: token = Token.LITERAL1;
125: }
126: break;
127: case '\'':
128: if (backslash)
129: backslash = false;
130: else {
131: doKeyword(line, i, c);
132: addToken(i - lastOffset, Token.NULL);
133: lastOffset = lastKeyword = i;
134: token = Token.LITERAL2;
135: }
136: break;
137: case '/':
138: backslash = false;
139: doKeyword(line, i, c);
140: if (length - i > 1) {
141: addToken(i - lastOffset, Token.NULL);
142: lastOffset = lastKeyword = i;
143: if (array[i1] == '/') {
144: addToken(length - i, Token.COMMENT2);
145: lastOffset = lastKeyword = length;
146: break loop;
147: } else if (array[i1] == '*') {
148: token = Token.COMMENT2;
149: }
150: }
151: break;
152: default:
153: backslash = false;
154: if (!Character.isLetterOrDigit(c) && c != '_')
155: doKeyword(line, i, c);
156: break;
157: }
158: break;
159: case Token.LITERAL1: // JavaScript "..."
160: if (backslash)
161: backslash = false;
162: else if (c == '"') {
163: addToken(i1 - lastOffset, Token.LITERAL1);
164: lastOffset = lastKeyword = i1;
165: token = JAVASCRIPT;
166: }
167: break;
168: case Token.LITERAL2: // JavaScript '...'
169: if (backslash)
170: backslash = false;
171: else if (c == '\'') {
172: addToken(i1 - lastOffset, Token.LITERAL1);
173: lastOffset = lastKeyword = i1;
174: token = JAVASCRIPT;
175: }
176: break;
177: case Token.COMMENT2: // Inside a JavaScript comment
178: backslash = false;
179: if (c == '*' && length - i > 1 && array[i1] == '/') {
180: addToken((i += 2) - lastOffset, Token.COMMENT2);
181: lastOffset = lastKeyword = i;
182: token = JAVASCRIPT;
183: }
184: break;
185: default:
186: throw new InternalError("Invalid state: " + token);
187: }
188: }
189:
190: switch (token) {
191: case Token.LITERAL1:
192: case Token.LITERAL2:
193: addToken(length - lastOffset, Token.INVALID);
194: token = JAVASCRIPT;
195: break;
196: case Token.KEYWORD2:
197: addToken(length - lastOffset, Token.INVALID);
198: token = Token.NULL;
199: break;
200: case JAVASCRIPT:
201: doKeyword(line, length, '\0');
202: addToken(length - lastOffset, Token.NULL);
203: break;
204: default:
205: addToken(length - lastOffset, token);
206: break;
207: }
208:
209: return token;
210: }
211:
212: // private members
213: private KeywordMap keywords;
214: private boolean js;
215: private int lastOffset;
216: private int lastKeyword;
217:
218: private boolean doKeyword(Segment line, int i, char c) {
219: int i1 = i + 1;
220:
221: int len = i - lastKeyword;
222: byte id = keywords.lookup(line, lastKeyword, len);
223: if (id != Token.NULL) {
224: if (lastKeyword != lastOffset)
225: addToken(lastKeyword - lastOffset, Token.NULL);
226: addToken(len, id);
227: lastOffset = i;
228: }
229: lastKeyword = i1;
230: return false;
231: }
232: }
|