001: /*
002: * HTMLTokenMarker.java - HTML token marker
003: * Copyright (C) 1998, 1999 Slava Pestov
004: *
005: * You may use and modify this package for any purpose. Redistribution is
006: * permitted, in both source and binary form, provided that this notice
007: * remains intact in all source distributions of this package.
008: */
009:
010: package org.syntax.jedit.tokenmarker;
011:
012: import javax.swing.text.Segment;
013:
014: import org.syntax.jedit.KeywordMap;
015: import org.syntax.jedit.SyntaxUtilities;
016:
017: /**
018: * HTML token marker.
019: *
020: * @author Slava Pestov
021: * @version $Id: HTMLTokenMarker.java,v 1.34 1999/12/13 03:40:29 sp Exp $
022: */
023: public class HTMLTokenMarker extends TokenMarker {
024: public static final byte JAVASCRIPT = Token.INTERNAL_FIRST;
025:
026: public HTMLTokenMarker() {
027: this (true);
028: }
029:
030: public HTMLTokenMarker(boolean js) {
031: this .js = js;
032: keywords = JavaScriptTokenMarker.getKeywords();
033: }
034:
035: public byte markTokensImpl(byte token, Segment line, int lineIndex) {
036: char[] array = line.array;
037: int offset = line.offset;
038: lastOffset = offset;
039: lastKeyword = offset;
040: int length = line.count + offset;
041: boolean backslash = false;
042:
043: loop: for (int i = offset; i < length; i++) {
044: int i1 = (i + 1);
045:
046: char c = array[i];
047: if (c == '\\') {
048: backslash = !backslash;
049: continue;
050: }
051:
052: switch (token) {
053: case Token.NULL: // HTML text
054: backslash = false;
055: switch (c) {
056: case '<':
057: addToken(i - lastOffset, token);
058: lastOffset = lastKeyword = i;
059: if (SyntaxUtilities.regionMatches(false, line, i1,
060: "!--")) {
061: i += 3;
062: token = Token.COMMENT1;
063: } else if (js
064: && SyntaxUtilities.regionMatches(true,
065: line, i1, "script>")) {
066: addToken(8, Token.KEYWORD1);
067: lastOffset = lastKeyword = (i += 8);
068: token = JAVASCRIPT;
069: } else {
070: token = Token.KEYWORD1;
071: }
072: break;
073: case '&':
074: addToken(i - lastOffset, token);
075: lastOffset = lastKeyword = i;
076: token = Token.KEYWORD2;
077: break;
078: }
079: break;
080: case Token.KEYWORD1: // Inside a tag
081: backslash = false;
082: if (c == '>') {
083: addToken(i1 - lastOffset, token);
084: lastOffset = lastKeyword = i1;
085: token = Token.NULL;
086: }
087: break;
088: case Token.KEYWORD2: // Inside an entity
089: backslash = false;
090: if (c == ';') {
091: addToken(i1 - lastOffset, token);
092: lastOffset = lastKeyword = i1;
093: token = Token.NULL;
094: break;
095: }
096: break;
097: case Token.COMMENT1: // Inside a comment
098: backslash = false;
099: if (SyntaxUtilities
100: .regionMatches(false, line, i, "-->")) {
101: addToken((i + 3) - lastOffset, token);
102: lastOffset = lastKeyword = i + 3;
103: token = Token.NULL;
104: }
105: break;
106: case JAVASCRIPT: // Inside a JavaScript
107: switch (c) {
108: case '<':
109: backslash = false;
110: doKeyword(line, i, c);
111: if (SyntaxUtilities.regionMatches(true, line, i1,
112: "/script>")) {
113: addToken(i - lastOffset, Token.NULL);
114: addToken(9, Token.KEYWORD1);
115: lastOffset = lastKeyword = (i += 9);
116: token = Token.NULL;
117: }
118: break;
119: case '"':
120: if (backslash)
121: backslash = false;
122: else {
123: doKeyword(line, i, c);
124: addToken(i - lastOffset, Token.NULL);
125: lastOffset = lastKeyword = i;
126: token = Token.LITERAL1;
127: }
128: break;
129: case '\'':
130: if (backslash)
131: backslash = false;
132: else {
133: doKeyword(line, i, c);
134: addToken(i - lastOffset, Token.NULL);
135: lastOffset = lastKeyword = i;
136: token = Token.LITERAL2;
137: }
138: break;
139: case '/':
140: backslash = false;
141: doKeyword(line, i, c);
142: if (length - i > 1) {
143: addToken(i - lastOffset, Token.NULL);
144: lastOffset = lastKeyword = i;
145: if (array[i1] == '/') {
146: addToken(length - i, Token.COMMENT2);
147: lastOffset = lastKeyword = length;
148: break loop;
149: } else if (array[i1] == '*') {
150: token = Token.COMMENT2;
151: }
152: }
153: break;
154: default:
155: backslash = false;
156: if (!Character.isLetterOrDigit(c) && c != '_')
157: doKeyword(line, i, c);
158: break;
159: }
160: break;
161: case Token.LITERAL1: // JavaScript "..."
162: if (backslash)
163: backslash = false;
164: else if (c == '"') {
165: addToken(i1 - lastOffset, Token.LITERAL1);
166: lastOffset = lastKeyword = i1;
167: token = JAVASCRIPT;
168: }
169: break;
170: case Token.LITERAL2: // JavaScript '...'
171: if (backslash)
172: backslash = false;
173: else if (c == '\'') {
174: addToken(i1 - lastOffset, Token.LITERAL1);
175: lastOffset = lastKeyword = i1;
176: token = JAVASCRIPT;
177: }
178: break;
179: case Token.COMMENT2: // Inside a JavaScript comment
180: backslash = false;
181: if (c == '*' && length - i > 1 && array[i1] == '/') {
182: addToken((i += 2) - lastOffset, Token.COMMENT2);
183: lastOffset = lastKeyword = i;
184: token = JAVASCRIPT;
185: }
186: break;
187: default:
188: throw new InternalError("Invalid state: " + token);
189: }
190: }
191:
192: switch (token) {
193: case Token.LITERAL1:
194: case Token.LITERAL2:
195: addToken(length - lastOffset, Token.INVALID);
196: token = JAVASCRIPT;
197: break;
198: case Token.KEYWORD2:
199: addToken(length - lastOffset, Token.INVALID);
200: token = Token.NULL;
201: break;
202: case JAVASCRIPT:
203: doKeyword(line, length, '\0');
204: addToken(length - lastOffset, Token.NULL);
205: break;
206: default:
207: addToken(length - lastOffset, token);
208: break;
209: }
210:
211: return token;
212: }
213:
214: // private members
215: private KeywordMap keywords;
216: private boolean js;
217: private int lastOffset;
218: private int lastKeyword;
219:
220: private boolean doKeyword(Segment line, int i, char c) {
221: int i1 = i + 1;
222:
223: int len = i - lastKeyword;
224: byte id = keywords.lookup(line, lastKeyword, len);
225: if (id != Token.NULL) {
226: if (lastKeyword != lastOffset)
227: addToken(lastKeyword - lastOffset, Token.NULL);
228: addToken(len, id);
229: lastOffset = i;
230: }
231: lastKeyword = i1;
232: return false;
233: }
234: }
|