001: /*
002: * Copyright 2006, 2007 Odysseus Software GmbH
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: package de.odysseus.el.tree.impl;
017:
018: import java.util.HashMap;
019:
020: import de.odysseus.el.misc.LocalMessages;
021:
022: /**
023: * Handcrafted scanner.
024: *
025: * @author Christoph Beck
026: */
027: final class Scanner {
028: /**
029: * Scan exception type
030: */
031: @SuppressWarnings("serial")
032: static class ScanException extends Exception {
033: ScanException(int position, String encountered, Object expected) {
034: super (LocalMessages.get("error.scan", position,
035: encountered, expected));
036: }
037: }
038:
039: /**
040: * Symbol type
041: */
042: static enum Symbol {
043: EOF, PLUS("'+'"), MINUS("'-'"), MUL("'*'"), DIV("'/'|'div'"), MOD(
044: "'%'|'mod'"), LPAREN("'('"), RPAREN("')'"), IDENTIFIER, NOT(
045: "'!'|'not'"), AND("'&&'|'and'"), OR("'||'|'or'"), EMPTY(
046: "'empty'"), INSTANCEOF("'instanceof'"), INTEGER, FLOAT, TRUE(
047: "'true'"), FALSE("'false'"), STRING, NULL("'null'"), LE(
048: "'<='|'le'"), LT("'<'|'lt'"), GE("'>='|'ge'"), GT(
049: "'>'|'gt'"), EQ("'=='|'eq'"), NE("'!='|'ne'"), QUESTION(
050: "'?'"), COLON("':'"), TEXT, DOT("'.'"), LBRACK("'['"), RBRACK(
051: "']'"), COMMA("','"), START_EVAL_DEFERRED("'#{'"), START_EVAL_DYNAMIC(
052: "'${'"), END_EVAL("'}'");
053: private final String string;
054:
055: private Symbol() {
056: this (null);
057: }
058:
059: private Symbol(String string) {
060: this .string = string;
061: }
062:
063: @Override
064: public String toString() {
065: return string == null ? "<" + name() + ">" : string;
066: }
067: }
068:
069: private static final HashMap<String, Symbol> KEYMAP = new HashMap<String, Symbol>(
070: 16);
071:
072: static {
073: KEYMAP.put("null", Symbol.NULL);
074: KEYMAP.put("true", Symbol.TRUE);
075: KEYMAP.put("false", Symbol.FALSE);
076: KEYMAP.put("empty", Symbol.EMPTY);
077: KEYMAP.put("div", Symbol.DIV);
078: KEYMAP.put("mod", Symbol.MOD);
079: KEYMAP.put("not", Symbol.NOT);
080: KEYMAP.put("and", Symbol.AND);
081: KEYMAP.put("or", Symbol.OR);
082: KEYMAP.put("le", Symbol.LE);
083: KEYMAP.put("lt", Symbol.LT);
084: KEYMAP.put("eq", Symbol.EQ);
085: KEYMAP.put("ne", Symbol.NE);
086: KEYMAP.put("ge", Symbol.GE);
087: KEYMAP.put("gt", Symbol.GT);
088: KEYMAP.put("instanceof", Symbol.INSTANCEOF);
089: }
090:
091: private String image; // original image of the current token
092: private String value; // unescaped string for text/string token
093: private int position; // start position of current token
094: private boolean eval; // inside eval expression flag
095:
096: private final String input;
097: private final StringBuilder builder = new StringBuilder();
098:
099: /**
100: * Constructor.
101: * @param input expression string
102: */
103: Scanner(String input) {
104: this .input = input;
105: }
106:
107: /**
108: * @return <code>true</code> iff the specified character is a digit
109: */
110: private boolean isDigit(char c) {
111: return c >= '0' && c <= '9';
112: }
113:
114: /**
115: * text token
116: */
117: private Symbol nextText() throws ScanException {
118: builder.setLength(0);
119: int i = position;
120: int l = input.length();
121: boolean escaped = false;
122: while (i < l) {
123: char c = input.charAt(i);
124: switch (c) {
125: case '\\':
126: if (escaped) {
127: builder.append('\\');
128: } else {
129: escaped = true;
130: }
131: break;
132: case '#':
133: case '$':
134: if (i + 1 < l && input.charAt(i + 1) == '{') {
135: if (escaped) {
136: builder.append(c);
137: } else {
138: value = builder.toString();
139: image = input.substring(position, i);
140: return Symbol.TEXT;
141: }
142: } else {
143: if (escaped) {
144: builder.append('\\');
145: }
146: builder.append(c);
147: }
148: escaped = false;
149: break;
150: default:
151: if (escaped) {
152: builder.append('\\');
153: }
154: builder.append(c);
155: escaped = false;
156: }
157: i++;
158: }
159: if (escaped) {
160: builder.append('\\');
161: }
162: value = builder.toString();
163: image = input.substring(position, i);
164: return Symbol.TEXT;
165: }
166:
167: /**
168: * string token
169: */
170: private Symbol nextString() throws ScanException {
171: builder.setLength(0);
172: char quote = input.charAt(position);
173: int i = position + 1;
174: int l = input.length();
175: while (i < l) {
176: char c = input.charAt(i++);
177: if (c == '\\') {
178: if (i == l) {
179: throw new ScanException(position,
180: "unterminated string", quote + " or \\");
181: } else {
182: c = input.charAt(i++);
183: if (c == '\\' || c == quote) {
184: builder.append(c);
185: } else {
186: throw new ScanException(position,
187: "invalid escape sequence \\" + c, "\\"
188: + quote + " or \\\\");
189: }
190: }
191: } else if (c == quote) {
192: value = builder.toString();
193: image = input.substring(position, i);
194: return Symbol.STRING;
195: } else {
196: builder.append(c);
197: }
198: }
199: throw new ScanException(position, "unterminated string", quote);
200: }
201:
202: /**
203: * number token
204: */
205: private Symbol nextNumber() throws ScanException {
206: int i = position;
207: int l = input.length();
208: while (i < l && isDigit(input.charAt(i))) {
209: i++;
210: }
211: Symbol symbol = Symbol.INTEGER;
212: if (i < l && input.charAt(i) == '.') {
213: i++;
214: while (i < l && isDigit(input.charAt(i))) {
215: i++;
216: }
217: symbol = Symbol.FLOAT;
218: }
219: if (i < l && (input.charAt(i) == 'e' || input.charAt(i) == 'E')) {
220: int e = i;
221: i++;
222: if (i < l
223: && (input.charAt(i) == '+' || input.charAt(i) == '-')) {
224: i++;
225: }
226: if (i < l && isDigit(input.charAt(i))) {
227: i++;
228: while (i < l && isDigit(input.charAt(i))) {
229: i++;
230: }
231: symbol = Symbol.FLOAT;
232: } else {
233: i = e;
234: }
235: }
236: image = input.substring(position, i);
237: return symbol;
238: }
239:
240: /**
241: * token inside an eval expression
242: */
243: private Symbol nextEval() throws ScanException {
244: char c = input.charAt(position);
245: int p1 = position + 1;
246: switch (c) {
247: case '*':
248: image = "*";
249: return Symbol.MUL;
250: case '/':
251: image = "/";
252: return Symbol.DIV;
253: case '%':
254: image = "%";
255: return Symbol.MOD;
256: case '+':
257: image = "+";
258: return Symbol.PLUS;
259: case '-':
260: image = "-";
261: return Symbol.MINUS;
262: case '?':
263: image = "?";
264: return Symbol.QUESTION;
265: case ':':
266: image = ":";
267: return Symbol.COLON;
268: case '[':
269: image = "[";
270: return Symbol.LBRACK;
271: case ']':
272: image = "]";
273: return Symbol.RBRACK;
274: case '(':
275: image = "(";
276: return Symbol.LPAREN;
277: case ')':
278: image = ")";
279: return Symbol.RPAREN;
280: case ',':
281: image = ",";
282: return Symbol.COMMA;
283: case '.':
284: if (p1 == input.length() || !isDigit(input.charAt(p1))) {
285: image = ".";
286: return Symbol.DOT;
287: }
288: break;
289: case '=':
290: if (p1 < input.length() && input.charAt(p1) == '=') {
291: image = "==";
292: return Symbol.EQ;
293: }
294: break;
295: case '&':
296: if (p1 < input.length() && input.charAt(p1) == '&') {
297: image = "&&";
298: return Symbol.AND;
299: }
300: break;
301: case '|':
302: if (p1 < input.length() && input.charAt(p1) == '|') {
303: image = "||";
304: return Symbol.OR;
305: }
306: break;
307: case '!':
308: if (p1 < input.length() && input.charAt(p1) == '=') {
309: image = "!=";
310: return Symbol.NE;
311: }
312: image = "!";
313: return Symbol.NOT;
314: case '<':
315: if (p1 < input.length() && input.charAt(p1) == '=') {
316: image = "<=";
317: return Symbol.LE;
318: }
319: image = "<";
320: return Symbol.LT;
321: case '>':
322: if (p1 < input.length() && input.charAt(p1) == '=') {
323: image = "<=";
324: return Symbol.GE;
325: }
326: image = ">";
327: return Symbol.GT;
328: case '"':
329: case '\'':
330: return nextString();
331: }
332:
333: if (isDigit(c) || c == '.') {
334: return nextNumber();
335: }
336:
337: if (Character.isJavaIdentifierStart(c)) {
338: int i = p1;
339: int l = input.length();
340: while (i < l
341: && Character.isJavaIdentifierPart(input.charAt(i))) {
342: i++;
343: }
344: image = input.substring(position, i);
345: return KEYMAP.containsKey(image) ? KEYMAP.get(image)
346: : Symbol.IDENTIFIER;
347: }
348:
349: throw new ScanException(position, "invalid character '" + c
350: + "'", "expression token");
351: }
352:
353: /**
354: * expose current token image. If the current token is of type text or string,
355: * this method returns the unescaped value instead of the original image.
356: */
357: String getImage() {
358: return value == null ? image : value;
359: }
360:
361: /**
362: * current position
363: */
364: int getPosition() {
365: return position;
366: }
367:
368: /**
369: * Scan next token.
370: * After calling this method, {@link #getImage()} and {@link #getPosition()}
371: * can be used to retreive the token's image and input position.
372: * @return symbol describing the token type.
373: */
374: Symbol next() throws ScanException {
375: if (image != null) {
376: position += image.length();
377: image = null;
378: value = null;
379: }
380:
381: int length = input.length();
382: if (eval) {
383: while (position < length
384: && Character.isWhitespace(input.charAt(position))) {
385: position++;
386: }
387: }
388:
389: if (position == length) {
390: return Symbol.EOF;
391: }
392:
393: if (eval) {
394: if (input.charAt(position) == '}') {
395: image = "}";
396: eval = false;
397: return Symbol.END_EVAL;
398: }
399: return nextEval();
400: } else {
401: if (position + 1 < length
402: && input.charAt(position + 1) == '{') {
403: switch (input.charAt(position)) {
404: case '#':
405: image = "#{";
406: eval = true;
407: return Symbol.START_EVAL_DEFERRED;
408: case '$':
409: image = "${";
410: eval = true;
411: return Symbol.START_EVAL_DYNAMIC;
412: }
413: }
414: return nextText();
415: }
416: }
417: }
|