001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: /* $Id: PropertyTokenizer.java 426576 2006-07-28 15:44:37Z jeremias $ */
019:
020: package org.apache.fop.fo.expr;
021:
022: /**
023: * Class to tokenize XSL FO property expression.
024: * This class is heavily based on the epxression tokenizer in James Clark's
025: * XT, an XSLT processor.
026: */
027: class PropertyTokenizer {
028:
029: static final int TOK_EOF = 0;
030: static final int TOK_NCNAME = TOK_EOF + 1;
031: static final int TOK_MULTIPLY = TOK_NCNAME + 1;
032: static final int TOK_LPAR = TOK_MULTIPLY + 1;
033: static final int TOK_RPAR = TOK_LPAR + 1;
034: static final int TOK_LITERAL = TOK_RPAR + 1;
035: static final int TOK_NUMBER = TOK_LITERAL + 1;
036: static final int TOK_FUNCTION_LPAR = TOK_NUMBER + 1;
037: static final int TOK_PLUS = TOK_FUNCTION_LPAR + 1;
038: static final int TOK_MINUS = TOK_PLUS + 1;
039: static final int TOK_MOD = TOK_MINUS + 1;
040: static final int TOK_DIV = TOK_MOD + 1;
041: static final int TOK_NUMERIC = TOK_DIV + 1;
042: static final int TOK_COMMA = TOK_NUMERIC + 1;
043: static final int TOK_PERCENT = TOK_COMMA + 1;
044: static final int TOK_COLORSPEC = TOK_PERCENT + 1;
045: static final int TOK_FLOAT = TOK_COLORSPEC + 1;
046: static final int TOK_INTEGER = TOK_FLOAT + 1;
047:
048: protected int currentToken = TOK_EOF;
049: protected String currentTokenValue = null;
050: protected int currentUnitLength = 0;
051:
052: private int currentTokenStartIndex = 0;
053: private/* final */String expr;
054: private int exprIndex = 0;
055: private int exprLength;
056: private boolean recognizeOperator = false;
057:
058: /**
059: * Construct a new PropertyTokenizer object to tokenize the passed
060: * String.
061: * @param s The Property expressio to tokenize.
062: */
063: PropertyTokenizer(String s) {
064: this .expr = s;
065: this .exprLength = s.length();
066: }
067:
068: /**
069: * Return the next token in the expression string.
070: * This sets the following package visible variables:
071: * currentToken An enumerated value identifying the recognized token
072: * currentTokenValue A String containing the token contents
073: * currentUnitLength If currentToken = TOK_NUMERIC, the number of
074: * characters in the unit name.
075: * @throws PropertyException If un unrecognized token is encountered.
076: */
077: void next() throws PropertyException {
078: currentTokenValue = null;
079: currentTokenStartIndex = exprIndex;
080: boolean currentMaybeOperator = recognizeOperator;
081: boolean bSawDecimal;
082: recognizeOperator = true;
083: for (;;) {
084: if (exprIndex >= exprLength) {
085: currentToken = TOK_EOF;
086: return;
087: }
088: char c = expr.charAt(exprIndex++);
089: switch (c) {
090: case ' ':
091: case '\t':
092: case '\r':
093: case '\n':
094: currentTokenStartIndex = exprIndex;
095: break;
096: case ',':
097: recognizeOperator = false;
098: currentToken = TOK_COMMA;
099: return;
100: case '+':
101: recognizeOperator = false;
102: currentToken = TOK_PLUS;
103: return;
104: case '-':
105: recognizeOperator = false;
106: currentToken = TOK_MINUS;
107: return;
108: case '(':
109: currentToken = TOK_LPAR;
110: recognizeOperator = false;
111: return;
112: case ')':
113: currentToken = TOK_RPAR;
114: return;
115: case '"':
116: case '\'':
117: exprIndex = expr.indexOf(c, exprIndex);
118: if (exprIndex < 0) {
119: exprIndex = currentTokenStartIndex + 1;
120: throw new PropertyException("missing quote");
121: }
122: currentTokenValue = expr.substring(
123: currentTokenStartIndex + 1, exprIndex++);
124: currentToken = TOK_LITERAL;
125: return;
126: case '*':
127: /*
128: * if (currentMaybeOperator) {
129: * recognizeOperator = false;
130: */
131: currentToken = TOK_MULTIPLY;
132: /*
133: * }
134: * else
135: * throw new PropertyException("illegal operator *");
136: */
137: return;
138: case '0':
139: case '1':
140: case '2':
141: case '3':
142: case '4':
143: case '5':
144: case '6':
145: case '7':
146: case '8':
147: case '9':
148: scanDigits();
149: if (exprIndex < exprLength
150: && expr.charAt(exprIndex) == '.') {
151: exprIndex++;
152: bSawDecimal = true;
153: if (exprIndex < exprLength
154: && isDigit(expr.charAt(exprIndex))) {
155: exprIndex++;
156: scanDigits();
157: }
158: } else {
159: bSawDecimal = false;
160: }
161: if (exprIndex < exprLength
162: && expr.charAt(exprIndex) == '%') {
163: exprIndex++;
164: currentToken = TOK_PERCENT;
165: } else {
166: // Check for possible unit name following number
167: currentUnitLength = exprIndex;
168: scanName();
169: currentUnitLength = exprIndex - currentUnitLength;
170: currentToken = (currentUnitLength > 0) ? TOK_NUMERIC
171: : (bSawDecimal ? TOK_FLOAT : TOK_INTEGER);
172: }
173: currentTokenValue = expr.substring(
174: currentTokenStartIndex, exprIndex);
175: return;
176:
177: case '.':
178: nextDecimalPoint();
179: return;
180:
181: case '#': // Start of color value
182: nextColor();
183: return;
184:
185: default:
186: --exprIndex;
187: scanName();
188: if (exprIndex == currentTokenStartIndex) {
189: throw new PropertyException("illegal character");
190: }
191: currentTokenValue = expr.substring(
192: currentTokenStartIndex, exprIndex);
193: // if (currentMaybeOperator) {
194: if (currentTokenValue.equals("mod")) {
195: currentToken = TOK_MOD;
196: return;
197: } else if (currentTokenValue.equals("div")) {
198: currentToken = TOK_DIV;
199: return;
200: }
201: /*
202: * else
203: * throw new PropertyException("unrecognized operator name");
204: * recognizeOperator = false;
205: * return;
206: * }
207: */
208: if (followingParen()) {
209: currentToken = TOK_FUNCTION_LPAR;
210: recognizeOperator = false;
211: } else {
212: currentToken = TOK_NCNAME;
213: recognizeOperator = false;
214: }
215: return;
216: }
217: }
218: }
219:
220: private void nextDecimalPoint() throws PropertyException {
221: if (exprIndex < exprLength && isDigit(expr.charAt(exprIndex))) {
222: ++exprIndex;
223: scanDigits();
224: if (exprIndex < exprLength && expr.charAt(exprIndex) == '%') {
225: exprIndex++;
226: currentToken = TOK_PERCENT;
227: } else {
228: // Check for possible unit name following number
229: currentUnitLength = exprIndex;
230: scanName();
231: currentUnitLength = exprIndex - currentUnitLength;
232: currentToken = (currentUnitLength > 0) ? TOK_NUMERIC
233: : TOK_FLOAT;
234: }
235: currentTokenValue = expr.substring(currentTokenStartIndex,
236: exprIndex);
237: return;
238: }
239: throw new PropertyException("illegal character '.'");
240: }
241:
242: private void nextColor() throws PropertyException {
243: if (exprIndex < exprLength
244: && isHexDigit(expr.charAt(exprIndex))) {
245: ++exprIndex;
246: scanHexDigits();
247: currentToken = TOK_COLORSPEC;
248: currentTokenValue = expr.substring(currentTokenStartIndex,
249: exprIndex);
250: // Probably should have some multiple of 3 for length!
251: return;
252: } else {
253: throw new PropertyException("illegal character '#'");
254: }
255: }
256:
257: /**
258: * Attempt to recognize a valid NAME token in the input expression.
259: */
260: private void scanName() {
261: if (exprIndex < exprLength
262: && isNameStartChar(expr.charAt(exprIndex))) {
263: while (++exprIndex < exprLength
264: && isNameChar(expr.charAt(exprIndex))) {
265: }
266: }
267: }
268:
269: /**
270: * Attempt to recognize a valid sequence of decimal DIGITS in the
271: * input expression.
272: */
273: private void scanDigits() {
274: while (exprIndex < exprLength
275: && isDigit(expr.charAt(exprIndex))) {
276: exprIndex++;
277: }
278: }
279:
280: /**
281: * Attempt to recognize a valid sequence of hexadecimal DIGITS in the
282: * input expression.
283: */
284: private void scanHexDigits() {
285: while (exprIndex < exprLength
286: && isHexDigit(expr.charAt(exprIndex))) {
287: exprIndex++;
288: }
289: }
290:
291: /**
292: * Return a boolean value indicating whether the following non-whitespace
293: * character is an opening parenthesis.
294: */
295: private boolean followingParen() {
296: for (int i = exprIndex; i < exprLength; i++) {
297: switch (expr.charAt(i)) {
298: case '(':
299: exprIndex = i + 1;
300: return true;
301: case ' ':
302: case '\r':
303: case '\n':
304: case '\t':
305: break;
306: default:
307: return false;
308: }
309: }
310: return false;
311: }
312:
313: private static final String NAME_START_CHARS = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
314: private static final String NAME_CHARS = ".-0123456789";
315: private static final String DIGITS = "0123456789";
316: private static final String HEX_CHARS = DIGITS + "abcdefABCDEF";
317:
318: /**
319: * Return a boolean value indicating whether the argument is a
320: * decimal digit (0-9).
321: * @param c The character to check
322: */
323: private static final boolean isDigit(char c) {
324: return DIGITS.indexOf(c) >= 0;
325: }
326:
327: /**
328: * Return a boolean value indicating whether the argument is a
329: * hexadecimal digit (0-9, A-F, a-f).
330: * @param c The character to check
331: */
332: private static final boolean isHexDigit(char c) {
333: return HEX_CHARS.indexOf(c) >= 0;
334: }
335:
336: /**
337: * Return a boolean value indicating whether the argument is whitespace
338: * as defined by XSL (space, newline, CR, tab).
339: * @param c The character to check
340: */
341: private static final boolean isSpace(char c) {
342: switch (c) {
343: case ' ':
344: case '\r':
345: case '\n':
346: case '\t':
347: return true;
348: }
349: return false;
350: }
351:
352: /**
353: * Return a boolean value indicating whether the argument is a valid name
354: * start character, ie. can start a NAME as defined by XSL.
355: * @param c The character to check
356: */
357: private static final boolean isNameStartChar(char c) {
358: return NAME_START_CHARS.indexOf(c) >= 0 || c >= 0x80;
359: }
360:
361: /**
362: * Return a boolean value indicating whether the argument is a valid name
363: * character, ie. can occur in a NAME as defined by XSL.
364: * @param c The character to check
365: */
366: private static final boolean isNameChar(char c) {
367: return NAME_START_CHARS.indexOf(c) >= 0
368: || NAME_CHARS.indexOf(c) >= 0 || c >= 0x80;
369: }
370:
371: }
|