001: /*
002: * Copyright 1999-2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: /*
017: * $Id: Lexer.java,v 1.16 2004/12/15 17:35:55 jycli Exp $
018: */
019: package org.apache.xpath.compiler;
020:
021: import java.util.Vector;
022:
023: import org.apache.xml.utils.PrefixResolver;
024: import org.apache.xpath.res.XPATHErrorResources;
025:
026: /**
027: * This class is in charge of lexical processing of the XPath
028: * expression into tokens.
029: */
030: class Lexer {
031:
032: /**
033: * The target XPath.
034: */
035: private Compiler m_compiler;
036:
037: /**
038: * The prefix resolver to map prefixes to namespaces in the XPath.
039: */
040: PrefixResolver m_namespaceContext;
041:
042: /**
043: * The XPath processor object.
044: */
045: XPathParser m_processor;
046:
047: /**
048: * This value is added to each element name in the TARGETEXTRA
049: * that is a 'target' (right-most top-level element name).
050: */
051: static final int TARGETEXTRA = 10000;
052:
053: /**
054: * Ignore this, it is going away.
055: * This holds a map to the m_tokenQueue that tells where the top-level elements are.
056: * It is used for pattern matching so the m_tokenQueue can be walked backwards.
057: * Each element that is a 'target', (right-most top level element name) has
058: * TARGETEXTRA added to it.
059: *
060: */
061: private int m_patternMap[] = new int[100];
062:
063: /**
064: * Ignore this, it is going away.
065: * The number of elements that m_patternMap maps;
066: */
067: private int m_patternMapSize;
068:
069: /**
070: * Create a Lexer object.
071: *
072: * @param compiler The owning compiler for this lexer.
073: * @param resolver The prefix resolver for mapping qualified name prefixes
074: * to namespace URIs.
075: * @param xpathProcessor The parser that is processing strings to opcodes.
076: */
077: Lexer(Compiler compiler, PrefixResolver resolver,
078: XPathParser xpathProcessor) {
079:
080: m_compiler = compiler;
081: m_namespaceContext = resolver;
082: m_processor = xpathProcessor;
083: }
084:
085: /**
086: * Walk through the expression and build a token queue, and a map of the top-level
087: * elements.
088: * @param pat XSLT Expression.
089: *
090: * @throws javax.xml.transform.TransformerException
091: */
092: void tokenize(String pat)
093: throws javax.xml.transform.TransformerException {
094: tokenize(pat, null);
095: }
096:
097: /**
098: * Walk through the expression and build a token queue, and a map of the top-level
099: * elements.
100: * @param pat XSLT Expression.
101: * @param targetStrings Vector to hold Strings, may be null.
102: *
103: * @throws javax.xml.transform.TransformerException
104: */
105: void tokenize(String pat, Vector targetStrings)
106: throws javax.xml.transform.TransformerException {
107:
108: m_compiler.m_currentPattern = pat;
109: m_patternMapSize = 0;
110:
111: // This needs to grow too.
112: m_compiler.m_opMap = new OpMapVector(
113: OpMap.MAXTOKENQUEUESIZE * 5,
114: OpMap.BLOCKTOKENQUEUESIZE * 5, OpMap.MAPINDEX_LENGTH);
115:
116: int nChars = pat.length();
117: int startSubstring = -1;
118: int posOfNSSep = -1;
119: boolean isStartOfPat = true;
120: boolean isAttrName = false;
121: boolean isNum = false;
122:
123: // Nesting of '[' so we can know if the given element should be
124: // counted inside the m_patternMap.
125: int nesting = 0;
126:
127: // char[] chars = pat.toCharArray();
128: for (int i = 0; i < nChars; i++) {
129: char c = pat.charAt(i);
130:
131: switch (c) {
132: case '\"': {
133: if (startSubstring != -1) {
134: isNum = false;
135: isStartOfPat = mapPatternElemPos(nesting,
136: isStartOfPat, isAttrName);
137: isAttrName = false;
138:
139: if (-1 != posOfNSSep) {
140: posOfNSSep = mapNSTokens(pat, startSubstring,
141: posOfNSSep, i);
142: } else {
143: addToTokenQueue(pat
144: .substring(startSubstring, i));
145: }
146: }
147:
148: startSubstring = i;
149:
150: for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\"'); i++)
151: ;
152:
153: if (c == '\"' && i < nChars) {
154: addToTokenQueue(pat
155: .substring(startSubstring, i + 1));
156:
157: startSubstring = -1;
158: } else {
159: m_processor
160: .error(
161: XPATHErrorResources.ER_EXPECTED_DOUBLE_QUOTE,
162: null); //"misquoted literal... expected double quote!");
163: }
164: }
165: break;
166: case '\'':
167: if (startSubstring != -1) {
168: isNum = false;
169: isStartOfPat = mapPatternElemPos(nesting,
170: isStartOfPat, isAttrName);
171: isAttrName = false;
172:
173: if (-1 != posOfNSSep) {
174: posOfNSSep = mapNSTokens(pat, startSubstring,
175: posOfNSSep, i);
176: } else {
177: addToTokenQueue(pat
178: .substring(startSubstring, i));
179: }
180: }
181:
182: startSubstring = i;
183:
184: for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\''); i++)
185: ;
186:
187: if (c == '\'' && i < nChars) {
188: addToTokenQueue(pat
189: .substring(startSubstring, i + 1));
190:
191: startSubstring = -1;
192: } else {
193: m_processor
194: .error(
195: XPATHErrorResources.ER_EXPECTED_SINGLE_QUOTE,
196: null); //"misquoted literal... expected single quote!");
197: }
198: break;
199: case 0x0A:
200: case 0x0D:
201: case ' ':
202: case '\t':
203: if (startSubstring != -1) {
204: isNum = false;
205: isStartOfPat = mapPatternElemPos(nesting,
206: isStartOfPat, isAttrName);
207: isAttrName = false;
208:
209: if (-1 != posOfNSSep) {
210: posOfNSSep = mapNSTokens(pat, startSubstring,
211: posOfNSSep, i);
212: } else {
213: addToTokenQueue(pat
214: .substring(startSubstring, i));
215: }
216:
217: startSubstring = -1;
218: }
219: break;
220: case '@':
221: isAttrName = true;
222:
223: // fall-through on purpose
224: case '-':
225: if ('-' == c) {
226: if (!(isNum || (startSubstring == -1))) {
227: break;
228: }
229:
230: isNum = false;
231: }
232:
233: // fall-through on purpose
234: case '(':
235: case '[':
236: case ')':
237: case ']':
238: case '|':
239: case '/':
240: case '*':
241: case '+':
242: case '=':
243: case ',':
244: case '\\': // Unused at the moment
245: case '^': // Unused at the moment
246: case '!': // Unused at the moment
247: case '$':
248: case '<':
249: case '>':
250: if (startSubstring != -1) {
251: isNum = false;
252: isStartOfPat = mapPatternElemPos(nesting,
253: isStartOfPat, isAttrName);
254: isAttrName = false;
255:
256: if (-1 != posOfNSSep) {
257: posOfNSSep = mapNSTokens(pat, startSubstring,
258: posOfNSSep, i);
259: } else {
260: addToTokenQueue(pat
261: .substring(startSubstring, i));
262: }
263:
264: startSubstring = -1;
265: } else if (('/' == c) && isStartOfPat) {
266: isStartOfPat = mapPatternElemPos(nesting,
267: isStartOfPat, isAttrName);
268: } else if ('*' == c) {
269: isStartOfPat = mapPatternElemPos(nesting,
270: isStartOfPat, isAttrName);
271: isAttrName = false;
272: }
273:
274: if (0 == nesting) {
275: if ('|' == c) {
276: if (null != targetStrings) {
277: recordTokenString(targetStrings);
278: }
279:
280: isStartOfPat = true;
281: }
282: }
283:
284: if ((')' == c) || (']' == c)) {
285: nesting--;
286: } else if (('(' == c) || ('[' == c)) {
287: nesting++;
288: }
289:
290: addToTokenQueue(pat.substring(i, i + 1));
291: break;
292: case ':':
293: if (i > 0) {
294: if (posOfNSSep == (i - 1)) {
295: if (startSubstring != -1) {
296: if (startSubstring < (i - 1))
297: addToTokenQueue(pat.substring(
298: startSubstring, i - 1));
299: }
300:
301: isNum = false;
302: isAttrName = false;
303: startSubstring = -1;
304: posOfNSSep = -1;
305:
306: addToTokenQueue(pat.substring(i - 1, i + 1));
307:
308: break;
309: } else {
310: posOfNSSep = i;
311: }
312: }
313:
314: // fall through on purpose
315: default:
316: if (-1 == startSubstring) {
317: startSubstring = i;
318: isNum = Character.isDigit(c);
319: } else if (isNum) {
320: isNum = Character.isDigit(c);
321: }
322: }
323: }
324:
325: if (startSubstring != -1) {
326: isNum = false;
327: isStartOfPat = mapPatternElemPos(nesting, isStartOfPat,
328: isAttrName);
329:
330: if ((-1 != posOfNSSep)
331: || ((m_namespaceContext != null) && (m_namespaceContext
332: .handlesNullPrefixes()))) {
333: posOfNSSep = mapNSTokens(pat, startSubstring,
334: posOfNSSep, nChars);
335: } else {
336: addToTokenQueue(pat.substring(startSubstring, nChars));
337: }
338: }
339:
340: if (0 == m_compiler.getTokenQueueSize()) {
341: m_processor.error(XPATHErrorResources.ER_EMPTY_EXPRESSION,
342: null); //"Empty expression!");
343: } else if (null != targetStrings) {
344: recordTokenString(targetStrings);
345: }
346:
347: m_processor.m_queueMark = 0;
348: }
349:
350: /**
351: * Record the current position on the token queue as long as
352: * this is a top-level element. Must be called before the
353: * next token is added to the m_tokenQueue.
354: *
355: * @param nesting The nesting count for the pattern element.
356: * @param isStart true if this is the start of a pattern.
357: * @param isAttrName true if we have determined that this is an attribute name.
358: *
359: * @return true if this is the start of a pattern.
360: */
361: private boolean mapPatternElemPos(int nesting, boolean isStart,
362: boolean isAttrName) {
363:
364: if (0 == nesting) {
365: if (m_patternMapSize >= m_patternMap.length) {
366: int patternMap[] = m_patternMap;
367: int len = m_patternMap.length;
368: m_patternMap = new int[m_patternMapSize + 100];
369: System.arraycopy(patternMap, 0, m_patternMap, 0, len);
370: }
371: if (!isStart) {
372: m_patternMap[m_patternMapSize - 1] -= TARGETEXTRA;
373: }
374: m_patternMap[m_patternMapSize] = (m_compiler
375: .getTokenQueueSize() - (isAttrName ? 1 : 0))
376: + TARGETEXTRA;
377:
378: m_patternMapSize++;
379:
380: isStart = false;
381: }
382:
383: return isStart;
384: }
385:
386: /**
387: * Given a map pos, return the corresponding token queue pos.
388: *
389: * @param i The index in the m_patternMap.
390: *
391: * @return the token queue position.
392: */
393: private int getTokenQueuePosFromMap(int i) {
394:
395: int pos = m_patternMap[i];
396:
397: return (pos >= TARGETEXTRA) ? (pos - TARGETEXTRA) : pos;
398: }
399:
400: /**
401: * Reset token queue mark and m_token to a
402: * given position.
403: * @param mark The new position.
404: */
405: private final void resetTokenMark(int mark) {
406:
407: int qsz = m_compiler.getTokenQueueSize();
408:
409: m_processor.m_queueMark = (mark > 0) ? ((mark <= qsz) ? mark - 1
410: : mark)
411: : 0;
412:
413: if (m_processor.m_queueMark < qsz) {
414: m_processor.m_token = (String) m_compiler.getTokenQueue()
415: .elementAt(m_processor.m_queueMark++);
416: m_processor.m_tokenChar = m_processor.m_token.charAt(0);
417: } else {
418: m_processor.m_token = null;
419: m_processor.m_tokenChar = 0;
420: }
421: }
422:
423: /**
424: * Given a string, return the corresponding keyword token.
425: *
426: * @param key The keyword.
427: *
428: * @return An opcode value.
429: */
430: final int getKeywordToken(String key) {
431:
432: int tok;
433:
434: try {
435: Integer itok = (Integer) Keywords.getKeyWord(key);
436:
437: tok = (null != itok) ? itok.intValue() : 0;
438: } catch (NullPointerException npe) {
439: tok = 0;
440: } catch (ClassCastException cce) {
441: tok = 0;
442: }
443:
444: return tok;
445: }
446:
447: /**
448: * Record the current token in the passed vector.
449: *
450: * @param targetStrings Vector of string.
451: */
452: private void recordTokenString(Vector targetStrings) {
453:
454: int tokPos = getTokenQueuePosFromMap(m_patternMapSize - 1);
455:
456: resetTokenMark(tokPos + 1);
457:
458: if (m_processor.lookahead('(', 1)) {
459: int tok = getKeywordToken(m_processor.m_token);
460:
461: switch (tok) {
462: case OpCodes.NODETYPE_COMMENT:
463: targetStrings
464: .addElement(PsuedoNames.PSEUDONAME_COMMENT);
465: break;
466: case OpCodes.NODETYPE_TEXT:
467: targetStrings.addElement(PsuedoNames.PSEUDONAME_TEXT);
468: break;
469: case OpCodes.NODETYPE_NODE:
470: targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
471: break;
472: case OpCodes.NODETYPE_ROOT:
473: targetStrings.addElement(PsuedoNames.PSEUDONAME_ROOT);
474: break;
475: case OpCodes.NODETYPE_ANYELEMENT:
476: targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
477: break;
478: case OpCodes.NODETYPE_PI:
479: targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
480: break;
481: default:
482: targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
483: }
484: } else {
485: if (m_processor.tokenIs('@')) {
486: tokPos++;
487:
488: resetTokenMark(tokPos + 1);
489: }
490:
491: if (m_processor.lookahead(':', 1)) {
492: tokPos += 2;
493: }
494:
495: targetStrings.addElement(m_compiler.getTokenQueue()
496: .elementAt(tokPos));
497: }
498: }
499:
500: /**
501: * Add a token to the token queue.
502: *
503: *
504: * @param s The token.
505: */
506: private final void addToTokenQueue(String s) {
507: m_compiler.getTokenQueue().addElement(s);
508: }
509:
510: /**
511: * When a seperator token is found, see if there's a element name or
512: * the like to map.
513: *
514: * @param pat The XPath name string.
515: * @param startSubstring The start of the name string.
516: * @param posOfNSSep The position of the namespace seperator (':').
517: * @param posOfScan The end of the name index.
518: *
519: * @throws javax.xml.transform.TransformerException
520: *
521: * @return -1 always.
522: */
523: private int mapNSTokens(String pat, int startSubstring,
524: int posOfNSSep, int posOfScan)
525: throws javax.xml.transform.TransformerException {
526:
527: String prefix = "";
528:
529: if ((startSubstring >= 0) && (posOfNSSep >= 0)) {
530: prefix = pat.substring(startSubstring, posOfNSSep);
531: }
532: String uName;
533:
534: if ((null != m_namespaceContext) && !prefix.equals("*")
535: && !prefix.equals("xmlns")) {
536: try {
537: if (prefix.length() > 0)
538: uName = ((PrefixResolver) m_namespaceContext)
539: .getNamespaceForPrefix(prefix);
540: else {
541:
542: // Assume last was wildcard. This is not legal according
543: // to the draft. Set the below to true to make namespace
544: // wildcards work.
545: if (false) {
546: addToTokenQueue(":");
547:
548: String s = pat.substring(posOfNSSep + 1,
549: posOfScan);
550:
551: if (s.length() > 0)
552: addToTokenQueue(s);
553:
554: return -1;
555: } else {
556: uName = ((PrefixResolver) m_namespaceContext)
557: .getNamespaceForPrefix(prefix);
558: }
559: }
560: } catch (ClassCastException cce) {
561: uName = m_namespaceContext
562: .getNamespaceForPrefix(prefix);
563: }
564: } else {
565: uName = prefix;
566: }
567:
568: if ((null != uName) && (uName.length() > 0)) {
569: addToTokenQueue(uName);
570: addToTokenQueue(":");
571:
572: String s = pat.substring(posOfNSSep + 1, posOfScan);
573:
574: if (s.length() > 0)
575: addToTokenQueue(s);
576: } else {
577: // To older XPath code it doesn't matter if
578: // error() is called or errorForDOM3().
579: m_processor.errorForDOM3(
580: XPATHErrorResources.ER_PREFIX_MUST_RESOLVE,
581: new String[] { prefix }); //"Prefix must resolve to a namespace: {0}";
582:
583: /** old code commented out 17-Sep-2004
584: // error("Could not locate namespace for prefix: "+prefix);
585: // m_processor.error(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE,
586: // new String[] {prefix}); //"Prefix must resolve to a namespace: {0}";
587: */
588:
589: /*** Old code commented out 10-Jan-2001
590: addToTokenQueue(prefix);
591: addToTokenQueue(":");
592:
593: String s = pat.substring(posOfNSSep + 1, posOfScan);
594:
595: if (s.length() > 0)
596: addToTokenQueue(s);
597: ***/
598: }
599:
600: return -1;
601: }
602: }
|