001: /*******************************************************************************
002: * Copyright (c) 2000, 2006 IBM Corporation and others.
003: * All rights reserved. This program and the accompanying materials
004: * are made available under the terms of the Eclipse Public License v1.0
005: * which accompanies this distribution, and is available at
006: * http://www.eclipse.org/legal/epl-v10.html
007: *
008: * Contributors:
009: * IBM Corporation - initial API and implementation
010: *******************************************************************************/package org.eclipse.jdt.core.compiler;
011:
012: import org.eclipse.jdt.core.compiler.InvalidInputException;
013:
014: /**
015: * Definition of a Java scanner, as returned by the <code>ToolFactory</code>.
016: * The scanner is responsible for tokenizing a given source, providing information about
017: * the nature of the token read, its positions and source equivalent.
018: * <p>
019: * When the scanner has finished tokenizing, it answers an EOF token (<code>
020: * ITerminalSymbols#TokenNameEOF</code>.
021: * </p><p>
022: * When encountering lexical errors, an <code>InvalidInputException</code> is thrown.
023: * </p><p>
024: * This interface is not intended to be implemented by clients.
025: * </p>
026: *
027: * @see org.eclipse.jdt.core.ToolFactory
028: * @see ITerminalSymbols
029: * @since 2.0
030: */
031: public interface IScanner {
032:
033: /**
034: * Answers the current identifier source, after unicode escape sequences have
035: * been translated into unicode characters.
036: * For example, if original source was <code>\\u0061bc</code> then it will answer <code>abc</code>.
037: *
038: * @return the current identifier source, after unicode escape sequences have
039: * been translated into unicode characters
040: */
041: char[] getCurrentTokenSource();
042:
043: /**
044: * Answers the current identifier source, before unicode escape sequences have
045: * been translated into unicode characters.
046: * For example, if original source was <code>\\u0061bc</code> then it will answer <code>\\u0061bc</code>.
047: *
048: * @return the current identifier source, before unicode escape sequences have
049: * been translated into unicode characters
050: * @since 2.1
051: */
052: char[] getRawTokenSource();
053:
054: /**
055: * Answers the starting position of the current token inside the original source.
056: * This position is zero-based and inclusive. It corresponds to the position of the first character
057: * which is part of this token. If this character was a unicode escape sequence, it points at the first
058: * character of this sequence.
059: *
060: * @return the starting position of the current token inside the original source
061: */
062: int getCurrentTokenStartPosition();
063:
064: /**
065: * Answers the ending position of the current token inside the original source.
066: * This position is zero-based and inclusive. It corresponds to the position of the last character
067: * which is part of this token. If this character was a unicode escape sequence, it points at the last
068: * character of this sequence.
069: *
070: * @return the ending position of the current token inside the original source
071: */
072: int getCurrentTokenEndPosition();
073:
074: /**
075: * Answers the starting position of a given line number. This line has to have been encountered
076: * already in the tokenization process (in other words, it cannot be used to compute positions of lines beyond
077: * current token). Once the entire source has been processed, it can be used without any limit.
078: * Line starting positions are zero-based, and start immediately after the previous line separator (if any).
079: *
080: * @param lineNumber the given line number
081: * @return the starting position of a given line number
082: */
083: int getLineStart(int lineNumber);
084:
085: /**
086: * Answers the ending position of a given line number. This line has to have been encountered
087: * already in the tokenization process (in other words, it cannot be used to compute positions of lines beyond
088: * current token). Once the entire source has been processed, it can be used without any limit.
089: * Line ending positions are zero-based, and correspond to the last character of the line separator
090: * (in case multi-character line separators).
091: *
092: * @param lineNumber the given line number
093: * @return the ending position of a given line number
094: **/
095: int getLineEnd(int lineNumber);
096:
097: /**
098: * Answers an array of the ending positions of the lines encountered so far. Line ending positions
099: * are zero-based, and correspond to the last character of the line separator (in case multi-character
100: * line separators).
101: *
102: * @return an array of the ending positions of the lines encountered so far
103: */
104: int[] getLineEnds();
105:
106: /**
107: * Answers a 1-based line number using the lines which have been encountered so far. If the position
108: * is located beyond the current scanned line, then the last line number will be answered.
109: *
110: * @param charPosition the given character position
111: * @return a 1-based line number using the lines which have been encountered so far
112: */
113: int getLineNumber(int charPosition);
114:
115: /**
116: * Read the next token in the source, and answers its ID as specified by <code>ITerminalSymbols</code>.
117: * Note that the actual token ID values are subject to change if new keywords were added to the language
118: * (for instance, 'assert' is a keyword in 1.4).
119: *
120: * @throws InvalidInputException in case a lexical error was detected while reading the current token
121: * @return the next token
122: */
123: int getNextToken() throws InvalidInputException;
124:
125: /**
126: * Answers the original source being processed (not a copy of it).
127: *
128: * @return the original source being processed
129: */
130: char[] getSource();
131:
132: /**
133: * Reposition the scanner on some portion of the original source. The given endPosition is the last valid position.
134: * Beyond this position, the scanner will answer EOF tokens (<code>ITerminalSymbols.TokenNameEOF</code>).
135: *
136: * @param startPosition the given start position
137: * @param endPosition the given end position
138: */
139: void resetTo(int startPosition, int endPosition);
140:
141: /**
142: * Set the scanner source to process. By default, the scanner will consider starting at the beginning of the
143: * source until it reaches its end.
144: * If the given source is <code>null</code>, this clears the source.
145: *
146: * @param source the given source
147: */
148: void setSource(char[] source);
149: }
|