001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: package org.netbeans.spi.lexer;
043:
044: import org.netbeans.lib.editor.util.AbstractCharSequence;
045: import org.netbeans.lib.lexer.CharProvider;
046: import org.netbeans.lib.lexer.LexerUtilsConstants;
047:
048: /**
049: * Provides characters to feed the {@link Lexer}.
050: * It logically corresponds to <CODE>java.io.Reader</CODE> but its {@link #read()} method
051: * does not throw any checked exception.
052: * <br>
053: * It allows to backup one or more characters that were already read
054: * by {@link #read()} so that they can be re-read again later.
055: * <br>
056: * It supports viewing of the previously read characters as <CODE>java.lang.CharSequence</CODE>
057: * by {@link #readText(int, int)}.
058: *
059: * <p>
060: * The <code>LexerInput</code> can only be used safely by a single thread.
061: *
062: * <p>The following picture shows an example of java identifier recognition:
063: *
064: * <p><IMG src="doc-files/lexer-input.gif">.
065: *
066: * @author Miloslav Metelka
067: * @version 1.00
068: */
069:
070: public final class LexerInput {
071:
072: /**
073: * Integer constant -1 returned by {@link #read()} to signal
074: * that there are no more characters available on input.
075: * <br/>
076: * It cannot be a part of any token's text but it is counted
077: * as a single character in {@link #backup(int)} operations.
078: * <br/>
079: * Translates to <code>0xFFFF</code> when casted to <code>char</code>.
080: */
081: public static final int EOF = -1;
082:
083: /**
084: * Character provider to which this lexer input delegates
085: * its operation.
086: */
087: private CharProvider charProvider;
088:
089: /**
090: * Character sequence that corresponds
091: * to the text that was read after past the end
092: * of the last returned token.
093: */
094: private ReadText readText;
095:
096: /**
097: * 1 if after EOF was just read or 0 otherwise.
098: */
099: private int eof;
100:
101: /**
102: * Construct instance of the lexer input.
103: *
104: * @param charProvider non-null character provider for this lexer input.
105: */
106: LexerInput(CharProvider charProvider) {
107: this .charProvider = charProvider;
108: }
109:
110: /**
111: * Read a single character from input or return {@link #EOF}.
112: *
113: * @return valid character from input
114: * or {@link #EOF} when there are no more characters available
115: * on input. It's allowed to repeat the reads once EOF was returned
116: * - all of them will return EOF.
117: */
118: public int read() {
119: int c = charProvider.read();
120: if (c == EOF) {
121: eof = 1;
122: }
123: return c;
124: }
125:
126: /**
127: * Undo last <code>count</code> of {@link #read()} operations.
128: * <br>
129: * The operation moves back read-offset (from which {@link #read()}
130: * reads characters) so that subsequent read operations
131: * will re-read the characters that were backed up.
132: * <br/>
133: * If {@link LexerInput#EOF} was returned by {@link #read()} then
134: * it will count as a single character in the backup operation
135: * (even if returned multiple times)
136: * i.e backup(1) will undo reading of (previously read) EOF.
137: *
138: * <p/>
139: * <i>Example:</i><pre>
140: * // backup last character that was read - either regular char or EOF
141: * lexerInput.backup(1);
142: *
143: * // Backup all characters read during recognition of current token
144: * lexerInput.backup(readLengthEOF());
145: * </pre>
146: *
147: * @param count >=0 amount of characters to return back to the input.
148: * @throws IndexOutOfBoundsException in case
149: * the <code>count > readLengthEOF()</code>.
150: */
151: public void backup(int count) {
152: if (count < 0) {
153: throw new IndexOutOfBoundsException("count=" + count
154: + " <0"); // NOI18N
155: }
156: // count >= 0
157: LexerUtilsConstants.checkValidBackup(count, readLengthEOF());
158: if (eof != 0) {
159: eof = 0; // backup EOF
160: count--;
161: }
162: charProvider.backup(count);
163: }
164:
165: /**
166: * Get distance between the current reading point and the begining of a token
167: * being currently recognized (excluding possibly read EOF).
168: *
169: * @return >=0 number of characters obtained from the input
170: * by subsequent {@link #read()} operations since
171: * the last token was returned. The {@link #backup(int)}
172: * operations with positive argument decrease that value
173: * while those with negative argument increase it.
174: * <p>
175: * Once a token gets created by
176: * {@link TokenFactory#createToken(TokenId)}
177: * the value returned by <CODE>readLength()</CODE> becomes zero.
178: * <br>
179: * If {@link LexerInput#EOF} was read then it is not counted into read length.
180: */
181: public int readLength() {
182: return charProvider.readIndex();
183: }
184:
185: /**
186: * Read length that includes EOF as a single character
187: * if it was just read from this input.
188: */
189: public int readLengthEOF() {
190: return readLength() + eof;
191: }
192:
193: /**
194: * Get character sequence that corresponds to characters
195: * that were read by previous {@link #read()} operations in the current token.
196: * <br><i>Example:</i><pre>
197: *
198: * private static final Map kwdStr2id = new HashMap();
199: *
200: * static {
201: * String[] keywords = new String[] { "private", "protected", ... };
202: * TokenId[] ids = new TokenId[] { JavaLanguage.PRIVATE, JavaLanguage.PROTECTED, ... };
203: * for (int i = keywords.length - 1; i >= 0; i--) {
204: * kwdStr2id.put(keywords[i], ids[i]);
205: * }
206: * }
207: *
208: * public Token nextToken() {
209: * ... read characters of identifier/keyword by lexerInput.read() ...
210: *
211: * // Now decide between keyword or identifier
212: * CharSequence text = lexerInput.readText(0, lexerInput.readLength());
213: * TokenId id = (TokenId)kwdStr2id.get(text);
214: * return (id != null) ? id : JavaLanguage.IDENTIFIER;
215: * }
216: *
217: * </pre>
218: *
219: * <p>
220: * If {@link LexerInput#EOF} was previously returned by {@link #read()}
221: * then it will not be a part of the returned charcter sequence
222: * (it also does not count into {@link #readLength()}.
223: *
224: * <p>
225: * Subsequent invocations of this method are cheap as the returned
226: * CharSequence instance is reused and just reinitialized.
227: *
228: * @param start >=0 and =<{@link #readLength()}
229: * is the starting index of the character sequence in the previously read characters.
230: * @param end >=start and =<{@link #readLength()}
231: * is the starting index of the character sequence in the previously read characters.
232: * @return character sequence corresponding to read characters.
233: * <P>The returned character sequence is only valid
234: * until any of <CODE>read()</CODE>, <CODE>backup()</CODE>,
235: * <CODE>createToken()</CODE> or another <CODE>readText()</CODE> is called.
236: * <P>The <CODE>length()</CODE> of the returned
237: * character sequence will be equal
238: * to the <CODE>end - start</CODE>.
239: * <BR>The <CODE>hashCode()</CODE> method of the returned
240: * character sequence works in the same way like
241: * {@link String#hashCode()}.
242: * <BR>The <CODE>equals()</CODE> method
243: * attempts to cast the compared object to {@link CharSequence}
244: * and compare the lengths and if they match
245: * then compare every character of the given
246: * character sequence i.e. the same way like <CODE>String.equals()</CODE> works.
247: * @throws IndexOutOfBoundsException in case the parameters are not in the
248: * required bounds.
249: */
250: public CharSequence readText(int start, int end) {
251: assert (start >= 0 && end >= start && end <= readLength()) : "start="
252: + start
253: + ", end="
254: + end
255: + ", readLength()="
256: + readLength(); // NOI18N
257:
258: if (readText == null) {
259: readText = new ReadText();
260: }
261: readText.reinit(start, end);
262: return readText;
263: }
264:
265: /**
266: * Return the read text for all the characters consumed from the input
267: * for the current token recognition.
268: */
269: public CharSequence readText() {
270: return readText(0, readLength());
271: }
272:
273: /**
274: * Read the next character and check whether it's '\n'
275: * and if not backup it (otherwise leave it consumed).
276: *
277: * <p>
278: * This method is useful in the following scenario:
279: * <pre>
280: * switch (ch) {
281: * case 'x':
282: * ...
283: * break;
284: * case 'y':
285: * ...
286: * break;
287: * case '\r': input.consumeNewline();
288: * case '\n':
289: * // Line separator recognized
290: * }
291: * </pre>
292: *
293: * @return true if newline was consumed or false otherwise.
294: */
295: public boolean consumeNewline() {
296: if (read() == '\n') {
297: return true;
298: } else {
299: backup(1);
300: return false;
301: }
302: }
303:
304: // /**
305: // * Lexer may call this method to get cached <code>java.lang.Integer</code> instance.
306: // * <br/>
307: // * The caching is only guaranteed if the given int value is below or equal to certain value
308: // * - the present implementation uses 127.
309: // * <br/>
310: // * If the value is above this constant a new value will be constructed
311: // * during each call. In such case the clients could possibly
312: // * implement their own caching.
313: // */
314: // public static Integer integerState(int state) {
315: // return IntegerCache.integer(state);
316: // }
317:
318: /**
319: * Helper character sequence being returned from <code>readText()</code>.
320: */
321: private final class ReadText extends
322: AbstractCharSequence.StringLike {
323:
324: private int start;
325:
326: private int length;
327:
328: private void reinit(int start, int end) {
329: this .start = start;
330: this .length = (end - start);
331: }
332:
333: public int length() {
334: return length;
335: }
336:
337: public char charAt(int index) {
338: if (index < 0 || index >= length) {
339: throw new IndexOutOfBoundsException("index=" + index
340: + ", length=" + length); // NOI18N
341: }
342: return charProvider.readExisting(index);
343: }
344:
345: }
346:
347: }
|