001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: package org.netbeans.api.lexer;
043:
044: import java.util.ConcurrentModificationException;
045: import org.netbeans.lib.lexer.EmbeddingContainer;
046: import org.netbeans.lib.lexer.LexerUtilsConstants;
047: import org.netbeans.lib.lexer.SubSequenceTokenList;
048: import org.netbeans.lib.lexer.LexerUtilsConstants;
049: import org.netbeans.lib.lexer.TokenList;
050: import org.netbeans.lib.lexer.token.AbstractToken;
051:
052: /**
053: * Token sequence allows to iterate between tokens
054: * of a token hierarchy.
055: * <br/>
056: * Token sequence for top-level language of a token hierarchy
057: * may be obtained by {@link TokenHierarchy#tokenSequence()}.
058: *
059: * <p>
060: * Use of token sequence is a two-step operation:
061: * <ol>
062: * <li>
063: * Position token sequence before token that should first be retrieved
064: * (or behind desired token when iterating backwards).
065: * <br/>
066: * One of the following ways may be used:
067: * <ul>
068: * <li> {@link #move(int)} positions TS before token that either starts
069: * at the given offset or "contains" it.
070: * </li>
071: * <li> {@link #moveIndex(int)} positions TS before n-th token in the underlying
072: * token list.
073: * </li>
074: * <li> {@link #moveStart()} positions TS before the first token. </li>
075: * <li> {@link #moveEnd()} positions TS behind the last token. </li>
076: * <li> Do nothing - TS is positioned before the first token automatically by default. </li>
077: * </ul>
078: * Token sequence will always be positioned between tokens
079: * when using one of the operations above
080: * ({@link #token()} will return <code>null</code> to signal between-tokens location).
081: * <br/>
082: * </li>
083: *
084: * <li>
085: * Start iterating through the tokens in forward/backward direction
086: * by using {@link #moveNext()} or {@link #movePrevious()}.
087: * <br/>
088: * If <code>moveNext()</code> or <code>movePrevious()</code> returned
089: * <code>true</code> then TS is positioned
090: * over a concrete token retrievable by {@link #token()}.
091: * <br/>
092: * Its offset can be retrieved by {@link #offset()}.
093: * </li>
094: * </ol>
095: * </p>
096: *
097: * <p>
098: * An example of forward iteration through the tokens:
099: * <pre>
100: * TokenSequence ts = tokenHierarchy.tokenSequence();
101: * // Possible positioning by ts.move(offset) or ts.moveIndex(index)
102: * while (ts.moveNext()) {
103: * Token t = ts.token();
104: * if (t.id() == ...) { ... }
105: * if (TokenUtilities.equals(t.text(), "mytext")) { ... }
106: * if (ts.offset() == ...) { ... }
107: * }
108: * </pre>
109: * </p>
110: *
111: * <p>
112: * This object should be used by a single thread only. For token hierarchies
113: * over mutable input sources the obtaining and using of the token sequence
114: * needs to be done under a read-lock of the input source.
115: * </p>
116: *
117: * @author Miloslav Metelka
118: * @version 1.00
119: */
120:
121: public final class TokenSequence<T extends TokenId> {
122:
123: private TokenList<T> tokenList; // 8 + 4 = 12 bytes
124:
125: private AbstractToken<T> token; // 16 bytes
126:
127: private int tokenIndex; // 20 bytes
128:
129: /**
130: * Offset in the input at which the current token is located
131: * or <code>-1</code> if the offset needs to be computed.
132: */
133: private int tokenOffset = -1; // 24 bytes
134:
135: /**
136: * Copy of the modCount of the token list. If the token list's modCount
137: * changes (by modification) this token sequence will become invalid.
138: */
139: private final int modCount; // 28 bytes
140:
141: /**
142: * Package-private constructor used by API accessor.
143: */
144: TokenSequence(TokenList<T> tokenList) {
145: this .tokenList = tokenList;
146: this .modCount = tokenList.modCount();
147: }
148:
149: /**
150: * Get the language describing token ids
151: * used by tokens in this token sequence.
152: */
153: public Language<T> language() {
154: return LexerUtilsConstants.innerLanguage(languagePath());
155: }
156:
157: /**
158: * Get the complete language path of the tokens contained
159: * in this token sequence.
160: */
161: public LanguagePath languagePath() {
162: return tokenList.languagePath();
163: }
164:
165: /**
166: * Get token to which this token sequence points to or null
167: * if TS is positioned between tokens
168: * ({@link #moveNext()} or {@link #movePrevious()} were not called yet).
169: * <br/>
170: * A typical iteration usage:
171: * <pre>
172: * TokenSequence ts = tokenHierarchy.tokenSequence();
173: * // Possible positioning by ts.move(offset) or ts.moveIndex(index)
174: * while (ts.moveNext()) {
175: * Token t = ts.token();
176: * if (t.id() == ...) { ... }
177: * if (TokenUtilities.equals(t.text(), "mytext")) { ... }
178: * if (ts.offset() == ...) { ... }
179: * }
180: * </pre>
181: *
182: * The returned token instance may be flyweight
183: * ({@link Token#isFlyweight()} returns true)
184: * which means that its {@link Token#offset(TokenHierarchy)} will return -1.
185: * <br/>
186: * To find a correct offset use {@link #offset()}.
187: * <br/>
188: * Or if its necessary to revert to a regular non-flyweigt token
189: * the {@link #offsetToken()} may be used.
190: * </p>
191: *
192: * <p>
193: * The lifetime of the returned token instance may be limited for mutable inputs.
194: * The token instance should not be held across the input source modifications.
195: * </p>
196: *
197: * @return token instance to which this token sequence is currently positioned
198: * or null if this token sequence is not positioned to any token which may
199: * happen after TS creation or after use of {@link #move(int)} or {@link #moveIndex(int)}.
200: *
201: * @see #offsetToken()
202: */
203: public Token<T> token() {
204: return token;
205: }
206:
207: /**
208: * Similar to {@link #token()} but always returns a non-flyweight token
209: * with the appropriate offset.
210: * <br/>
211: * If the current token is flyweight then this method replaces it
212: * with the corresponding non-flyweight token which it then returns.
213: * <br/>
214: * Subsequent calls to {@link #token()} will also return this non-flyweight token.
215: *
216: * <p>
217: * This method may be handy if the token instance is referenced in a standalone way
218: * (e.g. in an expression node of a parse tree) and it's necessary
219: * to get the appropriate offset from the token itself
220: * later when a token sequence will not be available.
221: * </p>
222: * @throws IllegalStateException if {@link #token()} returns null.
223: */
224: public Token<T> offsetToken() {
225: checkTokenNotNull();
226: if (token.isFlyweight()) {
227: token = tokenList.replaceFlyToken(tokenIndex, token,
228: offset());
229: }
230: return token;
231: }
232:
233: /**
234: * Get the offset of the current token in the underlying input.
235: * <br>
236: * The token's offset should never be computed by a client of the token sequence
237: * by adding/subtracting tokens' length to a client's variable because
238: * in case of the immutable token sequences there can be gaps
239: * between tokens if some tokens get filtered out.
240: * <br>
241: * Instead this method should always be used because it offers
242: * best performance with a constant time complexity.
243: *
244: * @return >=0 absolute offset of the current token in the underlying input.
245: * @throws IllegalStateException if {@link #token()} returns null.
246: */
247: public int offset() {
248: checkTokenNotNull();
249: if (tokenOffset == -1) {
250: tokenOffset = tokenList.tokenOffset(tokenIndex);
251: }
252: return tokenOffset;
253: }
254:
255: /**
256: * Get an index of token to which (or before which) this TS is currently positioned.
257: * <br/>
258: * <p>
259: * Initially or after {@link #move(int)} or {@link #moveIndex(int)}
260: * token sequence is positioned between tokens:
261: * <pre>
262: * Token[0] Token[1] ... Token[n]
263: * ^ ^ ^
264: * Index: 0 1 n
265: * </pre>
266: * </p>
267: *
268: * <p>
269: * After use of {@link #moveNext()} or {@link #movePrevious()}
270: * the token sequence is positioned over one of the actual tokens:
271: * <pre>
272: * Token[0] Token[1] ... Token[n]
273: * ^ ^ ^
274: * Index: 0 1 n
275: * </pre>
276: * </p>
277: *
278: * @return >=0 index of token to which (or before which) this TS is currently positioned.
279: */
280: public int index() {
281: return tokenIndex;
282: }
283:
284: /**
285: * Get embedded token sequence if the token
286: * to which this token sequence is currently positioned
287: * has a language embedding.
288: * <br/>
289: * If there is a custom embedding created by
290: * {@link #createEmbedding(Language,int,int)} it will be returned
291: * instead of the default embedding
292: * (the one created by <code>LanguageHierarchy.embedding()</code>
293: * or <code>LanguageProvider</code>).
294: *
295: * @return embedded sequence or null if no embedding exists for this token.
296: * @throws IllegalStateException if {@link #token()} returns null.
297: */
298: public TokenSequence<?> embedded() {
299: checkTokenNotNull();
300: return embeddedImpl(null);
301: }
302:
303: private <ET extends TokenId> TokenSequence<ET> embeddedImpl(
304: Language<ET> embeddedLanguage) {
305: if (token.isFlyweight())
306: return null;
307: TokenList<ET> embeddedTokenList = LexerUtilsConstants
308: .embeddedTokenList(tokenList, tokenIndex,
309: embeddedLanguage);
310: return (embeddedTokenList != null) ? new TokenSequence<ET>(
311: embeddedTokenList) : null;
312: }
313:
314: /**
315: * Get embedded token sequence if the token
316: * to which this token sequence is currently positioned
317: * has a language embedding.
318: *
319: * @throws IllegalStateException if {@link #token()} returns null.
320: */
321: public <ET extends TokenId> TokenSequence<ET> embedded(
322: Language<ET> embeddedLanguage) {
323: checkTokenNotNull();
324: return embeddedImpl(embeddedLanguage);
325: }
326:
327: /**
328: * Create language embedding without joining of the embedded sections.
329: *
330: * @throws IllegalStateException if {@link #token()} returns null.
331: * @see #createEmbedding(Language, int, int, boolean)
332: */
333: public boolean createEmbedding(Language<?> embeddedLanguage,
334: int startSkipLength, int endSkipLength) {
335: return createEmbedding(embeddedLanguage, startSkipLength,
336: endSkipLength, false);
337: }
338:
339: /**
340: * Create language embedding described by the given parameters.
341: * <br/>
342: * If the underying text input is mutable then this method should only be called
343: * within a write lock over the text input.
344: *
345: * @param embeddedLanguage non-null embedded language
346: * @param startSkipLength >=0 number of characters in an initial part of the token
347: * for which the language embedding is defined that should be excluded
348: * from the embedded section. The excluded characters will not be lexed
349: * and there will be no tokens created for them.
350: * @param endSkipLength >=0 number of characters at the end of the token
351: * for which the language embedding is defined that should be excluded
352: * from the embedded section. The excluded characters will not be lexed
353: * and there will be no tokens created for them.
354: * @param joinSections whether sections with this embedding should be joined
355: * across the input source or whether they should stay separate.
356: * <br/>
357: * For example for HTML sections embedded in JSP this flag should be true:
358: * <pre>
359: * <!-- HTML comment start
360: * <% System.out.println("Hello"); %>
361: still in HTML comment --<
362: * </pre>
363: * <br/>
364: * Only the embedded sections with the same language path can be joined.
365: * @return true if the embedding was created successfully or false if an embedding
366: * with the given language already exists for this token.
367: * @throws IllegalStateException if {@link #token()} returns null.
368: */
369: public boolean createEmbedding(Language<?> embeddedLanguage,
370: int startSkipLength, int endSkipLength, boolean joinSections) {
371: checkTokenNotNull();
372: // Write-lock presence checked in the impl
373: return EmbeddingContainer.createEmbedding(tokenList,
374: tokenIndex, embeddedLanguage, startSkipLength,
375: endSkipLength, joinSections);
376: }
377:
378: /**
379: * Remove previously created language embedding.
380: * <br/>
381: * If the underying text input is mutable then this method should only be called
382: * within a write lock over the text input.
383: */
384: public boolean removeEmbedding(Language<?> embeddedLanguage) {
385: checkTokenNotNull();
386: // Write-lock presence checked in the impl
387: return EmbeddingContainer.removeEmbedding(tokenList,
388: tokenIndex, embeddedLanguage);
389: }
390:
391: /**
392: * Move to the next token in this token sequence.
393: *
394: * <p>
395: * The next token may not necessarily start at the offset where
396: * the previous token ends (there may be gaps between tokens
397: * caused by token filtering). {@link #offset()} should be used
398: * for offset retrieval.
399: * </p>
400: *
401: * @return true if the sequence was successfully moved to the next token
402: * or false if it was not moved before there are no more tokens
403: * in the forward direction.
404: * @throws ConcurrentModificationException if this token sequence
405: * is no longer valid because of an underlying mutable input source modification.
406: */
407: public boolean moveNext() {
408: checkModCount();
409: if (token != null) // Token already fetched
410: tokenIndex++;
411: Object tokenOrEmbeddingContainer = tokenList
412: .tokenOrEmbeddingContainer(tokenIndex);
413: if (tokenOrEmbeddingContainer != null) {
414: AbstractToken origToken = token;
415: token = LexerUtilsConstants
416: .token(tokenOrEmbeddingContainer);
417: // If origToken == null then the right offset might already be pre-computed from move()
418: if (tokenOffset != -1) {
419: if (origToken != null) {
420: // If the token list is continuous or the fetched token
421: // is flyweight (there cannot be a gap before flyweight token)
422: // the original offset can be just increased
423: // by the original token's length.
424: if (tokenList.isContinuous() || token.isFlyweight()) {
425: tokenOffset += origToken.length(); // advance by previous token's length
426: } else
427: // Offset must be recomputed
428: tokenOffset = -1; // mark the offset to be recomputed
429: } else
430: // Not valid token previously
431: tokenOffset = -1;
432: }
433: return true;
434: }
435: if (token != null) // Unsuccessful move from existing token
436: tokenIndex--;
437: return false;
438: }
439:
440: /**
441: * Move to a previous token in this token sequence.
442: *
443: * <p>
444: * The previous token may not necessarily end at the offset where
445: * the previous token started (there may be gaps between tokens
446: * caused by token filtering). {@link #offset()} should be used
447: * for offset retrieval.
448: * </p>
449: *
450: * @return true if the sequence was successfully moved to the previous token
451: * or false if it was not moved because there are no more tokens
452: * in the backward direction.
453: * @throws ConcurrentModificationException if this token sequence
454: * is no longer valid because of an underlying mutable input source modification.
455: */
456: public boolean movePrevious() {
457: checkModCount();
458: if (tokenIndex > 0) {
459: AbstractToken origToken = token;
460: tokenIndex--;
461: token = LexerUtilsConstants.token(tokenList
462: .tokenOrEmbeddingContainer(tokenIndex));
463: if (tokenOffset != -1) {
464: // If the token list is continuous or the original token
465: // is flyweight (there cannot be a gap before flyweight token)
466: // the original offset can be just decreased
467: // by the fetched token's length.
468: if (tokenList.isContinuous() || origToken.isFlyweight()) {
469: tokenOffset -= token.length(); // decrease by the fetched's token length
470: } else { // mark the offset to be computed upon call to offset()
471: tokenOffset = -1;
472: }
473: }
474: return true;
475:
476: } // no tokens below index zero
477: return false;
478: }
479:
480: /**
481: * Position token sequence between <code>index-1</code>
482: * and <code>index</code> tokens.
483: * <br/>
484: * TS will be positioned in the following way:
485: * <pre>
486: * Token[0] ... Token[index-1] Token[index] ...
487: * ^ ^ ^
488: * Index: 0 index-1 index
489: * </pre>
490: *
491: * <p>
492: * Subsequent {@link #moveNext()} or {@link #movePrevious()} is needed to fetch
493: * a concrete token in the desired direction.
494: * <br/>
495: * Subsequent {@link #moveNext()} will position TS over <code>Token[index]</code>
496: * (or {@link #movePrevious()} will position TS over <code>Token[index-1]</code>)
497: * so that <code>{@link #token()} != null</code>.
498: *
499: * @param index index of the token to which this sequence
500: * should be positioned.
501: * <br/>
502: * If <code>index >= {@link #tokenCount()}</code>
503: * then the TS will be positioned to {@link #tokenCount()}.
504: * <br/>
505: * If <code>index < 0</code> then the TS will be positioned to index 0.
506: *
507: * @return difference between requested index and the index to which TS
508: * is really set.
509: * @throws ConcurrentModificationException if this token sequence
510: * is no longer valid because of an underlying mutable input source modification.
511: */
512: public int moveIndex(int index) {
513: checkModCount();
514: if (index >= 0) {
515: Object tokenOrEmbeddingContainer = tokenList
516: .tokenOrEmbeddingContainer(index);
517: if (tokenOrEmbeddingContainer != null) { // enough tokens
518: resetTokenIndex(index);
519: } else
520: // Token at the requested index does not exist - leave orig. index
521: resetTokenIndex(tokenCount());
522: } else
523: // index < 0
524: resetTokenIndex(0);
525: return index - tokenIndex;
526: }
527:
528: /**
529: * Move the token sequence to be positioned before the first token.
530: * <br/>
531: * This is equivalent to <code>moveIndex(0)</code>.
532: */
533: public void moveStart() {
534: moveIndex(0);
535: }
536:
537: /**
538: * Move the token sequence to be positioned behind the last token.
539: * <br/>
540: * This is equivalent to <code>moveIndex(tokenCount())</code>.
541: */
542: public void moveEnd() {
543: moveIndex(tokenCount());
544: }
545:
546: /**
547: * Move token sequence to be positioned between <code>index-1</code>
548: * and <code>index</code> tokens where Token[index] either starts at offset
549: * or "contains" the offset.
550: * <br/>
551: * <pre>
552: * +----------+-----+----------------+--------------+------
553: * | Token[0] | ... | Token[index-1] | Token[index] | ...
554: * | "public" | ... | "static" | "int" | ...
555: * +----------+-----+----------------+--------------+------
556: * ^ ^ ^
557: * Index: 0 index-1 index
558: * Offset: ---^ (if offset points to 'i','n' or 't')
559: * </pre>
560: *
561: * <p>
562: * Subsequent {@link #moveNext()} or {@link #movePrevious()} is needed to fetch
563: * a concrete token.
564: * <br/>
565: * If the offset is too big then the token sequence will be positioned
566: * behind the last token.
567: * </p>
568: *
569: * <p>
570: * If token filtering is used there may be gaps that are not covered
571: * by any tokens and if the offset is contained in such gap then
572: * the token sequence will be positioned before the token that follows the gap.
573: * </p>
574: *
575: *
576: * @param offset absolute offset to which the token sequence should be moved.
577: * @return difference between the reqeuested offset
578: * and the start offset of the token
579: * before which the the token sequence gets positioned.
580: *
581: * @throws ConcurrentModificationException if this token sequence
582: * is no longer valid because of an underlying mutable input source modification.
583: */
584: public int move(int offset) {
585: checkModCount();
586: // Token count in the list may change as possibly other threads
587: // keep asking for tokens. Root token list impls create tokens lazily
588: // when asked by clients.
589: int tokenCount = tokenList.tokenCountCurrent(); // presently created token count
590: if (tokenCount == 0) { // no tokens yet -> attempt to create at least one
591: if (tokenList.tokenOrEmbeddingContainer(0) == null) { // really no tokens at all
592: // In this case the token sequence could not be positioned yet
593: // so no need to reset "index" or other vars
594: resetTokenIndex(0);
595: return offset;
596: }
597: // Re-get the present token count (could be created a chunk of tokens at once)
598: tokenCount = tokenList.tokenCountCurrent();
599: }
600:
601: // tokenCount surely >0
602: int prevTokenOffset = tokenList.tokenOffset(tokenCount - 1);
603: if (offset > prevTokenOffset) { // may need to create further tokens if they do not exist
604: // Force token list to create subsequent tokens
605: // Cannot subtract offset by each token's length because
606: // there may be gaps between tokens due to token id filter use.
607: int tokenLength = LexerUtilsConstants.token(tokenList,
608: tokenCount - 1).length();
609: while (offset >= prevTokenOffset + tokenLength) { // above present token
610: Object tokenOrEmbeddingContainer = tokenList
611: .tokenOrEmbeddingContainer(tokenCount);
612: if (tokenOrEmbeddingContainer != null) {
613: AbstractToken t = LexerUtilsConstants
614: .token(tokenOrEmbeddingContainer);
615: if (t.isFlyweight()) { // need to use previous tokenLength
616: prevTokenOffset += tokenLength;
617: } else { // non-flyweight token - retrieve offset
618: prevTokenOffset = tokenList
619: .tokenOffset(tokenCount);
620: }
621: tokenLength = t.length();
622: tokenCount++;
623:
624: } else { // no more tokens => position behind last token
625: resetTokenIndex(tokenCount);
626: tokenOffset = prevTokenOffset + tokenLength; // May assign the token's offset in advance
627: return offset - tokenOffset;
628: }
629: }
630: resetTokenIndex(tokenCount - 1);
631: tokenOffset = prevTokenOffset; // May assign the token's offset in advance
632: return offset - prevTokenOffset;
633: }
634:
635: // The offset is within the currently recognized tokens
636: // Use binary search
637: int low = 0;
638: int high = tokenCount - 1;
639:
640: while (low <= high) {
641: int mid = (low + high) / 2;
642: int midStartOffset = tokenList.tokenOffset(mid);
643:
644: if (midStartOffset < offset) {
645: low = mid + 1;
646: } else if (midStartOffset > offset) {
647: high = mid - 1;
648: } else {
649: // Token starting exactly at offset found
650: resetTokenIndex(mid);
651: tokenOffset = midStartOffset;
652: return 0; // right at the token begining
653: }
654: }
655:
656: // Not found exactly and high + 1 == low => high < low
657: // BTW there may be gaps between tokens; if offset is in gap then position to higher token
658: if (high >= 0) { // could be -1
659: AbstractToken t = LexerUtilsConstants
660: .token(tokenList, high);
661: prevTokenOffset = tokenList.tokenOffset(high);
662: // If gaps allowed check whether the token at "high" contains the offset
663: if (!tokenList.isContinuous()
664: && offset > prevTokenOffset + t.length()) {
665: // Offset in the gap above the "high" token
666: high++;
667: prevTokenOffset += t.length();
668: }
669: } else { // at least one token exists => use token at index 0
670: high = 0;
671: prevTokenOffset = tokenList.tokenOffset(0); // result may differ from 0
672: }
673: resetTokenIndex(high);
674: tokenOffset = prevTokenOffset;
675: return offset - prevTokenOffset;
676: }
677:
678: /**
679: * Check whether this TS contains zero tokens.
680: * <br/>
681: * This check is strongly preferred over <code>tokenCount() == 0</code>.
682: *
683: * @see #tokenCount()
684: */
685: public boolean isEmpty() {
686: return (tokenIndex == 0 && tokenList
687: .tokenOrEmbeddingContainer(0) == null);
688: }
689:
690: /**
691: * Return total count of tokens in this sequence.
692: * <br>
693: * <b>Note:</b> Calling this method will lead
694: * to creation of all the remaining tokens in the sequence
695: * if they were not yet created.
696: *
697: * @return total number of tokens in this token sequence.
698: */
699: public int tokenCount() {
700: checkModCount();
701: return tokenList.tokenCount();
702: }
703:
704: /**
705: * Create sub sequence of this token sequence that only returns
706: * tokens above the given offset.
707: *
708: * @param startOffset only tokens satisfying
709: * <code>tokenStartOffset + tokenLength > startOffset</code>
710: * will be present in the returned sequence.
711: * @return non-null sub sequence of this token sequence.
712: */
713: public TokenSequence<T> subSequence(int startOffset) {
714: return subSequence(startOffset, Integer.MAX_VALUE);
715: }
716:
717: /**
718: * Create sub sequence of this token sequence that only returns
719: * tokens between the given offsets.
720: *
721: * @param startOffset only tokens satisfying
722: * <code>tokenStartOffset + tokenLength > startOffset</code>
723: * will be present in the returned sequence.
724: * @param endOffset >=startOffset only tokens satisfying
725: * <code>tokenStartOffset < endOffset</code>
726: * will be present in the returned sequence.
727: * @return non-null sub sequence of this token sequence.
728: */
729: public TokenSequence<T> subSequence(int startOffset, int endOffset) {
730: checkModCount(); // Ensure subsequences on valid token sequences only
731: TokenList<T> tl;
732: if (tokenList.getClass() == SubSequenceTokenList.class) {
733: SubSequenceTokenList<T> stl = (SubSequenceTokenList<T>) tokenList;
734: tl = stl.delegate();
735: startOffset = Math.max(startOffset, stl.limitStartOffset());
736: endOffset = Math.min(endOffset, stl.limitEndOffset());
737: } else
738: // Regular token list
739: tl = tokenList;
740: return new TokenSequence<T>(new SubSequenceTokenList<T>(tl,
741: startOffset, endOffset));
742: }
743:
744: /**
745: * Check whether this token sequence is valid and can be iterated.
746: * <br/>
747: * If this method returns false then the underlying token hierarchy was modified
748: * and this token sequence should be abandoned.
749: *
750: * @return true if this token sequence is ready for use or false if it should be abandoned.
751: */
752: public boolean isValid() {
753: return (tokenList.modCount() == this .modCount);
754: }
755:
756: @Override
757: public String toString() {
758: return LexerUtilsConstants.appendTokenList(null, tokenList,
759: tokenIndex, 0, Integer.MAX_VALUE, true, 0).toString();
760: }
761:
762: private void resetTokenIndex(int index) {
763: // Position to the given index e.g. by move() and moveIndex()
764: tokenIndex = index;
765: token = null;
766: tokenOffset = -1;
767: }
768:
769: private void checkTokenNotNull() {
770: if (token == null) {
771: throw new IllegalStateException(
772: "Caller of TokenSequence forgot to call moveNext/Previous() "
773: + "or it returned false (no more tokens): tokenIndex="
774: + tokenIndex); // NOI18N
775: }
776: }
777:
778: private void checkModCount() {
779: if (tokenList.modCount() != this .modCount) {
780: throw new ConcurrentModificationException(
781: "Caller uses obsolete token sequence which is no longer valid. Underlying token hierarchy"
782: + // NOI18N
783: " has been modified: modCount="
784: + this .modCount
785: + // NOI18N
786: " != upToDateModCount="
787: + tokenList.modCount() + // NOI18N
788: "\nPlease report against caller's module which needs to be fixed (not the lexer module)." // NOI18N
789: );
790: }
791: }
792:
793: }
|