001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: package org.netbeans.api.lexer;
043:
044: import java.io.Reader;
045: import java.util.ArrayList;
046: import java.util.List;
047: import java.util.Set;
048: import javax.swing.text.Document;
049: import org.netbeans.lib.lexer.TokenHierarchyOperation;
050: import org.netbeans.lib.lexer.inc.DocumentInput;
051:
052: /**
053: * Token hierarchy represents a given input source as a browsable hierarchy of tokens.
054: * <br>
055: * It's is an entry point into the Lexer API.
056: * <br/>
057: * It allows to create token sequences for hierarchy exploration
058: * and watching for token changes by attaching the token hierarchy listeners.
059: * <br>
060: * The hierarchy may either be flat or it can be a tree if the
061: * corresponding language hierarchy contains language embeddings.
062: *
063: * @author Miloslav Metelka
064: * @version 1.00
065: */
066:
067: public final class TokenHierarchy<I> { // "I" stands for mutable input source
068:
069: /**
070: * Get or create mutable token hierarchy for the given swing document.
071: * <br/>
072: * The document may define a top language by doing
073: * <code>doc.putProperty("mimeType", mimeType)</code>
074: * (a language defined for the given mime type will be searched and used)
075: * or by doing <code>putProperty(Language.class, language)</code>.
076: * Otherwise the returned hierarchy will be inactive and {@link #tokenSequence()}
077: * will return null.
078: * <br/>
079: * All the operations with the obtained token hierarchy
080: * must be done under document's read lock (or write lock).
081: *
082: * @param doc non-null swing text document for which the token hiearchy should be obtained.
083: * @return non-null token hierarchy.
084: */
085: public static <D extends Document> TokenHierarchy<D> get(D doc) {
086: return DocumentInput.get(doc).tokenHierarchyControl()
087: .tokenHierarchy();
088: }
089:
090: /**
091: * Create token hierarchy for the given non-mutating input text (for example
092: * java.lang.String).
093: *
094: * @see #create(CharSequence,boolean,Language,Set,InputAttributes)
095: */
096: public static <I extends CharSequence> TokenHierarchy<I> create(
097: I inputText, Language<?> language) {
098: return create(inputText, false, language, null, null);
099: }
100:
101: /**
102: * Create token hierarchy for the given input text.
103: *
104: * @param inputText input text containing the characters to tokenize.
105: * @param copyInputText <code>true</code> in case the content of the input
106: * will not be modified in the future so the created tokens can reference it.
107: * <br>
108: * <code>false</code> means that the text can change in the future
109: * and the tokens should not directly reference it. Instead copy of the necessary text
110: * from the input should be made and the original text should not be referenced.
111: * @param language language defining how the input
112: * will be tokenized.
113: * @param skipTokenIds set containing the token ids for which the tokens
114: * should not be created in the created token hierarchy.
115: * <br/>
116: * <code>null</code> may be passed which means that no tokens will be skipped.
117: * <br/>
118: * This applies to top level of the token hierarchy only (not to embedded tokens).
119: * <br/>
120: * The provided set should be efficient enough - ideally created by e.g.
121: * {@link Language#tokenCategoryMembers(String)}
122: * or {@link Language#merge(Collection,Collection)}.
123: *
124: * @param inputAttributes additional properties related to the input
125: * that may influence token creation or lexer operation
126: * for the particular language (such as version of the language to be used).
127: * @return non-null token hierarchy.
128: */
129: public static <I extends CharSequence, T extends TokenId> TokenHierarchy<I> create(
130: I inputText, boolean copyInputText, Language<T> language,
131: Set<T> skipTokenIds, InputAttributes inputAttributes) {
132:
133: return new TokenHierarchyOperation<I, T>(inputText,
134: copyInputText, language, skipTokenIds, inputAttributes)
135: .tokenHierarchy();
136: }
137:
138: /**
139: * Create token hierarchy for the given reader.
140: *
141: * @param inputReader input reader containing the characters to tokenize.
142: * @param language language defining how the input
143: * will be tokenized.
144: * @param skipTokenIds set containing the token ids for which the tokens
145: * should not be created in the created token hierarchy.
146: * <br/>
147: * <code>null</code> may be passed which means that no tokens will be skipped.
148: * <br/>
149: * This applies to top level of the token hierarchy only (not to embedded tokens).
150: * <br/>
151: * The provided set should be efficient enough - ideally created by e.g.
152: * {@link Language#tokenCategoryMembers(String)}
153: * or {@link Language#merge(Collection,Collection)}.
154: *
155: * @param inputAttributes additional properties related to the input
156: * that may influence token creation or lexer operation
157: * for the particular language (such as version of the language to be used).
158: * @return non-null token hierarchy.
159: */
160: public static <I extends Reader, T extends TokenId> TokenHierarchy<I> create(
161: I inputReader, Language<T> language, Set<T> skipTokenIds,
162: InputAttributes inputAttributes) {
163:
164: return new TokenHierarchyOperation<I, T>(inputReader, language,
165: skipTokenIds, inputAttributes).tokenHierarchy();
166: }
167:
168: private TokenHierarchyOperation<I, ?> operation;
169:
170: TokenHierarchy(TokenHierarchyOperation<I, ?> operation) {
171: this .operation = operation;
172: }
173:
174: /**
175: * Get token sequence of the top level language of the token hierarchy.
176: * <br/>
177: * For token hierarchies over mutable input sources the input source must be read-locked.
178: * <br/>
179: * The token sequences for inner levels of the token hierarchy can be
180: * obtained by calling {@link TokenSequence#embedded()}.
181: *
182: * @return token sequence of the top level of the token hierarchy
183: * or null if the token hierarchy is currently inactive ({@link #isActive()} returns false).
184: */
185: public TokenSequence<?> tokenSequence() {
186: return operation.tokenSequence();
187: }
188:
189: /**
190: * Get token sequence of the top level of the language hierarchy
191: * only if it's of the given language.
192: *
193: * @return non-null token sequence or null if the hierarchy is active
194: * and its top level token sequence satisfies the condition
195: * <code>(tokenSequence().language() == language)</code>.
196: * <br/>
197: * Null is returned otherwise.
198: */
199: public <T extends TokenId> TokenSequence<T> tokenSequence(
200: Language<T> language) {
201: @SuppressWarnings("unchecked")
202: TokenSequence<T> ts = (TokenSequence<T>) operation
203: .tokenSequence(language);
204: return ts;
205: }
206:
207: /**
208: * Get immutable list of token sequences with the given language path
209: * from this hierarchy.
210: * <br/>
211: * For mutable token hierarchies the method should only be invoked
212: * within read-locked input source. A new list should be
213: * obtained after each modification.
214: * {@link java.util.ConcurrentModificationException} may be thrown
215: * when iterating over (or retrieving items) from the obsolete list.
216: * <br/>
217: * For forward exploration of the list the iterator is preferred over
218: * index-based iteration because the list contents can be constructed lazily.
219: *
220: * @param languagePath non-null language path that the obtained token sequences
221: * will all have.
222: * @param startOffset starting offset of the TSs to get. Use 0 for no limit.
223: * If the particular TS ends after this offset then it will be returned.
224: * @param endOffset ending offset of the TS to get. Use Integer.MAX_VALUE for no limit.
225: * If the particular TS starts before this offset then it will be returned.
226: * @return non-null list of <code>TokenSequence</code>s or null if the token hierarchy
227: * is inactive ({@link #isActive()} returns false).
228: */
229: public List<TokenSequence<?>> tokenSequenceList(
230: LanguagePath languagePath, int startOffset, int endOffset) {
231: return operation.tokenSequenceList(languagePath, startOffset,
232: endOffset);
233: }
234:
235: /**
236: * Gets the list of all embedded <code>TokenSequence</code>s at the given offset.
237: * This method will use the top level <code>TokenSequence</code> in this
238: * hierarchy to drill down through the token at the specified <code>offset</code>
239: * and all its possible embedded sub-sequences.
240: *
241: * <p>If the <code>offset</code>
242: * lies at the border between two tokens the <code>backwardBias</code>
243: * parameter will be used to choose either the token on the left hand side
244: * (<code>backwardBias == true</code>) of the <code>offset</code> or
245: * on the right hand side (<code>backwardBias == false</code>).
246: *
247: * <p>
248: * For token hierarchies over mutable input sources this method must only be invoked
249: * within a read-lock over the mutable input source.
250: * </p>
251: *
252: * @param offset The offset to look at.
253: * @param backwardBias If <code>true</code> the backward lying token will
254: * be used in case that the <code>offset</code> specifies position between
255: * two tokens. If <code>false</code> the forward lying token will be used.
256: *
257: * @return The list of all sequences embedded at the given offset. The list
258: * may be empty if there are no tokens in the top level <code>TokenSequence</code>
259: * at the given offset and in the specified direction or if the token hierarchy
260: * is inactive ({@link #isActive()} returns false).
261: * The sequences in the list are ordered from the top level sequence to the bottom one.
262: *
263: * @since 1.20
264: */
265: public List<TokenSequence<?>> embeddedTokenSequences(int offset,
266: boolean backwardBias) {
267: TokenSequence<?> seq = tokenSequence();
268: List<TokenSequence<?>> sequences = new ArrayList<TokenSequence<?>>();
269:
270: while (seq != null) {
271: seq.move(offset);
272: if (seq.moveNext()) {
273: if (seq.offset() == offset && backwardBias) {
274: if (seq.movePrevious()) {
275: sequences.add(seq);
276: seq = seq.embedded();
277: } else {
278: seq = null;
279: }
280: } else {
281: sequences.add(seq);
282: seq = seq.embedded();
283: }
284: } else if (backwardBias && seq.movePrevious()) {
285: sequences.add(seq);
286: seq = seq.embedded();
287: } else {
288: seq = null;
289: }
290: }
291:
292: return sequences;
293: }
294:
295: /**
296: * Get a set of language paths used by this token hierarchy.
297: * <br/>
298: * The set includes "static" paths that are those reachable by traversing
299: * token ids of the top language and searching for the default embeddings
300: * that could be created by
301: * {@link org.netbeans.spi.lexer.LanguageHierarchy#embedding(Token,LanguagePath,InputAttributes)}.
302: *
303: * <p>
304: * For token hierarchies over mutable input sources this method must only be invoked
305: * within a read-lock over the mutable input source.
306: * </p>
307: *
308: *
309: * @return non-null set of language paths. The set will be empty
310: * if the token hierarchy is inactive ({@link #isActive()} returns false).
311: */
312: public Set<LanguagePath> languagePaths() {
313: return operation.languagePaths();
314: }
315:
316: /**
317: * Whether input text of this token hierarchy is mutable or not.
318: *
319: * @return true if the input text is mutable or false otherwise.
320: */
321: public boolean isMutable() {
322: return operation.isMutable();
323: }
324:
325: /**
326: * Get input source providing text over which
327: * this token hierarchy was constructed.
328: * <br/>
329: * It may be {@link java.lang.CharSequence} or {@link java.io.Reader}
330: * or a mutable input source such as swing text document
331: * {@link javax.swing.text.Document}.
332: *
333: * @return non-null input source.
334: */
335: public I inputSource() {
336: return operation.inputSource();
337: }
338:
339: /**
340: * Token hierarchy may be set inactive to release resources consumed
341: * by tokens.
342: * <br/>
343: * Only token hierarchies over a mutable input can become inactive.
344: * <br/>
345: * When inactive the hierarchy does not hold any tokens and
346: * {@link #tokenSequence()} return null.
347: *
348: * <p>
349: * For token hierarchies over mutable input sources this method must only be invoked
350: * within a read-lock over the mutable input source.
351: * </p>
352: *
353: * @return true if valid tokens exist for this hierarchy
354: * or false if the token hierarchy is inactive and there are currently
355: * no active tokens to represent it.
356: */
357: public boolean isActive() {
358: return operation.isActive();
359: }
360:
361: /**
362: * Add listener for token changes inside this hierarchy.
363: *
364: * @param listener token change listener to be added.
365: */
366: public void addTokenHierarchyListener(
367: TokenHierarchyListener listener) {
368: operation.addTokenHierarchyListener(listener);
369: }
370:
371: /**
372: * Remove listener for token changes inside this hierarchy.
373: *
374: * @param listener token change listener to be removed.
375: */
376: public void removeTokenHierarchyListener(
377: TokenHierarchyListener listener) {
378: operation.removeTokenHierarchyListener(listener);
379: }
380:
381: /**
382: * Obtaining of token hierarchy operation is only intended to be done
383: * by package accessor.
384: */
385: TokenHierarchyOperation<I, ?> operation() {
386: return operation;
387: }
388:
389: @Override
390: public String toString() {
391: return operation.toString();
392: }
393:
394: }
|