001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: package org.netbeans.api.lexer;
043:
044: /**
045: * Token describes a lexical element of input text.
046: * <br/>
047: * It mainly provides an identification by {@link #id()}
048: * and a textual body (aka token's image) by {@link #text()}.
049: * <br/>
050: * Only lexers should produce token instances and they should do it
051: * solely by using methods of {@link org.netbeans.spi.lexer.TokenFactory}.
052: *
053: * <p>
054: * <b>Note:</b><font color="red">
055: * Do not create custom extensions of this class - lexers may only return
056: * implementations produced by <code>TokenFactory</code>.
057: * Creation of any other token implementations will be refused.
058: * </font>
059: * </p>
060: *
061: * <p>
062: * Token guarantees stability of the {@link #id()} and {@link #length()} methods.
063: * The {@link #hashCode()} and {@link #equals(Object)} methods
064: * use the default implementations from <code>java.lang.Object</code>.
065: * <br/>
066: * The two tokens are only equal if they are the same object.
067: * </p>
068: *
069: * @author Miloslav Metelka
070: * @version 1.00
071: */
072:
073: public abstract class Token<T extends TokenId> {
074:
075: /**
076: * Create token instance.
077: * @throws IllegalStateException if a non-lexer-module-implementation token
078: * is attempted to be created.
079: */
080: protected Token() {
081: if (!(this instanceof org.netbeans.lib.lexer.token.AbstractToken)) {
082: throw new IllegalStateException(
083: "Custom token implementations prohibited."); // NOI18N
084: }
085: }
086:
087: /**
088: * Get identification of this token.
089: *
090: * @return non-null identification of this token.
091: */
092: public abstract T id();
093:
094: /**
095: * Get text of this token (aka token's image) as a character sequence.
096: * <br/>
097: * This text usually corresponds to the characters present in the lexed text input
098: * unless {@link #isCustomText()} returns true.
099: *
100: * <p>
101: * <b>Note for mutable input sources:</b>
102: * <br/>
103: * This method should only be called
104: * within a readonly (or read-write) transaction
105: * over the underlying input source
106: * (such as <code>javax.swing.text.Document.render()</code>
107: * for Swing documents).
108: * <br/>
109: * The result returned by this method
110: * is only valid within a readonly (or read-write) transaction
111: * over the input source (method must be re-called
112: * during the next readonly transaction).
113: * </p>
114: *
115: * @return non-null, non-empty text of this token.
116: * It may be <code>null</code> in case the token was used
117: * for a mutable input and it was removed
118: * from the token list for the given input (but even in such case
119: * the text can be retained in certain cases).
120: *
121: * <p>
122: * The behavior of <code>equals()</code> and <code>hashCode()</code>
123: * of the returned character sequence is generally undefined.
124: * <br/>
125: * The returned character sequence can NOT be compared to another
126: * character sequence by using its <code>equals()</code> method.
127: * <br/>
128: * {@link org.netbeans.api.lexer.TokenUtilities} contains
129: * utility methods related to token text comparing.
130: * </p>
131: *
132: * <p>
133: * The returned text is just a pointer to the primary source of the data
134: * e.g. a swing document. The character data are not duplicated in the tokens.
135: * </p>
136: */
137: public abstract CharSequence text();
138:
139: /**
140: * Check whether {@link #text()} returns a custom value that may differ
141: * from the original content of the text input.
142: * <br/>
143: * Using custom text may be useful in case when only certain part of the token
144: * is useful for the particular use and the token's text can be shrinked
145: * and possibly a flyweight text can be used.
146: * <br/>
147: * Also this is useful when using lexers generated by various lexer generators
148: * that generally allow to use a custom text in the produced tokens.
149: *
150: * @return true if the text of the token does not correspond
151: * to the original characters present in the text input being lexed.
152: */
153: public abstract boolean isCustomText();
154:
155: /**
156: * Get number of characters in the original text input
157: * that the token spans.
158: * <br/>
159: * Usually this is the same value like {@link #text()}</code>.length()</code>
160: * unless {@link #isCustomText()} returns true.
161: * <br/>
162: * Also this method will return valid length in all cases even
163: * when the text of the token could become <code>null</code>.
164: *
165: * @return >=0 length of the token.
166: */
167: public abstract int length();
168:
169: /**
170: * Get the offset at which this token is present in the input
171: * or <code>-1</code> if this token is flyweight (and therefore does not store offset).
172: * <br/>
173: * <b>Note:</b> Use of {@link TokenSequence#offset()} is usually preferred over
174: * this method because it returns actual offset even for the flyweight tokens.
175: * <br/>
176: * If necessary the flyweight token may be replaced by regular token
177: * by using {@link TokenSequence#offsetToken()}.
178: *
179: * <p>
180: * The complexity of the method should generally be constant
181: * regardless of the level of the language embedding.
182: * </p>
183: *
184: * @param tokenHierarchy <code>null</code> should be passed
185: * (the parameter is reserved for future use when token hierarchy snapshots will be implemented).
186: *
187: * @return >=0 offset of the token in the input or <code>-1</code>
188: * if this token is flyweight.
189: */
190: public abstract int offset(TokenHierarchy<?> tokenHierarchy);
191:
192: /**
193: * Checks whether this token instance is used for multiple occurrences
194: * of this token in this or other inputs.
195: * <br/>
196: * For example keywords or operators are typically flyweight tokens
197: * while e.g. identifiers are not flyweight as their text generally varies.
198: * <br/>
199: * Flyweight tokens may decrease the memory consumption for the tokens
200: * considerably for frequently used tokens. For example a single space ' '
201: * may be a useful flyweight token as it's used very often throughout a source.
202: * The decision of what tokens are made flyweight is upon the implementor
203: * of the particular language.
204: *
205: * <p>
206: * If the token is flyweight its {@link #offset(TokenHierarchy)} returns -1.
207: *
208: * @return true if the token is flyweight or false otherwise.
209: */
210: public abstract boolean isFlyweight();
211:
212: /**
213: * Check whether this token represents a complete token
214: * or whether it's a part of a complete token.
215: */
216: public abstract PartType partType();
217:
218: /**
219: * Quickly determine whether this token has any extra properties.
220: */
221: public abstract boolean hasProperties();
222:
223: /**
224: * Get extra property of this token.
225: * <br/>
226: * The token properties are defined by the lexer upon token creation.
227: * The clients of the API cannot set any property of the token.
228: *
229: * @param key non-null key of the property to get.
230: * @return non-null value of the property or null if the property does not
231: * have any value.
232: *
233: * @see #hasProperties()
234: */
235: public abstract Object getProperty(Object key);
236:
237: /**
238: * Make sure the default implementation of <code>hashCode()</code> is used
239: * and the token can safely be used in maps.
240: */
241: @Override
242: public final int hashCode() {
243: return super .hashCode();
244: }
245:
246: /**
247: * Make sure the default implementation of <code>equals()</code> is used
248: * and the token can safely be used in maps.
249: */
250: @Override
251: public final boolean equals(Object o) {
252: return super.equals(o);
253: }
254:
255: }
|