001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: package org.netbeans.api.lexer;
043:
044: import java.util.ArrayList;
045: import java.util.Collection;
046: import java.util.Collections;
047: import java.util.EnumSet;
048: import java.util.Map;
049: import java.util.HashMap;
050: import java.util.List;
051: import java.util.Set;
052: import org.netbeans.lib.lexer.LanguageManager;
053: import org.netbeans.lib.lexer.LanguageOperation;
054: import org.netbeans.lib.lexer.LexerApiPackageAccessor;
055: import org.netbeans.lib.lexer.LexerSpiPackageAccessor;
056: import org.netbeans.lib.lexer.TokenIdSet;
057: import org.netbeans.lib.lexer.TokenHierarchyOperation;
058: import org.netbeans.lib.lexer.TokenList;
059: import org.netbeans.lib.lexer.inc.TokenChangeInfo;
060: import org.netbeans.lib.lexer.inc.TokenHierarchyEventInfo;
061: import org.netbeans.lib.lexer.inc.TokenListChange;
062: import org.netbeans.spi.lexer.LanguageHierarchy;
063:
064: /**
065: * Language describes a set of token ids
066: * that comprise the given language.
067: * <br/>
068: * Each language corresponds to a certain mime-type.
069: * <br/>
070: * An input source may be lexed by using an existing language
071: * - see {@link TokenHierarchy} which is an entry point into the Lexer API.
072: * <br>
073: * Language hierarchy is represented by an unmodifiable set of {@link TokenId}s
074: * that can be retrieved by {@link #tokenIds()} and token categories
075: * {@link #tokenCategories()}.
076: *
077: * <p>
078: * The language cannot be instantiated directly.
079: * <br/>
080: * Instead it should be obtained from {@link LanguageHierarchy#language()}
081: * on an existing language hierarchy.
082: *
083: * @see LanguageHierarchy
084: * @see TokenId
085: *
086: * @author Miloslav Metelka
087: * @version 1.00
088: */
089:
090: public final class Language<T extends TokenId> {
091:
092: static {
093: LexerApiPackageAccessor.register(new Accessor());
094: }
095:
096: private final LanguageHierarchy<T> languageHierarchy;
097:
098: private final LanguageOperation<T> languageOperation;
099:
100: private String mimeType;
101:
102: private final int maxOrdinal;
103:
104: private final Set<T> ids;
105:
106: /** Lazily inited indexed ids for quick translation of ordinal to token id. */
107: private TokenIdSet<T> indexedIds;
108:
109: private final Map<String, T> idName2id;
110:
111: /**
112: * Map of category to ids that it contains.
113: */
114: private final Map<String, Set<T>> cat2ids;
115:
116: /**
117: * Lists of token categories for particular id.
118: * <br/>
119: * It's a list because it is ordered (primary category is first).
120: */
121: private List<String>[] id2cats;
122:
123: /**
124: * Lists of non-primary token categories for particular id.
125: * <br/>
126: * It's a list because the order might be important
127: * (e.g. for syntax coloring information resolving) although
128: * the present SPI does not utilize that.
129: */
130: private List<String>[] id2nonPrimaryCats;
131:
132: /**
133: * Finds a language by its mime type.
134: *
135: * <p>This method uses information from <code>LanguageProvider</code>s registered
136: * in the default lookup to find <code>Language</code> for a given
137: * mime type.
138: *
139: * <div class="nonnormative">
140: * <p>Netbeans provide an implementation of <code>LanguageProvider</code>
141: * that reads data from the <code>Editors</code> folder on the system filesystem.
142: * Therefore Netbeans modules can register their <code>Language</code>s
143: * in MimeLookup as any other mime-type related service.
144: *
145: * @param mimeType The mime type of a language that you want to find.
146: * @return The <code>Language</code> registered
147: * for the given <code>mimeType</code>.
148: */
149: public static Language<?> find(String mimeType) {
150: return LanguageManager.getInstance().findLanguage(mimeType);
151: }
152:
153: /**
154: * Construct language by providing a collection of token ids
155: * that comprise the language and extra categories into which the token ids belong.
156: *
157: * @param languageHierarchy non-null language hierarchy is in one-to-one relationship
158: * with the language and represents it on SPI side.
159: * @throws IndexOutOfBoundsException if any token id's ordinal is < 0.
160: */
161: Language(LanguageHierarchy<T> languageHierarchy) {
162: this .languageHierarchy = languageHierarchy;
163: this .languageOperation = new LanguageOperation<T>(
164: languageHierarchy, this );
165: mimeType = LexerSpiPackageAccessor.get().mimeType(
166: languageHierarchy);
167: checkMimeTypeValid(mimeType);
168: // Create ids and find max ordinal
169: Collection<T> createdIds = LexerSpiPackageAccessor.get()
170: .createTokenIds(languageHierarchy);
171: if (createdIds == null)
172: throw new IllegalArgumentException("Ids cannot be null"); // NOI18N
173: maxOrdinal = TokenIdSet.findMaxOrdinal(createdIds);
174:
175: // Convert collection of ids to efficient indexed Set<T>
176: if (createdIds instanceof EnumSet) {
177: ids = (Set<T>) createdIds;
178: } else { // not EnumSet
179: ids = new TokenIdSet<T>(createdIds, maxOrdinal, true);
180: }
181:
182: // Create TokenIdSet instances for token categories
183: Map<String, Collection<T>> createdCat2ids = LexerSpiPackageAccessor
184: .get().createTokenCategories(languageHierarchy);
185: if (createdCat2ids == null) {
186: createdCat2ids = Collections.emptyMap();
187: }
188: cat2ids = new HashMap<String, Set<T>>((int) (createdCat2ids
189: .size() / 0.73f));
190: for (Map.Entry<String, Collection<T>> entry : createdCat2ids
191: .entrySet()) {
192: Collection<T> createdCatIds = entry.getValue();
193: TokenIdSet.checkIdsFromLanguage(createdCatIds, ids);
194: // Do not use the original createdCatIds set because of the following:
195: // 1) Two token categories will have the same sets of contained ids
196: // in the createdCatIds map (the same physical Set instance).
197: // 2) At least one token id will have one of the two categories
198: // as its primary category.
199: // 3) If the original Set instance from the createdCatIds would be used
200: // then both categories would incorrectly contain the extra id(s).
201: Set<T> catIds = new TokenIdSet<T>(createdCatIds,
202: maxOrdinal, false);
203: cat2ids.put(entry.getKey(), catIds);
204: }
205:
206: // Walk through all ids and check duplicate names and primary categories
207: idName2id = new HashMap<String, T>((int) (ids.size() / 0.73f));
208: for (T id : ids) {
209: T sameNameId = idName2id.put(id.name(), id);
210: if (sameNameId != null && sameNameId != id) { // token ids with same name
211: throw new IllegalArgumentException(id
212: + " has duplicate name with " + sameNameId);
213: }
214:
215: String cat = id.primaryCategory();
216: if (cat != null) {
217: Set<T> catIds = cat2ids.get(cat);
218: if (catIds == null) {
219: catIds = new TokenIdSet<T>(null, maxOrdinal, false);
220: cat2ids.put(cat, catIds);
221: }
222: catIds.add(id);
223: }
224: }
225: }
226:
227: /**
228: * Get unmodifiable set of ids contained in this language.
229: * <br/>
230: * An iterator over the set returns the ids sorted by their ordinals.
231: *
232: * @return unmodifiable set of ids contained in this language.
233: */
234: public Set<T> tokenIds() {
235: return ids;
236: }
237:
238: /**
239: * Get tokenId for the given ordinal. This method
240: * can be used by lexers to quickly translate ordinal
241: * to tokenId.
242: * @param ordinal ordinal to be translated to corresponding tokenId.
243: * @return valid tokenId or null if there's no corresponding
244: * tokenId for the given int-id. It's possible because intIds
245: * of the language's token ids do not need to be continuous.
246: * If the ordinal is <0 or higher than the highest
247: * ordinal of all the token ids of this language the method
248: * throws {@link IndexOutOfBoundsException}.
249: * @throws IndexOutOfBoundsException if the ordinal is
250: * <0 or higher than {@link #maxOrdinal()}.
251: */
252: public T tokenId(int ordinal) {
253: synchronized (idName2id) {
254: if (indexedIds == null) {
255: if (ids instanceof EnumSet) {
256: indexedIds = new TokenIdSet<T>(ids, maxOrdinal,
257: false);
258: } else { // not EnumSet
259: indexedIds = (TokenIdSet<T>) ids;
260: }
261: }
262: return indexedIds.indexedIds()[ordinal];
263: }
264: }
265:
266: /**
267: * Similar to {@link #tokenId(int)} however it guarantees
268: * that it will always return non-null tokenId. Typically for a lexer
269: * just being developed it's possible that there are some integer
270: * token ids defined in the generated lexer for which there is
271: * no correspondence in the language. The lexer wrapper should
272: * always call this method if it expects to find a valid
273: * counterpart for given integer id.
274: * @param ordinal ordinal to translate to token id.
275: * @return always non-null tokenId that corresponds to the given integer id.
276: * @throws IndexOutOfBoundsException if the ordinal is
277: * <0 or higher than {@link #maxOrdinal()} or when there is no corresponding
278: * token id for it.
279: */
280: public T validTokenId(int ordinal) {
281: T id = tokenId(ordinal);
282: if (id == null) {
283: throw new IndexOutOfBoundsException(
284: "No tokenId for ordinal=" + ordinal
285: + " in language " + this );
286: }
287: return id;
288: }
289:
290: /**
291: * Find the tokenId from its name.
292: * @param name name of the tokenId to find.
293: * @return tokenId with the requested name or null if it does not exist.
294: */
295: public T tokenId(String name) {
296: return idName2id.get(name);
297: }
298:
299: /**
300: * Similar to {@link #tokenId(String)} but guarantees a valid tokenId to be returned.
301: * @throws IllegalArgumentException if no token in this language has the given name.
302: */
303: public T validTokenId(String name) {
304: T id = tokenId(name);
305: if (id == null) {
306: throw new IllegalArgumentException("No tokenId for name=\""
307: + name + "\" in language " + this );
308: }
309: return id;
310: }
311:
312: /**
313: * Get maximum ordinal of all the token ids that this language contains.
314: * @return maximum integer ordinal of all the token ids that this language contains
315: * or <code>-1</code> if the language contains no token ids.
316: */
317: public int maxOrdinal() {
318: return maxOrdinal;
319: }
320:
321: /**
322: * Get names of all token categories of this language.
323: *
324: * @return unmodifiable set containing names of all token categories
325: * contained in this language.
326: */
327: public Set<String> tokenCategories() {
328: return Collections.unmodifiableSet(cat2ids.keySet());
329: }
330:
331: /**
332: * Get members of the category with given name.
333: *
334: * @param tokenCategory non-null name of the category.
335: * @return set of token ids belonging to the given category.
336: */
337: public Set<T> tokenCategoryMembers(String tokenCategory) {
338: return Collections.unmodifiableSet(cat2ids.get(tokenCategory));
339: }
340:
341: /**
342: * Get list of all token categories for the particular token id.
343: *
344: * @return non-null unmodifiable list of all token categories for the particular token id.
345: * <br>
346: * Primary token's category (if defined for the token id) will be contained
347: * as first one in the list.
348: * @throws IllegalArgumentException if the given token id does not belong
349: * to this language.
350: */
351: public List<String> tokenCategories(T tokenId) {
352: checkMemberId(tokenId);
353: synchronized (idName2id) {
354: if (id2cats == null) {
355: buildTokenIdCategories();
356: }
357: return id2cats[tokenId.ordinal()];
358: }
359: }
360:
361: /**
362: * Get list of non-primary token categories (not containing the primary category)
363: * for the particular token id.
364: * <br/>
365: * If the token id has no primary category defined then the result
366: * of this method is equal to {@link #tokenCategories(TokenId)}.
367: *
368: * @return non-null unmodifiable list of secondary token categories for the particular token id.
369: * Primary token's category (if defined for the token id) will not be contained
370: * in the list.
371: * @throws IllegalArgumentException if the given token id does not belong
372: * to this language.
373: */
374: public List<String> nonPrimaryTokenCategories(T tokenId) {
375: checkMemberId(tokenId);
376: synchronized (idName2id) {
377: if (id2nonPrimaryCats == null) {
378: buildTokenIdCategories();
379: }
380: return id2nonPrimaryCats[tokenId.ordinal()];
381: }
382: }
383:
384: /**
385: * Merge two collections of token ids from this language
386: * into an efficient indexed set (the implementation similar
387: * to {@link java.util.EnumSet}).
388: *
389: * @param tokenIds1 non-null collection of token ids to be contained in the returned set.
390: * @param tokenIds2 collection of token ids to be contained in the returned set.
391: * @return set of token ids indexed by their ordinal number.
392: */
393: public Set<T> merge(Collection<T> tokenIds1, Collection<T> tokenIds2) {
394: TokenIdSet.checkIdsFromLanguage(tokenIds1, ids);
395: // Cannot retain EnumSet as tokenIds will already be wrapped
396: // by unmodifiableSet()
397: Set<T> ret = new TokenIdSet<T>(tokenIds1, maxOrdinal, false);
398: if (tokenIds2 != null) {
399: TokenIdSet.checkIdsFromLanguage(tokenIds2, ids);
400: ret.addAll(tokenIds2);
401: }
402: return ret;
403: }
404:
405: /**
406: * Gets the mime type of this language.
407: *
408: * @return non-null language's mime type.
409: */
410: public String mimeType() {
411: return mimeType;
412: }
413:
414: /** The languages are equal only if they are the same objects. */
415: public @Override
416: boolean equals(Object obj) {
417: return super .equals(obj);
418: }
419:
420: /** The hashCode of the language is the identity hashCode. */
421: public @Override
422: int hashCode() {
423: return super .hashCode();
424: }
425:
426: private void buildTokenIdCategories() {
427: assignCatArrays();
428: // List for decreasing of the number of created maps
429: // for tokenId2category mappings.
430: // List.get(0) is a Map[category, list-of-[category]].
431: // List.get(1) is a Map[category1, Map[category2, list-of-[category1;category2]]].
432: // etc.
433: List<Map<String, Object>> catMapsList = new ArrayList<Map<String, Object>>(
434: 4);
435: // All categories for a single token id
436: List<String> idCats = new ArrayList<String>(4);
437: for (T id : ids) {
438: // No extra sorting of the categories in which the particular id is contained
439: // - making explicit order of the categories could possibly be acomplished
440: // in the future if necessary by supporting some extra hints
441: // Add all the categories for the particular id into idCats
442: for (Map.Entry<String, Set<T>> e : cat2ids.entrySet()) {
443: if (e.getValue().contains(id)) {
444: idCats.add(e.getKey()); // Add this category to id's categories
445: }
446: }
447: // Assign both non-primary cats and all cats
448: id2cats[id.ordinal()] = findCatList(catMapsList, idCats, 0);
449: id2nonPrimaryCats[id.ordinal()] = findCatList(catMapsList,
450: idCats, 1);
451:
452: idCats.clear(); // reuse the list (is cloned if added to catMapsList)
453: }
454: }
455:
456: /**
457: * Find the cached list of categories from the catMapsList
458: * for the particular temporarily collected list of categories.
459: *
460: * @param catMapsList non-null list of cached maps.
461: * <br/>
462: * List.get(0) is a Map[category, list-containing-[category]].
463: * <br/>
464: * List.get(1) is a Map[category1, Map[category2, list-containing-[category1;category2]]].
465: * <br/>
466: * etc.
467: *
468: * @param idCats non-null temporarily collected list of categories for the particular id.
469: * It may be modified after this method gets finished.
470: * @param startIndex >=0 starting index in idCats - either 0 for returning
471: * of all categories or 1 for returning non-primary categories.
472: * @return non-null cached list of categories with contents equal to idCats.
473: */
474: private static List<String> findCatList(
475: List<Map<String, Object>> catMapsList, List<String> idCats,
476: int startIndex) {
477: int size = idCats.size() - startIndex;
478: if (size <= 0) {
479: return Collections.emptyList();
480: }
481: while (catMapsList.size() < size) {
482: catMapsList.add(new HashMap<String, Object>());
483: }
484: // Find the catList as the last item in the cascaded search through the maps
485: Map<String, Object> m = catMapsList.get(--size);
486: for (int i = startIndex; i < size; i++) {
487: @SuppressWarnings("unchecked")
488: Map<String, Object> catMap = (Map<String, Object>) m
489: .get(idCats.get(i));
490: if (catMap == null) {
491: catMap = new HashMap<String, Object>();
492: // Map<String,Map<String,Object>>
493: m.put(idCats.get(i), catMap);
494: }
495: m = catMap;
496: }
497:
498: @SuppressWarnings("unchecked")
499: List<String> catList = (List<String>) m.get(idCats.get(size));
500: if (catList == null) {
501: catList = new ArrayList<String>(idCats.size() - startIndex);
502: catList.addAll((startIndex > 0) ? idCats.subList(
503: startIndex, idCats.size()) : idCats);
504: m.put(idCats.get(size), catList);
505: }
506: return catList;
507: }
508:
509: @SuppressWarnings("unchecked")
510: private void assignCatArrays() {
511: id2cats = (List<String>[]) new List[maxOrdinal + 1];
512: id2nonPrimaryCats = (List<String>[]) new List[maxOrdinal + 1];
513: }
514:
515: /**
516: * Dump list of token ids for this language into string.
517: *
518: * @return dump of contents of this language.
519: */
520: public String dumpInfo() {
521: StringBuilder sb = new StringBuilder();
522: for (T id : ids) {
523: sb.append(id);
524: List<String> cats = tokenCategories(id);
525: if (cats.size() > 0) {
526: sb.append(": ");
527: for (int i = 0; i < cats.size(); i++) {
528: if (i > 0) {
529: sb.append(", ");
530: }
531: String cat = (String) cats.get(i);
532: sb.append('"');
533: sb.append(cat);
534: sb.append('"');
535: }
536: }
537: }
538: return ids.toString();
539: }
540:
541: public String toString() {
542: return mimeType + ", LH: " + languageHierarchy;
543: }
544:
545: private void checkMemberId(T id) {
546: if (!ids.contains(id)) {
547: throw new IllegalArgumentException(id
548: + " does not belong to language " + this ); // NOI18N
549: }
550: }
551:
552: private static void checkMimeTypeValid(String mimeType) {
553: if (mimeType == null) {
554: throw new IllegalStateException("mimeType cannot be null"); // NOI18N
555: }
556: int slashIndex = mimeType.indexOf('/');
557: if (slashIndex == -1) { // no slash
558: throw new IllegalStateException("mimeType=" + mimeType
559: + " does not contain '/'"); // NOI18N
560: }
561: if (mimeType.indexOf('/', slashIndex + 1) != -1) {
562: throw new IllegalStateException("mimeType=" + mimeType
563: + " contains more than one '/'"); // NOI18N
564: }
565: }
566:
567: /**
568: * Return language hierarchy associated with this language.
569: * <br>
570: * This method is for API package accessor only.
571: */
572: LanguageHierarchy<T> languageHierarchy() {
573: return languageHierarchy;
574: }
575:
576: LanguageOperation<T> languageOperation() {
577: return languageOperation;
578: }
579:
580: /**
581: * Accessor of package-private things in this package
582: * that need to be used by the lexer implementation classes.
583: */
584: private static final class Accessor extends LexerApiPackageAccessor {
585:
586: public <T extends TokenId> Language<T> createLanguage(
587: LanguageHierarchy<T> languageHierarchy) {
588: return new Language<T>(languageHierarchy);
589: }
590:
591: public <T extends TokenId> LanguageHierarchy<T> languageHierarchy(
592: Language<T> language) {
593: return language.languageHierarchy();
594: }
595:
596: public <T extends TokenId> LanguageOperation<T> languageOperation(
597: Language<T> language) {
598: return language.languageOperation();
599: }
600:
601: public <I> TokenHierarchy<I> createTokenHierarchy(
602: TokenHierarchyOperation<I, ?> tokenHierarchyOperation) {
603: return new TokenHierarchy<I>(tokenHierarchyOperation);
604: }
605:
606: public TokenHierarchyEvent createTokenChangeEvent(
607: TokenHierarchyEventInfo info) {
608: return new TokenHierarchyEvent(info);
609: }
610:
611: public <T extends TokenId> TokenChange<T> createTokenChange(
612: TokenChangeInfo<T> info) {
613: return new TokenChange<T>(info);
614: }
615:
616: public <T extends TokenId> TokenChangeInfo<T> tokenChangeInfo(
617: TokenChange<T> tokenChange) {
618: return tokenChange.info();
619: }
620:
621: public <I> TokenHierarchyOperation<I, ?> tokenHierarchyOperation(
622: TokenHierarchy<I> tokenHierarchy) {
623: return tokenHierarchy.operation();
624: }
625:
626: public <T extends TokenId> TokenSequence<T> createTokenSequence(
627: TokenList<T> tokenList) {
628: return new TokenSequence<T>(tokenList);
629: }
630:
631: }
632:
633: }
|