0001: /*
0002: [The "BSD licence"]
0003: Copyright (c) 2005-2006 Terence Parr
0004: All rights reserved.
0005:
0006: Redistribution and use in source and binary forms, with or without
0007: modification, are permitted provided that the following conditions
0008: are met:
0009: 1. Redistributions of source code must retain the above copyright
0010: notice, this list of conditions and the following disclaimer.
0011: 2. Redistributions in binary form must reproduce the above copyright
0012: notice, this list of conditions and the following disclaimer in the
0013: documentation and/or other materials provided with the distribution.
0014: 3. The name of the author may not be used to endorse or promote products
0015: derived from this software without specific prior written permission.
0016:
0017: THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
0018: IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
0019: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
0020: IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
0021: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
0022: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
0023: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
0024: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0025: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
0026: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0027: */
0028: package org.antlr.tool;
0029:
0030: import antlr.RecognitionException;
0031: import antlr.Token;
0032: import antlr.TokenStreamRewriteEngine;
0033: import antlr.TokenWithIndex;
0034: import antlr.collections.AST;
0035: import org.antlr.Tool;
0036: import org.antlr.analysis.*;
0037: import org.antlr.codegen.CodeGenerator;
0038: import org.antlr.misc.Barrier;
0039: import org.antlr.misc.IntSet;
0040: import org.antlr.misc.IntervalSet;
0041: import org.antlr.misc.Utils;
0042: import org.antlr.stringtemplate.StringTemplate;
0043: import org.antlr.stringtemplate.language.AngleBracketTemplateLexer;
0044:
0045: import java.io.*;
0046: import java.util.*;
0047:
0048: /** Represents a grammar in memory. */
0049: public class Grammar {
0050: public static final String SYNPRED_RULE_PREFIX = "synpred";
0051:
0052: public static final String GRAMMAR_FILE_EXTENSION = ".g";
0053:
0054: /** used for generating lexer temp files */
0055: public static final String LEXER_GRAMMAR_FILE_EXTENSION = ".g";
0056:
0057: public static final int INITIAL_DECISION_LIST_SIZE = 300;
0058: public static final int INVALID_RULE_INDEX = -1;
0059:
0060: // the various kinds of labels. t=type, id=ID, types+=type ids+=ID
0061: public static final int RULE_LABEL = 1;
0062: public static final int TOKEN_LABEL = 2;
0063: public static final int RULE_LIST_LABEL = 3;
0064: public static final int TOKEN_LIST_LABEL = 4;
0065: public static final int CHAR_LABEL = 5; // used in lexer for x='a'
0066:
0067: public static String[] LabelTypeToString = { "<invalid>", "rule",
0068: "token", "rule-list", "token-list" };
0069:
0070: public static final String ARTIFICIAL_TOKENS_RULENAME = "Tokens";
0071: public static final String FRAGMENT_RULE_MODIFIER = "fragment";
0072:
0073: public static final String SYNPREDGATE_ACTION_NAME = "synpredgate";
0074:
0075: /** When converting ANTLR char and string literals, here is the
0076: * value set of escape chars.
0077: */
0078: public static int ANTLRLiteralEscapedCharValue[] = new int[255];
0079:
0080: /** Given a char, we need to be able to show as an ANTLR literal.
0081: */
0082: public static String ANTLRLiteralCharValueEscape[] = new String[255];
0083:
0084: static {
0085: ANTLRLiteralEscapedCharValue['n'] = '\n';
0086: ANTLRLiteralEscapedCharValue['r'] = '\r';
0087: ANTLRLiteralEscapedCharValue['t'] = '\t';
0088: ANTLRLiteralEscapedCharValue['b'] = '\b';
0089: ANTLRLiteralEscapedCharValue['f'] = '\f';
0090: ANTLRLiteralEscapedCharValue['\\'] = '\\';
0091: ANTLRLiteralEscapedCharValue['\''] = '\'';
0092: ANTLRLiteralEscapedCharValue['"'] = '"';
0093: ANTLRLiteralCharValueEscape['\n'] = "\\n";
0094: ANTLRLiteralCharValueEscape['\r'] = "\\r";
0095: ANTLRLiteralCharValueEscape['\t'] = "\\t";
0096: ANTLRLiteralCharValueEscape['\b'] = "\\b";
0097: ANTLRLiteralCharValueEscape['\f'] = "\\f";
0098: ANTLRLiteralCharValueEscape['\\'] = "\\\\";
0099: ANTLRLiteralCharValueEscape['\''] = "\\'";
0100: }
0101:
0102: public static final int LEXER = 1;
0103: public static final int PARSER = 2;
0104: public static final int TREE_PARSER = 3;
0105: public static final int COMBINED = 4;
0106: public static final String[] grammarTypeToString = new String[] {
0107: "<invalid>", "lexer", "parser", "tree", "combined" };
0108:
0109: public static final String[] grammarTypeToFileNameSuffix = new String[] {
0110: "<invalid>", "Lexer", "Parser", "", // no suffix for tree grammars
0111: "Parser" // if combined grammar, gen Parser and Lexer will be done later
0112: };
0113:
0114: /** This is the buffer of *all* tokens found in the grammar file
0115: * including whitespace tokens etc... I use this to extract
0116: * lexer rules from combined grammars.
0117: */
0118: protected TokenStreamRewriteEngine tokenBuffer;
0119: public static final String IGNORE_STRING_IN_GRAMMAR_FILE_NAME = "__";
0120:
0121: public static class Decision {
0122: public int decision;
0123: public NFAState startState;
0124: public GrammarAST blockAST;
0125: public DFA dfa;
0126: }
0127:
0128: public class LabelElementPair {
0129: public antlr.Token label;
0130: public GrammarAST elementRef;
0131: public String referencedRuleName;
0132: /** Has an action referenced the label? Set by ActionAnalysis.g
0133: * Currently only set for rule labels.
0134: */
0135: public boolean actionReferencesLabel;
0136: public int type; // in {RULE_LABEL,TOKEN_LABEL,RULE_LIST_LABEL,TOKEN_LIST_LABEL}
0137:
0138: public LabelElementPair(antlr.Token label, GrammarAST elementRef) {
0139: this .label = label;
0140: this .elementRef = elementRef;
0141: this .referencedRuleName = elementRef.getText();
0142: }
0143:
0144: public Rule getReferencedRule() {
0145: return getRule(referencedRuleName);
0146: }
0147:
0148: public String toString() {
0149: return elementRef.toString();
0150: }
0151: }
0152:
0153: /** What name did the user provide for this grammar? */
0154: public String name;
0155:
0156: /** What type of grammar is this: lexer, parser, tree walker */
0157: public int type;
0158:
0159: /** A list of options specified at the grammar level such as language=Java.
0160: * The value can be an AST for complicated values such as character sets.
0161: * There may be code generator specific options in here. I do no
0162: * interpretation of the key/value pairs...they are simply available for
0163: * who wants them.
0164: */
0165: protected Map options;
0166:
0167: public static final Set legalOptions = new HashSet() {
0168: {
0169: add("language");
0170: add("tokenVocab");
0171: add("output");
0172: add("rewrite");
0173: add("ASTLabelType");
0174: add("TokenLabelType");
0175: add("superClass");
0176: add("filter");
0177: add("k");
0178: add("backtrack");
0179: add("memoize");
0180: }
0181: };
0182:
0183: public static final Set doNotCopyOptionsToLexer = new HashSet() {
0184: {
0185: add("output");
0186: add("ASTLabelType");
0187: add("superClass");
0188: add("k");
0189: add("backtrack");
0190: add("memoize");
0191: add("rewrite");
0192: }
0193: };
0194:
0195: public static final Map defaultOptions = new HashMap() {
0196: {
0197: put("language", "Java");
0198: }
0199: };
0200:
0201: /** Is there a global fixed lookahead set for this grammar?
0202: * If 0, nothing specified. -1 implies we have not looked at
0203: * the options table yet to set k.
0204: */
0205: protected int global_k = -1;
0206:
0207: /** Map a scope to a map of name:action pairs.
0208: * Map<String, Map<String,GrammarAST>>
0209: * The code generator will use this to fill holes in the output files.
0210: * I track the AST node for the action in case I need the line number
0211: * for errors.
0212: */
0213: protected Map actions = new HashMap();
0214:
0215: /** The NFA that represents the grammar with edges labelled with tokens
0216: * or epsilon. It is more suitable to analysis than an AST representation.
0217: */
0218: protected NFA nfa;
0219:
0220: /** Token names and literal tokens like "void" are uniquely indexed.
0221: * with -1 implying EOF. Characters are different; they go from
0222: * -1 (EOF) to \uFFFE. For example, 0 could be a binary byte you
0223: * want to lexer. Labels of DFA/NFA transitions can be both tokens
0224: * and characters. I use negative numbers for bookkeeping labels
0225: * like EPSILON. Char/String literals and token types overlap in the same
0226: * space, however.
0227: */
0228: protected int maxTokenType = Label.MIN_TOKEN_TYPE - 1;
0229:
0230: /** TODO: hook this to the charVocabulary option */
0231: protected IntSet charVocabulary = null;
0232:
0233: /** Map token like ID (but not literals like "while") to its token type */
0234: protected Map tokenIDToTypeMap = new HashMap();
0235:
0236: /** Map token literals like "while" to its token type. It may be that
0237: * WHILE="while"=35, in which case both tokenNameToTypeMap and this
0238: * field will have entries both mapped to 35.
0239: */
0240: protected Map stringLiteralToTypeMap = new HashMap();
0241:
0242: /** Map a token type to its token name.
0243: * Must subtract MIN_TOKEN_TYPE from index.
0244: */
0245: protected Vector typeToTokenList = new Vector();
0246:
0247: /** For interpreting and testing, you sometimes want to import token
0248: * definitions from another grammar (instead of reading token defs from
0249: * a file).
0250: */
0251: protected Grammar importTokenVocabularyFromGrammar;
0252:
0253: /** For ANTLRWorks, we want to be able to map a line:col to a specific
0254: * decision DFA so it can display DFA.
0255: */
0256: Map lineColumnToLookaheadDFAMap = new HashMap();
0257:
0258: public Tool tool;
0259:
0260: /** The unique set of all rule references in any rule; set of Token
0261: * objects so two refs to same rule can exist but at different line/position.
0262: */
0263: protected Set<antlr.Token> ruleRefs = new HashSet<antlr.Token>();
0264:
0265: /** The unique set of all token ID references in any rule */
0266: protected Set<antlr.Token> tokenIDRefs = new HashSet<antlr.Token>();
0267:
0268: /** If combined or lexer grammar, track the rules; Set<String>.
0269: * Track lexer rules so we can warn about undefined tokens.
0270: */
0271: protected Set<String> lexerRules = new HashSet<String>();
0272:
0273: /** Be able to assign a number to every decision in grammar;
0274: * decisions in 1..n
0275: */
0276: protected int decisionNumber = 0;
0277:
0278: /** Rules are uniquely labeled from 1..n */
0279: protected int ruleIndex = 1;
0280:
0281: /** A list of all rules that are in any left-recursive cycle. There
0282: * could be multiple cycles, but this is a flat list of all problematic
0283: * rules.
0284: */
0285: protected Set leftRecursiveRules;
0286:
0287: /** An external tool requests that DFA analysis abort prematurely. Stops
0288: * at DFA granularity, which are limited to a DFA size and time computation
0289: * as failsafe.
0290: */
0291: protected boolean externalAnalysisAbort;
0292:
0293: /** When we read in a grammar, we track the list of syntactic predicates
0294: * and build faux rules for them later. See my blog entry Dec 2, 2005:
0295: * http://www.antlr.org/blog/antlr3/lookahead.tml
0296: * This maps the name (we make up) for a pred to the AST grammar fragment.
0297: */
0298: protected LinkedHashMap nameToSynpredASTMap;
0299:
0300: /** Map a rule to it's Rule object
0301: */
0302: protected LinkedHashMap nameToRuleMap = new LinkedHashMap();
0303:
0304: /** Track the scopes defined outside of rules and the scopes associated
0305: * with all rules (even if empty).
0306: */
0307: protected Map scopes = new HashMap();
0308:
0309: /** Map a rule index to its name; use a Vector on purpose as new
0310: * collections stuff won't let me setSize and make it grow. :(
0311: * I need a specific guaranteed index, which the Collections stuff
0312: * won't let me have.
0313: */
0314: protected Vector ruleIndexToRuleList = new Vector();
0315:
0316: /** An AST that records entire input grammar with all rules. A simple
0317: * grammar with one rule, "grammar t; a : A | B ;", looks like:
0318: * ( grammar t ( rule a ( BLOCK ( ALT A ) ( ALT B ) ) <end-of-rule> ) )
0319: */
0320: protected GrammarAST grammarTree = null;
0321:
0322: /** Each subrule/rule is a decision point and we must track them so we
0323: * can go back later and build DFA predictors for them. This includes
0324: * all the rules, subrules, optional blocks, ()+, ()* etc... The
0325: * elements in this list are NFAState objects.
0326: */
0327: protected Vector indexToDecision = new Vector(
0328: INITIAL_DECISION_LIST_SIZE);
0329:
0330: /** If non-null, this is the code generator we will use to generate
0331: * recognizers in the target language.
0332: */
0333: protected CodeGenerator generator;
0334:
0335: NameSpaceChecker nameSpaceChecker = new NameSpaceChecker(this );
0336:
0337: /** Used during LOOK to detect computation cycles */
0338: protected Set lookBusy = new HashSet();
0339:
0340: /** The checkForLeftRecursion method needs to track what rules it has
0341: * visited to track infinite recursion.
0342: */
0343: protected Set visitedDuringRecursionCheck = null;
0344:
0345: protected boolean watchNFAConversion = false;
0346:
0347: /** For merged lexer/parsers, we must construct a separate lexer spec.
0348: * This is the template for lexer; put the literals first then the
0349: * regular rules. We don't need to specify a token vocab import as
0350: * I make the new grammar import from the old all in memory; don't want
0351: * to force it to read from the disk. Lexer grammar will have same
0352: * name as original grammar but will be in different filename. Foo.g
0353: * with combined grammar will have FooParser.java generated and
0354: * Foo__.g with again Foo inside. It will however generate FooLexer.java
0355: * as it's a lexer grammar. A bit odd, but autogenerated. Can tweak
0356: * later if we want.
0357: */
0358: protected StringTemplate lexerGrammarST = new StringTemplate(
0359: "lexer grammar <name>;\n" + "<if(options)>" + "options {\n"
0360: + " <options:{<it.name>=<it.value>;<\\n>}>\n"
0361: + "}<\\n>\n" + "<endif>\n"
0362: + "<actionNames,actions:{n,a|@<n> {<a>}\n}>\n"
0363: + "<literals:{<it.ruleName> : <it.literal> ;\n}>\n"
0364: + "<rules>", AngleBracketTemplateLexer.class);
0365:
0366: /** What file name holds this grammar? */
0367: protected String fileName;
0368:
0369: /** How long in ms did it take to build DFAs for this grammar?
0370: * If this grammar is a combined grammar, it only records time for
0371: * the parser grammar component. This only records the time to
0372: * do the LL(*) work; NFA->DFA conversion.
0373: */
0374: public long DFACreationWallClockTimeInMS;
0375:
0376: public int numberOfSemanticPredicates = 0;
0377: public int numberOfManualLookaheadOptions = 0;
0378: public Set setOfNondeterministicDecisionNumbers = new HashSet();
0379: public Set setOfNondeterministicDecisionNumbersResolvedWithPredicates = new HashSet();
0380: public Set setOfDFAWhoseConversionTerminatedEarly = new HashSet();
0381:
0382: /** Track decisions with syn preds specified for reporting.
0383: * This is the a set of BLOCK type AST nodes.
0384: */
0385: public Set<GrammarAST> blocksWithSynPreds = new HashSet();
0386:
0387: /** Track decisions that actually use the syn preds in the DFA.
0388: * Computed during NFA to DFA conversion.
0389: */
0390: public Set<DFA> decisionsWhoseDFAsUsesSynPreds = new HashSet();
0391:
0392: /** Track names of preds so we can avoid generating preds that aren't used
0393: * Computed during NFA to DFA conversion. Just walk accept states
0394: * and look for synpreds because that is the only state target whose
0395: * incident edges can have synpreds. Same is try for
0396: * decisionsWhoseDFAsUsesSynPreds.
0397: */
0398: public Set<String> synPredNamesUsedInDFA = new HashSet();
0399:
0400: /** Track decisions with syn preds specified for reporting.
0401: * This is the a set of BLOCK type AST nodes.
0402: */
0403: public Set<GrammarAST> blocksWithSemPreds = new HashSet();
0404:
0405: /** Track decisions that actually use the syn preds in the DFA. Set<DFA> */
0406: public Set decisionsWhoseDFAsUsesSemPreds = new HashSet();
0407:
0408: protected boolean allDecisionDFACreated = false;
0409:
0410: /** We need a way to detect when a lexer grammar is autogenerated from
0411: * another grammar or we are just sending in a string representing a
0412: * grammar. We don't want to generate a .tokens file, for example,
0413: * in such cases.
0414: */
0415: protected boolean builtFromString = false;
0416:
0417: /** Factored out the sanity checking code; delegate to it. */
0418: GrammarSanity sanity = new GrammarSanity(this );
0419:
0420: public Grammar() {
0421: initTokenSymbolTables();
0422: builtFromString = true;
0423: }
0424:
0425: public Grammar(String grammarString)
0426: throws antlr.RecognitionException,
0427: antlr.TokenStreamException {
0428: builtFromString = true;
0429: initTokenSymbolTables();
0430: setFileName("<string>");
0431: setGrammarContent(new StringReader(grammarString));
0432: }
0433:
0434: public Grammar(String fileName, String grammarString)
0435: throws antlr.RecognitionException,
0436: antlr.TokenStreamException {
0437: this (null, fileName, new StringReader(grammarString));
0438: }
0439:
0440: /** Create a grammar from a Reader. Parse the grammar, building a tree
0441: * and loading a symbol table of sorts here in Grammar. Then create
0442: * an NFA and associated factory. Walk the AST representing the grammar,
0443: * building the state clusters of the NFA.
0444: */
0445: public Grammar(Tool tool, String fileName, Reader r)
0446: throws antlr.RecognitionException,
0447: antlr.TokenStreamException {
0448: initTokenSymbolTables();
0449: setTool(tool);
0450: setFileName(fileName);
0451: setGrammarContent(r);
0452: }
0453:
0454: public void setFileName(String fileName) {
0455: this .fileName = fileName;
0456: }
0457:
0458: public String getFileName() {
0459: return fileName;
0460: }
0461:
0462: public void setName(String name) {
0463: if (name == null) {
0464: return;
0465: }
0466: // don't error check autogenerated files (those with '__' in them)
0467: String saneFile = fileName.replace('\\', '/');
0468: int lastSlash = saneFile.lastIndexOf('/');
0469: String onlyFileName = saneFile.substring(lastSlash + 1,
0470: fileName.length());
0471: if (!builtFromString) {
0472: int lastDot = onlyFileName.lastIndexOf('.');
0473: String onlyFileNameNoSuffix = null;
0474: if (lastDot < 0) {
0475: ErrorManager.error(
0476: ErrorManager.MSG_FILENAME_EXTENSION_ERROR,
0477: fileName);
0478: onlyFileNameNoSuffix = onlyFileName
0479: + GRAMMAR_FILE_EXTENSION;
0480: } else {
0481: onlyFileNameNoSuffix = onlyFileName.substring(0,
0482: lastDot);
0483: }
0484: if (!name.equals(onlyFileNameNoSuffix)) {
0485: ErrorManager.error(
0486: ErrorManager.MSG_FILE_AND_GRAMMAR_NAME_DIFFER,
0487: name, fileName);
0488: }
0489: }
0490: this .name = name;
0491: }
0492:
0493: public void setGrammarContent(String grammarString)
0494: throws antlr.RecognitionException,
0495: antlr.TokenStreamException {
0496: setGrammarContent(new StringReader(grammarString));
0497: }
0498:
0499: public void setGrammarContent(Reader r)
0500: throws antlr.RecognitionException,
0501: antlr.TokenStreamException {
0502: ErrorManager.resetErrorState(); // reset in case > 1 grammar in same thread
0503:
0504: // BUILD AST FROM GRAMMAR
0505: ANTLRLexer lexer = new ANTLRLexer(r);
0506: lexer.setFilename(this .getFileName());
0507: // use the rewrite engine because we want to buffer up all tokens
0508: // in case they have a merged lexer/parser, send lexer rules to
0509: // new grammar.
0510: lexer.setTokenObjectClass("antlr.TokenWithIndex");
0511: tokenBuffer = new TokenStreamRewriteEngine(lexer);
0512: tokenBuffer.discard(ANTLRParser.WS);
0513: tokenBuffer.discard(ANTLRParser.ML_COMMENT);
0514: tokenBuffer.discard(ANTLRParser.COMMENT);
0515: tokenBuffer.discard(ANTLRParser.SL_COMMENT);
0516: ANTLRParser parser = new ANTLRParser(tokenBuffer);
0517: parser.getASTFactory().setASTNodeClass(GrammarAST.class);
0518: parser.setFilename(this .getFileName());
0519: parser.setASTNodeClass("org.antlr.tool.GrammarAST");
0520: parser.grammar(this );
0521: grammarTree = (GrammarAST) parser.getAST();
0522: setFileName(lexer.getFilename()); // the lexer #src might change name
0523: if (grammarTree.findFirstType(ANTLRParser.RULE) == null) {
0524: ErrorManager
0525: .error(ErrorManager.MSG_NO_RULES, getFileName());
0526: return;
0527: }
0528:
0529: // Get syn pred rules and add to existing tree
0530: List synpredRules = getArtificialRulesForSyntacticPredicates(
0531: parser, nameToSynpredASTMap);
0532: for (int i = 0; i < synpredRules.size(); i++) {
0533: GrammarAST rAST = (GrammarAST) synpredRules.get(i);
0534: grammarTree.addChild(rAST);
0535: }
0536:
0537: if (Tool.internalOption_PrintGrammarTree) {
0538: System.out.println(grammarTree.toStringList());
0539: }
0540:
0541: // ASSIGN TOKEN TYPES
0542: //System.out.println("### assign types");
0543: AssignTokenTypesWalker ttypesWalker = new AssignTokenTypesWalker();
0544: ttypesWalker.setASTNodeClass("org.antlr.tool.GrammarAST");
0545: try {
0546: ttypesWalker.grammar(grammarTree, this );
0547: } catch (RecognitionException re) {
0548: ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, re);
0549: }
0550:
0551: // DEFINE RULES
0552: //System.out.println("### define rules");
0553: DefineGrammarItemsWalker defineItemsWalker = new DefineGrammarItemsWalker();
0554: defineItemsWalker.setASTNodeClass("org.antlr.tool.GrammarAST");
0555: try {
0556: defineItemsWalker.grammar(grammarTree, this );
0557: } catch (RecognitionException re) {
0558: ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, re);
0559: }
0560:
0561: // ANALYZE ACTIONS, LOOKING FOR LABEL AND ATTR REFS
0562: examineAllExecutableActions();
0563: checkAllRulesForUselessLabels();
0564:
0565: nameSpaceChecker.checkConflicts();
0566: }
0567:
0568: /** If the grammar is a merged grammar, return the text of the implicit
0569: * lexer grammar.
0570: */
0571: public String getLexerGrammar() {
0572: if (lexerGrammarST.getAttribute("literals") == null
0573: && lexerGrammarST.getAttribute("rules") == null) {
0574: // if no rules, return nothing
0575: return null;
0576: }
0577: lexerGrammarST.setAttribute("name", name);
0578: // if there are any actions set for lexer, pass them in
0579: if (actions.get("lexer") != null) {
0580: lexerGrammarST.setAttribute("actionNames", ((Map) actions
0581: .get("lexer")).keySet());
0582: lexerGrammarST.setAttribute("actions", ((Map) actions
0583: .get("lexer")).values());
0584: }
0585: // make sure generated grammar has the same options
0586: if (options != null) {
0587: Iterator optionNames = options.keySet().iterator();
0588: while (optionNames.hasNext()) {
0589: String optionName = (String) optionNames.next();
0590: if (!doNotCopyOptionsToLexer.contains(optionName)) {
0591: Object value = options.get(optionName);
0592: lexerGrammarST.setAttribute("options.{name,value}",
0593: optionName, value);
0594: }
0595: }
0596: }
0597: return lexerGrammarST.toString();
0598: }
0599:
0600: public String getImplicitlyGeneratedLexerFileName() {
0601: return name + IGNORE_STRING_IN_GRAMMAR_FILE_NAME
0602: + LEXER_GRAMMAR_FILE_EXTENSION;
0603: }
0604:
0605: public File getImportedVocabFileName(String vocabName) {
0606: return new File(tool.getLibraryDirectory(), File.separator
0607: + vocabName + CodeGenerator.VOCAB_FILE_EXTENSION);
0608: }
0609:
0610: /** Parse a rule we add artificially that is a list of the other lexer
0611: * rules like this: "Tokens : ID | INT | SEMI ;" nextToken() will invoke
0612: * this to set the current token. Add char literals before
0613: * the rule references.
0614: *
0615: * If in filter mode, we want every alt to backtrack and we need to
0616: * do k=1 to force the "first token def wins" rule. Otherwise, the
0617: * longest-match rule comes into play with LL(*).
0618: *
0619: * The ANTLRParser antlr.g file now invokes this when parsing a lexer
0620: * grammar, which I think is proper even though it peeks at the info
0621: * that later phases will compute. It gets a list of lexer rules
0622: * and builds a string representing the rule; then it creates a parser
0623: * and adds the resulting tree to the grammar's tree.
0624: */
0625: public GrammarAST addArtificialMatchTokensRule(
0626: GrammarAST grammarAST, List ruleNames, boolean filterMode) {
0627: StringTemplate matchTokenRuleST = null;
0628: if (filterMode) {
0629: matchTokenRuleST = new StringTemplate(
0630: ARTIFICIAL_TOKENS_RULENAME
0631: + " options {k=1; backtrack=true;} : <rules; separator=\"|\">;",
0632: AngleBracketTemplateLexer.class);
0633: } else {
0634: matchTokenRuleST = new StringTemplate(
0635: ARTIFICIAL_TOKENS_RULENAME
0636: + " : <rules; separator=\"|\">;",
0637: AngleBracketTemplateLexer.class);
0638: }
0639:
0640: // Now add token rule references
0641: for (int i = 0; i < ruleNames.size(); i++) {
0642: String rname = (String) ruleNames.get(i);
0643: matchTokenRuleST.setAttribute("rules", rname);
0644: }
0645: //System.out.println("tokens rule: "+matchTokenRuleST.toString());
0646:
0647: ANTLRLexer lexer = new ANTLRLexer(new StringReader(
0648: matchTokenRuleST.toString()));
0649: lexer.setTokenObjectClass("antlr.TokenWithIndex");
0650: TokenStreamRewriteEngine tokbuf = new TokenStreamRewriteEngine(
0651: lexer);
0652: tokbuf.discard(ANTLRParser.WS);
0653: tokbuf.discard(ANTLRParser.ML_COMMENT);
0654: tokbuf.discard(ANTLRParser.COMMENT);
0655: tokbuf.discard(ANTLRParser.SL_COMMENT);
0656: ANTLRParser parser = new ANTLRParser(tokbuf);
0657: parser.grammar = this ;
0658: parser.gtype = ANTLRParser.LEXER_GRAMMAR;
0659: parser.setASTNodeClass("org.antlr.tool.GrammarAST");
0660: try {
0661: parser.rule();
0662: if (Tool.internalOption_PrintGrammarTree) {
0663: System.out.println("Tokens rule: "
0664: + parser.getAST().toStringTree());
0665: }
0666: GrammarAST p = grammarAST;
0667: while (p.getType() != ANTLRParser.LEXER_GRAMMAR) {
0668: p = (GrammarAST) p.getNextSibling();
0669: }
0670: p.addChild(parser.getAST());
0671: } catch (Exception e) {
0672: ErrorManager.error(
0673: ErrorManager.MSG_ERROR_CREATING_ARTIFICIAL_RULE, e);
0674: }
0675: return (GrammarAST) parser.getAST();
0676: }
0677:
0678: /** for any syntactic predicates, we need to define rules for them; they will get
0679: * defined automatically like any other rule. :)
0680: */
0681: protected List getArtificialRulesForSyntacticPredicates(
0682: ANTLRParser parser, LinkedHashMap nameToSynpredASTMap) {
0683: List rules = new ArrayList();
0684: if (nameToSynpredASTMap == null) {
0685: return rules;
0686: }
0687: Set predNames = nameToSynpredASTMap.keySet();
0688: boolean isLexer = grammarTree.getType() == ANTLRParser.LEXER_GRAMMAR;
0689: for (Iterator it = predNames.iterator(); it.hasNext();) {
0690: String synpredName = (String) it.next();
0691: GrammarAST fragmentAST = (GrammarAST) nameToSynpredASTMap
0692: .get(synpredName);
0693: GrammarAST ruleAST = parser.createSimpleRuleAST(
0694: synpredName, fragmentAST, isLexer);
0695: rules.add(ruleAST);
0696: }
0697: return rules;
0698: }
0699:
0700: protected void initTokenSymbolTables() {
0701: // the faux token types take first NUM_FAUX_LABELS positions
0702: // then we must have room for the predefined runtime token types
0703: // like DOWN/UP used for tree parsing.
0704: typeToTokenList.setSize(Label.NUM_FAUX_LABELS
0705: + Label.MIN_TOKEN_TYPE - 1);
0706: typeToTokenList.set(Label.NUM_FAUX_LABELS + Label.INVALID,
0707: "<INVALID>");
0708: typeToTokenList.set(Label.NUM_FAUX_LABELS + Label.EOT, "<EOT>");
0709: typeToTokenList.set(Label.NUM_FAUX_LABELS + Label.SEMPRED,
0710: "<SEMPRED>");
0711: typeToTokenList.set(Label.NUM_FAUX_LABELS + Label.SET, "<SET>");
0712: typeToTokenList.set(Label.NUM_FAUX_LABELS + Label.EPSILON,
0713: Label.EPSILON_STR);
0714: typeToTokenList.set(Label.NUM_FAUX_LABELS + Label.EOF, "EOF");
0715: typeToTokenList.set(Label.NUM_FAUX_LABELS
0716: + Label.EOR_TOKEN_TYPE - 1, "<EOR>");
0717: typeToTokenList.set(Label.NUM_FAUX_LABELS + Label.DOWN - 1,
0718: "DOWN");
0719: typeToTokenList.set(Label.NUM_FAUX_LABELS + Label.UP - 1, "UP");
0720: tokenIDToTypeMap.put("<INVALID>", Utils.integer(Label.INVALID));
0721: tokenIDToTypeMap.put("<EOT>", Utils.integer(Label.EOT));
0722: tokenIDToTypeMap.put("<SEMPRED>", Utils.integer(Label.SEMPRED));
0723: tokenIDToTypeMap.put("<SET>", Utils.integer(Label.SET));
0724: tokenIDToTypeMap.put("<EPSILON>", Utils.integer(Label.EPSILON));
0725: tokenIDToTypeMap.put("EOF", Utils.integer(Label.EOF));
0726: tokenIDToTypeMap.put("<EOR>", Utils
0727: .integer(Label.EOR_TOKEN_TYPE));
0728: tokenIDToTypeMap.put("DOWN", Utils.integer(Label.DOWN));
0729: tokenIDToTypeMap.put("UP", Utils.integer(Label.UP));
0730: }
0731:
0732: /** Walk the list of options, altering this Grammar object according
0733: * to any I recognize.
0734: protected void processOptions() {
0735: Iterator optionNames = options.keySet().iterator();
0736: while (optionNames.hasNext()) {
0737: String optionName = (String) optionNames.next();
0738: Object value = options.get(optionName);
0739: if ( optionName.equals("tokenVocab") ) {
0740:
0741: }
0742: }
0743: }
0744: */
0745:
0746: public void createNFAs() {
0747: //System.out.println("### create NFAs");
0748: if (nfa != null) {
0749: // don't let it create more than once; has side-effects
0750: return;
0751: }
0752: if (getRules().size() == 0) {
0753: return;
0754: }
0755: nfa = new NFA(this ); // create NFA that TreeToNFAConverter'll fill in
0756: NFAFactory factory = new NFAFactory(nfa);
0757: TreeToNFAConverter nfaBuilder = new TreeToNFAConverter(this ,
0758: nfa, factory);
0759: try {
0760: nfaBuilder.grammar(grammarTree);
0761: } catch (RecognitionException re) {
0762: ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE,
0763: name, re);
0764: }
0765: //System.out.println("NFA has "+factory.getNumberOfStates()+" states");
0766: }
0767:
0768: /** For each decision in this grammar, compute a single DFA using the
0769: * NFA states associated with the decision. The DFA construction
0770: * determines whether or not the alternatives in the decision are
0771: * separable using a regular lookahead language.
0772: *
0773: * Store the lookahead DFAs in the AST created from the user's grammar
0774: * so the code generator or whoever can easily access it.
0775: *
0776: * This is a separate method because you might want to create a
0777: * Grammar without doing the expensive analysis.
0778: */
0779: public void createLookaheadDFAs() {
0780: if (nfa == null) {
0781: createNFAs();
0782: }
0783:
0784: long start = System.currentTimeMillis();
0785:
0786: //System.out.println("### create DFAs");
0787: int numDecisions = getNumberOfDecisions();
0788: if (NFAToDFAConverter.SINGLE_THREADED_NFA_CONVERSION) {
0789: for (int decision = 1; decision <= numDecisions; decision++) {
0790: NFAState decisionStartState = getDecisionNFAStartState(decision);
0791: if (!externalAnalysisAbort
0792: && decisionStartState.getNumberOfTransitions() > 1) {
0793: createLookaheadDFA(decision);
0794: }
0795: }
0796: } else {
0797: ErrorManager.info("two-threaded DFA conversion");
0798: // create a barrier expecting n DFA and this main creation thread
0799: Barrier barrier = new Barrier(3);
0800: // assume 2 CPU for now
0801: int midpoint = numDecisions / 2;
0802: NFAConversionThread t1 = new NFAConversionThread(this ,
0803: barrier, 1, midpoint);
0804: new Thread(t1).start();
0805: if (midpoint == (numDecisions / 2)) {
0806: midpoint++;
0807: }
0808: NFAConversionThread t2 = new NFAConversionThread(this ,
0809: barrier, midpoint, numDecisions);
0810: new Thread(t2).start();
0811: // wait for these two threads to finish
0812: try {
0813: barrier.waitForRelease();
0814: } catch (InterruptedException e) {
0815: ErrorManager.internalError(
0816: "what the hell? DFA interruptus", e);
0817: }
0818: }
0819:
0820: long stop = System.currentTimeMillis();
0821: DFACreationWallClockTimeInMS = stop - start;
0822:
0823: // indicate that we've finished building DFA (even if #decisions==0)
0824: allDecisionDFACreated = true;
0825: }
0826:
0827: public void createLookaheadDFA(int decision) {
0828: Decision d = getDecision(decision);
0829: String enclosingRule = d.startState.getEnclosingRule();
0830: Rule r = getRule(enclosingRule);
0831:
0832: //System.out.println("createLookaheadDFA(): "+enclosingRule+" dec "+decision+"; synprednames prev used "+synPredNamesUsedInDFA);
0833: if (r.isSynPred
0834: && !synPredNamesUsedInDFA.contains(enclosingRule)) {
0835: return;
0836: }
0837: NFAState decisionStartState = getDecisionNFAStartState(decision);
0838: long startDFA = 0, stopDFA = 0;
0839: if (watchNFAConversion) {
0840: System.out
0841: .println("--------------------\nbuilding lookahead DFA (d="
0842: + decisionStartState.getDecisionNumber()
0843: + ") for "
0844: + decisionStartState.getDescription());
0845: startDFA = System.currentTimeMillis();
0846: }
0847: DFA lookaheadDFA = new DFA(decision, decisionStartState);
0848: if ((lookaheadDFA.analysisAborted() && // did analysis bug out?
0849: lookaheadDFA.getUserMaxLookahead() != 1)
0850: || // either k=* or k>1
0851: (lookaheadDFA.probe.isNonLLStarDecision() && // >1 alt recurses, k=*
0852: lookaheadDFA.getAutoBacktrackMode())) {
0853: // set k=1 option if not already k=1 and try again
0854: // clean up tracking stuff
0855: decisionsWhoseDFAsUsesSynPreds.remove(lookaheadDFA);
0856: // TODO: clean up synPredNamesUsedInDFA also (harder)
0857: lookaheadDFA = null; // make sure other memory is "free" before redoing
0858: d.blockAST.setOption(this , "k", Utils.integer(1));
0859: //System.out.println("trying decision "+decision+" again with k=1");
0860: lookaheadDFA = new DFA(decision, decisionStartState);
0861: if (lookaheadDFA.analysisAborted()) { // did analysis bug out?
0862: ErrorManager
0863: .internalError("could not even do k=1 for decision "
0864: + decision);
0865: }
0866: }
0867:
0868: setLookaheadDFA(decision, lookaheadDFA);
0869:
0870: // create map from line:col to decision DFA (for ANTLRWorks)
0871: GrammarAST decisionAST = nfa.grammar
0872: .getDecisionBlockAST(lookaheadDFA.decisionNumber);
0873: int line = decisionAST.getLine();
0874: int col = decisionAST.getColumn();
0875: lineColumnToLookaheadDFAMap.put(new StringBuffer().append(
0876: line + ":").append(col).toString(), lookaheadDFA);
0877:
0878: if (watchNFAConversion) {
0879: stopDFA = System.currentTimeMillis();
0880: System.out.println("cost: "
0881: + lookaheadDFA.getNumberOfStates() + " states, "
0882: + (int) (stopDFA - startDFA) + " ms");
0883: }
0884: //System.out.println("after create DFA; synPredNamesUsedInDFA="+synPredNamesUsedInDFA);
0885: }
0886:
0887: /** Terminate DFA creation (grammar analysis).
0888: */
0889: public void externallyAbortNFAToDFAConversion() {
0890: externalAnalysisAbort = true;
0891: }
0892:
0893: public boolean NFAToDFAConversionExternallyAborted() {
0894: return externalAnalysisAbort;
0895: }
0896:
0897: /** Return a new unique integer in the token type space */
0898: public int getNewTokenType() {
0899: maxTokenType++;
0900: return maxTokenType;
0901: }
0902:
0903: /** Define a token at a particular token type value. Blast an
0904: * old value with a new one. This is called directly during import vocab
0905: * operation to set up tokens with specific values.
0906: */
0907: public void defineToken(String text, int tokenType) {
0908: if (tokenIDToTypeMap.get(text) != null) {
0909: // already defined? Must be predefined one like EOF;
0910: // do nothing
0911: return;
0912: }
0913: // the index in the typeToTokenList table is actually shifted to
0914: // hold faux labels as you cannot have negative indices.
0915: if (text.charAt(0) == '\'') {
0916: stringLiteralToTypeMap.put(text, Utils.integer(tokenType));
0917: } else { // must be a label like ID
0918: tokenIDToTypeMap.put(text, Utils.integer(tokenType));
0919: }
0920: int index = Label.NUM_FAUX_LABELS + tokenType - 1;
0921: //System.out.println("defining "+name+" token "+text+" at type="+tokenType+", index="+index);
0922: this .maxTokenType = Math.max(this .maxTokenType, tokenType);
0923: if (index >= typeToTokenList.size()) {
0924: typeToTokenList.setSize(index + 1);
0925: }
0926: String prevToken = (String) typeToTokenList.get(index);
0927: if (prevToken == null || prevToken.charAt(0) == '\'') {
0928: // only record if nothing there before or if thing before was a literal
0929: typeToTokenList.set(index, text);
0930: }
0931: }
0932:
0933: /** Define a new rule. A new rule index is created by incrementing
0934: * ruleIndex.
0935: */
0936: public void defineRule(antlr.Token ruleToken, String modifier,
0937: Map options, GrammarAST tree, GrammarAST argActionAST,
0938: int numAlts) {
0939: String ruleName = ruleToken.getText();
0940: /*
0941: System.out.println("defineRule("+ruleName+",modifier="+modifier+
0942: "): index="+ruleIndex);
0943: */
0944: if (getRule(ruleName) != null) {
0945: ErrorManager.grammarError(
0946: ErrorManager.MSG_RULE_REDEFINITION, this ,
0947: ruleToken, ruleName);
0948: }
0949:
0950: Rule r = new Rule(this , ruleName, ruleIndex, numAlts);
0951: r.modifier = modifier;
0952: nameToRuleMap.put(ruleName, r);
0953: setRuleAST(ruleName, tree);
0954: r.setOptions(options, ruleToken);
0955: r.argActionAST = argActionAST;
0956: ruleIndexToRuleList.setSize(ruleIndex + 1);
0957: ruleIndexToRuleList.set(ruleIndex, ruleName);
0958: ruleIndex++;
0959: if (ruleName.startsWith(SYNPRED_RULE_PREFIX)) {
0960: r.isSynPred = true;
0961: }
0962: }
0963:
0964: /** Define a new predicate and get back its name for use in building
0965: * a semantic predicate reference to the syn pred.
0966: */
0967: public String defineSyntacticPredicate(GrammarAST blockAST,
0968: String currentRuleName) {
0969: if (nameToSynpredASTMap == null) {
0970: nameToSynpredASTMap = new LinkedHashMap();
0971: }
0972: String predName = null;
0973: predName = SYNPRED_RULE_PREFIX
0974: + (nameToSynpredASTMap.size() + 1);
0975: nameToSynpredASTMap.put(predName, blockAST);
0976: return predName;
0977: }
0978:
0979: public LinkedHashMap getSyntacticPredicates() {
0980: return nameToSynpredASTMap;
0981: }
0982:
0983: public GrammarAST getSyntacticPredicate(String name) {
0984: if (nameToSynpredASTMap == null) {
0985: return null;
0986: }
0987: return (GrammarAST) nameToSynpredASTMap.get(name);
0988: }
0989:
0990: public void synPredUsedInDFA(DFA dfa, SemanticContext semCtx) {
0991: decisionsWhoseDFAsUsesSynPreds.add(dfa);
0992: semCtx.trackUseOfSyntacticPredicates(this ); // walk ctx looking for preds
0993: //System.out.println("after tracking use for dec "+dfa.decisionNumber+": "+synPredNamesUsedInDFA);
0994: }
0995:
0996: /** Given @scope::name {action} define it for this grammar. Later,
0997: * the code generator will ask for the actions table.
0998: */
0999: public void defineNamedAction(GrammarAST ampersandAST,
1000: String scope, GrammarAST nameAST, GrammarAST actionAST) {
1001: if (scope == null) {
1002: scope = getDefaultActionScope(type);
1003: }
1004: //System.out.println("@"+scope+"::"+nameAST.getText()+"{"+actionAST.getText()+"}");
1005: String actionName = nameAST.getText();
1006: Map scopeActions = (Map) actions.get(scope);
1007: if (scopeActions == null) {
1008: scopeActions = new HashMap();
1009: actions.put(scope, scopeActions);
1010: }
1011: GrammarAST a = (GrammarAST) scopeActions.get(actionName);
1012: if (a != null) {
1013: ErrorManager.grammarError(
1014: ErrorManager.MSG_ACTION_REDEFINITION, this , nameAST
1015: .getToken(), nameAST.getText());
1016: } else {
1017: scopeActions.put(actionName, actionAST);
1018: }
1019: }
1020:
1021: public Map getActions() {
1022: return actions;
1023: }
1024:
1025: /** Given a grammar type, what should be the default action scope?
1026: * If I say @members in a COMBINED grammar, for example, the
1027: * default scope should be "parser".
1028: */
1029: public String getDefaultActionScope(int grammarType) {
1030: switch (grammarType) {
1031: case Grammar.LEXER:
1032: return "lexer";
1033: case Grammar.PARSER:
1034: case Grammar.COMBINED:
1035: return "parser";
1036: case Grammar.TREE_PARSER:
1037: return "treeparser";
1038: }
1039: return null;
1040: }
1041:
1042: public void defineLexerRuleFoundInParser(antlr.Token ruleToken,
1043: GrammarAST ruleAST) {
1044: //System.out.println("rule tree is:\n"+ruleAST.toStringTree());
1045: /*
1046: String ruleText = tokenBuffer.toOriginalString(ruleAST.ruleStartTokenIndex,
1047: ruleAST.ruleStopTokenIndex);
1048: */
1049: // first, create the text of the rule
1050: StringBuffer buf = new StringBuffer();
1051: buf.append("// $ANTLR src \"");
1052: buf.append(getFileName());
1053: buf.append("\" ");
1054: buf.append(ruleAST.getLine());
1055: buf.append("\n");
1056: for (int i = ruleAST.ruleStartTokenIndex; i <= ruleAST.ruleStopTokenIndex
1057: && i < tokenBuffer.size(); i++) {
1058: TokenWithIndex t = (TokenWithIndex) tokenBuffer.getToken(i);
1059: // undo the text deletions done by the lexer (ugh)
1060: if (t.getType() == ANTLRParser.BLOCK) {
1061: buf.append("(");
1062: } else if (t.getType() == ANTLRParser.ACTION) {
1063: buf.append("{");
1064: buf.append(t.getText());
1065: buf.append("}");
1066: } else if (t.getType() == ANTLRParser.SEMPRED
1067: || t.getType() == ANTLRParser.SYN_SEMPRED
1068: || t.getType() == ANTLRParser.GATED_SEMPRED
1069: || t.getType() == ANTLRParser.BACKTRACK_SEMPRED) {
1070: buf.append("{");
1071: buf.append(t.getText());
1072: buf.append("}?");
1073: } else if (t.getType() == ANTLRParser.ARG_ACTION) {
1074: buf.append("[");
1075: buf.append(t.getText());
1076: buf.append("]");
1077: } else {
1078: buf.append(t.getText());
1079: }
1080: }
1081: String ruleText = buf.toString();
1082: //System.out.println("[["+ruleText+"]]");
1083: // now put the rule into the lexer grammar template
1084: lexerGrammarST.setAttribute("rules", ruleText);
1085: // track this lexer rule's name
1086: lexerRules.add(ruleToken.getText());
1087: }
1088:
1089: /** If someone does PLUS='+' in the parser, must make sure we get
1090: * "PLUS : '+' ;" in lexer not "T73 : '+';"
1091: */
1092: public void defineLexerRuleForAliasedStringLiteral(String tokenID,
1093: String literal, int tokenType) {
1094: //System.out.println("defineLexerRuleForAliasedStringLiteral: "+literal+" "+tokenType);
1095: lexerGrammarST.setAttribute("literals.{ruleName,type,literal}",
1096: tokenID, Utils.integer(tokenType), literal);
1097: // track this lexer rule's name
1098: lexerRules.add(tokenID);
1099: }
1100:
1101: public void defineLexerRuleForStringLiteral(String literal,
1102: int tokenType) {
1103: //System.out.println("defineLexerRuleForStringLiteral: "+literal+" "+tokenType);
1104: lexerGrammarST.setAttribute("literals.{ruleName,type,literal}",
1105: computeTokenNameFromLiteral(tokenType, literal), Utils
1106: .integer(tokenType), literal);
1107: }
1108:
1109: public Rule getRule(String ruleName) {
1110: Rule r = (Rule) nameToRuleMap.get(ruleName);
1111: return r;
1112: }
1113:
1114: public int getRuleIndex(String ruleName) {
1115: Rule r = getRule(ruleName);
1116: if (r != null) {
1117: return r.index;
1118: }
1119: return INVALID_RULE_INDEX;
1120: }
1121:
1122: public String getRuleName(int ruleIndex) {
1123: return (String) ruleIndexToRuleList.get(ruleIndex);
1124: }
1125:
1126: public AttributeScope defineGlobalScope(String name,
1127: Token scopeAction) {
1128: AttributeScope scope = new AttributeScope(this , name,
1129: scopeAction);
1130: scopes.put(name, scope);
1131: return scope;
1132: }
1133:
1134: public AttributeScope createReturnScope(String ruleName,
1135: Token retAction) {
1136: AttributeScope scope = new AttributeScope(this , ruleName,
1137: retAction);
1138: scope.isReturnScope = true;
1139: return scope;
1140: }
1141:
1142: public AttributeScope createRuleScope(String ruleName,
1143: Token scopeAction) {
1144: AttributeScope scope = new AttributeScope(this , ruleName,
1145: scopeAction);
1146: scope.isDynamicRuleScope = true;
1147: return scope;
1148: }
1149:
1150: public AttributeScope createParameterScope(String ruleName,
1151: Token argAction) {
1152: AttributeScope scope = new AttributeScope(this , ruleName,
1153: argAction);
1154: scope.isParameterScope = true;
1155: return scope;
1156: }
1157:
1158: /** Get a global scope */
1159: public AttributeScope getGlobalScope(String name) {
1160: return (AttributeScope) scopes.get(name);
1161: }
1162:
1163: public Map getGlobalScopes() {
1164: return scopes;
1165: }
1166:
1167: /** Define a label defined in a rule r; check the validity then ask the
1168: * Rule object to actually define it.
1169: */
1170: protected void defineLabel(Rule r, antlr.Token label,
1171: GrammarAST element, int type) {
1172: boolean err = nameSpaceChecker.checkForLabelTypeMismatch(r,
1173: label, type);
1174: if (err) {
1175: return;
1176: }
1177: r.defineLabel(label, element, type);
1178: }
1179:
1180: public void defineTokenRefLabel(String ruleName, antlr.Token label,
1181: GrammarAST tokenRef) {
1182: Rule r = getRule(ruleName);
1183: if (r != null) {
1184: if (type == LEXER
1185: && (tokenRef.getType() == ANTLRParser.CHAR_LITERAL
1186: || tokenRef.getType() == ANTLRParser.BLOCK
1187: || tokenRef.getType() == ANTLRParser.NOT
1188: || tokenRef.getType() == ANTLRParser.CHAR_RANGE || tokenRef
1189: .getType() == ANTLRParser.WILDCARD)) {
1190: defineLabel(r, label, tokenRef, CHAR_LABEL);
1191: } else {
1192: defineLabel(r, label, tokenRef, TOKEN_LABEL);
1193: }
1194: }
1195: }
1196:
1197: public void defineRuleRefLabel(String ruleName, antlr.Token label,
1198: GrammarAST ruleRef) {
1199: Rule r = getRule(ruleName);
1200: if (r != null) {
1201: defineLabel(r, label, ruleRef, RULE_LABEL);
1202: }
1203: }
1204:
1205: public void defineTokenListLabel(String ruleName,
1206: antlr.Token label, GrammarAST element) {
1207: Rule r = getRule(ruleName);
1208: if (r != null) {
1209: defineLabel(r, label, element, TOKEN_LIST_LABEL);
1210: }
1211: }
1212:
1213: public void defineRuleListLabel(String ruleName, antlr.Token label,
1214: GrammarAST element) {
1215: Rule r = getRule(ruleName);
1216: if (r != null) {
1217: if (!r.getHasMultipleReturnValues()) {
1218: ErrorManager
1219: .grammarError(
1220: ErrorManager.MSG_LIST_LABEL_INVALID_UNLESS_RETVAL_STRUCT,
1221: this , label, label.getText());
1222: }
1223: defineLabel(r, label, element, RULE_LIST_LABEL);
1224: }
1225: }
1226:
1227: /** Given a set of all rewrite elements on right of ->, filter for
1228: * label types such as Grammar.TOKEN_LABEL, Grammar.TOKEN_LIST_LABEL, ...
1229: * Return a displayable token type name computed from the GrammarAST.
1230: */
1231: public Set<String> getLabels(Set<GrammarAST> rewriteElements,
1232: int labelType) {
1233: Set<String> labels = new HashSet<String>();
1234: for (Iterator it = rewriteElements.iterator(); it.hasNext();) {
1235: GrammarAST el = (GrammarAST) it.next();
1236: if (el.getType() == ANTLRParser.LABEL) {
1237: Rule r = getRule(el.enclosingRule);
1238: String labelName = el.getText();
1239: LabelElementPair pair = r.getLabel(labelName);
1240: // if valid label and type is what we're looking for
1241: // and not ref to old value val $rule, add to list
1242: if (pair != null && pair.type == labelType
1243: && !labelName.equals(el.enclosingRule)) {
1244: labels.add(labelName);
1245: }
1246: }
1247: }
1248: return labels;
1249: }
1250:
1251: /** Before generating code, we examine all actions that can have
1252: * $x.y and $y stuff in them because some code generation depends on
1253: * Rule.referencedPredefinedRuleAttributes. I need to remove unused
1254: * rule labels for example.
1255: */
1256: protected void examineAllExecutableActions() {
1257: Collection rules = getRules();
1258: for (Iterator it = rules.iterator(); it.hasNext();) {
1259: Rule r = (Rule) it.next();
1260: // walk all actions within the rule elements, args, and exceptions
1261: List<GrammarAST> actions = r.getInlineActions();
1262: for (int i = 0; i < actions.size(); i++) {
1263: GrammarAST actionAST = (GrammarAST) actions.get(i);
1264: ActionAnalysisLexer sniffer = new ActionAnalysisLexer(
1265: this , r.name, actionAST);
1266: sniffer.analyze();
1267: }
1268: // walk any named actions like @init, @after
1269: Collection<GrammarAST> namedActions = r.getActions()
1270: .values();
1271: for (Iterator it2 = namedActions.iterator(); it2.hasNext();) {
1272: GrammarAST actionAST = (GrammarAST) it2.next();
1273: ActionAnalysisLexer sniffer = new ActionAnalysisLexer(
1274: this , r.name, actionAST);
1275: sniffer.analyze();
1276: }
1277: }
1278: }
1279:
1280: /** Remove all labels on rule refs whose target rules have no return value.
1281: * Do this for all rules in grammar.
1282: */
1283: public void checkAllRulesForUselessLabels() {
1284: if (type == LEXER) {
1285: return;
1286: }
1287: Set rules = nameToRuleMap.keySet();
1288: for (Iterator it = rules.iterator(); it.hasNext();) {
1289: String ruleName = (String) it.next();
1290: Rule r = getRule(ruleName);
1291: removeUselessLabels(r.getRuleLabels());
1292: removeUselessLabels(r.getRuleListLabels());
1293: }
1294: }
1295:
1296: /** A label on a rule is useless if the rule has no return value, no
1297: * tree or template output, and it is not referenced in an action.
1298: */
1299: protected void removeUselessLabels(Map ruleToElementLabelPairMap) {
1300: if (ruleToElementLabelPairMap == null) {
1301: return;
1302: }
1303: Collection labels = ruleToElementLabelPairMap.values();
1304: List kill = new ArrayList();
1305: for (Iterator labelit = labels.iterator(); labelit.hasNext();) {
1306: LabelElementPair pair = (LabelElementPair) labelit.next();
1307: Rule refdRule = getRule(pair.elementRef.getText());
1308: if (refdRule != null && !refdRule.getHasReturnValue()
1309: && !pair.actionReferencesLabel) {
1310: //System.out.println(pair.label.getText()+" is useless");
1311: kill.add(pair.label.getText());
1312: }
1313: }
1314: for (int i = 0; i < kill.size(); i++) {
1315: String labelToKill = (String) kill.get(i);
1316: // System.out.println("kill "+labelToKill);
1317: ruleToElementLabelPairMap.remove(labelToKill);
1318: }
1319: }
1320:
1321: /** Track a rule reference within an outermost alt of a rule. Used
1322: * at the moment to decide if $ruleref refers to a unique rule ref in
1323: * the alt. Rewrite rules force tracking of all rule AST results.
1324: *
1325: * This data is also used to verify that all rules have been defined.
1326: */
1327: public void altReferencesRule(String ruleName, GrammarAST refAST,
1328: int outerAltNum) {
1329: Rule r = getRule(ruleName);
1330: if (r == null) {
1331: return;
1332: }
1333: r.trackRuleReferenceInAlt(refAST, outerAltNum);
1334: antlr.Token refToken = refAST.getToken();
1335: if (!ruleRefs.contains(refToken)) {
1336: ruleRefs.add(refToken);
1337: }
1338: }
1339:
1340: /** Track a token reference within an outermost alt of a rule. Used
1341: * to decide if $tokenref refers to a unique token ref in
1342: * the alt. Does not track literals!
1343: *
1344: * Rewrite rules force tracking of all tokens.
1345: */
1346: public void altReferencesTokenID(String ruleName,
1347: GrammarAST refAST, int outerAltNum) {
1348: Rule r = getRule(ruleName);
1349: if (r == null) {
1350: return;
1351: }
1352: r.trackTokenReferenceInAlt(refAST, outerAltNum);
1353: if (!tokenIDRefs.contains(refAST.getToken())) {
1354: tokenIDRefs.add(refAST.getToken());
1355: }
1356: }
1357:
1358: /** To yield smaller, more readable code, track which rules have their
1359: * predefined attributes accessed. If the rule has no user-defined
1360: * return values, then don't generate the return value scope classes
1361: * etc... Make the rule have void return value. Don't track for lexer
1362: * rules.
1363: */
1364: public void referenceRuleLabelPredefinedAttribute(String ruleName) {
1365: Rule r = getRule(ruleName);
1366: if (r != null && type != LEXER) {
1367: // indicate that an action ref'd an attr unless it's in a lexer
1368: // so that $ID.text refs don't force lexer rules to define
1369: // return values...Token objects are created by the caller instead.
1370: r.referencedPredefinedRuleAttributes = true;
1371: }
1372: }
1373:
1374: public List checkAllRulesForLeftRecursion() {
1375: return sanity.checkAllRulesForLeftRecursion();
1376: }
1377:
1378: /** Return a list of left-recursive rules; no analysis can be done
1379: * successfully on these. Useful to skip these rules then and also
1380: * for ANTLRWorks to highlight them.
1381: */
1382: public Set getLeftRecursiveRules() {
1383: if (nfa == null) {
1384: createNFAs();
1385: }
1386: if (leftRecursiveRules != null) {
1387: return leftRecursiveRules;
1388: }
1389: sanity.checkAllRulesForLeftRecursion();
1390: return leftRecursiveRules;
1391: }
1392:
1393: public void checkRuleReference(GrammarAST refAST,
1394: GrammarAST argsAST, String currentRuleName) {
1395: sanity.checkRuleReference(refAST, argsAST, currentRuleName);
1396: }
1397:
1398: /** Rules like "a : ;" and "a : {...} ;" should not generate
1399: * try/catch blocks for RecognitionException. To detect this
1400: * it's probably ok to just look for any reference to an atom
1401: * that can match some input. W/o that, the rule is unlikey to have
1402: * any else.
1403: */
1404: public boolean isEmptyRule(GrammarAST block) {
1405: GrammarAST aTokenRefNode = block
1406: .findFirstType(ANTLRParser.TOKEN_REF);
1407: GrammarAST aStringLiteralRefNode = block
1408: .findFirstType(ANTLRParser.STRING_LITERAL);
1409: GrammarAST aCharLiteralRefNode = block
1410: .findFirstType(ANTLRParser.CHAR_LITERAL);
1411: GrammarAST aWildcardRefNode = block
1412: .findFirstType(ANTLRParser.WILDCARD);
1413: GrammarAST aRuleRefNode = block
1414: .findFirstType(ANTLRParser.RULE_REF);
1415: if (aTokenRefNode == null && aStringLiteralRefNode == null
1416: && aCharLiteralRefNode == null
1417: && aWildcardRefNode == null && aRuleRefNode == null) {
1418: return true;
1419: }
1420: return false;
1421: }
1422:
1423: public int getTokenType(String tokenName) {
1424: Integer I = null;
1425: if (tokenName.charAt(0) == '\'') {
1426: I = (Integer) stringLiteralToTypeMap.get(tokenName);
1427: } else { // must be a label like ID
1428: I = (Integer) tokenIDToTypeMap.get(tokenName);
1429: }
1430: int i = (I != null) ? I.intValue() : Label.INVALID;
1431: //System.out.println("grammar type "+type+" "+tokenName+"->"+i);
1432: return i;
1433: }
1434:
1435: /** Get the list of tokens that are IDs like BLOCK and LPAREN */
1436: public Set getTokenIDs() {
1437: return tokenIDToTypeMap.keySet();
1438: }
1439:
1440: /** Return an ordered integer list of token types that have no
1441: * corresponding token ID like INT or KEYWORD_BEGIN; for stuff
1442: * like 'begin'.
1443: */
1444: public Collection getTokenTypesWithoutID() {
1445: List types = new ArrayList();
1446: for (int t = Label.MIN_TOKEN_TYPE; t <= getMaxTokenType(); t++) {
1447: String name = getTokenDisplayName(t);
1448: if (name.charAt(0) == '\'') {
1449: types.add(Utils.integer(t));
1450: }
1451: }
1452: return types;
1453: }
1454:
1455: /** Get a list of all token IDs and literals that have an associated
1456: * token type.
1457: */
1458: public Set getTokenDisplayNames() {
1459: Set names = new HashSet();
1460: for (int t = Label.MIN_TOKEN_TYPE; t <= getMaxTokenType(); t++) {
1461: names.add(getTokenDisplayName(t));
1462: }
1463: return names;
1464: }
1465:
1466: /** Given a literal like (the 3 char sequence with single quotes) 'a',
1467: * return the int value of 'a'. Convert escape sequences here also.
1468: * ANTLR's antlr.g parser does not convert escape sequences.
1469: *
1470: * 11/26/2005: I changed literals to always be '...' even for strings.
1471: * This routine still works though.
1472: */
1473: public static int getCharValueFromGrammarCharLiteral(String literal) {
1474: if (literal.length() == 3) {
1475: // 'x'
1476: return literal.charAt(1); // no escape char
1477: } else if (literal.length() == 4) {
1478: // '\x' (antlr lexer will catch invalid char)
1479: int escChar = literal.charAt(2);
1480: int charVal = ANTLRLiteralEscapedCharValue[escChar];
1481: if (charVal == 0) {
1482: // Unnecessary escapes like '\{' should just yield {
1483: return escChar;
1484: }
1485: return charVal;
1486: } else if (literal.length() == 8) {
1487: // '\u1234'
1488: String unicodeChars = literal.substring(3,
1489: literal.length() - 1);
1490: return Integer.parseInt(unicodeChars, 16);
1491: }
1492: ErrorManager.assertTrue(false, "invalid char literal: "
1493: + literal);
1494: return -1;
1495: }
1496:
1497: /** ANTLR does not convert escape sequences during the parse phase because
1498: * it could not know how to print String/char literals back out when
1499: * printing grammars etc... Someone in China might use the real unicode
1500: * char in a literal as it will display on their screen; when printing
1501: * back out, I could not know whether to display or use a unicode escape.
1502: *
1503: * This routine converts a string literal with possible escape sequences
1504: * into a pure string of 16-bit char values. Escapes and unicode \u0000
1505: * specs are converted to pure chars. return in a buffer; people may
1506: * want to walk/manipulate further.
1507: *
1508: * The NFA construction routine must know the actual char values.
1509: */
1510: public static StringBuffer getUnescapedStringFromGrammarStringLiteral(
1511: String literal) {
1512: //System.out.println("escape: ["+literal+"]");
1513: StringBuffer buf = new StringBuffer();
1514: int last = literal.length() - 1; // skip quotes on outside
1515: for (int i = 1; i < last; i++) {
1516: char c = literal.charAt(i);
1517: if (c == '\\') {
1518: i++;
1519: c = literal.charAt(i);
1520: if (Character.toUpperCase(c) == 'U') {
1521: // \u0000
1522: i++;
1523: String unicodeChars = literal.substring(i, i + 4);
1524: // parse the unicode 16 bit hex value
1525: int val = Integer.parseInt(unicodeChars, 16);
1526: i += 4 - 1; // loop will inc by 1; only jump 3 then
1527: buf.append((char) val);
1528: } else {
1529: buf.append((char) ANTLRLiteralEscapedCharValue[c]); // normal \x escape
1530: }
1531: } else {
1532: buf.append(c); // simple char x
1533: }
1534: }
1535: //System.out.println("string: ["+buf.toString()+"]");
1536: return buf;
1537: }
1538:
1539: /** Pull your token definitions from an existing grammar in memory.
1540: * You must use Grammar() ctor then this method then setGrammarContent()
1541: * to make this work. This is useful primarily for testing and
1542: * interpreting grammars. Return the max token type found.
1543: */
1544: public int importTokenVocabulary(Grammar importFromGr) {
1545: Set importedTokenIDs = importFromGr.getTokenIDs();
1546: for (Iterator it = importedTokenIDs.iterator(); it.hasNext();) {
1547: String tokenID = (String) it.next();
1548: int tokenType = importFromGr.getTokenType(tokenID);
1549: maxTokenType = Math.max(maxTokenType, tokenType);
1550: if (tokenType >= Label.MIN_TOKEN_TYPE) {
1551: //System.out.println("import token from grammar "+tokenID+"="+tokenType);
1552: defineToken(tokenID, tokenType);
1553: }
1554: }
1555: return maxTokenType; // return max found
1556: }
1557:
1558: /** Load a vocab file <vocabName>.tokens and return max token type found. */
1559: public int importTokenVocabulary(String vocabName) {
1560: File fullFile = getImportedVocabFileName(vocabName);
1561: try {
1562: FileReader fr = new FileReader(fullFile);
1563: BufferedReader br = new BufferedReader(fr);
1564: StreamTokenizer tokenizer = new StreamTokenizer(br);
1565: tokenizer.parseNumbers();
1566: tokenizer.wordChars('_', '_');
1567: tokenizer.eolIsSignificant(true);
1568: tokenizer.slashSlashComments(true);
1569: tokenizer.slashStarComments(true);
1570: tokenizer.ordinaryChar('=');
1571: tokenizer.quoteChar('\'');
1572: tokenizer.whitespaceChars(' ', ' ');
1573: tokenizer.whitespaceChars('\t', '\t');
1574: int lineNum = 1;
1575: int token = tokenizer.nextToken();
1576: while (token != StreamTokenizer.TT_EOF) {
1577: String tokenID;
1578: if (token == StreamTokenizer.TT_WORD) {
1579: tokenID = tokenizer.sval;
1580: } else if (token == '\'') {
1581: tokenID = "'" + tokenizer.sval + "'";
1582: } else {
1583: ErrorManager
1584: .error(
1585: ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
1586: vocabName
1587: + CodeGenerator.VOCAB_FILE_EXTENSION,
1588: Utils.integer(lineNum));
1589: while (tokenizer.nextToken() != StreamTokenizer.TT_EOL) {
1590: ;
1591: }
1592: token = tokenizer.nextToken();
1593: continue;
1594: }
1595: token = tokenizer.nextToken();
1596: if (token != '=') {
1597: ErrorManager
1598: .error(
1599: ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
1600: vocabName
1601: + CodeGenerator.VOCAB_FILE_EXTENSION,
1602: Utils.integer(lineNum));
1603: while (tokenizer.nextToken() != StreamTokenizer.TT_EOL) {
1604: ;
1605: }
1606: token = tokenizer.nextToken();
1607: continue;
1608: }
1609: token = tokenizer.nextToken(); // skip '='
1610: if (token != StreamTokenizer.TT_NUMBER) {
1611: ErrorManager
1612: .error(
1613: ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
1614: vocabName
1615: + CodeGenerator.VOCAB_FILE_EXTENSION,
1616: Utils.integer(lineNum));
1617: while (tokenizer.nextToken() != StreamTokenizer.TT_EOL) {
1618: ;
1619: }
1620: token = tokenizer.nextToken();
1621: continue;
1622: }
1623: int tokenType = (int) tokenizer.nval;
1624: token = tokenizer.nextToken();
1625: //System.out.println("import "+tokenID+"="+tokenType);
1626: maxTokenType = Math.max(maxTokenType, tokenType);
1627: defineToken(tokenID, tokenType);
1628: lineNum++;
1629: if (token != StreamTokenizer.TT_EOL) {
1630: ErrorManager
1631: .error(
1632: ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
1633: vocabName
1634: + CodeGenerator.VOCAB_FILE_EXTENSION,
1635: Utils.integer(lineNum));
1636: while (tokenizer.nextToken() != StreamTokenizer.TT_EOL) {
1637: ;
1638: }
1639: token = tokenizer.nextToken();
1640: continue;
1641: }
1642: token = tokenizer.nextToken(); // skip newline
1643: }
1644: br.close();
1645: } catch (FileNotFoundException fnfe) {
1646: ErrorManager.error(
1647: ErrorManager.MSG_CANNOT_FIND_TOKENS_FILE, fullFile);
1648: } catch (IOException ioe) {
1649: ErrorManager.error(
1650: ErrorManager.MSG_ERROR_READING_TOKENS_FILE,
1651: fullFile, ioe);
1652: } catch (Exception e) {
1653: ErrorManager.error(
1654: ErrorManager.MSG_ERROR_READING_TOKENS_FILE,
1655: fullFile, e);
1656: }
1657: return maxTokenType;
1658: }
1659:
1660: /** Given a token type, get a meaningful name for it such as the ID
1661: * or string literal. If this is a lexer and the ttype is in the
1662: * char vocabulary, compute an ANTLR-valid (possibly escaped) char literal.
1663: */
1664: public String getTokenDisplayName(int ttype) {
1665: String tokenName = null;
1666: int index = 0;
1667: // inside any target's char range and is lexer grammar?
1668: if (this .type == LEXER && ttype >= Label.MIN_CHAR_VALUE
1669: && ttype <= Label.MAX_CHAR_VALUE) {
1670: return getANTLRCharLiteralForChar(ttype);
1671: }
1672: // faux label?
1673: else if (ttype < 0) {
1674: tokenName = (String) typeToTokenList
1675: .get(Label.NUM_FAUX_LABELS + ttype);
1676: } else {
1677: // compute index in typeToTokenList for ttype
1678: index = ttype - 1; // normalize to 0..n-1
1679: index += Label.NUM_FAUX_LABELS; // jump over faux tokens
1680:
1681: if (index < typeToTokenList.size()) {
1682: tokenName = (String) typeToTokenList.get(index);
1683: } else {
1684: tokenName = String.valueOf(ttype);
1685: }
1686: }
1687: //System.out.println("getTokenDisplaYanme ttype="+ttype+", index="+index+", name="+tokenName);
1688: return tokenName;
1689: }
1690:
1691: /** Get the list of ANTLR String literals */
1692: public Set getStringLiterals() {
1693: return stringLiteralToTypeMap.keySet();
1694: }
1695:
1696: public int getGrammarMaxLookahead() {
1697: if (global_k >= 0) {
1698: return global_k;
1699: }
1700: /*
1701: Integer kI = (Integer)getOption("k");
1702: if ( kI!=null ) {
1703: global_k = kI.intValue();
1704: }
1705: else {
1706: global_k = 0;
1707: }
1708: */
1709: Object k = getOption("k");
1710: if (k == null) {
1711: global_k = 0;
1712: } else if (k instanceof Integer) {
1713: Integer kI = (Integer) k;
1714: global_k = kI.intValue();
1715: } else {
1716: // must be String "*"
1717: if (k.equals("*")) { // this the default anyway
1718: global_k = 0;
1719: }
1720: }
1721: return global_k;
1722: }
1723:
1724: /** Save the option key/value pair and process it; return the key
1725: * or null if invalid option.
1726: */
1727: public String setOption(String key, Object value,
1728: antlr.Token optionsStartToken) {
1729: if (!legalOptions.contains(key)) {
1730: ErrorManager.grammarError(ErrorManager.MSG_ILLEGAL_OPTION,
1731: this , optionsStartToken, key);
1732: return null;
1733: }
1734: if (!optionIsValid(key, value)) {
1735: return null;
1736: }
1737: if (options == null) {
1738: options = new HashMap();
1739: }
1740: options.put(key, value);
1741: return key;
1742: }
1743:
1744: public void setOptions(Map options, antlr.Token optionsStartToken) {
1745: if (options == null) {
1746: this .options = null;
1747: return;
1748: }
1749: Set keys = options.keySet();
1750: for (Iterator it = keys.iterator(); it.hasNext();) {
1751: String optionName = (String) it.next();
1752: Object optionValue = options.get(optionName);
1753: String stored = setOption(optionName, optionValue,
1754: optionsStartToken);
1755: if (stored == null) {
1756: it.remove();
1757: }
1758: }
1759: }
1760:
1761: public Object getOption(String key) {
1762: Object value = null;
1763: if (options != null) {
1764: value = options.get(key);
1765: }
1766: if (value == null) {
1767: value = defaultOptions.get(key);
1768: }
1769: return value;
1770: }
1771:
1772: public boolean optionIsValid(String key, Object value) {
1773: return true;
1774: }
1775:
1776: public boolean buildAST() {
1777: String outputType = (String) getOption("output");
1778: if (outputType != null) {
1779: return outputType.equals("AST");
1780: }
1781: return false;
1782: }
1783:
1784: public boolean isBuiltFromString() {
1785: return builtFromString;
1786: }
1787:
1788: public boolean buildTemplate() {
1789: String outputType = (String) getOption("output");
1790: if (outputType != null) {
1791: return outputType.equals("template");
1792: }
1793: return false;
1794: }
1795:
1796: public Collection getRules() {
1797: return nameToRuleMap.values();
1798: }
1799:
1800: public void setRuleAST(String ruleName, GrammarAST t) {
1801: Rule r = (Rule) nameToRuleMap.get(ruleName);
1802: if (r != null) {
1803: r.tree = t;
1804: r.EORNode = t.getLastChild();
1805: }
1806: }
1807:
1808: public void setRuleStartState(String ruleName, NFAState startState) {
1809: Rule r = (Rule) nameToRuleMap.get(ruleName);
1810: if (r != null) {
1811: r.startState = startState;
1812: }
1813: }
1814:
1815: public void setRuleStopState(String ruleName, NFAState stopState) {
1816: Rule r = (Rule) nameToRuleMap.get(ruleName);
1817: if (r != null) {
1818: r.stopState = stopState;
1819: }
1820: }
1821:
1822: public NFAState getRuleStartState(String ruleName) {
1823: Rule r = (Rule) nameToRuleMap.get(ruleName);
1824: if (r != null) {
1825: return r.startState;
1826: }
1827: return null;
1828: }
1829:
1830: public String getRuleModifier(String ruleName) {
1831: Rule r = (Rule) nameToRuleMap.get(ruleName);
1832: if (r != null) {
1833: return r.modifier;
1834: }
1835: return null;
1836: }
1837:
1838: public NFAState getRuleStopState(String ruleName) {
1839: Rule r = (Rule) nameToRuleMap.get(ruleName);
1840: if (r != null) {
1841: return r.stopState;
1842: }
1843: return null;
1844: }
1845:
1846: public int assignDecisionNumber(NFAState state) {
1847: decisionNumber++;
1848: state.setDecisionNumber(decisionNumber);
1849: return decisionNumber;
1850: }
1851:
1852: protected Decision getDecision(int decision) {
1853: int index = decision - 1;
1854: if (index >= indexToDecision.size()) {
1855: return null;
1856: }
1857: Decision d = (Decision) indexToDecision.get(index);
1858: return d;
1859: }
1860:
1861: protected Decision createDecision(int decision) {
1862: int index = decision - 1;
1863: if (index < indexToDecision.size()) {
1864: return getDecision(decision); // don't recreate
1865: }
1866: Decision d = new Decision();
1867: d.decision = decision;
1868: indexToDecision.setSize(getNumberOfDecisions());
1869: indexToDecision.set(index, d);
1870: return d;
1871: }
1872:
1873: public List getDecisionNFAStartStateList() {
1874: List states = new ArrayList(100);
1875: for (int d = 0; d < indexToDecision.size(); d++) {
1876: Decision dec = (Decision) indexToDecision.elementAt(d);
1877: states.add(dec.startState);
1878: }
1879: return states;
1880: }
1881:
1882: public NFAState getDecisionNFAStartState(int decision) {
1883: Decision d = getDecision(decision);
1884: if (d == null) {
1885: return null;
1886: }
1887: return d.startState;
1888: }
1889:
1890: public DFA getLookaheadDFA(int decision) {
1891: Decision d = getDecision(decision);
1892: if (d == null) {
1893: return null;
1894: }
1895: return d.dfa;
1896: }
1897:
1898: public GrammarAST getDecisionBlockAST(int decision) {
1899: Decision d = getDecision(decision);
1900: if (d == null) {
1901: return null;
1902: }
1903: return d.blockAST;
1904: }
1905:
1906: /** returns a list of column numbers for all decisions
1907: * on a particular line so ANTLRWorks choose the decision
1908: * depending on the location of the cursor (otherwise,
1909: * ANTLRWorks has to give the *exact* location which
1910: * is not easy from the user point of view).
1911: *
1912: * This is not particularly fast as it walks entire line:col->DFA map
1913: * looking for a prefix of "line:".
1914: */
1915: public List getLookaheadDFAColumnsForLineInFile(int line) {
1916: String prefix = line + ":";
1917: List columns = new ArrayList();
1918: for (Iterator iter = lineColumnToLookaheadDFAMap.keySet()
1919: .iterator(); iter.hasNext();) {
1920: String key = (String) iter.next();
1921: if (key.startsWith(prefix)) {
1922: columns.add(Integer.valueOf(key.substring(prefix
1923: .length())));
1924: }
1925: }
1926: return columns;
1927: }
1928:
1929: /** Useful for ANTLRWorks to map position in file to the DFA for display */
1930: public DFA getLookaheadDFAFromPositionInFile(int line, int col) {
1931: return (DFA) lineColumnToLookaheadDFAMap.get(new StringBuffer()
1932: .append(line + ":").append(col).toString());
1933: }
1934:
1935: public Map getLineColumnToLookaheadDFAMap() {
1936: return lineColumnToLookaheadDFAMap;
1937: }
1938:
1939: /*
1940: public void setDecisionOptions(int decision, Map options) {
1941: Decision d = createDecision(decision);
1942: d.options = options;
1943: }
1944:
1945: public void setDecisionOption(int decision, String name, Object value) {
1946: Decision d = getDecision(decision);
1947: if ( d!=null ) {
1948: if ( d.options==null ) {
1949: d.options = new HashMap();
1950: }
1951: d.options.put(name,value);
1952: }
1953: }
1954:
1955: public Map getDecisionOptions(int decision) {
1956: Decision d = getDecision(decision);
1957: if ( d==null ) {
1958: return null;
1959: }
1960: return d.options;
1961: }
1962: */
1963:
1964: public int getNumberOfDecisions() {
1965: return decisionNumber;
1966: }
1967:
1968: public int getNumberOfCyclicDecisions() {
1969: int n = 0;
1970: for (int i = 1; i <= getNumberOfDecisions(); i++) {
1971: Decision d = getDecision(i);
1972: if (d.dfa != null && d.dfa.isCyclic()) {
1973: n++;
1974: }
1975: }
1976: return n;
1977: }
1978:
1979: /** Set the lookahead DFA for a particular decision. This means
1980: * that the appropriate AST node must updated to have the new lookahead
1981: * DFA. This method could be used to properly set the DFAs without
1982: * using the createLookaheadDFAs() method. You could do this
1983: *
1984: * Grammar g = new Grammar("...");
1985: * g.setLookahead(1, dfa1);
1986: * g.setLookahead(2, dfa2);
1987: * ...
1988: */
1989: public void setLookaheadDFA(int decision, DFA lookaheadDFA) {
1990: Decision d = createDecision(decision);
1991: d.dfa = lookaheadDFA;
1992: GrammarAST ast = d.startState.getAssociatedASTNode();
1993: ast.setLookaheadDFA(lookaheadDFA);
1994: }
1995:
1996: public void setDecisionNFA(int decision, NFAState state) {
1997: Decision d = createDecision(decision);
1998: d.startState = state;
1999: }
2000:
2001: public void setDecisionBlockAST(int decision, GrammarAST blockAST) {
2002: //System.out.println("setDecisionBlockAST("+decision+", "+blockAST.token);
2003: Decision d = createDecision(decision);
2004: d.blockAST = blockAST;
2005: }
2006:
2007: public boolean allDecisionDFAHaveBeenCreated() {
2008: return allDecisionDFACreated;
2009: }
2010:
2011: /** How many token types have been allocated so far? */
2012: public int getMaxTokenType() {
2013: return maxTokenType;
2014: }
2015:
2016: /** What is the max char value possible for this grammar's target? Use
2017: * unicode max if no target defined.
2018: */
2019: public int getMaxCharValue() {
2020: if (generator != null) {
2021: return generator.target.getMaxCharValue(generator);
2022: } else {
2023: return Label.MAX_CHAR_VALUE;
2024: }
2025: }
2026:
2027: /** Return a set of all possible token or char types for this grammar */
2028: public IntSet getTokenTypes() {
2029: if (type == LEXER) {
2030: return getAllCharValues();
2031: }
2032: return IntervalSet.of(Label.MIN_TOKEN_TYPE, getMaxTokenType());
2033: }
2034:
2035: /** If there is a char vocabulary, use it; else return min to max char
2036: * as defined by the target. If no target, use max unicode char value.
2037: */
2038: public IntSet getAllCharValues() {
2039: if (charVocabulary != null) {
2040: return charVocabulary;
2041: }
2042: IntSet allChar = IntervalSet.of(Label.MIN_CHAR_VALUE,
2043: getMaxCharValue());
2044: return allChar;
2045: }
2046:
2047: /** Return a string representing the escaped char for code c. E.g., If c
2048: * has value 0x100, you will get "\u0100". ASCII gets the usual
2049: * char (non-hex) representation. Control characters are spit out
2050: * as unicode. While this is specially set up for returning Java strings,
2051: * it can be used by any language target that has the same syntax. :)
2052: *
2053: * 11/26/2005: I changed this to use double quotes, consistent with antlr.g
2054: * 12/09/2005: I changed so everything is single quotes
2055: */
2056: public static String getANTLRCharLiteralForChar(int c) {
2057: if (c < Label.MIN_CHAR_VALUE) {
2058: ErrorManager.internalError("invalid char value " + c);
2059: return "'<INVALID>'";
2060: }
2061: if (c < ANTLRLiteralCharValueEscape.length
2062: && ANTLRLiteralCharValueEscape[c] != null) {
2063: return '\'' + ANTLRLiteralCharValueEscape[c] + '\'';
2064: }
2065: if (Character.UnicodeBlock.of((char) c) == Character.UnicodeBlock.BASIC_LATIN
2066: && !Character.isISOControl((char) c)) {
2067: if (c == '\\') {
2068: return "'\\\\'";
2069: }
2070: if (c == '\'') {
2071: return "'\\''";
2072: }
2073: return '\'' + Character.toString((char) c) + '\'';
2074: }
2075: // turn on the bit above max "\uFFFF" value so that we pad with zeros
2076: // then only take last 4 digits
2077: String hex = Integer.toHexString(c | 0x10000).toUpperCase()
2078: .substring(1, 5);
2079: String unicodeStr = "'\\u" + hex + "'";
2080: return unicodeStr;
2081: }
2082:
2083: /** For lexer grammars, return everything in unicode not in set.
2084: * For parser and tree grammars, return everything in token space
2085: * from MIN_TOKEN_TYPE to last valid token type or char value.
2086: */
2087: public IntSet complement(IntSet set) {
2088: //System.out.println("complement "+set.toString(this));
2089: //System.out.println("vocabulary "+getTokenTypes().toString(this));
2090: IntSet c = set.complement(getTokenTypes());
2091: //System.out.println("result="+c.toString(this));
2092: return c;
2093: }
2094:
2095: public IntSet complement(int atom) {
2096: return complement(IntervalSet.of(atom));
2097: }
2098:
2099: /** Given set tree like ( SET A B ) in lexer, check that A and B
2100: * are both valid sets themselves, else we must tree like a BLOCK
2101: */
2102: public boolean isValidSet(TreeToNFAConverter nfabuilder,
2103: GrammarAST t) {
2104: boolean valid = true;
2105: try {
2106: //System.out.println("parse BLOCK as set tree: "+t.toStringTree());
2107: nfabuilder.testBlockAsSet(t);
2108: } catch (RecognitionException re) {
2109: // The rule did not parse as a set, return null; ignore exception
2110: valid = false;
2111: }
2112: //System.out.println("valid? "+valid);
2113: return valid;
2114: }
2115:
2116: /** Get the set equivalent (if any) of the indicated rule from this
2117: * grammar. Mostly used in the lexer to do ~T for some fragment rule
2118: * T. If the rule AST has a SET use that. If the rule is a single char
2119: * convert it to a set and return. If rule is not a simple set (w/o actions)
2120: * then return null.
2121: * Rules have AST form:
2122: *
2123: * ^( RULE ID modifier ARG RET SCOPE block EOR )
2124: */
2125: public IntSet getSetFromRule(TreeToNFAConverter nfabuilder,
2126: String ruleName) throws RecognitionException {
2127: Rule r = getRule(ruleName);
2128: if (r == null) {
2129: return null;
2130: }
2131: IntSet elements = null;
2132: //System.out.println("parsed tree: "+r.tree.toStringTree());
2133: elements = nfabuilder.setRule(r.tree);
2134: //System.out.println("elements="+elements);
2135: return elements;
2136: }
2137:
2138: /** Decisions are linked together with transition(1). Count how
2139: * many there are. This is here rather than in NFAState because
2140: * a grammar decides how NFAs are put together to form a decision.
2141: */
2142: public int getNumberOfAltsForDecisionNFA(NFAState decisionState) {
2143: if (decisionState == null) {
2144: return 0;
2145: }
2146: int n = 1;
2147: NFAState p = decisionState;
2148: while (p.transition(1) != null) {
2149: n++;
2150: p = (NFAState) p.transition(1).target;
2151: }
2152: return n;
2153: }
2154:
2155: /** Get the ith alternative (1..n) from a decision; return null when
2156: * an invalid alt is requested. I must count in to find the right
2157: * alternative number. For (A|B), you get NFA structure (roughly):
2158: *
2159: * o->o-A->o
2160: * |
2161: * o->o-B->o
2162: *
2163: * This routine returns the leftmost state for each alt. So alt=1, returns
2164: * the upperleft most state in this structure.
2165: */
2166: public NFAState getNFAStateForAltOfDecision(NFAState decisionState,
2167: int alt) {
2168: if (decisionState == null || alt <= 0) {
2169: return null;
2170: }
2171: int n = 1;
2172: NFAState p = decisionState;
2173: while (p != null) {
2174: if (n == alt) {
2175: return p;
2176: }
2177: n++;
2178: Transition next = p.transition(1);
2179: p = null;
2180: if (next != null) {
2181: p = (NFAState) next.target;
2182: }
2183: }
2184: return null;
2185: }
2186:
2187: /** From an NFA state, s, find the set of all labels reachable from s.
2188: * This computes FIRST, FOLLOW and any other lookahead computation
2189: * depending on where s is.
2190: *
2191: * Record, with EOR_TOKEN_TYPE, if you hit the end of a rule so we can
2192: * know at runtime (when these sets are used) to start walking up the
2193: * follow chain to compute the real, correct follow set.
2194: *
2195: * This routine will only be used on parser and tree parser grammars.
2196: *
2197: * TODO: it does properly handle a : b A ; where b is nullable
2198: * Actually it stops at end of rules, returning EOR. Hmm...
2199: * should check for that and keep going.
2200: */
2201: public LookaheadSet LOOK(NFAState s) {
2202: lookBusy.clear();
2203: return _LOOK(s);
2204: }
2205:
2206: protected LookaheadSet _LOOK(NFAState s) {
2207: if (s.isAcceptState()) {
2208: return new LookaheadSet(Label.EOR_TOKEN_TYPE);
2209: }
2210:
2211: if (lookBusy.contains(s)) {
2212: // return a copy of an empty set; we may modify set inline
2213: return new LookaheadSet();
2214: }
2215: lookBusy.add(s);
2216: Transition transition0 = s.transition(0);
2217: if (transition0 == null) {
2218: return null;
2219: }
2220:
2221: if (transition0.label.isAtom()) {
2222: int atom = transition0.label.getAtom();
2223: if (atom == Label.EOF) {
2224: return LookaheadSet.EOF();
2225: }
2226: return new LookaheadSet(atom);
2227: }
2228: if (transition0.label.isSet()) {
2229: IntSet sl = transition0.label.getSet();
2230: LookaheadSet laSet = new LookaheadSet(sl);
2231: if (laSet.member(Label.EOF)) {
2232: laSet.remove(Label.EOF);
2233: laSet.hasEOF = true;
2234: }
2235: return laSet;
2236: }
2237: LookaheadSet tset = _LOOK((NFAState) transition0.target);
2238: if (tset.member(Label.EOR_TOKEN_TYPE)) {
2239: if (transition0 instanceof RuleClosureTransition) {
2240: // we called a rule that found the end of the rule.
2241: // That means the rule is nullable and we need to
2242: // keep looking at what follows the rule ref. E.g.,
2243: // a : b A ; where b is nullable means that LOOK(a)
2244: // should include A.
2245: RuleClosureTransition ruleInvocationTrans = (RuleClosureTransition) transition0;
2246: // remove the EOR and get what follows
2247: tset.remove(Label.EOR_TOKEN_TYPE);
2248: LookaheadSet fset = _LOOK((NFAState) ruleInvocationTrans
2249: .getFollowState());
2250: tset.orInPlace(fset);
2251: }
2252: }
2253:
2254: Transition transition1 = s.transition(1);
2255: if (transition1 != null) {
2256: LookaheadSet tset1 = _LOOK((NFAState) transition1.target);
2257: tset.orInPlace(tset1);
2258: }
2259: return tset;
2260: }
2261:
2262: public void setCodeGenerator(CodeGenerator generator) {
2263: this .generator = generator;
2264: }
2265:
2266: public CodeGenerator getCodeGenerator() {
2267: return generator;
2268: }
2269:
2270: public GrammarAST getGrammarTree() {
2271: return grammarTree;
2272: }
2273:
2274: public Tool getTool() {
2275: return tool;
2276: }
2277:
2278: public void setTool(Tool tool) {
2279: this .tool = tool;
2280: }
2281:
2282: /** given a token type and the text of the literal, come up with a
2283: * decent token type label. For now it's just T<type>. Actually,
2284: * if there is an aliased name from tokens like PLUS='+', use it.
2285: */
2286: public String computeTokenNameFromLiteral(int tokenType,
2287: String literal) {
2288: return "T" + tokenType;
2289: }
2290:
2291: public String toString() {
2292: return grammarTreeToString(grammarTree);
2293: }
2294:
2295: public String grammarTreeToString(GrammarAST t) {
2296: return grammarTreeToString(t, true);
2297: }
2298:
2299: public String grammarTreeToString(GrammarAST t, boolean showActions) {
2300: String s = null;
2301: try {
2302: s = t.getLine() + ":" + t.getColumn() + ": ";
2303: s += new ANTLRTreePrinter().toString((AST) t, this ,
2304: showActions);
2305: } catch (Exception e) {
2306: ErrorManager
2307: .error(ErrorManager.MSG_BAD_AST_STRUCTURE, t, e);
2308: }
2309: return s;
2310: }
2311:
2312: public void setWatchNFAConversion(boolean watchNFAConversion) {
2313: this .watchNFAConversion = watchNFAConversion;
2314: }
2315:
2316: public boolean getWatchNFAConversion() {
2317: return watchNFAConversion;
2318: }
2319:
2320: public void printGrammar(PrintStream output) {
2321: ANTLRTreePrinter printer = new ANTLRTreePrinter();
2322: printer.setASTNodeClass("org.antlr.tool.GrammarAST");
2323: try {
2324: String g = printer.toString(grammarTree, this , false);
2325: output.println(g);
2326: } catch (RecognitionException re) {
2327: ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR, re);
2328: }
2329: }
2330:
2331: }
|