0001: package antlr;
0002:
0003: /* ANTLR Translator Generator
0004: * Project led by Terence Parr at http://www.jGuru.com
0005: * Software rights: http://www.antlr.org/RIGHTS.html
0006: *
0007: * $Id: JavaCodeGenerator.java,v 1.1 2004/01/21 19:18:31 rgrimm Exp $
0008: */
0009:
0010: import java.util.Enumeration;
0011: import java.util.Hashtable;
0012: import java.util.HashSet;
0013:
0014: import antlr.collections.impl.BitSet;
0015: import antlr.collections.impl.Vector;
0016:
0017: import java.io.PrintWriter; //SAS: changed for proper text file io
0018: import java.io.IOException;
0019: import java.io.FileWriter;
0020:
0021: /**Generate MyParser.java, MyLexer.java and MyParserTokenTypes.java */
0022: public class JavaCodeGenerator extends CodeGenerator {
0023: // non-zero if inside syntactic predicate generation
0024: protected int syntacticPredLevel = 0;
0025:
0026: // Are we generating ASTs (for parsers and tree parsers) right now?
0027: protected boolean genAST = false;
0028:
0029: // Are we saving the text consumed (for lexers) right now?
0030: protected boolean saveText = false;
0031:
0032: // Grammar parameters set up to handle different grammar classes.
0033: // These are used to get instanceof tests out of code generation
0034: String labeledElementType;
0035: String labeledElementASTType;
0036: String labeledElementInit;
0037: String commonExtraArgs;
0038: String commonExtraParams;
0039: String commonLocalVars;
0040: String lt1Value;
0041: String exceptionThrown;
0042: String throwNoViable;
0043:
0044: /** Tracks the rule being generated. Used for mapTreeId */
0045: RuleBlock currentRule;
0046:
0047: /** Tracks the rule or labeled subrule being generated. Used for
0048: AST generation. */
0049: String currentASTResult;
0050:
0051: /** Mapping between the ids used in the current alt, and the
0052: * names of variables used to represent their AST values.
0053: */
0054: Hashtable treeVariableMap = new Hashtable();
0055:
0056: /** Used to keep track of which AST variables have been defined in a rule
0057: * (except for the #rule_name and #rule_name_in var's
0058: */
0059: HashSet declaredASTVariables = new HashSet();
0060:
0061: /* Count of unnamed generated variables */
0062: int astVarNumber = 1;
0063:
0064: /** Special value used to mark duplicate in treeVariableMap */
0065: protected static final String NONUNIQUE = new String();
0066:
0067: public static final int caseSizeThreshold = 127; // ascii is max
0068:
0069: private Vector semPreds;
0070:
0071: /** Create a Java code-generator using the given Grammar.
0072: * The caller must still call setTool, setBehavior, and setAnalyzer
0073: * before generating code.
0074: */
0075: public JavaCodeGenerator() {
0076: super ();
0077: charFormatter = new JavaCharFormatter();
0078: }
0079:
0080: /** Adds a semantic predicate string to the sem pred vector
0081: These strings will be used to build an array of sem pred names
0082: when building a debugging parser. This method should only be
0083: called when the debug option is specified
0084: */
0085: protected int addSemPred(String predicate) {
0086: semPreds.appendElement(predicate);
0087: return semPreds.size() - 1;
0088: }
0089:
0090: public void exitIfError() {
0091: if (antlrTool.hasError()) {
0092: antlrTool.fatalError("Exiting due to errors.");
0093: }
0094: }
0095:
0096: /**Generate the parser, lexer, treeparser, and token types in Java */
0097: public void gen() {
0098: // Do the code generation
0099: try {
0100: // Loop over all grammars
0101: Enumeration grammarIter = behavior.grammars.elements();
0102: while (grammarIter.hasMoreElements()) {
0103: Grammar g = (Grammar) grammarIter.nextElement();
0104: // Connect all the components to each other
0105: g.setGrammarAnalyzer(analyzer);
0106: g.setCodeGenerator(this );
0107: analyzer.setGrammar(g);
0108: // To get right overloading behavior across hetrogeneous grammars
0109: setupGrammarParameters(g);
0110: g.generate();
0111: // print out the grammar with lookahead sets (and FOLLOWs)
0112: // System.out.print(g.toString());
0113: exitIfError();
0114: }
0115:
0116: // Loop over all token managers (some of which are lexers)
0117: Enumeration tmIter = behavior.tokenManagers.elements();
0118: while (tmIter.hasMoreElements()) {
0119: TokenManager tm = (TokenManager) tmIter.nextElement();
0120: if (!tm.isReadOnly()) {
0121: // Write the token manager tokens as Java
0122: // this must appear before genTokenInterchange so that
0123: // labels are set on string literals
0124: genTokenTypes(tm);
0125: // Write the token manager tokens as plain text
0126: genTokenInterchange(tm);
0127: }
0128: exitIfError();
0129: }
0130: } catch (IOException e) {
0131: antlrTool.reportException(e, null);
0132: }
0133: }
0134:
0135: /** Generate code for the given grammar element.
0136: * @param blk The {...} action to generate
0137: */
0138: public void gen(ActionElement action) {
0139: if (DEBUG_CODE_GENERATOR)
0140: System.out.println("genAction(" + action + ")");
0141: if (action.isSemPred) {
0142: genSemPred(action.actionText, action.line);
0143: } else {
0144: if (grammar.hasSyntacticPredicate) {
0145: println("if ( inputState.guessing==0 ) {");
0146: tabs++;
0147: }
0148:
0149: // get the name of the followSet for the current rule so that we
0150: // can replace $FOLLOW in the .g file.
0151: ActionTransInfo tInfo = new ActionTransInfo();
0152: String actionStr = processActionForSpecialSymbols(
0153: action.actionText, action.getLine(), currentRule,
0154: tInfo);
0155:
0156: if (tInfo.refRuleRoot != null) {
0157: // Somebody referenced "#rule", make sure translated var is valid
0158: // assignment to #rule is left as a ref also, meaning that assignments
0159: // with no other refs like "#rule = foo();" still forces this code to be
0160: // generated (unnecessarily).
0161: println(tInfo.refRuleRoot + " = ("
0162: + labeledElementASTType + ")currentAST.root;");
0163: }
0164:
0165: // dump the translated action
0166: printAction(actionStr);
0167:
0168: if (tInfo.assignToRoot) {
0169: // Somebody did a "#rule=", reset internal currentAST.root
0170: println("currentAST.root = " + tInfo.refRuleRoot + ";");
0171: // reset the child pointer too to be last sibling in sibling list
0172: println("currentAST.child = " + tInfo.refRuleRoot
0173: + "!=null &&" + tInfo.refRuleRoot
0174: + ".getFirstChild()!=null ?");
0175: tabs++;
0176: println(tInfo.refRuleRoot + ".getFirstChild() : "
0177: + tInfo.refRuleRoot + ";");
0178: tabs--;
0179: println("currentAST.advanceChildToEnd();");
0180: }
0181:
0182: if (grammar.hasSyntacticPredicate) {
0183: tabs--;
0184: println("}");
0185: }
0186: }
0187: }
0188:
0189: /** Generate code for the given grammar element.
0190: * @param blk The "x|y|z|..." block to generate
0191: */
0192: public void gen(AlternativeBlock blk) {
0193: if (DEBUG_CODE_GENERATOR)
0194: System.out.println("gen(" + blk + ")");
0195: println("{");
0196: genBlockPreamble(blk);
0197: genBlockInitAction(blk);
0198:
0199: // Tell AST generation to build subrule result
0200: String saveCurrentASTResult = currentASTResult;
0201: if (blk.getLabel() != null) {
0202: currentASTResult = blk.getLabel();
0203: }
0204:
0205: boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
0206:
0207: JavaBlockFinishingInfo howToFinish = genCommonBlock(blk, true);
0208: genBlockFinish(howToFinish, throwNoViable);
0209:
0210: println("}");
0211:
0212: // Restore previous AST generation
0213: currentASTResult = saveCurrentASTResult;
0214: }
0215:
0216: /** Generate code for the given grammar element.
0217: * @param blk The block-end element to generate. Block-end
0218: * elements are synthesized by the grammar parser to represent
0219: * the end of a block.
0220: */
0221: public void gen(BlockEndElement end) {
0222: if (DEBUG_CODE_GENERATOR)
0223: System.out.println("genRuleEnd(" + end + ")");
0224: }
0225:
0226: /** Generate code for the given grammar element.
0227: * @param blk The character literal reference to generate
0228: */
0229: public void gen(CharLiteralElement atom) {
0230: if (DEBUG_CODE_GENERATOR)
0231: System.out.println("genChar(" + atom + ")");
0232:
0233: if (atom.getLabel() != null) {
0234: println(atom.getLabel() + " = " + lt1Value + ";");
0235: }
0236:
0237: boolean oldsaveText = saveText;
0238: saveText = saveText
0239: && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;
0240: genMatch(atom);
0241: saveText = oldsaveText;
0242: }
0243:
0244: /** Generate code for the given grammar element.
0245: * @param blk The character-range reference to generate
0246: */
0247: public void gen(CharRangeElement r) {
0248: if (r.getLabel() != null && syntacticPredLevel == 0) {
0249: println(r.getLabel() + " = " + lt1Value + ";");
0250: }
0251: boolean flag = (grammar instanceof LexerGrammar && (!saveText || r
0252: .getAutoGenType() == GrammarElement.AUTO_GEN_BANG));
0253: if (flag) {
0254: println("_saveIndex=text.length();");
0255: }
0256:
0257: println("matchRange(" + r.beginText + "," + r.endText + ");");
0258:
0259: if (flag) {
0260: println("text.setLength(_saveIndex);");
0261: }
0262: }
0263:
0264: /** Generate the lexer Java file */
0265: public void gen(LexerGrammar g) throws IOException {
0266: // If debugging, create a new sempred vector for this grammar
0267: if (g.debuggingOutput)
0268: semPreds = new Vector();
0269:
0270: setGrammar(g);
0271: if (!(grammar instanceof LexerGrammar)) {
0272: antlrTool.panic("Internal error generating lexer");
0273: }
0274:
0275: // SAS: moved output creation to method so a subclass can change
0276: // how the output is generated (for VAJ interface)
0277: setupOutput(grammar.getClassName());
0278:
0279: genAST = false; // no way to gen trees.
0280: saveText = true; // save consumed characters.
0281:
0282: tabs = 0;
0283:
0284: // Generate header common to all Java output files
0285: genHeader();
0286: // Do not use printAction because we assume tabs==0
0287: println(behavior.getHeaderAction(""));
0288:
0289: // Generate header specific to lexer Java file
0290: // println("import java.io.FileInputStream;");
0291: println("import java.io.InputStream;");
0292: println("import antlr.TokenStreamException;");
0293: println("import antlr.TokenStreamIOException;");
0294: println("import antlr.TokenStreamRecognitionException;");
0295: println("import antlr.CharStreamException;");
0296: println("import antlr.CharStreamIOException;");
0297: println("import antlr.ANTLRException;");
0298: println("import java.io.Reader;");
0299: println("import java.util.Hashtable;");
0300: println("import antlr." + grammar.getSuperClass() + ";");
0301: println("import antlr.InputBuffer;");
0302: println("import antlr.ByteBuffer;");
0303: println("import antlr.CharBuffer;");
0304: println("import antlr.Token;");
0305: println("import antlr.CommonToken;");
0306: println("import antlr.RecognitionException;");
0307: println("import antlr.NoViableAltForCharException;");
0308: println("import antlr.MismatchedCharException;");
0309: println("import antlr.TokenStream;");
0310: println("import antlr.ANTLRHashString;");
0311: println("import antlr.LexerSharedInputState;");
0312: println("import antlr.collections.impl.BitSet;");
0313: println("import antlr.SemanticException;");
0314:
0315: // Generate user-defined lexer file preamble
0316: println(grammar.preambleAction.getText());
0317:
0318: // Generate lexer class definition
0319: String sup = null;
0320: if (grammar.super Class != null) {
0321: sup = grammar.super Class;
0322: } else {
0323: sup = "antlr." + grammar.getSuperClass();
0324: }
0325:
0326: // print javadoc comment if any
0327: if (grammar.comment != null) {
0328: _println(grammar.comment);
0329: }
0330:
0331: // get prefix (replaces "public" and lets user specify)
0332: String prefix = "public";
0333: Token tprefix = (Token) grammar.options
0334: .get("classHeaderPrefix");
0335: if (tprefix != null) {
0336: String p = StringUtils.stripFrontBack(tprefix.getText(),
0337: "\"", "\"");
0338: if (p != null) {
0339: prefix = p;
0340: }
0341: }
0342:
0343: print(prefix + " ");
0344: print("class " + grammar.getClassName() + " extends " + sup);
0345: println(" implements " + grammar.tokenManager.getName()
0346: + TokenTypesFileSuffix + ", TokenStream");
0347: Token tsuffix = (Token) grammar.options
0348: .get("classHeaderSuffix");
0349: if (tsuffix != null) {
0350: String suffix = StringUtils.stripFrontBack(tsuffix
0351: .getText(), "\"", "\"");
0352: if (suffix != null) {
0353: print(", " + suffix); // must be an interface name for Java
0354: }
0355: }
0356: println(" {");
0357:
0358: // Generate user-defined lexer class members
0359: print(processActionForSpecialSymbols(grammar.classMemberAction
0360: .getText(), grammar.classMemberAction.getLine(),
0361: currentRule, null));
0362:
0363: //
0364: // Generate the constructor from InputStream, which in turn
0365: // calls the ByteBuffer constructor
0366: //
0367: println("public " + grammar.getClassName()
0368: + "(InputStream in) {");
0369: tabs++;
0370: println("this(new ByteBuffer(in));");
0371: tabs--;
0372: println("}");
0373:
0374: //
0375: // Generate the constructor from Reader, which in turn
0376: // calls the CharBuffer constructor
0377: //
0378: println("public " + grammar.getClassName() + "(Reader in) {");
0379: tabs++;
0380: println("this(new CharBuffer(in));");
0381: tabs--;
0382: println("}");
0383:
0384: println("public " + grammar.getClassName()
0385: + "(InputBuffer ib) {");
0386: tabs++;
0387: // if debugging, wrap the input buffer in a debugger
0388: if (grammar.debuggingOutput)
0389: println("this(new LexerSharedInputState(new antlr.debug.DebuggingInputBuffer(ib)));");
0390: else
0391: println("this(new LexerSharedInputState(ib));");
0392: tabs--;
0393: println("}");
0394:
0395: //
0396: // Generate the constructor from InputBuffer (char or byte)
0397: //
0398: println("public " + grammar.getClassName()
0399: + "(LexerSharedInputState state) {");
0400: tabs++;
0401:
0402: println("super(state);");
0403: // if debugging, set up array variables and call user-overridable
0404: // debugging setup method
0405: if (grammar.debuggingOutput) {
0406: println(" ruleNames = _ruleNames;");
0407: println(" semPredNames = _semPredNames;");
0408: println(" setupDebugging();");
0409: }
0410:
0411: // Generate the setting of various generated options.
0412: // These need to be before the literals since ANTLRHashString depends on
0413: // the casesensitive stuff.
0414: println("caseSensitiveLiterals = " + g.caseSensitiveLiterals
0415: + ";");
0416: println("setCaseSensitive(" + g.caseSensitive + ");");
0417:
0418: // Generate the initialization of a hashtable
0419: // containing the string literals used in the lexer
0420: // The literals variable itself is in CharScanner
0421: println("literals = new Hashtable();");
0422: Enumeration keys = grammar.tokenManager.getTokenSymbolKeys();
0423: while (keys.hasMoreElements()) {
0424: String key = (String) keys.nextElement();
0425: if (key.charAt(0) != '"') {
0426: continue;
0427: }
0428: TokenSymbol sym = grammar.tokenManager.getTokenSymbol(key);
0429: if (sym instanceof StringLiteralSymbol) {
0430: StringLiteralSymbol s = (StringLiteralSymbol) sym;
0431: println("literals.put(new ANTLRHashString(" + s.getId()
0432: + ", this), new Integer(" + s.getTokenType()
0433: + "));");
0434: }
0435: }
0436: tabs--;
0437:
0438: Enumeration ids;
0439: println("}");
0440:
0441: // generate the rule name array for debugging
0442: if (grammar.debuggingOutput) {
0443: println("private static final String _ruleNames[] = {");
0444:
0445: ids = grammar.rules.elements();
0446: int ruleNum = 0;
0447: while (ids.hasMoreElements()) {
0448: GrammarSymbol sym = (GrammarSymbol) ids.nextElement();
0449: if (sym instanceof RuleSymbol)
0450: println(" \"" + ((RuleSymbol) sym).getId() + "\",");
0451: }
0452: println("};");
0453: }
0454:
0455: // Generate nextToken() rule.
0456: // nextToken() is a synthetic lexer rule that is the implicit OR of all
0457: // user-defined lexer rules.
0458: genNextToken();
0459:
0460: // Generate code for each rule in the lexer
0461: ids = grammar.rules.elements();
0462: int ruleNum = 0;
0463: while (ids.hasMoreElements()) {
0464: RuleSymbol sym = (RuleSymbol) ids.nextElement();
0465: // Don't generate the synthetic rules
0466: if (!sym.getId().equals("mnextToken")) {
0467: genRule(sym, false, ruleNum++);
0468: }
0469: exitIfError();
0470: }
0471:
0472: // Generate the semantic predicate map for debugging
0473: if (grammar.debuggingOutput)
0474: genSemPredMap();
0475:
0476: // Generate the bitsets used throughout the lexer
0477: genBitsets(bitsetsUsed, ((LexerGrammar) grammar).charVocabulary
0478: .size());
0479:
0480: println("");
0481: println("}");
0482:
0483: // Close the lexer output stream
0484: currentOutput.close();
0485: currentOutput = null;
0486: }
0487:
0488: /** Generate code for the given grammar element.
0489: * @param blk The (...)+ block to generate
0490: */
0491: public void gen(OneOrMoreBlock blk) {
0492: if (DEBUG_CODE_GENERATOR)
0493: System.out.println("gen+(" + blk + ")");
0494: String label;
0495: String cnt;
0496: println("{");
0497: genBlockPreamble(blk);
0498: if (blk.getLabel() != null) {
0499: cnt = "_cnt_" + blk.getLabel();
0500: } else {
0501: cnt = "_cnt" + blk.ID;
0502: }
0503: println("int " + cnt + "=0;");
0504: if (blk.getLabel() != null) {
0505: label = blk.getLabel();
0506: } else {
0507: label = "_loop" + blk.ID;
0508: }
0509: println(label + ":");
0510: println("do {");
0511: tabs++;
0512: // generate the init action for ()+ ()* inside the loop
0513: // this allows us to do usefull EOF checking...
0514: genBlockInitAction(blk);
0515:
0516: // Tell AST generation to build subrule result
0517: String saveCurrentASTResult = currentASTResult;
0518: if (blk.getLabel() != null) {
0519: currentASTResult = blk.getLabel();
0520: }
0521:
0522: boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
0523:
0524: // generate exit test if greedy set to false
0525: // and an alt is ambiguous with exit branch
0526: // or when lookahead derived purely from end-of-file
0527: // Lookahead analysis stops when end-of-file is hit,
0528: // returning set {epsilon}. Since {epsilon} is not
0529: // ambig with any real tokens, no error is reported
0530: // by deterministic() routines and we have to check
0531: // for the case where the lookahead depth didn't get
0532: // set to NONDETERMINISTIC (this only happens when the
0533: // FOLLOW contains real atoms + epsilon).
0534: boolean generateNonGreedyExitPath = false;
0535: int nonGreedyExitDepth = grammar.maxk;
0536:
0537: if (!blk.greedy
0538: && blk.exitLookaheadDepth <= grammar.maxk
0539: && blk.exitCache[blk.exitLookaheadDepth]
0540: .containsEpsilon()) {
0541: generateNonGreedyExitPath = true;
0542: nonGreedyExitDepth = blk.exitLookaheadDepth;
0543: } else if (!blk.greedy
0544: && blk.exitLookaheadDepth == LLkGrammarAnalyzer.NONDETERMINISTIC) {
0545: generateNonGreedyExitPath = true;
0546: }
0547:
0548: // generate exit test if greedy set to false
0549: // and an alt is ambiguous with exit branch
0550: if (generateNonGreedyExitPath) {
0551: if (DEBUG_CODE_GENERATOR) {
0552: System.out
0553: .println("nongreedy (...)+ loop; exit depth is "
0554: + blk.exitLookaheadDepth);
0555: }
0556: String predictExit = getLookaheadTestExpression(
0557: blk.exitCache, nonGreedyExitDepth);
0558: println("// nongreedy exit test");
0559: println("if ( " + cnt + ">=1 && " + predictExit
0560: + ") break " + label + ";");
0561: }
0562:
0563: JavaBlockFinishingInfo howToFinish = genCommonBlock(blk, false);
0564: genBlockFinish(howToFinish, "if ( " + cnt + ">=1 ) { break "
0565: + label + "; } else {" + throwNoViable + "}");
0566:
0567: println(cnt + "++;");
0568: tabs--;
0569: println("} while (true);");
0570: println("}");
0571:
0572: // Restore previous AST generation
0573: currentASTResult = saveCurrentASTResult;
0574: }
0575:
0576: /** Generate the parser Java file */
0577: public void gen(ParserGrammar g) throws IOException {
0578:
0579: // if debugging, set up a new vector to keep track of sempred
0580: // strings for this grammar
0581: if (g.debuggingOutput)
0582: semPreds = new Vector();
0583:
0584: setGrammar(g);
0585: if (!(grammar instanceof ParserGrammar)) {
0586: antlrTool.panic("Internal error generating parser");
0587: }
0588:
0589: // Open the output stream for the parser and set the currentOutput
0590: // SAS: moved file setup so subclass could do it (for VAJ interface)
0591: setupOutput(grammar.getClassName());
0592:
0593: genAST = grammar.buildAST;
0594:
0595: tabs = 0;
0596:
0597: // Generate the header common to all output files.
0598: genHeader();
0599: // Do not use printAction because we assume tabs==0
0600: println(behavior.getHeaderAction(""));
0601:
0602: // Generate header for the parser
0603: println("import antlr.TokenBuffer;");
0604: println("import antlr.TokenStreamException;");
0605: println("import antlr.TokenStreamIOException;");
0606: println("import antlr.ANTLRException;");
0607: println("import antlr." + grammar.getSuperClass() + ";");
0608: println("import antlr.Token;");
0609: println("import antlr.TokenStream;");
0610: println("import antlr.RecognitionException;");
0611: println("import antlr.NoViableAltException;");
0612: println("import antlr.MismatchedTokenException;");
0613: println("import antlr.SemanticException;");
0614: println("import antlr.ParserSharedInputState;");
0615: println("import antlr.collections.impl.BitSet;");
0616: if (genAST) {
0617: println("import antlr.collections.AST;");
0618: println("import java.util.Hashtable;");
0619: println("import antlr.ASTFactory;");
0620: println("import antlr.ASTPair;");
0621: println("import antlr.collections.impl.ASTArray;");
0622: }
0623:
0624: // Output the user-defined parser preamble
0625: println(grammar.preambleAction.getText());
0626:
0627: // Generate parser class definition
0628: String sup = null;
0629: if (grammar.super Class != null)
0630: sup = grammar.super Class;
0631: else
0632: sup = "antlr." + grammar.getSuperClass();
0633:
0634: // print javadoc comment if any
0635: if (grammar.comment != null) {
0636: _println(grammar.comment);
0637: }
0638:
0639: // get prefix (replaces "public" and lets user specify)
0640: String prefix = "public";
0641: Token tprefix = (Token) grammar.options
0642: .get("classHeaderPrefix");
0643: if (tprefix != null) {
0644: String p = StringUtils.stripFrontBack(tprefix.getText(),
0645: "\"", "\"");
0646: if (p != null) {
0647: prefix = p;
0648: }
0649: }
0650:
0651: print(prefix + " ");
0652: print("class " + grammar.getClassName() + " extends " + sup);
0653: println(" implements " + grammar.tokenManager.getName()
0654: + TokenTypesFileSuffix);
0655:
0656: Token tsuffix = (Token) grammar.options
0657: .get("classHeaderSuffix");
0658: if (tsuffix != null) {
0659: String suffix = StringUtils.stripFrontBack(tsuffix
0660: .getText(), "\"", "\"");
0661: if (suffix != null)
0662: print(", " + suffix); // must be an interface name for Java
0663: }
0664: println(" {");
0665:
0666: // set up an array of all the rule names so the debugger can
0667: // keep track of them only by number -- less to store in tree...
0668: if (grammar.debuggingOutput) {
0669: println("private static final String _ruleNames[] = {");
0670:
0671: Enumeration ids = grammar.rules.elements();
0672: int ruleNum = 0;
0673: while (ids.hasMoreElements()) {
0674: GrammarSymbol sym = (GrammarSymbol) ids.nextElement();
0675: if (sym instanceof RuleSymbol)
0676: println(" \"" + ((RuleSymbol) sym).getId() + "\",");
0677: }
0678: println("};");
0679: }
0680:
0681: // Generate user-defined parser class members
0682: print(processActionForSpecialSymbols(grammar.classMemberAction
0683: .getText(), grammar.classMemberAction.getLine(),
0684: currentRule, null));
0685:
0686: // Generate parser class constructor from TokenBuffer
0687: println("");
0688: println("protected " + grammar.getClassName()
0689: + "(TokenBuffer tokenBuf, int k) {");
0690: println(" super(tokenBuf,k);");
0691: println(" tokenNames = _tokenNames;");
0692: // if debugging, set up arrays and call the user-overridable
0693: // debugging setup method
0694: if (grammar.debuggingOutput) {
0695: println(" ruleNames = _ruleNames;");
0696: println(" semPredNames = _semPredNames;");
0697: println(" setupDebugging(tokenBuf);");
0698: }
0699: if (grammar.buildAST) {
0700: println(" buildTokenTypeASTClassMap();");
0701: println(" astFactory = new ASTFactory(getTokenTypeToASTClassMap());");
0702: }
0703: println("}");
0704: println("");
0705:
0706: println("public " + grammar.getClassName()
0707: + "(TokenBuffer tokenBuf) {");
0708: println(" this(tokenBuf," + grammar.maxk + ");");
0709: println("}");
0710: println("");
0711:
0712: // Generate parser class constructor from TokenStream
0713: println("protected " + grammar.getClassName()
0714: + "(TokenStream lexer, int k) {");
0715: println(" super(lexer,k);");
0716: println(" tokenNames = _tokenNames;");
0717:
0718: // if debugging, set up arrays and call the user-overridable
0719: // debugging setup method
0720: if (grammar.debuggingOutput) {
0721: println(" ruleNames = _ruleNames;");
0722: println(" semPredNames = _semPredNames;");
0723: println(" setupDebugging(lexer);");
0724: }
0725: if (grammar.buildAST) {
0726: println(" buildTokenTypeASTClassMap();");
0727: println(" astFactory = new ASTFactory(getTokenTypeToASTClassMap());");
0728: }
0729: println("}");
0730: println("");
0731:
0732: println("public " + grammar.getClassName()
0733: + "(TokenStream lexer) {");
0734: println(" this(lexer," + grammar.maxk + ");");
0735: println("}");
0736: println("");
0737:
0738: println("public " + grammar.getClassName()
0739: + "(ParserSharedInputState state) {");
0740: println(" super(state," + grammar.maxk + ");");
0741: println(" tokenNames = _tokenNames;");
0742: if (grammar.buildAST) {
0743: println(" buildTokenTypeASTClassMap();");
0744: println(" astFactory = new ASTFactory(getTokenTypeToASTClassMap());");
0745: }
0746: println("}");
0747: println("");
0748:
0749: // Generate code for each rule in the grammar
0750: Enumeration ids = grammar.rules.elements();
0751: int ruleNum = 0;
0752: while (ids.hasMoreElements()) {
0753: GrammarSymbol sym = (GrammarSymbol) ids.nextElement();
0754: if (sym instanceof RuleSymbol) {
0755: RuleSymbol rs = (RuleSymbol) sym;
0756: genRule(rs, rs.references.size() == 0, ruleNum++);
0757: }
0758: exitIfError();
0759: }
0760:
0761: // Generate the token names
0762: genTokenStrings();
0763:
0764: if (grammar.buildAST) {
0765: genTokenASTNodeMap();
0766: }
0767:
0768: // Generate the bitsets used throughout the grammar
0769: genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType());
0770:
0771: // Generate the semantic predicate map for debugging
0772: if (grammar.debuggingOutput)
0773: genSemPredMap();
0774:
0775: // Close class definition
0776: println("");
0777: println("}");
0778:
0779: // Close the parser output stream
0780: currentOutput.close();
0781: currentOutput = null;
0782: }
0783:
0784: /** Generate code for the given grammar element.
0785: * @param blk The rule-reference to generate
0786: */
0787: public void gen(RuleRefElement rr) {
0788: if (DEBUG_CODE_GENERATOR)
0789: System.out.println("genRR(" + rr + ")");
0790: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(rr.targetRule);
0791: if (rs == null || !rs.isDefined()) {
0792: // Is this redundant???
0793: antlrTool.error("Rule '" + rr.targetRule
0794: + "' is not defined", grammar.getFilename(), rr
0795: .getLine(), rr.getColumn());
0796: return;
0797: }
0798: if (!(rs instanceof RuleSymbol)) {
0799: // Is this redundant???
0800: antlrTool.error("'" + rr.targetRule
0801: + "' does not name a grammar rule", grammar
0802: .getFilename(), rr.getLine(), rr.getColumn());
0803: return;
0804: }
0805:
0806: genErrorTryForElement(rr);
0807:
0808: // AST value for labeled rule refs in tree walker.
0809: // This is not AST construction; it is just the input tree node value.
0810: if (grammar instanceof TreeWalkerGrammar
0811: && rr.getLabel() != null && syntacticPredLevel == 0) {
0812: println(rr.getLabel() + " = _t==ASTNULL ? null : "
0813: + lt1Value + ";");
0814: }
0815:
0816: // if in lexer and ! on rule ref or alt or rule, save buffer index to kill later
0817: if (grammar instanceof LexerGrammar
0818: && (!saveText || rr.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
0819: println("_saveIndex=text.length();");
0820: }
0821:
0822: // Process return value assignment if any
0823: printTabs();
0824: if (rr.idAssign != null) {
0825: // Warn if the rule has no return type
0826: if (rs.block.returnAction == null) {
0827: antlrTool.warning("Rule '" + rr.targetRule
0828: + "' has no return type",
0829: grammar.getFilename(), rr.getLine(), rr
0830: .getColumn());
0831: }
0832: _print(rr.idAssign + "=");
0833: } else {
0834: // Warn about return value if any, but not inside syntactic predicate
0835: if (!(grammar instanceof LexerGrammar)
0836: && syntacticPredLevel == 0
0837: && rs.block.returnAction != null) {
0838: antlrTool.warning("Rule '" + rr.targetRule
0839: + "' returns a value", grammar.getFilename(),
0840: rr.getLine(), rr.getColumn());
0841: }
0842: }
0843:
0844: // Call the rule
0845: GenRuleInvocation(rr);
0846:
0847: // if in lexer and ! on element or alt or rule, save buffer index to kill later
0848: if (grammar instanceof LexerGrammar
0849: && (!saveText || rr.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
0850: println("text.setLength(_saveIndex);");
0851: }
0852:
0853: // if not in a syntactic predicate
0854: if (syntacticPredLevel == 0) {
0855: boolean doNoGuessTest = (grammar.hasSyntacticPredicate && (grammar.buildAST
0856: && rr.getLabel() != null || (genAST && rr
0857: .getAutoGenType() == GrammarElement.AUTO_GEN_NONE)));
0858: if (doNoGuessTest) {
0859: // println("if (inputState.guessing==0) {");
0860: // tabs++;
0861: }
0862:
0863: if (grammar.buildAST && rr.getLabel() != null) {
0864: // always gen variable for rule return on labeled rules
0865: println(rr.getLabel() + "_AST = ("
0866: + labeledElementASTType + ")returnAST;");
0867: }
0868: if (genAST) {
0869: switch (rr.getAutoGenType()) {
0870: case GrammarElement.AUTO_GEN_NONE:
0871: // println("theASTFactory.addASTChild(currentAST, returnAST);");
0872: println("astFactory.addASTChild(currentAST, returnAST);");
0873: break;
0874: case GrammarElement.AUTO_GEN_CARET:
0875: antlrTool
0876: .error("Internal: encountered ^ after rule reference");
0877: break;
0878: default:
0879: break;
0880: }
0881: }
0882:
0883: // if a lexer and labeled, Token label defined at rule level, just set it here
0884: if (grammar instanceof LexerGrammar
0885: && rr.getLabel() != null) {
0886: println(rr.getLabel() + "=_returnToken;");
0887: }
0888:
0889: if (doNoGuessTest) {
0890: // tabs--;
0891: // println("}");
0892: }
0893: }
0894: genErrorCatchForElement(rr);
0895: }
0896:
0897: /** Generate code for the given grammar element.
0898: * @param blk The string-literal reference to generate
0899: */
0900: public void gen(StringLiteralElement atom) {
0901: if (DEBUG_CODE_GENERATOR)
0902: System.out.println("genString(" + atom + ")");
0903:
0904: // Variable declarations for labeled elements
0905: if (atom.getLabel() != null && syntacticPredLevel == 0) {
0906: println(atom.getLabel() + " = " + lt1Value + ";");
0907: }
0908:
0909: // AST
0910: genElementAST(atom);
0911:
0912: // is there a bang on the literal?
0913: boolean oldsaveText = saveText;
0914: saveText = saveText
0915: && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;
0916:
0917: // matching
0918: genMatch(atom);
0919:
0920: saveText = oldsaveText;
0921:
0922: // tack on tree cursor motion if doing a tree walker
0923: if (grammar instanceof TreeWalkerGrammar) {
0924: println("_t = _t.getNextSibling();");
0925: }
0926: }
0927:
0928: /** Generate code for the given grammar element.
0929: * @param blk The token-range reference to generate
0930: */
0931: public void gen(TokenRangeElement r) {
0932: genErrorTryForElement(r);
0933: if (r.getLabel() != null && syntacticPredLevel == 0) {
0934: println(r.getLabel() + " = " + lt1Value + ";");
0935: }
0936:
0937: // AST
0938: genElementAST(r);
0939:
0940: // match
0941: println("matchRange(" + r.beginText + "," + r.endText + ");");
0942: genErrorCatchForElement(r);
0943: }
0944:
0945: /** Generate code for the given grammar element.
0946: * @param blk The token-reference to generate
0947: */
0948: public void gen(TokenRefElement atom) {
0949: if (DEBUG_CODE_GENERATOR)
0950: System.out.println("genTokenRef(" + atom + ")");
0951: if (grammar instanceof LexerGrammar) {
0952: antlrTool.panic("Token reference found in lexer");
0953: }
0954: genErrorTryForElement(atom);
0955: // Assign Token value to token label variable
0956: if (atom.getLabel() != null && syntacticPredLevel == 0) {
0957: println(atom.getLabel() + " = " + lt1Value + ";");
0958: }
0959:
0960: // AST
0961: genElementAST(atom);
0962: // matching
0963: genMatch(atom);
0964: genErrorCatchForElement(atom);
0965:
0966: // tack on tree cursor motion if doing a tree walker
0967: if (grammar instanceof TreeWalkerGrammar) {
0968: println("_t = _t.getNextSibling();");
0969: }
0970: }
0971:
0972: public void gen(TreeElement t) {
0973: // save AST cursor
0974: println("AST __t" + t.ID + " = _t;");
0975:
0976: // If there is a label on the root, then assign that to the variable
0977: if (t.root.getLabel() != null) {
0978: println(t.root.getLabel() + " = _t==ASTNULL ? null :("
0979: + labeledElementASTType + ")_t;");
0980: }
0981:
0982: // check for invalid modifiers ! and ^ on tree element roots
0983: if (t.root.getAutoGenType() == GrammarElement.AUTO_GEN_BANG) {
0984: antlrTool
0985: .error(
0986: "Suffixing a root node with '!' is not implemented",
0987: grammar.getFilename(), t.getLine(), t
0988: .getColumn());
0989: t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE);
0990: }
0991: if (t.root.getAutoGenType() == GrammarElement.AUTO_GEN_CARET) {
0992: antlrTool
0993: .warning(
0994: "Suffixing a root node with '^' is redundant; already a root",
0995: grammar.getFilename(), t.getLine(), t
0996: .getColumn());
0997: t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE);
0998: }
0999:
1000: // Generate AST variables
1001: genElementAST(t.root);
1002: if (grammar.buildAST) {
1003: // Save the AST construction state
1004: println("ASTPair __currentAST" + t.ID
1005: + " = currentAST.copy();");
1006: // Make the next item added a child of the TreeElement root
1007: println("currentAST.root = currentAST.child;");
1008: println("currentAST.child = null;");
1009: }
1010:
1011: // match root
1012: if (t.root instanceof WildcardElement) {
1013: println("if ( _t==null ) throw new MismatchedTokenException();");
1014: } else {
1015: genMatch(t.root);
1016: }
1017: // move to list of children
1018: println("_t = _t.getFirstChild();");
1019:
1020: // walk list of children, generating code for each
1021: for (int i = 0; i < t.getAlternatives().size(); i++) {
1022: Alternative a = t.getAlternativeAt(i);
1023: AlternativeElement e = a.head;
1024: while (e != null) {
1025: e.generate();
1026: e = e.next;
1027: }
1028: }
1029:
1030: if (grammar.buildAST) {
1031: // restore the AST construction state to that just after the
1032: // tree root was added
1033: println("currentAST = __currentAST" + t.ID + ";");
1034: }
1035: // restore AST cursor
1036: println("_t = __t" + t.ID + ";");
1037: // move cursor to sibling of tree just parsed
1038: println("_t = _t.getNextSibling();");
1039: }
1040:
1041: /** Generate the tree-parser Java file */
1042: public void gen(TreeWalkerGrammar g) throws IOException {
1043: // SAS: debugging stuff removed for now...
1044: setGrammar(g);
1045: if (!(grammar instanceof TreeWalkerGrammar)) {
1046: antlrTool.panic("Internal error generating tree-walker");
1047: }
1048: // Open the output stream for the parser and set the currentOutput
1049: // SAS: move file open to method so subclass can override it
1050: // (mainly for VAJ interface)
1051: setupOutput(grammar.getClassName());
1052:
1053: genAST = grammar.buildAST;
1054: tabs = 0;
1055:
1056: // Generate the header common to all output files.
1057: genHeader();
1058: // Do not use printAction because we assume tabs==0
1059: println(behavior.getHeaderAction(""));
1060:
1061: // Generate header for the parser
1062: println("import antlr." + grammar.getSuperClass() + ";");
1063: println("import antlr.Token;");
1064: println("import antlr.collections.AST;");
1065: println("import antlr.RecognitionException;");
1066: println("import antlr.ANTLRException;");
1067: println("import antlr.NoViableAltException;");
1068: println("import antlr.MismatchedTokenException;");
1069: println("import antlr.SemanticException;");
1070: println("import antlr.collections.impl.BitSet;");
1071: println("import antlr.ASTPair;");
1072: println("import antlr.collections.impl.ASTArray;");
1073:
1074: // Output the user-defined parser premamble
1075: println(grammar.preambleAction.getText());
1076:
1077: // Generate parser class definition
1078: String sup = null;
1079: if (grammar.super Class != null) {
1080: sup = grammar.super Class;
1081: } else {
1082: sup = "antlr." + grammar.getSuperClass();
1083: }
1084: println("");
1085:
1086: // print javadoc comment if any
1087: if (grammar.comment != null) {
1088: _println(grammar.comment);
1089: }
1090:
1091: // get prefix (replaces "public" and lets user specify)
1092: String prefix = "public";
1093: Token tprefix = (Token) grammar.options
1094: .get("classHeaderPrefix");
1095: if (tprefix != null) {
1096: String p = StringUtils.stripFrontBack(tprefix.getText(),
1097: "\"", "\"");
1098: if (p != null) {
1099: prefix = p;
1100: }
1101: }
1102:
1103: print(prefix + " ");
1104: print("class " + grammar.getClassName() + " extends " + sup);
1105: println(" implements " + grammar.tokenManager.getName()
1106: + TokenTypesFileSuffix);
1107: Token tsuffix = (Token) grammar.options
1108: .get("classHeaderSuffix");
1109: if (tsuffix != null) {
1110: String suffix = StringUtils.stripFrontBack(tsuffix
1111: .getText(), "\"", "\"");
1112: if (suffix != null) {
1113: print(", " + suffix); // must be an interface name for Java
1114: }
1115: }
1116: println(" {");
1117:
1118: // Generate user-defined parser class members
1119: print(processActionForSpecialSymbols(grammar.classMemberAction
1120: .getText(), grammar.classMemberAction.getLine(),
1121: currentRule, null));
1122:
1123: // Generate default parser class constructor
1124: println("public " + grammar.getClassName() + "() {");
1125: tabs++;
1126: println("tokenNames = _tokenNames;");
1127: tabs--;
1128: println("}");
1129: println("");
1130:
1131: // Generate code for each rule in the grammar
1132: Enumeration ids = grammar.rules.elements();
1133: int ruleNum = 0;
1134: String ruleNameInits = "";
1135: while (ids.hasMoreElements()) {
1136: GrammarSymbol sym = (GrammarSymbol) ids.nextElement();
1137: if (sym instanceof RuleSymbol) {
1138: RuleSymbol rs = (RuleSymbol) sym;
1139: genRule(rs, rs.references.size() == 0, ruleNum++);
1140: }
1141: exitIfError();
1142: }
1143:
1144: // Generate the token names
1145: genTokenStrings();
1146:
1147: // Generate the bitsets used throughout the grammar
1148: genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType());
1149:
1150: // Close class definition
1151: println("}");
1152: println("");
1153:
1154: // Close the parser output stream
1155: currentOutput.close();
1156: currentOutput = null;
1157: }
1158:
1159: /** Generate code for the given grammar element.
1160: * @param wc The wildcard element to generate
1161: */
1162: public void gen(WildcardElement wc) {
1163: // Variable assignment for labeled elements
1164: if (wc.getLabel() != null && syntacticPredLevel == 0) {
1165: println(wc.getLabel() + " = " + lt1Value + ";");
1166: }
1167:
1168: // AST
1169: genElementAST(wc);
1170: // Match anything but EOF
1171: if (grammar instanceof TreeWalkerGrammar) {
1172: println("if ( _t==null ) throw new MismatchedTokenException();");
1173: } else if (grammar instanceof LexerGrammar) {
1174: if (grammar instanceof LexerGrammar
1175: && (!saveText || wc.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
1176: println("_saveIndex=text.length();");
1177: }
1178: println("matchNot(EOF_CHAR);");
1179: if (grammar instanceof LexerGrammar
1180: && (!saveText || wc.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
1181: println("text.setLength(_saveIndex);"); // kill text atom put in buffer
1182: }
1183: } else {
1184: println("matchNot(" + getValueString(Token.EOF_TYPE) + ");");
1185: }
1186:
1187: // tack on tree cursor motion if doing a tree walker
1188: if (grammar instanceof TreeWalkerGrammar) {
1189: println("_t = _t.getNextSibling();");
1190: }
1191: }
1192:
1193: /** Generate code for the given grammar element.
1194: * @param blk The (...)* block to generate
1195: */
1196: public void gen(ZeroOrMoreBlock blk) {
1197: if (DEBUG_CODE_GENERATOR)
1198: System.out.println("gen*(" + blk + ")");
1199: println("{");
1200: genBlockPreamble(blk);
1201: String label;
1202: if (blk.getLabel() != null) {
1203: label = blk.getLabel();
1204: } else {
1205: label = "_loop" + blk.ID;
1206: }
1207: println(label + ":");
1208: println("do {");
1209: tabs++;
1210: // generate the init action for ()* inside the loop
1211: // this allows us to do usefull EOF checking...
1212: genBlockInitAction(blk);
1213:
1214: // Tell AST generation to build subrule result
1215: String saveCurrentASTResult = currentASTResult;
1216: if (blk.getLabel() != null) {
1217: currentASTResult = blk.getLabel();
1218: }
1219:
1220: boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
1221:
1222: // generate exit test if greedy set to false
1223: // and an alt is ambiguous with exit branch
1224: // or when lookahead derived purely from end-of-file
1225: // Lookahead analysis stops when end-of-file is hit,
1226: // returning set {epsilon}. Since {epsilon} is not
1227: // ambig with any real tokens, no error is reported
1228: // by deterministic() routines and we have to check
1229: // for the case where the lookahead depth didn't get
1230: // set to NONDETERMINISTIC (this only happens when the
1231: // FOLLOW contains real atoms + epsilon).
1232: boolean generateNonGreedyExitPath = false;
1233: int nonGreedyExitDepth = grammar.maxk;
1234:
1235: if (!blk.greedy
1236: && blk.exitLookaheadDepth <= grammar.maxk
1237: && blk.exitCache[blk.exitLookaheadDepth]
1238: .containsEpsilon()) {
1239: generateNonGreedyExitPath = true;
1240: nonGreedyExitDepth = blk.exitLookaheadDepth;
1241: } else if (!blk.greedy
1242: && blk.exitLookaheadDepth == LLkGrammarAnalyzer.NONDETERMINISTIC) {
1243: generateNonGreedyExitPath = true;
1244: }
1245: if (generateNonGreedyExitPath) {
1246: if (DEBUG_CODE_GENERATOR) {
1247: System.out
1248: .println("nongreedy (...)* loop; exit depth is "
1249: + blk.exitLookaheadDepth);
1250: }
1251: String predictExit = getLookaheadTestExpression(
1252: blk.exitCache, nonGreedyExitDepth);
1253: println("// nongreedy exit test");
1254: println("if (" + predictExit + ") break " + label + ";");
1255: }
1256:
1257: JavaBlockFinishingInfo howToFinish = genCommonBlock(blk, false);
1258: genBlockFinish(howToFinish, "break " + label + ";");
1259:
1260: tabs--;
1261: println("} while (true);");
1262: println("}");
1263:
1264: // Restore previous AST generation
1265: currentASTResult = saveCurrentASTResult;
1266: }
1267:
1268: /** Generate an alternative.
1269: * @param alt The alternative to generate
1270: * @param blk The block to which the alternative belongs
1271: */
1272: protected void genAlt(Alternative alt, AlternativeBlock blk) {
1273: // Save the AST generation state, and set it to that of the alt
1274: boolean savegenAST = genAST;
1275: genAST = genAST && alt.getAutoGen();
1276:
1277: boolean oldsaveTest = saveText;
1278: saveText = saveText && alt.getAutoGen();
1279:
1280: // Reset the variable name map for the alternative
1281: Hashtable saveMap = treeVariableMap;
1282: treeVariableMap = new Hashtable();
1283:
1284: // Generate try block around the alt for error handling
1285: if (alt.exceptionSpec != null) {
1286: println("try { // for error handling");
1287: tabs++;
1288: }
1289:
1290: AlternativeElement elem = alt.head;
1291: while (!(elem instanceof BlockEndElement)) {
1292: elem.generate(); // alt can begin with anything. Ask target to gen.
1293: elem = elem.next;
1294: }
1295:
1296: if (genAST) {
1297: if (blk instanceof RuleBlock) {
1298: // Set the AST return value for the rule
1299: RuleBlock rblk = (RuleBlock) blk;
1300: if (grammar.hasSyntacticPredicate) {
1301: // println("if ( inputState.guessing==0 ) {");
1302: // tabs++;
1303: }
1304: println(rblk.getRuleName() + "_AST = ("
1305: + labeledElementASTType + ")currentAST.root;");
1306: if (grammar.hasSyntacticPredicate) {
1307: // --tabs;
1308: // println("}");
1309: }
1310: } else if (blk.getLabel() != null) {
1311: // ### future: also set AST value for labeled subrules.
1312: // println(blk.getLabel() + "_AST = ("+labeledElementASTType+")currentAST.root;");
1313: antlrTool.warning("Labeled subrules not yet supported",
1314: grammar.getFilename(), blk.getLine(), blk
1315: .getColumn());
1316: }
1317: }
1318:
1319: if (alt.exceptionSpec != null) {
1320: // close try block
1321: tabs--;
1322: println("}");
1323: genErrorHandler(alt.exceptionSpec);
1324: }
1325:
1326: genAST = savegenAST;
1327: saveText = oldsaveTest;
1328:
1329: treeVariableMap = saveMap;
1330: }
1331:
1332: /** Generate all the bitsets to be used in the parser or lexer
1333: * Generate the raw bitset data like "long _tokenSet1_data[] = {...};"
1334: * and the BitSet object declarations like "BitSet _tokenSet1 = new BitSet(_tokenSet1_data);"
1335: * Note that most languages do not support object initialization inside a
1336: * class definition, so other code-generators may have to separate the
1337: * bitset declarations from the initializations (e.g., put the initializations
1338: * in the generated constructor instead).
1339: * @param bitsetList The list of bitsets to generate.
1340: * @param maxVocabulary Ensure that each generated bitset can contain at least this value.
1341: */
1342: protected void genBitsets(Vector bitsetList, int maxVocabulary) {
1343: println("");
1344: for (int i = 0; i < bitsetList.size(); i++) {
1345: BitSet p = (BitSet) bitsetList.elementAt(i);
1346: // Ensure that generated BitSet is large enough for vocabulary
1347: p.growToInclude(maxVocabulary);
1348: genBitSet(p, i);
1349: }
1350: }
1351:
1352: /** Do something simple like:
1353: * private static final long[] mk_tokenSet_0() {
1354: * long[] data = { -2305839160922996736L, 63L, 16777216L, 0L, 0L, 0L };
1355: * return data;
1356: * }
1357: * public static final BitSet _tokenSet_0 = new BitSet(mk_tokenSet_0());
1358: *
1359: * Or, for large bitsets, optimize init so ranges are collapsed into loops.
1360: * This is most useful for lexers using unicode.
1361: */
1362: private void genBitSet(BitSet p, int id) {
1363: // initialization data
1364: println("private static final long[] mk" + getBitsetName(id)
1365: + "() {");
1366: int n = p.lengthInLongWords();
1367: if (n < BITSET_OPTIMIZE_INIT_THRESHOLD) {
1368: println("\tlong[] data = { " + p.toStringOfWords() + "};");
1369: } else {
1370: // will init manually, allocate space then set values
1371: println("\tlong[] data = new long[" + n + "];");
1372: long[] elems = p.toPackedArray();
1373: for (int i = 0; i < elems.length;) {
1374: if (elems[i] == 0) {
1375: // done automatically by Java, don't waste time/code
1376: i++;
1377: continue;
1378: }
1379: if ((i + 1) == elems.length || elems[i] != elems[i + 1]) {
1380: // last number or no run of numbers, just dump assignment
1381: println("\tdata[" + i + "]=" + elems[i] + "L;");
1382: i++;
1383: } else {
1384: // scan to find end of run
1385: int j;
1386: for (j = i + 1; j < elems.length
1387: && elems[j] == elems[i]; j++) {
1388: }
1389: // j-1 is last member of run
1390: println("\tfor (int i = " + i + "; i<=" + (j - 1)
1391: + "; i++) { data[i]=" + elems[i] + "L; }");
1392: i = j;
1393: }
1394: }
1395: }
1396:
1397: println("\treturn data;");
1398: println("}");
1399: // BitSet object
1400: println("public static final BitSet " + getBitsetName(id)
1401: + " = new BitSet(" + "mk" + getBitsetName(id) + "()"
1402: + ");");
1403: }
1404:
1405: /** Generate the finish of a block, using a combination of the info
1406: * returned from genCommonBlock() and the action to perform when
1407: * no alts were taken
1408: * @param howToFinish The return of genCommonBlock()
1409: * @param noViableAction What to generate when no alt is taken
1410: */
1411: private void genBlockFinish(JavaBlockFinishingInfo howToFinish,
1412: String noViableAction) {
1413: if (howToFinish.needAnErrorClause
1414: && (howToFinish.generatedAnIf || howToFinish.generatedSwitch)) {
1415: if (howToFinish.generatedAnIf) {
1416: println("else {");
1417: } else {
1418: println("{");
1419: }
1420: tabs++;
1421: println(noViableAction);
1422: tabs--;
1423: println("}");
1424: }
1425:
1426: if (howToFinish.postscript != null) {
1427: println(howToFinish.postscript);
1428: }
1429: }
1430:
1431: /** Generate the init action for a block, which may be a RuleBlock or a
1432: * plain AlternativeBLock.
1433: * @blk The block for which the preamble is to be generated.
1434: */
1435: protected void genBlockInitAction(AlternativeBlock blk) {
1436: // dump out init action
1437: if (blk.initAction != null) {
1438: printAction(processActionForSpecialSymbols(blk.initAction,
1439: blk.getLine(), currentRule, null));
1440: }
1441: }
1442:
1443: /** Generate the header for a block, which may be a RuleBlock or a
1444: * plain AlternativeBLock. This generates any variable declarations
1445: * and syntactic-predicate-testing variables.
1446: * @blk The block for which the preamble is to be generated.
1447: */
1448: protected void genBlockPreamble(AlternativeBlock blk) {
1449: // define labels for rule blocks.
1450: if (blk instanceof RuleBlock) {
1451: RuleBlock rblk = (RuleBlock) blk;
1452: if (rblk.labeledElements != null) {
1453: for (int i = 0; i < rblk.labeledElements.size(); i++) {
1454: AlternativeElement a = (AlternativeElement) rblk.labeledElements
1455: .elementAt(i);
1456: // System.out.println("looking at labeled element: "+a);
1457: // Variables for labeled rule refs and
1458: // subrules are different than variables for
1459: // grammar atoms. This test is a little tricky
1460: // because we want to get all rule refs and ebnf,
1461: // but not rule blocks or syntactic predicates
1462: if (a instanceof RuleRefElement
1463: || a instanceof AlternativeBlock
1464: && !(a instanceof RuleBlock)
1465: && !(a instanceof SynPredBlock)) {
1466:
1467: if (!(a instanceof RuleRefElement)
1468: && ((AlternativeBlock) a).not
1469: && analyzer
1470: .subruleCanBeInverted(
1471: ((AlternativeBlock) a),
1472: grammar instanceof LexerGrammar)) {
1473: // Special case for inverted subrules that
1474: // will be inlined. Treat these like
1475: // token or char literal references
1476: println(labeledElementType + " "
1477: + a.getLabel() + " = "
1478: + labeledElementInit + ";");
1479: if (grammar.buildAST) {
1480: genASTDeclaration(a);
1481: }
1482: } else {
1483: if (grammar.buildAST) {
1484: // Always gen AST variables for
1485: // labeled elements, even if the
1486: // element itself is marked with !
1487: genASTDeclaration(a);
1488: }
1489: if (grammar instanceof LexerGrammar) {
1490: println("Token " + a.getLabel()
1491: + "=null;");
1492: }
1493: if (grammar instanceof TreeWalkerGrammar) {
1494: // always generate rule-ref variables
1495: // for tree walker
1496: println(labeledElementType + " "
1497: + a.getLabel() + " = "
1498: + labeledElementInit + ";");
1499: }
1500: }
1501: } else {
1502: // It is a token or literal reference. Generate the
1503: // correct variable type for this grammar
1504: println(labeledElementType + " " + a.getLabel()
1505: + " = " + labeledElementInit + ";");
1506:
1507: // In addition, generate *_AST variables if
1508: // building ASTs
1509: if (grammar.buildAST) {
1510: if (a instanceof GrammarAtom
1511: && ((GrammarAtom) a)
1512: .getASTNodeType() != null) {
1513: GrammarAtom ga = (GrammarAtom) a;
1514: genASTDeclaration(a, ga
1515: .getASTNodeType());
1516: } else {
1517: genASTDeclaration(a);
1518: }
1519: }
1520: }
1521: }
1522: }
1523: }
1524: }
1525:
1526: /** Generate a series of case statements that implement a BitSet test.
1527: * @param p The Bitset for which cases are to be generated
1528: */
1529: protected void genCases(BitSet p) {
1530: if (DEBUG_CODE_GENERATOR)
1531: System.out.println("genCases(" + p + ")");
1532: int[] elems;
1533:
1534: elems = p.toArray();
1535: // Wrap cases four-per-line for lexer, one-per-line for parser
1536: int wrap = (grammar instanceof LexerGrammar) ? 4 : 1;
1537: int j = 1;
1538: boolean startOfLine = true;
1539: for (int i = 0; i < elems.length; i++) {
1540: if (j == 1) {
1541: print("");
1542: } else {
1543: _print(" ");
1544: }
1545: _print("case " + getValueString(elems[i]) + ":");
1546:
1547: if (j == wrap) {
1548: _println("");
1549: startOfLine = true;
1550: j = 1;
1551: } else {
1552: j++;
1553: startOfLine = false;
1554: }
1555: }
1556: if (!startOfLine) {
1557: _println("");
1558: }
1559: }
1560:
1561: /**Generate common code for a block of alternatives; return a
1562: * postscript that needs to be generated at the end of the
1563: * block. Other routines may append else-clauses and such for
1564: * error checking before the postfix is generated. If the
1565: * grammar is a lexer, then generate alternatives in an order
1566: * where alternatives requiring deeper lookahead are generated
1567: * first, and EOF in the lookahead set reduces the depth of
1568: * the lookahead. @param blk The block to generate @param
1569: * noTestForSingle If true, then it does not generate a test
1570: * for a single alternative.
1571: */
1572: public JavaBlockFinishingInfo genCommonBlock(AlternativeBlock blk,
1573: boolean noTestForSingle) {
1574: int nIF = 0;
1575: boolean createdLL1Switch = false;
1576: int closingBracesOfIFSequence = 0;
1577: JavaBlockFinishingInfo finishingInfo = new JavaBlockFinishingInfo();
1578: if (DEBUG_CODE_GENERATOR)
1579: System.out.println("genCommonBlock(" + blk + ")");
1580:
1581: // Save the AST generation state, and set it to that of the block
1582: boolean savegenAST = genAST;
1583: genAST = genAST && blk.getAutoGen();
1584:
1585: boolean oldsaveTest = saveText;
1586: saveText = saveText && blk.getAutoGen();
1587:
1588: // Is this block inverted? If so, generate special-case code
1589: if (blk.not
1590: && analyzer.subruleCanBeInverted(blk,
1591: grammar instanceof LexerGrammar)) {
1592: if (DEBUG_CODE_GENERATOR)
1593: System.out.println("special case: ~(subrule)");
1594: Lookahead p = analyzer.look(1, blk);
1595: // Variable assignment for labeled elements
1596: if (blk.getLabel() != null && syntacticPredLevel == 0) {
1597: println(blk.getLabel() + " = " + lt1Value + ";");
1598: }
1599:
1600: // AST
1601: genElementAST(blk);
1602:
1603: String astArgs = "";
1604: if (grammar instanceof TreeWalkerGrammar) {
1605: astArgs = "_t,";
1606: }
1607:
1608: // match the bitset for the alternative
1609: println("match(" + astArgs
1610: + getBitsetName(markBitsetForGen(p.fset)) + ");");
1611:
1612: // tack on tree cursor motion if doing a tree walker
1613: if (grammar instanceof TreeWalkerGrammar) {
1614: println("_t = _t.getNextSibling();");
1615: }
1616: return finishingInfo;
1617: }
1618:
1619: // Special handling for single alt
1620: if (blk.getAlternatives().size() == 1) {
1621: Alternative alt = blk.getAlternativeAt(0);
1622: // Generate a warning if there is a synPred for single alt.
1623: if (alt.synPred != null) {
1624: antlrTool
1625: .warning(
1626: "Syntactic predicate superfluous for single alternative",
1627: grammar.getFilename(), blk
1628: .getAlternativeAt(0).synPred
1629: .getLine(), blk
1630: .getAlternativeAt(0).synPred
1631: .getColumn());
1632: }
1633: if (noTestForSingle) {
1634: if (alt.semPred != null) {
1635: // Generate validating predicate
1636: genSemPred(alt.semPred, blk.line);
1637: }
1638: genAlt(alt, blk);
1639: return finishingInfo;
1640: }
1641: }
1642:
1643: // count number of simple LL(1) cases; only do switch for
1644: // many LL(1) cases (no preds, no end of token refs)
1645: // We don't care about exit paths for (...)*, (...)+
1646: // because we don't explicitly have a test for them
1647: // as an alt in the loop.
1648: //
1649: // Also, we now count how many unicode lookahead sets
1650: // there are--they must be moved to DEFAULT or ELSE
1651: // clause.
1652: int nLL1 = 0;
1653: for (int i = 0; i < blk.getAlternatives().size(); i++) {
1654: Alternative a = blk.getAlternativeAt(i);
1655: if (suitableForCaseExpression(a)) {
1656: nLL1++;
1657: }
1658: }
1659:
1660: // do LL(1) cases
1661: if (nLL1 >= makeSwitchThreshold) {
1662: // Determine the name of the item to be compared
1663: String testExpr = lookaheadString(1);
1664: createdLL1Switch = true;
1665: // when parsing trees, convert null to valid tree node with NULL lookahead
1666: if (grammar instanceof TreeWalkerGrammar) {
1667: println("if (_t==null) _t=ASTNULL;");
1668: }
1669: println("switch ( " + testExpr + ") {");
1670: for (int i = 0; i < blk.alternatives.size(); i++) {
1671: Alternative alt = blk.getAlternativeAt(i);
1672: // ignore any non-LL(1) alts, predicated alts,
1673: // or end-of-token alts for case expressions
1674: if (!suitableForCaseExpression(alt)) {
1675: continue;
1676: }
1677: Lookahead p = alt.cache[1];
1678: if (p.fset.degree() == 0 && !p.containsEpsilon()) {
1679: antlrTool
1680: .warning(
1681: "Alternate omitted due to empty prediction set",
1682: grammar.getFilename(), alt.head
1683: .getLine(), alt.head
1684: .getColumn());
1685: } else {
1686: genCases(p.fset);
1687: println("{");
1688: tabs++;
1689: genAlt(alt, blk);
1690: println("break;");
1691: tabs--;
1692: println("}");
1693: }
1694: }
1695: println("default:");
1696: tabs++;
1697: }
1698:
1699: // do non-LL(1) and nondeterministic cases This is tricky in
1700: // the lexer, because of cases like: STAR : '*' ; ASSIGN_STAR
1701: // : "*="; Since nextToken is generated without a loop, then
1702: // the STAR will have end-of-token as it's lookahead set for
1703: // LA(2). So, we must generate the alternatives containing
1704: // trailing end-of-token in their lookahead sets *after* the
1705: // alternatives without end-of-token. This implements the
1706: // usual lexer convention that longer matches come before
1707: // shorter ones, e.g. "*=" matches ASSIGN_STAR not STAR
1708: //
1709: // For non-lexer grammars, this does not sort the alternates
1710: // by depth Note that alts whose lookahead is purely
1711: // end-of-token at k=1 end up as default or else clauses.
1712: int startDepth = (grammar instanceof LexerGrammar) ? grammar.maxk
1713: : 0;
1714: for (int altDepth = startDepth; altDepth >= 0; altDepth--) {
1715: if (DEBUG_CODE_GENERATOR)
1716: System.out.println("checking depth " + altDepth);
1717: for (int i = 0; i < blk.alternatives.size(); i++) {
1718: Alternative alt = blk.getAlternativeAt(i);
1719: if (DEBUG_CODE_GENERATOR)
1720: System.out.println("genAlt: " + i);
1721: // if we made a switch above, ignore what we already took care
1722: // of. Specifically, LL(1) alts with no preds
1723: // that do not have end-of-token in their prediction set
1724: // and that are not giant unicode sets.
1725: if (createdLL1Switch && suitableForCaseExpression(alt)) {
1726: if (DEBUG_CODE_GENERATOR)
1727: System.out
1728: .println("ignoring alt because it was in the switch");
1729: continue;
1730: }
1731: String e;
1732:
1733: boolean unpredicted = false;
1734:
1735: if (grammar instanceof LexerGrammar) {
1736: // Calculate the "effective depth" of the alt,
1737: // which is the max depth at which
1738: // cache[depth]!=end-of-token
1739: int effectiveDepth = alt.lookaheadDepth;
1740: if (effectiveDepth == GrammarAnalyzer.NONDETERMINISTIC) {
1741: // use maximum lookahead
1742: effectiveDepth = grammar.maxk;
1743: }
1744: while (effectiveDepth >= 1
1745: && alt.cache[effectiveDepth]
1746: .containsEpsilon()) {
1747: effectiveDepth--;
1748: }
1749: // Ignore alts whose effective depth is other than
1750: // the ones we are generating for this iteration.
1751: if (effectiveDepth != altDepth) {
1752: if (DEBUG_CODE_GENERATOR)
1753: System.out
1754: .println("ignoring alt because effectiveDepth!=altDepth;"
1755: + effectiveDepth
1756: + "!="
1757: + altDepth);
1758: continue;
1759: }
1760: unpredicted = lookaheadIsEmpty(alt, effectiveDepth);
1761: e = getLookaheadTestExpression(alt, effectiveDepth);
1762: } else {
1763: unpredicted = lookaheadIsEmpty(alt, grammar.maxk);
1764: e = getLookaheadTestExpression(alt, grammar.maxk);
1765: }
1766:
1767: // Was it a big unicode range that forced unsuitability
1768: // for a case expression?
1769: if (alt.cache[1].fset.degree() > caseSizeThreshold
1770: && suitableForCaseExpression(alt)) {
1771: if (nIF == 0) {
1772: println("if " + e + " {");
1773: } else {
1774: println("else if " + e + " {");
1775: }
1776: } else if (unpredicted && alt.semPred == null
1777: && alt.synPred == null) {
1778: // The alt has empty prediction set and no
1779: // predicate to help out. if we have not
1780: // generated a previous if, just put {...} around
1781: // the end-of-token clause
1782: if (nIF == 0) {
1783: println("{");
1784: } else {
1785: println("else {");
1786: }
1787: finishingInfo.needAnErrorClause = false;
1788: } else { // check for sem and syn preds
1789:
1790: // Add any semantic predicate expression to the
1791: // lookahead test
1792: if (alt.semPred != null) {
1793: // if debugging, wrap the evaluation of the
1794: // predicate in a method translate $ and #
1795: // references
1796: ActionTransInfo tInfo = new ActionTransInfo();
1797: String actionStr = processActionForSpecialSymbols(
1798: alt.semPred, blk.line, currentRule,
1799: tInfo);
1800: // ignore translation info...we don't need to
1801: // do anything with it. call that will inform
1802: // SemanticPredicateListeners of the result
1803: if (((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar))
1804: && grammar.debuggingOutput) {
1805: e = "("
1806: + e
1807: + "&& fireSemanticPredicateEvaluated(antlr.debug.SemanticPredicateEvent.PREDICTING,"
1808: + addSemPred(charFormatter
1809: .escapeString(actionStr))
1810: + "," + actionStr + "))";
1811: } else {
1812: e = "(" + e + "&&(" + actionStr + "))";
1813: }
1814: }
1815:
1816: // Generate any syntactic predicates
1817: if (nIF > 0) {
1818: if (alt.synPred != null) {
1819: println("else {");
1820: tabs++;
1821: genSynPred(alt.synPred, e);
1822: closingBracesOfIFSequence++;
1823: } else {
1824: println("else if " + e + " {");
1825: }
1826: } else {
1827: if (alt.synPred != null) {
1828: genSynPred(alt.synPred, e);
1829: } else {
1830: // when parsing trees, convert null to
1831: // valid tree node with NULL lookahead.
1832: if (grammar instanceof TreeWalkerGrammar) {
1833: println("if (_t==null) _t=ASTNULL;");
1834: }
1835: println("if " + e + " {");
1836: }
1837: }
1838:
1839: }
1840:
1841: nIF++;
1842: tabs++;
1843: genAlt(alt, blk);
1844: tabs--;
1845: println("}");
1846: }
1847: }
1848: String ps = "";
1849: for (int i = 1; i <= closingBracesOfIFSequence; i++) {
1850: ps += "}";
1851: }
1852:
1853: // Restore the AST generation state
1854: genAST = savegenAST;
1855:
1856: // restore save text state
1857: saveText = oldsaveTest;
1858:
1859: // Return the finishing info.
1860: if (createdLL1Switch) {
1861: tabs--;
1862: finishingInfo.postscript = ps + "}";
1863: finishingInfo.generatedSwitch = true;
1864: finishingInfo.generatedAnIf = nIF > 0;
1865: //return new JavaBlockFinishingInfo(ps+"}",true,nIF>0); // close up switch statement
1866:
1867: } else {
1868: finishingInfo.postscript = ps;
1869: finishingInfo.generatedSwitch = false;
1870: finishingInfo.generatedAnIf = nIF > 0;
1871: // return new JavaBlockFinishingInfo(ps, false,nIF>0);
1872: }
1873: return finishingInfo;
1874: }
1875:
1876: private static boolean suitableForCaseExpression(Alternative a) {
1877: return a.lookaheadDepth == 1 && a.semPred == null
1878: && !a.cache[1].containsEpsilon()
1879: && a.cache[1].fset.degree() <= caseSizeThreshold;
1880: }
1881:
1882: /** Generate code to link an element reference into the AST */
1883: private void genElementAST(AlternativeElement el) {
1884: // handle case where you're not building trees, but are in tree walker.
1885: // Just need to get labels set up.
1886: if (grammar instanceof TreeWalkerGrammar && !grammar.buildAST) {
1887: String elementRef;
1888: String astName;
1889:
1890: // Generate names and declarations of the AST variable(s)
1891: if (el.getLabel() == null) {
1892: elementRef = lt1Value;
1893: // Generate AST variables for unlabeled stuff
1894: astName = "tmp" + astVarNumber + "_AST";
1895: astVarNumber++;
1896: // Map the generated AST variable in the alternate
1897: mapTreeVariable(el, astName);
1898: // Generate an "input" AST variable also
1899: println(labeledElementASTType + " " + astName
1900: + "_in = " + elementRef + ";");
1901: }
1902: return;
1903: }
1904:
1905: if (grammar.buildAST && syntacticPredLevel == 0) {
1906: boolean needASTDecl = (genAST && (el.getLabel() != null || el
1907: .getAutoGenType() != GrammarElement.AUTO_GEN_BANG));
1908:
1909: // RK: if we have a grammar element always generate the decl
1910: // since some guy can access it from an action and we can't
1911: // peek ahead (well not without making a mess).
1912: // I'd prefer taking this out.
1913: if (el.getAutoGenType() != GrammarElement.AUTO_GEN_BANG
1914: && (el instanceof TokenRefElement)) {
1915: needASTDecl = true;
1916: }
1917:
1918: boolean doNoGuessTest = (grammar.hasSyntacticPredicate && needASTDecl);
1919:
1920: String elementRef;
1921: String astNameBase;
1922:
1923: // Generate names and declarations of the AST variable(s)
1924: if (el.getLabel() != null) {
1925: elementRef = el.getLabel();
1926: astNameBase = el.getLabel();
1927: } else {
1928: elementRef = lt1Value;
1929: // Generate AST variables for unlabeled stuff
1930: astNameBase = "tmp" + astVarNumber;
1931: ;
1932: astVarNumber++;
1933: }
1934:
1935: // Generate the declaration if required.
1936: if (needASTDecl) {
1937: // Generate the declaration
1938: if (el instanceof GrammarAtom) {
1939: GrammarAtom ga = (GrammarAtom) el;
1940: if (ga.getASTNodeType() != null) {
1941: genASTDeclaration(el, astNameBase, ga
1942: .getASTNodeType());
1943: // println(ga.getASTNodeType()+" " + astName+" = null;");
1944: } else {
1945: genASTDeclaration(el, astNameBase,
1946: labeledElementASTType);
1947: // println(labeledElementASTType+" " + astName + " = null;");
1948: }
1949: } else {
1950: genASTDeclaration(el, astNameBase,
1951: labeledElementASTType);
1952: // println(labeledElementASTType+" " + astName + " = null;");
1953: }
1954: }
1955:
1956: // for convenience..
1957: String astName = astNameBase + "_AST";
1958:
1959: // Map the generated AST variable in the alternate
1960: mapTreeVariable(el, astName);
1961: if (grammar instanceof TreeWalkerGrammar) {
1962: // Generate an "input" AST variable also
1963: println(labeledElementASTType + " " + astName
1964: + "_in = null;");
1965: }
1966:
1967: // Enclose actions with !guessing
1968: if (doNoGuessTest) {
1969: // println("if (inputState.guessing==0) {");
1970: // tabs++;
1971: }
1972:
1973: // if something has a label assume it will be used
1974: // so we must initialize the RefAST
1975: if (el.getLabel() != null) {
1976: if (el instanceof GrammarAtom) {
1977: println(astName
1978: + " = "
1979: + getASTCreateString((GrammarAtom) el,
1980: elementRef) + ";");
1981: } else {
1982: println(astName + " = "
1983: + getASTCreateString(elementRef) + ";");
1984: }
1985: }
1986:
1987: // if it has no label but a declaration exists initialize it.
1988: if (el.getLabel() == null && needASTDecl) {
1989: elementRef = lt1Value;
1990: if (el instanceof GrammarAtom) {
1991: println(astName
1992: + " = "
1993: + getASTCreateString((GrammarAtom) el,
1994: elementRef) + ";");
1995: } else {
1996: println(astName + " = "
1997: + getASTCreateString(elementRef) + ";");
1998: }
1999: // Map the generated AST variable in the alternate
2000: if (grammar instanceof TreeWalkerGrammar) {
2001: // set "input" AST variable also
2002: println(astName + "_in = " + elementRef + ";");
2003: }
2004: }
2005:
2006: if (genAST) {
2007: switch (el.getAutoGenType()) {
2008: case GrammarElement.AUTO_GEN_NONE:
2009: println("astFactory.addASTChild(currentAST, "
2010: + astName + ");");
2011: break;
2012: case GrammarElement.AUTO_GEN_CARET:
2013: println("astFactory.makeASTRoot(currentAST, "
2014: + astName + ");");
2015: break;
2016: default:
2017: break;
2018: }
2019: }
2020: if (doNoGuessTest) {
2021: // tabs--;
2022: // println("}");
2023: }
2024: }
2025: }
2026:
2027: /** Close the try block and generate catch phrases
2028: * if the element has a labeled handler in the rule
2029: */
2030: private void genErrorCatchForElement(AlternativeElement el) {
2031: if (el.getLabel() == null)
2032: return;
2033: String r = el.enclosingRuleName;
2034: if (grammar instanceof LexerGrammar) {
2035: r = CodeGenerator.encodeLexerRuleName(el.enclosingRuleName);
2036: }
2037: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(r);
2038: if (rs == null) {
2039: antlrTool.panic("Enclosing rule not found!");
2040: }
2041: ExceptionSpec ex = rs.block.findExceptionSpec(el.getLabel());
2042: if (ex != null) {
2043: tabs--;
2044: println("}");
2045: genErrorHandler(ex);
2046: }
2047: }
2048:
2049: /** Generate the catch phrases for a user-specified error handler */
2050: private void genErrorHandler(ExceptionSpec ex) {
2051: // Each ExceptionHandler in the ExceptionSpec is a separate catch
2052: for (int i = 0; i < ex.handlers.size(); i++) {
2053: ExceptionHandler handler = (ExceptionHandler) ex.handlers
2054: .elementAt(i);
2055: // Generate catch phrase
2056: println("catch (" + handler.exceptionTypeAndName.getText()
2057: + ") {");
2058: tabs++;
2059: if (grammar.hasSyntacticPredicate) {
2060: println("if (inputState.guessing==0) {");
2061: tabs++;
2062: }
2063:
2064: // When not guessing, execute user handler action
2065: ActionTransInfo tInfo = new ActionTransInfo();
2066: printAction(processActionForSpecialSymbols(handler.action
2067: .getText(), handler.action.getLine(), currentRule,
2068: tInfo));
2069:
2070: if (grammar.hasSyntacticPredicate) {
2071: tabs--;
2072: println("} else {");
2073: tabs++;
2074: // When guessing, rethrow exception
2075: println("throw "
2076: + extractIdOfAction(handler.exceptionTypeAndName)
2077: + ";");
2078: tabs--;
2079: println("}");
2080: }
2081: // Close catch phrase
2082: tabs--;
2083: println("}");
2084: }
2085: }
2086:
2087: /** Generate a try { opening if the element has a labeled handler in the rule */
2088: private void genErrorTryForElement(AlternativeElement el) {
2089: if (el.getLabel() == null)
2090: return;
2091: String r = el.enclosingRuleName;
2092: if (grammar instanceof LexerGrammar) {
2093: r = CodeGenerator.encodeLexerRuleName(el.enclosingRuleName);
2094: }
2095: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(r);
2096: if (rs == null) {
2097: antlrTool.panic("Enclosing rule not found!");
2098: }
2099: ExceptionSpec ex = rs.block.findExceptionSpec(el.getLabel());
2100: if (ex != null) {
2101: println("try { // for error handling");
2102: tabs++;
2103: }
2104: }
2105:
2106: protected void genASTDeclaration(AlternativeElement el) {
2107: genASTDeclaration(el, labeledElementASTType);
2108: }
2109:
2110: protected void genASTDeclaration(AlternativeElement el,
2111: String node_type) {
2112: genASTDeclaration(el, el.getLabel(), node_type);
2113: }
2114:
2115: protected void genASTDeclaration(AlternativeElement el,
2116: String var_name, String node_type) {
2117: // already declared?
2118: if (declaredASTVariables.contains(el))
2119: return;
2120:
2121: // emit code
2122: println(node_type + " " + var_name + "_AST = null;");
2123:
2124: // mark as declared
2125: declaredASTVariables.add(el);
2126: }
2127:
2128: /** Generate a header that is common to all Java files */
2129: protected void genHeader() {
2130: println("// $ANTLR " + Tool.version + ": " + "\""
2131: + antlrTool.fileMinusPath(antlrTool.grammarFile) + "\""
2132: + " -> " + "\"" + grammar.getClassName() + ".java\"$");
2133: }
2134:
2135: private void genLiteralsTest() {
2136: println("_ttype = testLiteralsTable(_ttype);");
2137: }
2138:
2139: private void genLiteralsTestForPartialToken() {
2140: println("_ttype = testLiteralsTable(new String(text.getBuffer(),_begin,text.length()-_begin),_ttype);");
2141: }
2142:
2143: protected void genMatch(BitSet b) {
2144: }
2145:
2146: protected void genMatch(GrammarAtom atom) {
2147: if (atom instanceof StringLiteralElement) {
2148: if (grammar instanceof LexerGrammar) {
2149: genMatchUsingAtomText(atom);
2150: } else {
2151: genMatchUsingAtomTokenType(atom);
2152: }
2153: } else if (atom instanceof CharLiteralElement) {
2154: if (grammar instanceof LexerGrammar) {
2155: genMatchUsingAtomText(atom);
2156: } else {
2157: antlrTool
2158: .error("cannot ref character literals in grammar: "
2159: + atom);
2160: }
2161: } else if (atom instanceof TokenRefElement) {
2162: genMatchUsingAtomText(atom);
2163: } else if (atom instanceof WildcardElement) {
2164: gen((WildcardElement) atom);
2165: }
2166: }
2167:
2168: protected void genMatchUsingAtomText(GrammarAtom atom) {
2169: // match() for trees needs the _t cursor
2170: String astArgs = "";
2171: if (grammar instanceof TreeWalkerGrammar) {
2172: astArgs = "_t,";
2173: }
2174:
2175: // if in lexer and ! on element, save buffer index to kill later
2176: if (grammar instanceof LexerGrammar
2177: && (!saveText || atom.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
2178: println("_saveIndex=text.length();");
2179: }
2180:
2181: print(atom.not ? "matchNot(" : "match(");
2182: _print(astArgs);
2183:
2184: // print out what to match
2185: if (atom.atomText.equals("EOF")) {
2186: // horrible hack to handle EOF case
2187: _print("Token.EOF_TYPE");
2188: } else {
2189: _print(atom.atomText);
2190: }
2191: _println(");");
2192:
2193: if (grammar instanceof LexerGrammar
2194: && (!saveText || atom.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
2195: println("text.setLength(_saveIndex);"); // kill text atom put in buffer
2196: }
2197: }
2198:
2199: protected void genMatchUsingAtomTokenType(GrammarAtom atom) {
2200: // match() for trees needs the _t cursor
2201: String astArgs = "";
2202: if (grammar instanceof TreeWalkerGrammar) {
2203: astArgs = "_t,";
2204: }
2205:
2206: // If the literal can be mangled, generate the symbolic constant instead
2207: String mangledName = null;
2208: String s = astArgs + getValueString(atom.getType());
2209:
2210: // matching
2211: println((atom.not ? "matchNot(" : "match(") + s + ");");
2212: }
2213:
2214: /** Generate the nextToken() rule. nextToken() is a synthetic
2215: * lexer rule that is the implicit OR of all user-defined
2216: * lexer rules.
2217: */
2218: public void genNextToken() {
2219: // Are there any public rules? If not, then just generate a
2220: // fake nextToken().
2221: boolean hasPublicRules = false;
2222: for (int i = 0; i < grammar.rules.size(); i++) {
2223: RuleSymbol rs = (RuleSymbol) grammar.rules.elementAt(i);
2224: if (rs.isDefined() && rs.access.equals("public")) {
2225: hasPublicRules = true;
2226: break;
2227: }
2228: }
2229: if (!hasPublicRules) {
2230: println("");
2231: println("public Token nextToken() throws TokenStreamException {");
2232: println("\ttry {uponEOF();}");
2233: println("\tcatch(CharStreamIOException csioe) {");
2234: println("\t\tthrow new TokenStreamIOException(csioe.io);");
2235: println("\t}");
2236: println("\tcatch(CharStreamException cse) {");
2237: println("\t\tthrow new TokenStreamException(cse.getMessage());");
2238: println("\t}");
2239: println("\treturn new CommonToken(Token.EOF_TYPE, \"\");");
2240: println("}");
2241: println("");
2242: return;
2243: }
2244:
2245: // Create the synthesized nextToken() rule
2246: RuleBlock nextTokenBlk = MakeGrammar.createNextTokenRule(
2247: grammar, grammar.rules, "nextToken");
2248: // Define the nextToken rule symbol
2249: RuleSymbol nextTokenRs = new RuleSymbol("mnextToken");
2250: nextTokenRs.setDefined();
2251: nextTokenRs.setBlock(nextTokenBlk);
2252: nextTokenRs.access = "private";
2253: grammar.define(nextTokenRs);
2254: // Analyze the nextToken rule
2255: boolean ok = grammar.theLLkAnalyzer.deterministic(nextTokenBlk);
2256:
2257: // Generate the next token rule
2258: String filterRule = null;
2259: if (((LexerGrammar) grammar).filterMode) {
2260: filterRule = ((LexerGrammar) grammar).filterRule;
2261: }
2262:
2263: println("");
2264: println("public Token nextToken() throws TokenStreamException {");
2265: tabs++;
2266: println("Token theRetToken=null;");
2267: _println("tryAgain:");
2268: println("for (;;) {");
2269: tabs++;
2270: println("Token _token = null;");
2271: println("int _ttype = Token.INVALID_TYPE;");
2272: if (((LexerGrammar) grammar).filterMode) {
2273: println("setCommitToPath(false);");
2274: if (filterRule != null) {
2275: // Here's a good place to ensure that the filter rule actually exists
2276: if (!grammar.isDefined(CodeGenerator
2277: .encodeLexerRuleName(filterRule))) {
2278: grammar.antlrTool.error("Filter rule " + filterRule
2279: + " does not exist in this lexer");
2280: } else {
2281: RuleSymbol rs = (RuleSymbol) grammar
2282: .getSymbol(CodeGenerator
2283: .encodeLexerRuleName(filterRule));
2284: if (!rs.isDefined()) {
2285: grammar.antlrTool.error("Filter rule "
2286: + filterRule
2287: + " does not exist in this lexer");
2288: } else if (rs.access.equals("public")) {
2289: grammar.antlrTool.error("Filter rule "
2290: + filterRule + " must be protected");
2291: }
2292: }
2293: println("int _m;");
2294: println("_m = mark();");
2295: }
2296: }
2297: println("resetText();");
2298:
2299: println("try { // for char stream error handling");
2300: tabs++;
2301:
2302: // Generate try around whole thing to trap scanner errors
2303: println("try { // for lexical error handling");
2304: tabs++;
2305:
2306: // Test for public lexical rules with empty paths
2307: for (int i = 0; i < nextTokenBlk.getAlternatives().size(); i++) {
2308: Alternative a = nextTokenBlk.getAlternativeAt(i);
2309: if (a.cache[1].containsEpsilon()) {
2310: //String r = a.head.toString();
2311: RuleRefElement rr = (RuleRefElement) a.head;
2312: String r = CodeGenerator
2313: .decodeLexerRuleName(rr.targetRule);
2314: antlrTool.warning("public lexical rule " + r
2315: + " is optional (can match \"nothing\")");
2316: }
2317: }
2318:
2319: // Generate the block
2320: String newline = System.getProperty("line.separator");
2321: JavaBlockFinishingInfo howToFinish = genCommonBlock(
2322: nextTokenBlk, false);
2323: String errFinish = "if (LA(1)==EOF_CHAR) {uponEOF(); _returnToken = makeToken(Token.EOF_TYPE);}";
2324: errFinish += newline + "\t\t\t\t";
2325: if (((LexerGrammar) grammar).filterMode) {
2326: if (filterRule == null) {
2327: errFinish += "else {consume(); continue tryAgain;}";
2328: } else {
2329: errFinish += "else {" + newline + "\t\t\t\t\tcommit();"
2330: + newline + "\t\t\t\t\ttry {m" + filterRule
2331: + "(false);}" + newline
2332: + "\t\t\t\t\tcatch(RecognitionException e) {"
2333: + newline
2334: + "\t\t\t\t\t // catastrophic failure"
2335: + newline + "\t\t\t\t\t reportError(e);"
2336: + newline + "\t\t\t\t\t consume();" + newline
2337: + "\t\t\t\t\t}" + newline
2338: + "\t\t\t\t\tcontinue tryAgain;" + newline
2339: + "\t\t\t\t}";
2340: }
2341: } else {
2342: errFinish += "else {" + throwNoViable + "}";
2343: }
2344: genBlockFinish(howToFinish, errFinish);
2345:
2346: // at this point a valid token has been matched, undo "mark" that was done
2347: if (((LexerGrammar) grammar).filterMode && filterRule != null) {
2348: println("commit();");
2349: }
2350:
2351: // Generate literals test if desired
2352: // make sure _ttype is set first; note _returnToken must be
2353: // non-null as the rule was required to create it.
2354: println("if ( _returnToken==null ) continue tryAgain; // found SKIP token");
2355: println("_ttype = _returnToken.getType();");
2356: if (((LexerGrammar) grammar).getTestLiterals()) {
2357: genLiteralsTest();
2358: }
2359:
2360: // return token created by rule reference in switch
2361: println("_returnToken.setType(_ttype);");
2362: println("return _returnToken;");
2363:
2364: // Close try block
2365: tabs--;
2366: println("}");
2367: println("catch (RecognitionException e) {");
2368: tabs++;
2369: if (((LexerGrammar) grammar).filterMode) {
2370: if (filterRule == null) {
2371: println("if ( !getCommitToPath() ) {consume(); continue tryAgain;}");
2372: } else {
2373: println("if ( !getCommitToPath() ) {");
2374: tabs++;
2375: println("rewind(_m);");
2376: println("resetText();");
2377: println("try {m" + filterRule + "(false);}");
2378: println("catch(RecognitionException ee) {");
2379: println(" // horrendous failure: error in filter rule");
2380: println(" reportError(ee);");
2381: println(" consume();");
2382: println("}");
2383: println("continue tryAgain;");
2384: tabs--;
2385: println("}");
2386: }
2387: }
2388: if (nextTokenBlk.getDefaultErrorHandler()) {
2389: println("reportError(e);");
2390: println("consume();");
2391: } else {
2392: // pass on to invoking routine
2393: println("throw new TokenStreamRecognitionException(e);");
2394: }
2395: tabs--;
2396: println("}");
2397:
2398: // close CharStreamException try
2399: tabs--;
2400: println("}");
2401: println("catch (CharStreamException cse) {");
2402: println(" if ( cse instanceof CharStreamIOException ) {");
2403: println(" throw new TokenStreamIOException(((CharStreamIOException)cse).io);");
2404: println(" }");
2405: println(" else {");
2406: println(" throw new TokenStreamException(cse.getMessage());");
2407: println(" }");
2408: println("}");
2409:
2410: // close for-loop
2411: tabs--;
2412: println("}");
2413:
2414: // close method nextToken
2415: tabs--;
2416: println("}");
2417: println("");
2418: }
2419:
2420: /** Gen a named rule block.
2421: * ASTs are generated for each element of an alternative unless
2422: * the rule or the alternative have a '!' modifier.
2423: *
2424: * If an alternative defeats the default tree construction, it
2425: * must set <rule>_AST to the root of the returned AST.
2426: *
2427: * Each alternative that does automatic tree construction, builds
2428: * up root and child list pointers in an ASTPair structure.
2429: *
2430: * A rule finishes by setting the returnAST variable from the
2431: * ASTPair.
2432: *
2433: * @param rule The name of the rule to generate
2434: * @param startSymbol true if the rule is a start symbol (i.e., not referenced elsewhere)
2435: */
2436: public void genRule(RuleSymbol s, boolean startSymbol, int ruleNum) {
2437: tabs = 1;
2438:
2439: if (DEBUG_CODE_GENERATOR)
2440: System.out.println("genRule(" + s.getId() + ")");
2441: if (!s.isDefined()) {
2442: antlrTool.error("undefined rule: " + s.getId());
2443: return;
2444: }
2445:
2446: // Generate rule return type, name, arguments
2447: RuleBlock rblk = s.getBlock();
2448:
2449: currentRule = rblk;
2450: currentASTResult = s.getId();
2451:
2452: // clear list of declared ast variables..
2453: declaredASTVariables.clear();
2454:
2455: // Save the AST generation state, and set it to that of the rule
2456: boolean savegenAST = genAST;
2457: genAST = genAST && rblk.getAutoGen();
2458:
2459: // boolean oldsaveTest = saveText;
2460: saveText = rblk.getAutoGen();
2461:
2462: // print javadoc comment if any
2463: if (s.comment != null) {
2464: _println(s.comment);
2465: }
2466:
2467: // Gen method access and final qualifier
2468: print(s.access + " final ");
2469:
2470: // Gen method return type (note lexer return action set at rule creation)
2471: if (rblk.returnAction != null) {
2472: // Has specified return value
2473: _print(extractTypeOfAction(rblk.returnAction, rblk
2474: .getLine(), rblk.getColumn())
2475: + " ");
2476: } else {
2477: // No specified return value
2478: _print("void ");
2479: }
2480:
2481: // Gen method name
2482: _print(s.getId() + "(");
2483:
2484: // Additional rule parameters common to all rules for this grammar
2485: _print(commonExtraParams);
2486: if (commonExtraParams.length() != 0 && rblk.argAction != null) {
2487: _print(",");
2488: }
2489:
2490: // Gen arguments
2491: if (rblk.argAction != null) {
2492: // Has specified arguments
2493: _println("");
2494: tabs++;
2495: println(rblk.argAction);
2496: tabs--;
2497: print(")");
2498: } else {
2499: // No specified arguments
2500: _print(")");
2501: }
2502:
2503: // Gen throws clause and open curly
2504: _print(" throws " + exceptionThrown);
2505: if (grammar instanceof ParserGrammar) {
2506: _print(", TokenStreamException");
2507: } else if (grammar instanceof LexerGrammar) {
2508: _print(", CharStreamException, TokenStreamException");
2509: }
2510: // Add user-defined exceptions unless lexer (for now)
2511: if (rblk.throwsSpec != null) {
2512: if (grammar instanceof LexerGrammar) {
2513: antlrTool
2514: .error("user-defined throws spec not allowed (yet) for lexer rule "
2515: + rblk.ruleName);
2516: } else {
2517: _print(", " + rblk.throwsSpec);
2518: }
2519: }
2520:
2521: _println(" {");
2522: tabs++;
2523:
2524: // Convert return action to variable declaration
2525: if (rblk.returnAction != null)
2526: println(rblk.returnAction + ";");
2527:
2528: // print out definitions needed by rules for various grammar types
2529: println(commonLocalVars);
2530:
2531: if (grammar.traceRules) {
2532: if (grammar instanceof TreeWalkerGrammar) {
2533: println("traceIn(\"" + s.getId() + "\",_t);");
2534: } else {
2535: println("traceIn(\"" + s.getId() + "\");");
2536: }
2537: }
2538:
2539: if (grammar instanceof LexerGrammar) {
2540: // lexer rule default return value is the rule's token name
2541: // This is a horrible hack to support the built-in EOF lexer rule.
2542: if (s.getId().equals("mEOF"))
2543: println("_ttype = Token.EOF_TYPE;");
2544: else
2545: println("_ttype = " + s.getId().substring(1) + ";");
2546: println("int _saveIndex;"); // used for element! (so we can kill text matched for element)
2547: /*
2548: println("boolean old_saveConsumedInput=saveConsumedInput;");
2549: if ( !rblk.getAutoGen() ) { // turn off "save input" if ! on rule
2550: println("saveConsumedInput=false;");
2551: }
2552: */
2553: }
2554:
2555: // if debugging, write code to mark entry to the rule
2556: if (grammar.debuggingOutput)
2557: if (grammar instanceof ParserGrammar)
2558: println("fireEnterRule(" + ruleNum + ",0);");
2559: else if (grammar instanceof LexerGrammar)
2560: println("fireEnterRule(" + ruleNum + ",_ttype);");
2561:
2562: // Generate trace code if desired
2563: if (grammar.debuggingOutput || grammar.traceRules) {
2564: println("try { // debugging");
2565: tabs++;
2566: }
2567:
2568: // Initialize AST variables
2569: if (grammar instanceof TreeWalkerGrammar) {
2570: // "Input" value for rule
2571: println(labeledElementASTType + " " + s.getId()
2572: + "_AST_in = (" + labeledElementASTType + ")_t;");
2573: }
2574: if (grammar.buildAST) {
2575: // Parser member used to pass AST returns from rule invocations
2576: println("returnAST = null;");
2577: // Tracks AST construction
2578: // println("ASTPair currentAST = (inputState.guessing==0) ? new ASTPair() : null;");
2579: println("ASTPair currentAST = new ASTPair();");
2580: // User-settable return value for rule.
2581: println(labeledElementASTType + " " + s.getId()
2582: + "_AST = null;");
2583: }
2584:
2585: genBlockPreamble(rblk);
2586: genBlockInitAction(rblk);
2587: println("");
2588:
2589: // Search for an unlabeled exception specification attached to the rule
2590: ExceptionSpec unlabeledUserSpec = rblk.findExceptionSpec("");
2591:
2592: // Generate try block around the entire rule for error handling
2593: if (unlabeledUserSpec != null || rblk.getDefaultErrorHandler()) {
2594: println("try { // for error handling");
2595: tabs++;
2596: }
2597:
2598: // Generate the alternatives
2599: if (rblk.alternatives.size() == 1) {
2600: // One alternative -- use simple form
2601: Alternative alt = rblk.getAlternativeAt(0);
2602: String pred = alt.semPred;
2603: if (pred != null)
2604: genSemPred(pred, currentRule.line);
2605: if (alt.synPred != null) {
2606: antlrTool
2607: .warning(
2608: "Syntactic predicate ignored for single alternative",
2609: grammar.getFilename(), alt.synPred
2610: .getLine(), alt.synPred
2611: .getColumn());
2612: }
2613: genAlt(alt, rblk);
2614: } else {
2615: // Multiple alternatives -- generate complex form
2616: boolean ok = grammar.theLLkAnalyzer.deterministic(rblk);
2617:
2618: JavaBlockFinishingInfo howToFinish = genCommonBlock(rblk,
2619: false);
2620: genBlockFinish(howToFinish, throwNoViable);
2621: }
2622:
2623: // Generate catch phrase for error handling
2624: if (unlabeledUserSpec != null || rblk.getDefaultErrorHandler()) {
2625: // Close the try block
2626: tabs--;
2627: println("}");
2628: }
2629:
2630: // Generate user-defined or default catch phrases
2631: if (unlabeledUserSpec != null) {
2632: genErrorHandler(unlabeledUserSpec);
2633: } else if (rblk.getDefaultErrorHandler()) {
2634: // Generate default catch phrase
2635: println("catch (" + exceptionThrown + " ex) {");
2636: tabs++;
2637: // Generate code to handle error if not guessing
2638: if (grammar.hasSyntacticPredicate) {
2639: println("if (inputState.guessing==0) {");
2640: tabs++;
2641: }
2642: println("reportError(ex);");
2643: if (!(grammar instanceof TreeWalkerGrammar)) {
2644: // Generate code to consume until token in k==1 follow set
2645: Lookahead follow = grammar.theLLkAnalyzer.FOLLOW(1,
2646: rblk.endNode);
2647: String followSetName = getBitsetName(markBitsetForGen(follow.fset));
2648: println("consume();");
2649: println("consumeUntil(" + followSetName + ");");
2650: } else {
2651: // Just consume one token
2652: println("if (_t!=null) {_t = _t.getNextSibling();}");
2653: }
2654: if (grammar.hasSyntacticPredicate) {
2655: tabs--;
2656: // When guessing, rethrow exception
2657: println("} else {");
2658: println(" throw ex;");
2659: println("}");
2660: }
2661: // Close catch phrase
2662: tabs--;
2663: println("}");
2664: }
2665:
2666: // Squirrel away the AST "return" value
2667: if (grammar.buildAST) {
2668: println("returnAST = " + s.getId() + "_AST;");
2669: }
2670:
2671: // Set return tree value for tree walkers
2672: if (grammar instanceof TreeWalkerGrammar) {
2673: println("_retTree = _t;");
2674: }
2675:
2676: // Generate literals test for lexer rules so marked
2677: if (rblk.getTestLiterals()) {
2678: if (s.access.equals("protected")) {
2679: genLiteralsTestForPartialToken();
2680: } else {
2681: genLiteralsTest();
2682: }
2683: }
2684:
2685: // if doing a lexer rule, dump code to create token if necessary
2686: if (grammar instanceof LexerGrammar) {
2687: println("if ( _createToken && _token==null && _ttype!=Token.SKIP ) {");
2688: println(" _token = makeToken(_ttype);");
2689: println(" _token.setText(new String(text.getBuffer(), _begin, text.length()-_begin));");
2690: println("}");
2691: println("_returnToken = _token;");
2692: }
2693:
2694: // Gen the return statement if there is one (lexer has hard-wired return action)
2695: if (rblk.returnAction != null) {
2696: println("return "
2697: + extractIdOfAction(rblk.returnAction, rblk
2698: .getLine(), rblk.getColumn()) + ";");
2699: }
2700:
2701: if (grammar.debuggingOutput || grammar.traceRules) {
2702: tabs--;
2703: println("} finally { // debugging");
2704: tabs++;
2705:
2706: // If debugging, generate calls to mark exit of rule
2707: if (grammar.debuggingOutput)
2708: if (grammar instanceof ParserGrammar)
2709: println("fireExitRule(" + ruleNum + ",0);");
2710: else if (grammar instanceof LexerGrammar)
2711: println("fireExitRule(" + ruleNum + ",_ttype);");
2712:
2713: if (grammar.traceRules) {
2714: if (grammar instanceof TreeWalkerGrammar) {
2715: println("traceOut(\"" + s.getId() + "\",_t);");
2716: } else {
2717: println("traceOut(\"" + s.getId() + "\");");
2718: }
2719: }
2720:
2721: tabs--;
2722: println("}");
2723: }
2724:
2725: tabs--;
2726: println("}");
2727: println("");
2728:
2729: // Restore the AST generation state
2730: genAST = savegenAST;
2731:
2732: // restore char save state
2733: // saveText = oldsaveTest;
2734: }
2735:
2736: private void GenRuleInvocation(RuleRefElement rr) {
2737: // dump rule name
2738: _print(rr.targetRule + "(");
2739:
2740: // lexers must tell rule if it should set _returnToken
2741: if (grammar instanceof LexerGrammar) {
2742: // if labeled, could access Token, so tell rule to create
2743: if (rr.getLabel() != null) {
2744: _print("true");
2745: } else {
2746: _print("false");
2747: }
2748: if (commonExtraArgs.length() != 0 || rr.args != null) {
2749: _print(",");
2750: }
2751: }
2752:
2753: // Extra arguments common to all rules for this grammar
2754: _print(commonExtraArgs);
2755: if (commonExtraArgs.length() != 0 && rr.args != null) {
2756: _print(",");
2757: }
2758:
2759: // Process arguments to method, if any
2760: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(rr.targetRule);
2761: if (rr.args != null) {
2762: // When not guessing, execute user arg action
2763: ActionTransInfo tInfo = new ActionTransInfo();
2764: String args = processActionForSpecialSymbols(rr.args, 0,
2765: currentRule, tInfo);
2766: if (tInfo.assignToRoot || tInfo.refRuleRoot != null) {
2767: antlrTool.error("Arguments of rule reference '"
2768: + rr.targetRule + "' cannot set or ref #"
2769: + currentRule.getRuleName(), grammar
2770: .getFilename(), rr.getLine(), rr.getColumn());
2771: }
2772: _print(args);
2773:
2774: // Warn if the rule accepts no arguments
2775: if (rs.block.argAction == null) {
2776: antlrTool.warning("Rule '" + rr.targetRule
2777: + "' accepts no arguments", grammar
2778: .getFilename(), rr.getLine(), rr.getColumn());
2779: }
2780: } else {
2781: // For C++, no warning if rule has parameters, because there may be default
2782: // values for all of the parameters
2783: if (rs.block.argAction != null) {
2784: antlrTool.warning(
2785: "Missing parameters on reference to rule "
2786: + rr.targetRule, grammar.getFilename(),
2787: rr.getLine(), rr.getColumn());
2788: }
2789: }
2790: _println(");");
2791:
2792: // move down to the first child while parsing
2793: if (grammar instanceof TreeWalkerGrammar) {
2794: println("_t = _retTree;");
2795: }
2796: }
2797:
2798: protected void genSemPred(String pred, int line) {
2799: // translate $ and # references
2800: ActionTransInfo tInfo = new ActionTransInfo();
2801: pred = processActionForSpecialSymbols(pred, line, currentRule,
2802: tInfo);
2803: // ignore translation info...we don't need to do anything with it.
2804: String escapedPred = charFormatter.escapeString(pred);
2805:
2806: // if debugging, wrap the semantic predicate evaluation in a method
2807: // that can tell SemanticPredicateListeners the result
2808: if (grammar.debuggingOutput
2809: && ((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar)))
2810: pred = "fireSemanticPredicateEvaluated(antlr.debug.SemanticPredicateEvent.VALIDATING,"
2811: + addSemPred(escapedPred) + "," + pred + ")";
2812: println("if (!(" + pred + "))");
2813: println(" throw new SemanticException(\"" + escapedPred
2814: + "\");");
2815: }
2816:
2817: /** Write an array of Strings which are the semantic predicate
2818: * expressions. The debugger will reference them by number only
2819: */
2820: protected void genSemPredMap() {
2821: Enumeration e = semPreds.elements();
2822: println("private String _semPredNames[] = {");
2823: while (e.hasMoreElements())
2824: println("\"" + e.nextElement() + "\",");
2825: println("};");
2826: }
2827:
2828: protected void genSynPred(SynPredBlock blk, String lookaheadExpr) {
2829: if (DEBUG_CODE_GENERATOR)
2830: System.out.println("gen=>(" + blk + ")");
2831:
2832: // Dump synpred result variable
2833: println("boolean synPredMatched" + blk.ID + " = false;");
2834: // Gen normal lookahead test
2835: println("if (" + lookaheadExpr + ") {");
2836: tabs++;
2837:
2838: // Save input state
2839: if (grammar instanceof TreeWalkerGrammar) {
2840: println("AST __t" + blk.ID + " = _t;");
2841: } else {
2842: println("int _m" + blk.ID + " = mark();");
2843: }
2844:
2845: // Once inside the try, assume synpred works unless exception caught
2846: println("synPredMatched" + blk.ID + " = true;");
2847: println("inputState.guessing++;");
2848:
2849: // if debugging, tell listeners that a synpred has started
2850: if (grammar.debuggingOutput
2851: && ((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar))) {
2852: println("fireSyntacticPredicateStarted();");
2853: }
2854:
2855: syntacticPredLevel++;
2856: println("try {");
2857: tabs++;
2858: gen((AlternativeBlock) blk); // gen code to test predicate
2859: tabs--;
2860: //println("System.out.println(\"pred "+blk+" succeeded\");");
2861: println("}");
2862: println("catch (" + exceptionThrown + " pe) {");
2863: tabs++;
2864: println("synPredMatched" + blk.ID + " = false;");
2865: //println("System.out.println(\"pred "+blk+" failed\");");
2866: tabs--;
2867: println("}");
2868:
2869: // Restore input state
2870: if (grammar instanceof TreeWalkerGrammar) {
2871: println("_t = __t" + blk.ID + ";");
2872: } else {
2873: println("rewind(_m" + blk.ID + ");");
2874: }
2875:
2876: println("inputState.guessing--;");
2877:
2878: // if debugging, tell listeners how the synpred turned out
2879: if (grammar.debuggingOutput
2880: && ((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar))) {
2881: println("if (synPredMatched" + blk.ID + ")");
2882: println(" fireSyntacticPredicateSucceeded();");
2883: println("else");
2884: println(" fireSyntacticPredicateFailed();");
2885: }
2886:
2887: syntacticPredLevel--;
2888: tabs--;
2889:
2890: // Close lookahead test
2891: println("}");
2892:
2893: // Test synred result
2894: println("if ( synPredMatched" + blk.ID + " ) {");
2895: }
2896:
2897: /** Generate a static array containing the names of the tokens,
2898: * indexed by the token type values. This static array is used
2899: * to format error messages so that the token identifers or literal
2900: * strings are displayed instead of the token numbers.
2901: *
2902: * If a lexical rule has a paraphrase, use it rather than the
2903: * token label.
2904: */
2905: public void genTokenStrings() {
2906: // Generate a string for each token. This creates a static
2907: // array of Strings indexed by token type.
2908: println("");
2909: println("public static final String[] _tokenNames = {");
2910: tabs++;
2911:
2912: // Walk the token vocabulary and generate a Vector of strings
2913: // from the tokens.
2914: Vector v = grammar.tokenManager.getVocabulary();
2915: for (int i = 0; i < v.size(); i++) {
2916: String s = (String) v.elementAt(i);
2917: if (s == null) {
2918: s = "<" + String.valueOf(i) + ">";
2919: }
2920: if (!s.startsWith("\"") && !s.startsWith("<")) {
2921: TokenSymbol ts = (TokenSymbol) grammar.tokenManager
2922: .getTokenSymbol(s);
2923: if (ts != null && ts.getParaphrase() != null) {
2924: s = StringUtils.stripFrontBack(ts.getParaphrase(),
2925: "\"", "\"");
2926: }
2927: }
2928: print(charFormatter.literalString(s));
2929: if (i != v.size() - 1) {
2930: _print(",");
2931: }
2932: _println("");
2933: }
2934:
2935: // Close the string array initailizer
2936: tabs--;
2937: println("};");
2938: }
2939:
2940: /** Create and set Integer token type objects that map
2941: * to Java Class objects (which AST node to create).
2942: */
2943: protected void genTokenASTNodeMap() {
2944: println("");
2945: println("protected void buildTokenTypeASTClassMap() {");
2946: // Generate a map.put("T","TNode") for each token
2947: // if heterogeneous node known for that token T.
2948: tabs++;
2949: boolean generatedNewHashtable = false;
2950: int n = 0;
2951: // Walk the token vocabulary and generate puts.
2952: Vector v = grammar.tokenManager.getVocabulary();
2953: for (int i = 0; i < v.size(); i++) {
2954: String s = (String) v.elementAt(i);
2955: if (s != null) {
2956: TokenSymbol ts = grammar.tokenManager.getTokenSymbol(s);
2957: if (ts != null && ts.getASTNodeType() != null) {
2958: n++;
2959: if (!generatedNewHashtable) {
2960: // only generate if we are going to add a mapping
2961: println("tokenTypeToASTClassMap = new Hashtable();");
2962: generatedNewHashtable = true;
2963: }
2964: println("tokenTypeToASTClassMap.put(new Integer("
2965: + s + "), " + ts.getASTNodeType()
2966: + ".class);");
2967: }
2968: }
2969: }
2970:
2971: if (n == 0) {
2972: println("tokenTypeToASTClassMap=null;");
2973: }
2974: tabs--;
2975: println("};");
2976: }
2977:
2978: /** Generate the token types Java file */
2979: protected void genTokenTypes(TokenManager tm) throws IOException {
2980: // Open the token output Java file and set the currentOutput stream
2981: // SAS: file open was moved to a method so a subclass can override
2982: // This was mainly for the VAJ interface
2983: setupOutput(tm.getName() + TokenTypesFileSuffix);
2984:
2985: tabs = 0;
2986:
2987: // Generate the header common to all Java files
2988: genHeader();
2989: // Do not use printAction because we assume tabs==0
2990: println(behavior.getHeaderAction(""));
2991:
2992: // Encapsulate the definitions in an interface. This can be done
2993: // because they are all constants.
2994: println("public interface " + tm.getName()
2995: + TokenTypesFileSuffix + " {");
2996: tabs++;
2997:
2998: // Generate a definition for each token type
2999: Vector v = tm.getVocabulary();
3000:
3001: // Do special tokens manually
3002: println("int EOF = " + Token.EOF_TYPE + ";");
3003: println("int NULL_TREE_LOOKAHEAD = "
3004: + Token.NULL_TREE_LOOKAHEAD + ";");
3005:
3006: for (int i = Token.MIN_USER_TYPE; i < v.size(); i++) {
3007: String s = (String) v.elementAt(i);
3008: if (s != null) {
3009: if (s.startsWith("\"")) {
3010: // a string literal
3011: StringLiteralSymbol sl = (StringLiteralSymbol) tm
3012: .getTokenSymbol(s);
3013: if (sl == null) {
3014: antlrTool.panic("String literal " + s
3015: + " not in symbol table");
3016: } else if (sl.label != null) {
3017: println("int " + sl.label + " = " + i + ";");
3018: } else {
3019: String mangledName = mangleLiteral(s);
3020: if (mangledName != null) {
3021: // We were able to create a meaningful mangled token name
3022: println("int " + mangledName + " = " + i
3023: + ";");
3024: // if no label specified, make the label equal to the mangled name
3025: sl.label = mangledName;
3026: } else {
3027: println("// " + s + " = " + i);
3028: }
3029: }
3030: } else if (!s.startsWith("<")) {
3031: println("int " + s + " = " + i + ";");
3032: }
3033: }
3034: }
3035:
3036: // Close the interface
3037: tabs--;
3038: println("}");
3039:
3040: // Close the tokens output file
3041: currentOutput.close();
3042: currentOutput = null;
3043: exitIfError();
3044: }
3045:
3046: /** Get a string for an expression to generate creation of an AST subtree.
3047: * @param v A Vector of String, where each element is an expression in the target language yielding an AST node.
3048: */
3049: public String getASTCreateString(Vector v) {
3050: if (v.size() == 0) {
3051: return "";
3052: }
3053: StringBuffer buf = new StringBuffer();
3054: buf.append("(" + labeledElementASTType
3055: + ")astFactory.make( (new ASTArray(" + v.size() + "))");
3056: for (int i = 0; i < v.size(); i++) {
3057: buf.append(".add(" + v.elementAt(i) + ")");
3058: }
3059: buf.append(")");
3060: return buf.toString();
3061: }
3062:
3063: /** Get a string for an expression to generate creating of an AST node
3064: * @param atom The grammar node for which you are creating the node
3065: * @param str The arguments to the AST constructor
3066: */
3067: public String getASTCreateString(GrammarAtom atom,
3068: String astCtorArgs) {
3069: //System.out.println("getASTCreateString("+atom+","+astCtorArgs+")");
3070: if (atom != null && atom.getASTNodeType() != null) {
3071: // they specified a type either on the reference or in tokens{} section
3072: return "(" + atom.getASTNodeType() + ")"
3073: + "astFactory.create(" + astCtorArgs + ",\""
3074: + atom.getASTNodeType() + "\")";
3075: } else {
3076: // must be an action or something since not referencing an atom
3077: return getASTCreateString(astCtorArgs);
3078: }
3079: }
3080:
3081: /** Get a string for an expression to generate creating of an AST node.
3082: * Parse the first (possibly only) argument looking for the token type.
3083: * If the token type is a valid token symbol, ask for it's AST node type
3084: * and add to the end if only 2 arguments. The forms are #[T], #[T,"t"],
3085: * and as of 2.7.2 #[T,"t",ASTclassname].
3086: *
3087: * @param str The arguments to the AST constructor
3088: */
3089: public String getASTCreateString(String astCtorArgs) {
3090: //System.out.println("AST CTOR: "+astCtorArgs);
3091: if (astCtorArgs == null) {
3092: astCtorArgs = "";
3093: }
3094: int nCommas = 0;
3095: for (int i = 0; i < astCtorArgs.length(); i++) {
3096: if (astCtorArgs.charAt(i) == ',') {
3097: nCommas++;
3098: }
3099: }
3100: //System.out.println("num commas="+nCommas);
3101: if (nCommas < 2) { // if 1 or 2 args
3102: int firstComma = astCtorArgs.indexOf(',');
3103: int lastComma = astCtorArgs.lastIndexOf(',');
3104: String tokenName = astCtorArgs;
3105: if (nCommas > 0) {
3106: tokenName = astCtorArgs.substring(0, firstComma);
3107: }
3108: //System.out.println("Checking for ast node type of "+tokenName);
3109: TokenSymbol ts = grammar.tokenManager
3110: .getTokenSymbol(tokenName);
3111: if (ts != null) {
3112: String astNodeType = ts.getASTNodeType();
3113: //System.out.println("node type of "+tokenName+" is "+astNodeType);
3114: String emptyText = "";
3115: if (nCommas == 0) {
3116: // need to add 2nd arg of blank text for token text
3117: emptyText = ",\"\"";
3118: }
3119: if (astNodeType != null) {
3120: return "(" + astNodeType + ")"
3121: + "astFactory.create(" + astCtorArgs
3122: + emptyText + ",\"" + astNodeType + "\")";
3123: }
3124: // fall through and just do a regular create with cast on front
3125: // if necessary (it differs from default "AST").
3126: }
3127: if (labeledElementASTType.equals("AST")) {
3128: return "astFactory.create(" + astCtorArgs + ")";
3129: }
3130: return "(" + labeledElementASTType + ")"
3131: + "astFactory.create(" + astCtorArgs + ")";
3132: }
3133: // create default type or (since 2.7.2) 3rd arg is classname
3134: return "(" + labeledElementASTType + ")astFactory.create("
3135: + astCtorArgs + ")";
3136: }
3137:
3138: protected String getLookaheadTestExpression(Lookahead[] look, int k) {
3139: StringBuffer e = new StringBuffer(100);
3140: boolean first = true;
3141:
3142: e.append("(");
3143: for (int i = 1; i <= k; i++) {
3144: BitSet p = look[i].fset;
3145: if (!first) {
3146: e.append(") && (");
3147: }
3148: first = false;
3149:
3150: // Syn preds can yield <end-of-syn-pred> (epsilon) lookahead.
3151: // There is no way to predict what that token would be. Just
3152: // allow anything instead.
3153: if (look[i].containsEpsilon()) {
3154: e.append("true");
3155: } else {
3156: e.append(getLookaheadTestTerm(i, p));
3157: }
3158: }
3159: e.append(")");
3160:
3161: return e.toString();
3162: }
3163:
3164: /**Generate a lookahead test expression for an alternate. This
3165: * will be a series of tests joined by '&&' and enclosed by '()',
3166: * the number of such tests being determined by the depth of the lookahead.
3167: */
3168: protected String getLookaheadTestExpression(Alternative alt,
3169: int maxDepth) {
3170: int depth = alt.lookaheadDepth;
3171: if (depth == GrammarAnalyzer.NONDETERMINISTIC) {
3172: // if the decision is nondeterministic, do the best we can: LL(k)
3173: // any predicates that are around will be generated later.
3174: depth = grammar.maxk;
3175: }
3176:
3177: if (maxDepth == 0) {
3178: // empty lookahead can result from alt with sem pred
3179: // that can see end of token. E.g., A : {pred}? ('a')? ;
3180: return "( true )";
3181: }
3182:
3183: return "(" + getLookaheadTestExpression(alt.cache, depth) + ")";
3184: }
3185:
3186: /**Generate a depth==1 lookahead test expression given the BitSet.
3187: * This may be one of:
3188: * 1) a series of 'x==X||' tests
3189: * 2) a range test using >= && <= where possible,
3190: * 3) a bitset membership test for complex comparisons
3191: * @param k The lookahead level
3192: * @param p The lookahead set for level k
3193: */
3194: protected String getLookaheadTestTerm(int k, BitSet p) {
3195: // Determine the name of the item to be compared
3196: String ts = lookaheadString(k);
3197:
3198: // Generate a range expression if possible
3199: int[] elems = p.toArray();
3200: if (elementsAreRange(elems)) {
3201: return getRangeExpression(k, elems);
3202: }
3203:
3204: // Generate a bitset membership test if possible
3205: StringBuffer e;
3206: int degree = p.degree();
3207: if (degree == 0) {
3208: return "true";
3209: }
3210:
3211: if (degree >= bitsetTestThreshold) {
3212: int bitsetIdx = markBitsetForGen(p);
3213: return getBitsetName(bitsetIdx) + ".member(" + ts + ")";
3214: }
3215:
3216: // Otherwise, generate the long-winded series of "x==X||" tests
3217: e = new StringBuffer();
3218: for (int i = 0; i < elems.length; i++) {
3219: // Get the compared-to item (token or character value)
3220: String cs = getValueString(elems[i]);
3221:
3222: // Generate the element comparison
3223: if (i > 0)
3224: e.append("||");
3225: e.append(ts);
3226: e.append("==");
3227: e.append(cs);
3228: }
3229: return e.toString();
3230: }
3231:
3232: /** Return an expression for testing a contiguous renage of elements
3233: * @param k The lookahead level
3234: * @param elems The elements representing the set, usually from BitSet.toArray().
3235: * @return String containing test expression.
3236: */
3237: public String getRangeExpression(int k, int[] elems) {
3238: if (!elementsAreRange(elems)) {
3239: antlrTool.panic("getRangeExpression called with non-range");
3240: }
3241: int begin = elems[0];
3242: int end = elems[elems.length - 1];
3243: return "(" + lookaheadString(k) + " >= "
3244: + getValueString(begin) + " && " + lookaheadString(k)
3245: + " <= " + getValueString(end) + ")";
3246: }
3247:
3248: /** getValueString: get a string representation of a token or char value
3249: * @param value The token or char value
3250: */
3251: private String getValueString(int value) {
3252: String cs;
3253: if (grammar instanceof LexerGrammar) {
3254: cs = charFormatter.literalChar(value);
3255: } else {
3256: TokenSymbol ts = grammar.tokenManager
3257: .getTokenSymbolAt(value);
3258: if (ts == null) {
3259: return "" + value; // return token type as string
3260: // tool.panic("vocabulary for token type " + value + " is null");
3261: }
3262: String tId = ts.getId();
3263: if (ts instanceof StringLiteralSymbol) {
3264: // if string literal, use predefined label if any
3265: // if no predefined, try to mangle into LITERAL_xxx.
3266: // if can't mangle, use int value as last resort
3267: StringLiteralSymbol sl = (StringLiteralSymbol) ts;
3268: String label = sl.getLabel();
3269: if (label != null) {
3270: cs = label;
3271: } else {
3272: cs = mangleLiteral(tId);
3273: if (cs == null) {
3274: cs = String.valueOf(value);
3275: }
3276: }
3277: } else {
3278: cs = tId;
3279: }
3280: }
3281: return cs;
3282: }
3283:
3284: /**Is the lookahead for this alt empty? */
3285: protected boolean lookaheadIsEmpty(Alternative alt, int maxDepth) {
3286: int depth = alt.lookaheadDepth;
3287: if (depth == GrammarAnalyzer.NONDETERMINISTIC) {
3288: depth = grammar.maxk;
3289: }
3290: for (int i = 1; i <= depth && i <= maxDepth; i++) {
3291: BitSet p = alt.cache[i].fset;
3292: if (p.degree() != 0) {
3293: return false;
3294: }
3295: }
3296: return true;
3297: }
3298:
3299: private String lookaheadString(int k) {
3300: if (grammar instanceof TreeWalkerGrammar) {
3301: return "_t.getType()";
3302: }
3303: return "LA(" + k + ")";
3304: }
3305:
3306: /** Mangle a string literal into a meaningful token name. This is
3307: * only possible for literals that are all characters. The resulting
3308: * mangled literal name is literalsPrefix with the text of the literal
3309: * appended.
3310: * @return A string representing the mangled literal, or null if not possible.
3311: */
3312: private String mangleLiteral(String s) {
3313: String mangled = antlrTool.literalsPrefix;
3314: for (int i = 1; i < s.length() - 1; i++) {
3315: if (!Character.isLetter(s.charAt(i)) && s.charAt(i) != '_') {
3316: return null;
3317: }
3318: mangled += s.charAt(i);
3319: }
3320: if (antlrTool.upperCaseMangledLiterals) {
3321: mangled = mangled.toUpperCase();
3322: }
3323: return mangled;
3324: }
3325:
3326: /** Map an identifier to it's corresponding tree-node variable.
3327: * This is context-sensitive, depending on the rule and alternative
3328: * being generated
3329: * @param idParam The identifier name to map
3330: * @return The mapped id (which may be the same as the input), or null if the mapping is invalid due to duplicates
3331: */
3332: public String mapTreeId(String idParam, ActionTransInfo transInfo) {
3333: // if not in an action of a rule, nothing to map.
3334: if (currentRule == null)
3335: return idParam;
3336:
3337: boolean in_var = false;
3338: String id = idParam;
3339: if (grammar instanceof TreeWalkerGrammar) {
3340: if (!grammar.buildAST) {
3341: in_var = true;
3342: }
3343: // If the id ends with "_in", then map it to the input variable
3344: else if (id.length() > 3
3345: && id.lastIndexOf("_in") == id.length() - 3) {
3346: // Strip off the "_in"
3347: id = id.substring(0, id.length() - 3);
3348: in_var = true;
3349: }
3350: }
3351:
3352: // Check the rule labels. If id is a label, then the output
3353: // variable is label_AST, and the input variable is plain label.
3354: for (int i = 0; i < currentRule.labeledElements.size(); i++) {
3355: AlternativeElement elt = (AlternativeElement) currentRule.labeledElements
3356: .elementAt(i);
3357: if (elt.getLabel().equals(id)) {
3358: return in_var ? id : id + "_AST";
3359: }
3360: }
3361:
3362: // Failing that, check the id-to-variable map for the alternative.
3363: // If the id is in the map, then output variable is the name in the
3364: // map, and input variable is name_in
3365: String s = (String) treeVariableMap.get(id);
3366: if (s != null) {
3367: if (s == NONUNIQUE) {
3368: // There is more than one element with this id
3369: antlrTool.error("Ambiguous reference to AST element "
3370: + id + " in rule " + currentRule.getRuleName());
3371:
3372: return null;
3373: } else if (s.equals(currentRule.getRuleName())) {
3374: // a recursive call to the enclosing rule is
3375: // ambiguous with the rule itself.
3376: antlrTool.error("Ambiguous reference to AST element "
3377: + id + " in rule " + currentRule.getRuleName());
3378: return null;
3379: } else {
3380: return in_var ? s + "_in" : s;
3381: }
3382: }
3383:
3384: // Failing that, check the rule name itself. Output variable
3385: // is rule_AST; input variable is rule_AST_in (treeparsers).
3386: if (id.equals(currentRule.getRuleName())) {
3387: String r = in_var ? id + "_AST_in" : id + "_AST";
3388: if (transInfo != null) {
3389: if (!in_var) {
3390: transInfo.refRuleRoot = r;
3391: }
3392: }
3393: return r;
3394: } else {
3395: // id does not map to anything -- return itself.
3396: return id;
3397: }
3398: }
3399:
3400: /** Given an element and the name of an associated AST variable,
3401: * create a mapping between the element "name" and the variable name.
3402: */
3403: private void mapTreeVariable(AlternativeElement e, String name) {
3404: // For tree elements, defer to the root
3405: if (e instanceof TreeElement) {
3406: mapTreeVariable(((TreeElement) e).root, name);
3407: return;
3408: }
3409:
3410: // Determine the name of the element, if any, for mapping purposes
3411: String elName = null;
3412:
3413: // Don't map labeled items
3414: if (e.getLabel() == null) {
3415: if (e instanceof TokenRefElement) {
3416: // use the token id
3417: elName = ((TokenRefElement) e).atomText;
3418: } else if (e instanceof RuleRefElement) {
3419: // use the rule name
3420: elName = ((RuleRefElement) e).targetRule;
3421: }
3422: }
3423: // Add the element to the tree variable map if it has a name
3424: if (elName != null) {
3425: if (treeVariableMap.get(elName) != null) {
3426: // Name is already in the map -- mark it as duplicate
3427: treeVariableMap.remove(elName);
3428: treeVariableMap.put(elName, NONUNIQUE);
3429: } else {
3430: treeVariableMap.put(elName, name);
3431: }
3432: }
3433: }
3434:
3435: /** Lexically process $var and tree-specifiers in the action.
3436: * This will replace #id and #(...) with the appropriate
3437: * function calls and/or variables etc...
3438: */
3439: protected String processActionForSpecialSymbols(String actionStr,
3440: int line, RuleBlock currentRule, ActionTransInfo tInfo) {
3441: if (actionStr == null || actionStr.length() == 0)
3442: return null;
3443:
3444: // The action trans info tells us (at the moment) whether an
3445: // assignment was done to the rule's tree root.
3446: if (grammar == null)
3447: return actionStr;
3448:
3449: // see if we have anything to do...
3450: if ((grammar.buildAST && actionStr.indexOf('#') != -1)
3451: || grammar instanceof TreeWalkerGrammar
3452: || ((grammar instanceof LexerGrammar || grammar instanceof ParserGrammar) && actionStr
3453: .indexOf('$') != -1)) {
3454: // Create a lexer to read an action and return the translated version
3455: antlr.actions.java.ActionLexer lexer = new antlr.actions.java.ActionLexer(
3456: actionStr, currentRule, this , tInfo);
3457:
3458: lexer.setLineOffset(line);
3459: lexer.setFilename(grammar.getFilename());
3460: lexer.setTool(antlrTool);
3461:
3462: try {
3463: lexer.mACTION(true);
3464: actionStr = lexer.getTokenObject().getText();
3465: // System.out.println("action translated: "+actionStr);
3466: // System.out.println("trans info is "+tInfo);
3467: } catch (RecognitionException ex) {
3468: lexer.reportError(ex);
3469: return actionStr;
3470: } catch (TokenStreamException tex) {
3471: antlrTool.panic("Error reading action:" + actionStr);
3472: return actionStr;
3473: } catch (CharStreamException io) {
3474: antlrTool.panic("Error reading action:" + actionStr);
3475: return actionStr;
3476: }
3477: }
3478: return actionStr;
3479: }
3480:
3481: private void setupGrammarParameters(Grammar g) {
3482: if (g instanceof ParserGrammar) {
3483: labeledElementASTType = "AST";
3484: if (g.hasOption("ASTLabelType")) {
3485: Token tsuffix = g.getOption("ASTLabelType");
3486: if (tsuffix != null) {
3487: String suffix = StringUtils.stripFrontBack(tsuffix
3488: .getText(), "\"", "\"");
3489: if (suffix != null) {
3490: labeledElementASTType = suffix;
3491: }
3492: }
3493: }
3494: labeledElementType = "Token ";
3495: labeledElementInit = "null";
3496: commonExtraArgs = "";
3497: commonExtraParams = "";
3498: commonLocalVars = "";
3499: lt1Value = "LT(1)";
3500: exceptionThrown = "RecognitionException";
3501: throwNoViable = "throw new NoViableAltException(LT(1), getFilename());";
3502: } else if (g instanceof LexerGrammar) {
3503: labeledElementType = "char ";
3504: labeledElementInit = "'\\0'";
3505: commonExtraArgs = "";
3506: commonExtraParams = "boolean _createToken";
3507: commonLocalVars = "int _ttype; Token _token=null; int _begin=text.length();";
3508: lt1Value = "LA(1)";
3509: exceptionThrown = "RecognitionException";
3510: throwNoViable = "throw new NoViableAltForCharException((char)LA(1), getFilename(), getLine(), getColumn());";
3511: } else if (g instanceof TreeWalkerGrammar) {
3512: labeledElementASTType = "AST";
3513: labeledElementType = "AST";
3514: if (g.hasOption("ASTLabelType")) {
3515: Token tsuffix = g.getOption("ASTLabelType");
3516: if (tsuffix != null) {
3517: String suffix = StringUtils.stripFrontBack(tsuffix
3518: .getText(), "\"", "\"");
3519: if (suffix != null) {
3520: labeledElementASTType = suffix;
3521: labeledElementType = suffix;
3522: }
3523: }
3524: }
3525: if (!g.hasOption("ASTLabelType")) {
3526: g.setOption("ASTLabelType", new Token(
3527: ANTLRTokenTypes.STRING_LITERAL, "AST"));
3528: }
3529: labeledElementInit = "null";
3530: commonExtraArgs = "_t";
3531: commonExtraParams = "AST _t";
3532: commonLocalVars = "";
3533: lt1Value = "(" + labeledElementASTType + ")_t";
3534: exceptionThrown = "RecognitionException";
3535: throwNoViable = "throw new NoViableAltException(_t);";
3536: } else {
3537: antlrTool.panic("Unknown grammar type");
3538: }
3539: }
3540:
3541: /** This method exists so a subclass, namely VAJCodeGenerator,
3542: * can open the file in its own evil way. JavaCodeGenerator
3543: * simply opens a text file...
3544: */
3545: public void setupOutput(String className) throws IOException {
3546: currentOutput = antlrTool.openOutputFile(className + ".java");
3547: }
3548: }
|