0001: package antlr;
0002:
0003: /* ANTLR Translator Generator
0004: * Project led by Terence Parr at http://www.jGuru.com
0005: * Software rights: http://www.antlr.org/RIGHTS.html
0006: *
0007: * $Id: CppCodeGenerator.java,v 1.1 2004/01/21 19:18:30 rgrimm Exp $
0008: */
0009:
0010: // C++ code generator by Pete Wells: pete@yamuna.demon.co.uk
0011: // #line generation contributed by: Ric Klaren <klaren@cs.utwente.nl>
0012: import java.util.Enumeration;
0013: import java.util.Hashtable;
0014: import java.util.HashSet;
0015: import antlr.collections.impl.BitSet;
0016: import antlr.collections.impl.Vector;
0017: import java.io.PrintWriter; //SAS: changed for proper text file io
0018: import java.io.IOException;
0019: import java.io.FileWriter;
0020:
0021: /**Generate MyParser.cpp, MyParser.hpp, MyLexer.cpp, MyLexer.hpp and MyParserTokenTypes.hpp */
0022: public class CppCodeGenerator extends CodeGenerator {
0023: // non-zero if inside syntactic predicate generation
0024: protected int syntacticPredLevel = 0;
0025:
0026: // Are we generating ASTs (for parsers and tree parsers) right now?
0027: protected boolean genAST = false;
0028:
0029: // Are we saving the text consumed (for lexers) right now?
0030: protected boolean saveText = false;
0031:
0032: // Generate #line's
0033: protected boolean genHashLines = true;
0034: // Generate constructors or not
0035: protected boolean noConstructors = false;
0036:
0037: // Used to keep track of lineno in output
0038: protected int outputLine;
0039: protected String outputFile;
0040:
0041: // Grammar parameters set up to handle different grammar classes.
0042: // These are used to get instanceof tests out of code generation
0043: boolean usingCustomAST = false;
0044: String labeledElementType;
0045: String labeledElementASTType; // mostly the same as labeledElementType except in parsers
0046: String labeledElementASTInit;
0047: String labeledElementInit;
0048: String commonExtraArgs;
0049: String commonExtraParams;
0050: String commonLocalVars;
0051: String lt1Value;
0052: String exceptionThrown;
0053: String throwNoViable;
0054:
0055: // Tracks the rule being generated. Used for mapTreeId
0056: RuleBlock currentRule;
0057: // Tracks the rule or labeled subrule being generated. Used for AST generation.
0058: String currentASTResult;
0059: // Mapping between the ids used in the current alt, and the
0060: // names of variables used to represent their AST values.
0061: Hashtable treeVariableMap = new Hashtable();
0062:
0063: /** Used to keep track of which AST variables have been defined in a rule
0064: * (except for the #rule_name and #rule_name_in var's
0065: */
0066: HashSet declaredASTVariables = new HashSet();
0067:
0068: // Count of unnamed generated variables
0069: int astVarNumber = 1;
0070: // Special value used to mark duplicate in treeVariableMap
0071: protected static final String NONUNIQUE = new String();
0072:
0073: public static final int caseSizeThreshold = 127; // ascii is max
0074:
0075: private Vector semPreds;
0076:
0077: // Used to keep track of which (heterogeneous AST types are used)
0078: // which need to be set in the ASTFactory of the generated parser
0079: private Vector astTypes;
0080:
0081: private static String namespaceStd = "ANTLR_USE_NAMESPACE(std)";
0082: private static String namespaceAntlr = "ANTLR_USE_NAMESPACE(antlr)";
0083: private static NameSpace nameSpace = null;
0084:
0085: private static final String preIncludeCpp = "pre_include_cpp";
0086: private static final String preIncludeHpp = "pre_include_hpp";
0087: private static final String postIncludeCpp = "post_include_cpp";
0088: private static final String postIncludeHpp = "post_include_hpp";
0089:
0090: /** Create a C++ code-generator using the given Grammar.
0091: * The caller must still call setTool, setBehavior, and setAnalyzer
0092: * before generating code.
0093: */
0094: public CppCodeGenerator() {
0095: super ();
0096: charFormatter = new CppCharFormatter();
0097: }
0098:
0099: /** Adds a semantic predicate string to the sem pred vector
0100: These strings will be used to build an array of sem pred names
0101: when building a debugging parser. This method should only be
0102: called when the debug option is specified
0103: */
0104: protected int addSemPred(String predicate) {
0105: semPreds.appendElement(predicate);
0106: return semPreds.size() - 1;
0107: }
0108:
0109: public void exitIfError() {
0110: if (antlrTool.hasError()) {
0111: antlrTool.fatalError("Exiting due to errors.");
0112: }
0113: }
0114:
0115: protected int countLines(String s) {
0116: int lines = 0;
0117: for (int i = 0; i < s.length(); i++) {
0118: if (s.charAt(i) == '\n')
0119: lines++;
0120: }
0121: return lines;
0122: }
0123:
0124: /** Output a String to the currentOutput stream.
0125: * Ignored if string is null.
0126: * @param s The string to output
0127: */
0128: protected void _print(String s) {
0129: if (s != null) {
0130: outputLine += countLines(s);
0131: currentOutput.print(s);
0132: }
0133: }
0134:
0135: /** Print an action without leading tabs, attempting to
0136: * preserve the current indentation level for multi-line actions
0137: * Ignored if string is null.
0138: * @param s The action string to output
0139: */
0140: protected void _printAction(String s) {
0141: if (s != null) {
0142: outputLine += countLines(s) + 1;
0143: super ._printAction(s);
0144: }
0145: }
0146:
0147: /** Print an action stored in a token surrounded by #line stuff */
0148: public void printAction(Token t) {
0149: if (t != null) {
0150: genLineNo(t.getLine());
0151: printTabs();
0152: _printAction(processActionForSpecialSymbols(t.getText(), t
0153: .getLine(), null, null));
0154: genLineNo2();
0155: }
0156: }
0157:
0158: /** Print a header action by #line stuff also process any tree construction
0159: * @param name The name of the header part
0160: */
0161: public void printHeaderAction(String name) {
0162: Token a = (antlr.Token) behavior.headerActions.get(name);
0163: if (a != null) {
0164: genLineNo(a.getLine());
0165: println(processActionForSpecialSymbols(a.getText(), a
0166: .getLine(), null, null));
0167: genLineNo2();
0168: }
0169: }
0170:
0171: /** Output a String followed by newline, to the currentOutput stream.
0172: * Ignored if string is null.
0173: * @param s The string to output
0174: */
0175: protected void _println(String s) {
0176: if (s != null) {
0177: outputLine += countLines(s) + 1;
0178: currentOutput.println(s);
0179: }
0180: }
0181:
0182: /** Output tab indent followed by a String followed by newline,
0183: * to the currentOutput stream. Ignored if string is null.
0184: * @param s The string to output
0185: */
0186: protected void println(String s) {
0187: if (s != null) {
0188: printTabs();
0189: outputLine += countLines(s) + 1;
0190: currentOutput.println(s);
0191: }
0192: }
0193:
0194: /** Generate a #line or // line depending on options */
0195: public void genLineNo(int line) {
0196: if (line == 0) {
0197: line++;
0198: }
0199: if (genHashLines)
0200: _println("#line " + line + " \""
0201: + antlrTool.fileMinusPath(antlrTool.grammarFile)
0202: + "\"");
0203: }
0204:
0205: /** Generate a #line or // line depending on options */
0206: public void genLineNo(GrammarElement el) {
0207: if (el != null)
0208: genLineNo(el.getLine());
0209: }
0210:
0211: /** Generate a #line or // line depending on options */
0212: public void genLineNo(Token t) {
0213: if (t != null)
0214: genLineNo(t.getLine());
0215: }
0216:
0217: /** Generate a #line or // line depending on options */
0218: public void genLineNo2() {
0219: if (genHashLines) {
0220: _println("#line " + (outputLine + 1) + " \"" + outputFile
0221: + "\"");
0222: }
0223: }
0224:
0225: /**Generate the parser, lexer, treeparser, and token types in C++ */
0226: public void gen() {
0227: // Do the code generation
0228: try {
0229: // Loop over all grammars
0230: Enumeration grammarIter = behavior.grammars.elements();
0231: while (grammarIter.hasMoreElements()) {
0232: Grammar g = (Grammar) grammarIter.nextElement();
0233: // Connect all the components to each other
0234: g.setGrammarAnalyzer(analyzer);
0235: g.setCodeGenerator(this );
0236: analyzer.setGrammar(g);
0237: // To get right overloading behavior across hetrogeneous grammars
0238: setupGrammarParameters(g);
0239: g.generate();
0240: exitIfError();
0241: }
0242:
0243: // Loop over all token managers (some of which are lexers)
0244: Enumeration tmIter = behavior.tokenManagers.elements();
0245: while (tmIter.hasMoreElements()) {
0246: TokenManager tm = (TokenManager) tmIter.nextElement();
0247: if (!tm.isReadOnly()) {
0248: // Write the token manager tokens as C++
0249: // this must appear before genTokenInterchange so that
0250: // labels are set on string literals
0251: genTokenTypes(tm);
0252: // Write the token manager tokens as plain text
0253: genTokenInterchange(tm);
0254: }
0255: exitIfError();
0256: }
0257: } catch (IOException e) {
0258: antlrTool.reportException(e, null);
0259: }
0260: }
0261:
0262: /** Generate code for the given grammar element.
0263: * @param blk The {...} action to generate
0264: */
0265: public void gen(ActionElement action) {
0266: if (DEBUG_CODE_GENERATOR)
0267: System.out.println("genAction(" + action + ")");
0268: if (action.isSemPred) {
0269: genSemPred(action.actionText, action.line);
0270: } else {
0271: if (grammar.hasSyntacticPredicate) {
0272: println("if ( inputState->guessing==0 ) {");
0273: tabs++;
0274: }
0275:
0276: ActionTransInfo tInfo = new ActionTransInfo();
0277: String actionStr = processActionForSpecialSymbols(
0278: action.actionText, action.getLine(), currentRule,
0279: tInfo);
0280:
0281: if (tInfo.refRuleRoot != null) {
0282: // Somebody referenced "#rule", make sure translated var is valid
0283: // assignment to #rule is left as a ref also, meaning that assignments
0284: // with no other refs like "#rule = foo();" still forces this code to be
0285: // generated (unnecessarily).
0286: println(tInfo.refRuleRoot + " = "
0287: + labeledElementASTType + "(currentAST.root);");
0288: }
0289:
0290: // dump the translated action
0291: genLineNo(action);
0292: printAction(actionStr);
0293: genLineNo2();
0294:
0295: if (tInfo.assignToRoot) {
0296: // Somebody did a "#rule=", reset internal currentAST.root
0297: println("currentAST.root = " + tInfo.refRuleRoot + ";");
0298: // reset the child pointer too to be last sibling in sibling list
0299: // now use if else in stead of x ? y : z to shut CC 4.2 up.
0300: println("if ( " + tInfo.refRuleRoot + "!="
0301: + labeledElementASTInit + " &&");
0302: tabs++;
0303: println(tInfo.refRuleRoot + "->getFirstChild() != "
0304: + labeledElementASTInit + " )");
0305: println(" currentAST.child = " + tInfo.refRuleRoot
0306: + "->getFirstChild();");
0307: tabs--;
0308: println("else");
0309: tabs++;
0310: println("currentAST.child = " + tInfo.refRuleRoot + ";");
0311: tabs--;
0312: println("currentAST.advanceChildToEnd();");
0313: }
0314:
0315: if (grammar.hasSyntacticPredicate) {
0316: tabs--;
0317: println("}");
0318: }
0319: }
0320: }
0321:
0322: /** Generate code for the given grammar element.
0323: * @param blk The "x|y|z|..." block to generate
0324: */
0325: public void gen(AlternativeBlock blk) {
0326: if (DEBUG_CODE_GENERATOR)
0327: System.out.println("gen(" + blk + ")");
0328: println("{");
0329: genBlockPreamble(blk);
0330: genBlockInitAction(blk);
0331:
0332: // Tell AST generation to build subrule result
0333: String saveCurrentASTResult = currentASTResult;
0334: if (blk.getLabel() != null) {
0335: currentASTResult = blk.getLabel();
0336: }
0337:
0338: boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
0339:
0340: CppBlockFinishingInfo howToFinish = genCommonBlock(blk, true);
0341: genBlockFinish(howToFinish, throwNoViable);
0342:
0343: println("}");
0344:
0345: // Restore previous AST generation
0346: currentASTResult = saveCurrentASTResult;
0347: }
0348:
0349: /** Generate code for the given grammar element.
0350: * @param blk The block-end element to generate. Block-end
0351: * elements are synthesized by the grammar parser to represent
0352: * the end of a block.
0353: */
0354: public void gen(BlockEndElement end) {
0355: if (DEBUG_CODE_GENERATOR)
0356: System.out.println("genRuleEnd(" + end + ")");
0357: }
0358:
0359: /** Generate code for the given grammar element.
0360: * @param blk The character literal reference to generate
0361: */
0362: public void gen(CharLiteralElement atom) {
0363: if (DEBUG_CODE_GENERATOR)
0364: System.out.println("genChar(" + atom + ")");
0365:
0366: if (atom.getLabel() != null) {
0367: println(atom.getLabel() + " = " + lt1Value + ";");
0368: }
0369:
0370: boolean oldsaveText = saveText;
0371: saveText = saveText
0372: && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;
0373: genMatch(atom);
0374: saveText = oldsaveText;
0375: }
0376:
0377: /** Generate code for the given grammar element.
0378: * @param blk The character-range reference to generate
0379: */
0380: public void gen(CharRangeElement r) {
0381: if (r.getLabel() != null && syntacticPredLevel == 0) {
0382: println(r.getLabel() + " = " + lt1Value + ";");
0383: }
0384: // Correctly take care of saveIndex stuff...
0385: boolean save = (grammar instanceof LexerGrammar && (!saveText || r
0386: .getAutoGenType() == GrammarElement.AUTO_GEN_BANG));
0387: if (save)
0388: println("_saveIndex=text.length();");
0389:
0390: println("matchRange(" + textOrChar(r.beginText) + ","
0391: + textOrChar(r.endText) + ");");
0392:
0393: if (save)
0394: println("text.setLength(_saveIndex);");
0395: }
0396:
0397: /** Generate the lexer C++ files */
0398: public void gen(LexerGrammar g) throws IOException {
0399: // If debugging, create a new sempred vector for this grammar
0400: if (g.debuggingOutput)
0401: semPreds = new Vector();
0402:
0403: setGrammar(g);
0404: if (!(grammar instanceof LexerGrammar)) {
0405: antlrTool.panic("Internal error generating lexer");
0406: }
0407:
0408: genBody(g);
0409: genInclude(g);
0410: }
0411:
0412: /** Generate code for the given grammar element.
0413: * @param blk The (...)+ block to generate
0414: */
0415: public void gen(OneOrMoreBlock blk) {
0416: if (DEBUG_CODE_GENERATOR)
0417: System.out.println("gen+(" + blk + ")");
0418: String label;
0419: String cnt;
0420: println("{ // ( ... )+");
0421: genBlockPreamble(blk);
0422: if (blk.getLabel() != null) {
0423: cnt = "_cnt_" + blk.getLabel();
0424: } else {
0425: cnt = "_cnt" + blk.ID;
0426: }
0427: println("int " + cnt + "=0;");
0428: if (blk.getLabel() != null) {
0429: label = blk.getLabel();
0430: } else {
0431: label = "_loop" + blk.ID;
0432: }
0433:
0434: println("for (;;) {");
0435: tabs++;
0436: // generate the init action for ()+ ()* inside the loop
0437: // this allows us to do usefull EOF checking...
0438: genBlockInitAction(blk);
0439:
0440: // Tell AST generation to build subrule result
0441: String saveCurrentASTResult = currentASTResult;
0442: if (blk.getLabel() != null) {
0443: currentASTResult = blk.getLabel();
0444: }
0445:
0446: boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
0447:
0448: // generate exit test if greedy set to false
0449: // and an alt is ambiguous with exit branch
0450: // or when lookahead derived purely from end-of-file
0451: // Lookahead analysis stops when end-of-file is hit,
0452: // returning set {epsilon}. Since {epsilon} is not
0453: // ambig with any real tokens, no error is reported
0454: // by deterministic() routines and we have to check
0455: // for the case where the lookahead depth didn't get
0456: // set to NONDETERMINISTIC (this only happens when the
0457: // FOLLOW contains real atoms + epsilon).
0458: boolean generateNonGreedyExitPath = false;
0459: int nonGreedyExitDepth = grammar.maxk;
0460:
0461: if (!blk.greedy
0462: && blk.exitLookaheadDepth <= grammar.maxk
0463: && blk.exitCache[blk.exitLookaheadDepth]
0464: .containsEpsilon()) {
0465: generateNonGreedyExitPath = true;
0466: nonGreedyExitDepth = blk.exitLookaheadDepth;
0467: } else if (!blk.greedy
0468: && blk.exitLookaheadDepth == LLkGrammarAnalyzer.NONDETERMINISTIC) {
0469: generateNonGreedyExitPath = true;
0470: }
0471:
0472: // generate exit test if greedy set to false
0473: // and an alt is ambiguous with exit branch
0474: if (generateNonGreedyExitPath) {
0475: if (DEBUG_CODE_GENERATOR) {
0476: System.out
0477: .println("nongreedy (...)+ loop; exit depth is "
0478: + blk.exitLookaheadDepth);
0479: }
0480: String predictExit = getLookaheadTestExpression(
0481: blk.exitCache, nonGreedyExitDepth);
0482: println("// nongreedy exit test");
0483: println("if ( " + cnt + ">=1 && " + predictExit + ") goto "
0484: + label + ";");
0485: }
0486:
0487: CppBlockFinishingInfo howToFinish = genCommonBlock(blk, false);
0488: genBlockFinish(howToFinish, "if ( " + cnt + ">=1 ) { goto "
0489: + label + "; } else {" + throwNoViable + "}");
0490:
0491: println(cnt + "++;");
0492: tabs--;
0493: println("}");
0494: println(label + ":;");
0495: println("} // ( ... )+");
0496:
0497: // Restore previous AST generation
0498: currentASTResult = saveCurrentASTResult;
0499: }
0500:
0501: /** Generate the parser C++ file */
0502: public void gen(ParserGrammar g) throws IOException {
0503:
0504: // if debugging, set up a new vector to keep track of sempred
0505: // strings for this grammar
0506: if (g.debuggingOutput)
0507: semPreds = new Vector();
0508:
0509: setGrammar(g);
0510: if (!(grammar instanceof ParserGrammar)) {
0511: antlrTool.panic("Internal error generating parser");
0512: }
0513:
0514: genBody(g);
0515: genInclude(g);
0516: }
0517:
0518: /** Generate code for the given grammar element.
0519: * @param blk The rule-reference to generate
0520: */
0521: public void gen(RuleRefElement rr) {
0522: if (DEBUG_CODE_GENERATOR)
0523: System.out.println("genRR(" + rr + ")");
0524: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(rr.targetRule);
0525: if (rs == null || !rs.isDefined()) {
0526: // Is this redundant???
0527: antlrTool.error("Rule '" + rr.targetRule
0528: + "' is not defined", grammar.getFilename(), rr
0529: .getLine(), rr.getColumn());
0530: return;
0531: }
0532: if (!(rs instanceof RuleSymbol)) {
0533: // Is this redundant???
0534: antlrTool.error("'" + rr.targetRule
0535: + "' does not name a grammar rule", grammar
0536: .getFilename(), rr.getLine(), rr.getColumn());
0537: return;
0538: }
0539:
0540: genErrorTryForElement(rr);
0541:
0542: // AST value for labeled rule refs in tree walker.
0543: // This is not AST construction; it is just the input tree node value.
0544: if (grammar instanceof TreeWalkerGrammar
0545: && rr.getLabel() != null && syntacticPredLevel == 0) {
0546: println(rr.getLabel() + " = (_t == ASTNULL) ? "
0547: + labeledElementASTInit + " : " + lt1Value + ";");
0548: }
0549:
0550: // if in lexer and ! on rule ref or alt or rule, save buffer index to
0551: // kill later
0552: if (grammar instanceof LexerGrammar
0553: && (!saveText || rr.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
0554: println("_saveIndex = text.length();");
0555: }
0556:
0557: // Process return value assignment if any
0558: printTabs();
0559: if (rr.idAssign != null) {
0560: // Warn if the rule has no return type
0561: if (rs.block.returnAction == null) {
0562: antlrTool.warning("Rule '" + rr.targetRule
0563: + "' has no return type",
0564: grammar.getFilename(), rr.getLine(), rr
0565: .getColumn());
0566: }
0567: _print(rr.idAssign + "=");
0568: } else {
0569: // Warn about return value if any, but not inside syntactic predicate
0570: if (!(grammar instanceof LexerGrammar)
0571: && syntacticPredLevel == 0
0572: && rs.block.returnAction != null) {
0573: antlrTool.warning("Rule '" + rr.targetRule
0574: + "' returns a value", grammar.getFilename(),
0575: rr.getLine(), rr.getColumn());
0576: }
0577: }
0578:
0579: // Call the rule
0580: GenRuleInvocation(rr);
0581:
0582: // if in lexer and ! on element or alt or rule, save buffer index to kill later
0583: if (grammar instanceof LexerGrammar
0584: && (!saveText || rr.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
0585: println("text.erase(_saveIndex);");
0586: }
0587:
0588: // if not in a syntactic predicate
0589: if (syntacticPredLevel == 0) {
0590: boolean doNoGuessTest = (grammar.hasSyntacticPredicate && (grammar.buildAST
0591: && rr.getLabel() != null || (genAST && rr
0592: .getAutoGenType() == GrammarElement.AUTO_GEN_NONE)));
0593:
0594: if (doNoGuessTest) {
0595: println("if (inputState->guessing==0) {");
0596: tabs++;
0597: }
0598:
0599: if (grammar.buildAST && rr.getLabel() != null) {
0600: // always gen variable for rule return on labeled rules
0601: println(rr.getLabel() + "_AST = returnAST;");
0602: }
0603:
0604: if (genAST) {
0605: switch (rr.getAutoGenType()) {
0606: case GrammarElement.AUTO_GEN_NONE:
0607: if (usingCustomAST)
0608: println("astFactory->addASTChild(currentAST, static_cast<"
0609: + namespaceAntlr
0610: + "RefAST>(returnAST));");
0611: else
0612: println("astFactory->addASTChild( currentAST, returnAST );");
0613: break;
0614: case GrammarElement.AUTO_GEN_CARET:
0615: // FIXME: RK: I'm not so sure this should be an error..
0616: // I think it might actually work and be usefull at times.
0617: antlrTool
0618: .error("Internal: encountered ^ after rule reference");
0619: break;
0620: default:
0621: break;
0622: }
0623: }
0624:
0625: // if a lexer and labeled, Token label defined at rule level, just set it here
0626: if (grammar instanceof LexerGrammar
0627: && rr.getLabel() != null) {
0628: println(rr.getLabel() + "=_returnToken;");
0629: }
0630:
0631: if (doNoGuessTest) {
0632: tabs--;
0633: println("}");
0634: }
0635: }
0636: genErrorCatchForElement(rr);
0637: }
0638:
0639: /** Generate code for the given grammar element.
0640: * @param blk The string-literal reference to generate
0641: */
0642: public void gen(StringLiteralElement atom) {
0643: if (DEBUG_CODE_GENERATOR)
0644: System.out.println("genString(" + atom + ")");
0645:
0646: // Variable declarations for labeled elements
0647: if (atom.getLabel() != null && syntacticPredLevel == 0) {
0648: println(atom.getLabel() + " = " + lt1Value + ";");
0649: }
0650:
0651: // AST
0652: genElementAST(atom);
0653:
0654: // is there a bang on the literal?
0655: boolean oldsaveText = saveText;
0656: saveText = saveText
0657: && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;
0658:
0659: // matching
0660: genMatch(atom);
0661:
0662: saveText = oldsaveText;
0663:
0664: // tack on tree cursor motion if doing a tree walker
0665: if (grammar instanceof TreeWalkerGrammar) {
0666: println("_t = _t->getNextSibling();");
0667: }
0668: }
0669:
0670: /** Generate code for the given grammar element.
0671: * @param blk The token-range reference to generate
0672: */
0673: public void gen(TokenRangeElement r) {
0674: genErrorTryForElement(r);
0675: if (r.getLabel() != null && syntacticPredLevel == 0) {
0676: println(r.getLabel() + " = " + lt1Value + ";");
0677: }
0678:
0679: // AST
0680: genElementAST(r);
0681:
0682: // match
0683: println("matchRange(" + r.beginText + "," + r.endText + ");");
0684: genErrorCatchForElement(r);
0685: }
0686:
0687: /** Generate code for the given grammar element.
0688: * @param blk The token-reference to generate
0689: */
0690: public void gen(TokenRefElement atom) {
0691: if (DEBUG_CODE_GENERATOR)
0692: System.out.println("genTokenRef(" + atom + ")");
0693: if (grammar instanceof LexerGrammar) {
0694: antlrTool.panic("Token reference found in lexer");
0695: }
0696: genErrorTryForElement(atom);
0697: // Assign Token value to token label variable
0698: if (atom.getLabel() != null && syntacticPredLevel == 0) {
0699: println(atom.getLabel() + " = " + lt1Value + ";");
0700: }
0701:
0702: // AST
0703: genElementAST(atom);
0704: // matching
0705: genMatch(atom);
0706: genErrorCatchForElement(atom);
0707:
0708: // tack on tree cursor motion if doing a tree walker
0709: if (grammar instanceof TreeWalkerGrammar) {
0710: println("_t = _t->getNextSibling();");
0711: }
0712: }
0713:
0714: public void gen(TreeElement t) {
0715: // save AST cursor
0716: println(labeledElementType + " __t" + t.ID + " = _t;");
0717:
0718: // If there is a label on the root, then assign that to the variable
0719: if (t.root.getLabel() != null) {
0720: println(t.root.getLabel() + " = (_t == ASTNULL) ? "
0721: + labeledElementASTInit + " : _t;");
0722: }
0723:
0724: // check for invalid modifiers ! and ^ on tree element roots
0725: if (t.root.getAutoGenType() == GrammarElement.AUTO_GEN_BANG) {
0726: antlrTool
0727: .error(
0728: "Suffixing a root node with '!' is not implemented",
0729: grammar.getFilename(), t.getLine(), t
0730: .getColumn());
0731: t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE);
0732: }
0733: if (t.root.getAutoGenType() == GrammarElement.AUTO_GEN_CARET) {
0734: antlrTool
0735: .warning(
0736: "Suffixing a root node with '^' is redundant; already a root",
0737: grammar.getFilename(), t.getLine(), t
0738: .getColumn());
0739: t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE);
0740: }
0741:
0742: // Generate AST variables
0743: genElementAST(t.root);
0744: if (grammar.buildAST) {
0745: // Save the AST construction state
0746: println(namespaceAntlr + "ASTPair __currentAST" + t.ID
0747: + " = currentAST;");
0748: // Make the next item added a child of the TreeElement root
0749: println("currentAST.root = currentAST.child;");
0750: println("currentAST.child = " + labeledElementASTInit + ";");
0751: }
0752:
0753: // match root
0754: if (t.root instanceof WildcardElement) {
0755: println("if ( _t == ASTNULL ) throw MismatchedTokenException();");
0756: } else {
0757: genMatch(t.root);
0758: }
0759: // move to list of children
0760: println("_t = _t->getFirstChild();");
0761:
0762: // walk list of children, generating code for each
0763: for (int i = 0; i < t.getAlternatives().size(); i++) {
0764: Alternative a = t.getAlternativeAt(i);
0765: AlternativeElement e = a.head;
0766: while (e != null) {
0767: e.generate();
0768: e = e.next;
0769: }
0770: }
0771:
0772: if (grammar.buildAST) {
0773: // restore the AST construction state to that just after the
0774: // tree root was added
0775: println("currentAST = __currentAST" + t.ID + ";");
0776: }
0777: // restore AST cursor
0778: println("_t = __t" + t.ID + ";");
0779: // move cursor to sibling of tree just parsed
0780: println("_t = _t->getNextSibling();");
0781: }
0782:
0783: /** Generate the tree-parser C++ files */
0784: public void gen(TreeWalkerGrammar g) throws IOException {
0785: setGrammar(g);
0786: if (!(grammar instanceof TreeWalkerGrammar)) {
0787: antlrTool.panic("Internal error generating tree-walker");
0788: }
0789:
0790: genBody(g);
0791: genInclude(g);
0792: }
0793:
0794: /** Generate code for the given grammar element.
0795: * @param wc The wildcard element to generate
0796: */
0797: public void gen(WildcardElement wc) {
0798: // Variable assignment for labeled elements
0799: if (wc.getLabel() != null && syntacticPredLevel == 0) {
0800: println(wc.getLabel() + " = " + lt1Value + ";");
0801: }
0802:
0803: // AST
0804: genElementAST(wc);
0805: // Match anything but EOF
0806: if (grammar instanceof TreeWalkerGrammar) {
0807: println("if ( _t == " + labeledElementASTInit + " ) throw "
0808: + namespaceAntlr + "MismatchedTokenException();");
0809: } else if (grammar instanceof LexerGrammar) {
0810: if (grammar instanceof LexerGrammar
0811: && (!saveText || wc.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
0812: println("_saveIndex = text.length();");
0813: }
0814: println("matchNot(EOF/*_CHAR*/);");
0815: if (grammar instanceof LexerGrammar
0816: && (!saveText || wc.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
0817: println("text.erase(_saveIndex);"); // kill text atom put in buffer
0818: }
0819: } else {
0820: println("matchNot(" + getValueString(Token.EOF_TYPE) + ");");
0821: }
0822:
0823: // tack on tree cursor motion if doing a tree walker
0824: if (grammar instanceof TreeWalkerGrammar) {
0825: println("_t = _t->getNextSibling();");
0826: }
0827: }
0828:
0829: /** Generate code for the given grammar element.
0830: * @param blk The (...)* block to generate
0831: */
0832: public void gen(ZeroOrMoreBlock blk) {
0833: if (DEBUG_CODE_GENERATOR)
0834: System.out.println("gen*(" + blk + ")");
0835: println("{ // ( ... )*");
0836: genBlockPreamble(blk);
0837: String label;
0838: if (blk.getLabel() != null) {
0839: label = blk.getLabel();
0840: } else {
0841: label = "_loop" + blk.ID;
0842: }
0843: println("for (;;) {");
0844: tabs++;
0845: // generate the init action for ()+ ()* inside the loop
0846: // this allows us to do usefull EOF checking...
0847: genBlockInitAction(blk);
0848:
0849: // Tell AST generation to build subrule result
0850: String saveCurrentASTResult = currentASTResult;
0851: if (blk.getLabel() != null) {
0852: currentASTResult = blk.getLabel();
0853: }
0854:
0855: boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
0856:
0857: // generate exit test if greedy set to false
0858: // and an alt is ambiguous with exit branch
0859: // or when lookahead derived purely from end-of-file
0860: // Lookahead analysis stops when end-of-file is hit,
0861: // returning set {epsilon}. Since {epsilon} is not
0862: // ambig with any real tokens, no error is reported
0863: // by deterministic() routines and we have to check
0864: // for the case where the lookahead depth didn't get
0865: // set to NONDETERMINISTIC (this only happens when the
0866: // FOLLOW contains real atoms + epsilon).
0867: boolean generateNonGreedyExitPath = false;
0868: int nonGreedyExitDepth = grammar.maxk;
0869:
0870: if (!blk.greedy
0871: && blk.exitLookaheadDepth <= grammar.maxk
0872: && blk.exitCache[blk.exitLookaheadDepth]
0873: .containsEpsilon()) {
0874: generateNonGreedyExitPath = true;
0875: nonGreedyExitDepth = blk.exitLookaheadDepth;
0876: } else if (!blk.greedy
0877: && blk.exitLookaheadDepth == LLkGrammarAnalyzer.NONDETERMINISTIC) {
0878: generateNonGreedyExitPath = true;
0879: }
0880: if (generateNonGreedyExitPath) {
0881: if (DEBUG_CODE_GENERATOR) {
0882: System.out
0883: .println("nongreedy (...)* loop; exit depth is "
0884: + blk.exitLookaheadDepth);
0885: }
0886: String predictExit = getLookaheadTestExpression(
0887: blk.exitCache, nonGreedyExitDepth);
0888: println("// nongreedy exit test");
0889: println("if (" + predictExit + ") goto " + label + ";");
0890: }
0891:
0892: CppBlockFinishingInfo howToFinish = genCommonBlock(blk, false);
0893: genBlockFinish(howToFinish, "goto " + label + ";");
0894:
0895: tabs--;
0896: println("}");
0897: println(label + ":;");
0898: println("} // ( ... )*");
0899:
0900: // Restore previous AST generation
0901: currentASTResult = saveCurrentASTResult;
0902: }
0903:
0904: /** Generate an alternative.
0905: * @param alt The alternative to generate
0906: * @param blk The block to which the alternative belongs
0907: */
0908: protected void genAlt(Alternative alt, AlternativeBlock blk) {
0909: // Save the AST generation state, and set it to that of the alt
0910: boolean savegenAST = genAST;
0911: genAST = genAST && alt.getAutoGen();
0912:
0913: boolean oldsaveTest = saveText;
0914: saveText = saveText && alt.getAutoGen();
0915:
0916: // Reset the variable name map for the alternative
0917: Hashtable saveMap = treeVariableMap;
0918: treeVariableMap = new Hashtable();
0919:
0920: // Generate try block around the alt for error handling
0921: if (alt.exceptionSpec != null) {
0922: println("try { // for error handling");
0923: tabs++;
0924: }
0925:
0926: AlternativeElement elem = alt.head;
0927: while (!(elem instanceof BlockEndElement)) {
0928: elem.generate(); // alt can begin with anything. Ask target to gen.
0929: elem = elem.next;
0930: }
0931:
0932: if (genAST) {
0933: if (blk instanceof RuleBlock) {
0934: // Set the AST return value for the rule
0935: RuleBlock rblk = (RuleBlock) blk;
0936: if (usingCustomAST)
0937: println(rblk.getRuleName() + "_AST = static_cast<"
0938: + labeledElementASTType
0939: + ">(currentAST.root);");
0940: else
0941: println(rblk.getRuleName()
0942: + "_AST = currentAST.root;");
0943: } else if (blk.getLabel() != null) {
0944: // ### future: also set AST value for labeled subrules.
0945: // println(blk.getLabel() + "_AST = "+labeledElementASTType+"(currentAST.root);");
0946: antlrTool.warning(
0947: "Labeled subrules are not implemented", grammar
0948: .getFilename(), blk.getLine(), blk
0949: .getColumn());
0950: }
0951: }
0952:
0953: if (alt.exceptionSpec != null) {
0954: // close try block
0955: tabs--;
0956: println("}");
0957: genErrorHandler(alt.exceptionSpec);
0958: }
0959:
0960: genAST = savegenAST;
0961: saveText = oldsaveTest;
0962:
0963: treeVariableMap = saveMap;
0964: }
0965:
0966: /** Generate all the bitsets to be used in the parser or lexer
0967: * Generate the raw bitset data like "long _tokenSet1_data[] = {...};"
0968: * and the BitSet object declarations like "BitSet _tokenSet1 = new BitSet(_tokenSet1_data);"
0969: * Note that most languages do not support object initialization inside a
0970: * class definition, so other code-generators may have to separate the
0971: * bitset declarations from the initializations (e.g., put the initializations
0972: * in the generated constructor instead).
0973: * @param bitsetList The list of bitsets to generate.
0974: * @param maxVocabulary Ensure that each generated bitset can contain at least this value.
0975: * @param dumpSets Dump out the token definitions of the contents of the bitset
0976: * only for grammars/parsers.
0977: */
0978: protected void genBitsets(Vector bitsetList, int maxVocabulary,
0979: String prefix) {
0980: TokenManager tm = grammar.tokenManager;
0981:
0982: println("");
0983:
0984: for (int i = 0; i < bitsetList.size(); i++) {
0985: BitSet p = (BitSet) bitsetList.elementAt(i);
0986: // Ensure that generated BitSet is large enough for vocabulary
0987: p.growToInclude(maxVocabulary);
0988:
0989: // initialization data
0990: println("const unsigned long " + prefix + getBitsetName(i)
0991: + "_data_" + "[] = { " + p.toStringOfHalfWords()
0992: + " };");
0993:
0994: // Dump the contents of the bitset in readable format...
0995: String t = "// ";
0996: for (int j = 0; j < tm.getVocabulary().size(); j++) {
0997: if (p.member(j)) {
0998: if ((grammar instanceof LexerGrammar))
0999: t += tm.getVocabulary().elementAt(j) + " ";
1000: else
1001: t += tm.getTokenStringAt(j) + " ";
1002:
1003: if (t.length() > 70) {
1004: println(t);
1005: t = "// ";
1006: }
1007: }
1008: }
1009: if (t != "// ")
1010: println(t);
1011:
1012: // BitSet object
1013: println("const " + namespaceAntlr + "BitSet " + prefix
1014: + getBitsetName(i) + "(" + getBitsetName(i)
1015: + "_data_," + p.size() / 32 + ");");
1016: }
1017: }
1018:
1019: protected void genBitsetsHeader(Vector bitsetList, int maxVocabulary) {
1020: println("");
1021: for (int i = 0; i < bitsetList.size(); i++) {
1022: BitSet p = (BitSet) bitsetList.elementAt(i);
1023: // Ensure that generated BitSet is large enough for vocabulary
1024: p.growToInclude(maxVocabulary);
1025: // initialization data
1026: println("static const unsigned long " + getBitsetName(i)
1027: + "_data_" + "[];");
1028: // BitSet object
1029: println("static const " + namespaceAntlr + "BitSet "
1030: + getBitsetName(i) + ";");
1031: }
1032: }
1033:
1034: /** Generate the finish of a block, using a combination of the info
1035: * returned from genCommonBlock() and the action to perform when
1036: * no alts were taken
1037: * @param howToFinish The return of genCommonBlock()
1038: * @param noViableAction What to generate when no alt is taken
1039: */
1040: private void genBlockFinish(CppBlockFinishingInfo howToFinish,
1041: String noViableAction) {
1042: if (howToFinish.needAnErrorClause
1043: && (howToFinish.generatedAnIf || howToFinish.generatedSwitch)) {
1044: if (howToFinish.generatedAnIf) {
1045: println("else {");
1046: } else {
1047: println("{");
1048: }
1049: tabs++;
1050: println(noViableAction);
1051: tabs--;
1052: println("}");
1053: }
1054:
1055: if (howToFinish.postscript != null) {
1056: println(howToFinish.postscript);
1057: }
1058: }
1059:
1060: /** Generate the initaction for a block, which may be a RuleBlock or a
1061: * plain AlternativeBLock.
1062: * @blk The block for which the preamble is to be generated.
1063: */
1064: protected void genBlockInitAction(AlternativeBlock blk) {
1065: // dump out init action
1066: if (blk.initAction != null) {
1067: genLineNo(blk);
1068: printAction(processActionForSpecialSymbols(blk.initAction,
1069: blk.line, currentRule, null));
1070: genLineNo2();
1071: }
1072: }
1073:
1074: /** Generate the header for a block, which may be a RuleBlock or a
1075: * plain AlternativeBLock. This generates any variable declarations
1076: * and syntactic-predicate-testing variables.
1077: * @blk The block for which the preamble is to be generated.
1078: */
1079: protected void genBlockPreamble(AlternativeBlock blk) {
1080: // define labels for rule blocks.
1081: if (blk instanceof RuleBlock) {
1082: RuleBlock rblk = (RuleBlock) blk;
1083: if (rblk.labeledElements != null) {
1084: for (int i = 0; i < rblk.labeledElements.size(); i++) {
1085:
1086: AlternativeElement a = (AlternativeElement) rblk.labeledElements
1087: .elementAt(i);
1088: //System.out.println("looking at labeled element: "+a);
1089: // Variables for labeled rule refs and subrules are different than
1090: // variables for grammar atoms. This test is a little tricky because
1091: // we want to get all rule refs and ebnf, but not rule blocks or
1092: // syntactic predicates
1093: if (a instanceof RuleRefElement
1094: || a instanceof AlternativeBlock
1095: && !(a instanceof RuleBlock)
1096: && !(a instanceof SynPredBlock)) {
1097:
1098: if (!(a instanceof RuleRefElement)
1099: && ((AlternativeBlock) a).not
1100: && analyzer
1101: .subruleCanBeInverted(
1102: ((AlternativeBlock) a),
1103: grammar instanceof LexerGrammar)) {
1104: // Special case for inverted subrules that will be
1105: // inlined. Treat these like token or char literal
1106: // references
1107: println(labeledElementType + " "
1108: + a.getLabel() + " = "
1109: + labeledElementInit + ";");
1110: if (grammar.buildAST) {
1111: genASTDeclaration(a);
1112: }
1113: } else {
1114: if (grammar.buildAST) {
1115: // Always gen AST variables for labeled elements, even if the
1116: // element itself is marked with !
1117: genASTDeclaration(a);
1118: }
1119: if (grammar instanceof LexerGrammar) {
1120: println(namespaceAntlr + "RefToken "
1121: + a.getLabel() + ";");
1122: }
1123: if (grammar instanceof TreeWalkerGrammar) {
1124: // always generate rule-ref variables for tree walker
1125: println(labeledElementType + " "
1126: + a.getLabel() + " = "
1127: + labeledElementInit + ";");
1128: }
1129: }
1130: } else {
1131: // It is a token or literal reference. Generate the
1132: // correct variable type for this grammar
1133: println(labeledElementType + " " + a.getLabel()
1134: + " = " + labeledElementInit + ";");
1135: // In addition, generate *_AST variables if building ASTs
1136: if (grammar.buildAST) {
1137: // println(labeledElementASTType+" " + a.getLabel() + "_AST = "+labeledElementASTInit+";");
1138: // RK: Hmm this is strange.
1139: // The java codegen contained this check for nodetype while we only had the
1140: // println right above here..
1141: if (a instanceof GrammarAtom
1142: && ((GrammarAtom) a)
1143: .getASTNodeType() != null) {
1144: GrammarAtom ga = (GrammarAtom) a;
1145: genASTDeclaration(a, ga
1146: .getASTNodeType());
1147: } else {
1148: genASTDeclaration(a);
1149: }
1150: }
1151: }
1152: }
1153: }
1154: }
1155: }
1156:
1157: public void genBody(LexerGrammar g) throws IOException {
1158: outputFile = grammar.getClassName() + ".cpp";
1159: outputLine = 1;
1160: currentOutput = antlrTool.openOutputFile(outputFile);
1161: //SAS: changed for proper text file io
1162:
1163: genAST = false; // no way to gen trees.
1164: saveText = true; // save consumed characters.
1165:
1166: tabs = 0;
1167:
1168: // Generate header common to all C++ output files
1169: genHeader(outputFile);
1170:
1171: printHeaderAction(preIncludeCpp);
1172: // Generate header specific to lexer C++ file
1173: println("#include \"" + grammar.getClassName() + ".hpp\"");
1174: println("#include <antlr/CharBuffer.hpp>");
1175: println("#include <antlr/TokenStreamException.hpp>");
1176: println("#include <antlr/TokenStreamIOException.hpp>");
1177: println("#include <antlr/TokenStreamRecognitionException.hpp>");
1178: println("#include <antlr/CharStreamException.hpp>");
1179: println("#include <antlr/CharStreamIOException.hpp>");
1180: println("#include <antlr/NoViableAltForCharException.hpp>");
1181: if (grammar.debuggingOutput)
1182: println("#include <antlr/DebuggingInputBuffer.hpp>");
1183: println("");
1184: printHeaderAction(postIncludeCpp);
1185:
1186: if (nameSpace != null)
1187: nameSpace.emitDeclarations(currentOutput);
1188:
1189: // Generate user-defined lexer file preamble
1190: printAction(grammar.preambleAction);
1191:
1192: // Generate lexer class definition
1193: String sup = null;
1194: if (grammar.super Class != null) {
1195: sup = grammar.super Class;
1196: } else {
1197: sup = grammar.getSuperClass();
1198: if (sup.lastIndexOf('.') != -1)
1199: sup = sup.substring(sup.lastIndexOf('.') + 1);
1200: sup = namespaceAntlr + sup;
1201: }
1202:
1203: if (noConstructors) {
1204: println("#if 0");
1205: println("// constructor creation turned of with 'noConstructor' option");
1206: }
1207: //
1208: // Generate the constructor from InputStream
1209: //
1210: println(grammar.getClassName() + "::" + grammar.getClassName()
1211: + "(" + namespaceStd + "istream& in)");
1212: tabs++;
1213: // if debugging, wrap the input buffer in a debugger
1214: if (grammar.debuggingOutput)
1215: println(": " + sup + "(new " + namespaceAntlr
1216: + "DebuggingInputBuffer(new " + namespaceAntlr
1217: + "CharBuffer(in))," + g.caseSensitive + ")");
1218: else
1219: println(": " + sup + "(new " + namespaceAntlr
1220: + "CharBuffer(in)," + g.caseSensitive + ")");
1221: tabs--;
1222: println("{");
1223: tabs++;
1224:
1225: // if debugging, set up array variables and call user-overridable
1226: // debugging setup method
1227: if (grammar.debuggingOutput) {
1228: println("setRuleNames(_ruleNames);");
1229: println("setSemPredNames(_semPredNames);");
1230: println("setupDebugging();");
1231: }
1232:
1233: // println("setCaseSensitive("+g.caseSensitive+");");
1234: println("initLiterals();");
1235: tabs--;
1236: println("}");
1237: println("");
1238:
1239: // Generate the constructor from InputBuffer
1240: println(grammar.getClassName() + "::" + grammar.getClassName()
1241: + "(" + namespaceAntlr + "InputBuffer& ib)");
1242: tabs++;
1243: // if debugging, wrap the input buffer in a debugger
1244: if (grammar.debuggingOutput)
1245: println(": " + sup + "(new " + namespaceAntlr
1246: + "DebuggingInputBuffer(ib)," + g.caseSensitive
1247: + ")");
1248: else
1249: println(": " + sup + "(ib," + g.caseSensitive + ")");
1250: tabs--;
1251: println("{");
1252: tabs++;
1253:
1254: // if debugging, set up array variables and call user-overridable
1255: // debugging setup method
1256: if (grammar.debuggingOutput) {
1257: println("setRuleNames(_ruleNames);");
1258: println("setSemPredNames(_semPredNames);");
1259: println("setupDebugging();");
1260: }
1261:
1262: // println("setCaseSensitive("+g.caseSensitive+");");
1263: println("initLiterals();");
1264: tabs--;
1265: println("}");
1266: println("");
1267:
1268: // Generate the constructor from LexerSharedInputState
1269: println(grammar.getClassName() + "::" + grammar.getClassName()
1270: + "(const " + namespaceAntlr
1271: + "LexerSharedInputState& state)");
1272: tabs++;
1273: println(": " + sup + "(state," + g.caseSensitive + ")");
1274: tabs--;
1275: println("{");
1276: tabs++;
1277:
1278: // if debugging, set up array variables and call user-overridable
1279: // debugging setup method
1280: if (grammar.debuggingOutput) {
1281: println("setRuleNames(_ruleNames);");
1282: println("setSemPredNames(_semPredNames);");
1283: println("setupDebugging();");
1284: }
1285:
1286: // println("setCaseSensitive("+g.caseSensitive+");");
1287: println("initLiterals();");
1288: tabs--;
1289: println("}");
1290: println("");
1291:
1292: if (noConstructors) {
1293: println("// constructor creation turned of with 'noConstructor' option");
1294: println("#endif");
1295: }
1296:
1297: println("void " + grammar.getClassName() + "::initLiterals()");
1298: println("{");
1299: tabs++;
1300: // Generate the initialization of the map
1301: // containing the string literals used in the lexer
1302: // The literals variable itself is in CharScanner
1303: /* TJP: get keys now and check to make sure it's a literal not
1304: * a label to a literal; was dup'ing string literals before
1305: * change.
1306: *
1307: Enumeration ids = grammar.tokenManager.getTokenSymbolElements();
1308: while ( ids.hasMoreElements() ) {
1309: TokenSymbol sym = (TokenSymbol)ids.nextElement();
1310: if ( sym instanceof StringLiteralSymbol ) {
1311: StringLiteralSymbol s = (StringLiteralSymbol)sym;
1312: println("literals["+s.getId()+"] = "+s.getTokenType()+";");
1313: }
1314: }
1315: */
1316: // TJP changed it to following loop.
1317: Enumeration keys = grammar.tokenManager.getTokenSymbolKeys();
1318: while (keys.hasMoreElements()) {
1319: String key = (String) keys.nextElement();
1320: if (key.charAt(0) != '"') {
1321: continue;
1322: }
1323: TokenSymbol sym = grammar.tokenManager.getTokenSymbol(key);
1324: if (sym instanceof StringLiteralSymbol) {
1325: StringLiteralSymbol s = (StringLiteralSymbol) sym;
1326: println("literals[" + s.getId() + "] = "
1327: + s.getTokenType() + ";");
1328: }
1329: }
1330:
1331: // Generate the setting of various generated options.
1332: tabs--;
1333: println("}");
1334:
1335: Enumeration ids;
1336: // generate the rule name array for debugging
1337: if (grammar.debuggingOutput) {
1338: println("const char* " + grammar.getClassName()
1339: + "::_ruleNames[] = {");
1340: tabs++;
1341:
1342: ids = grammar.rules.elements();
1343: int ruleNum = 0;
1344: while (ids.hasMoreElements()) {
1345: GrammarSymbol sym = (GrammarSymbol) ids.nextElement();
1346: if (sym instanceof RuleSymbol)
1347: println("\"" + ((RuleSymbol) sym).getId() + "\",");
1348: }
1349: println("0");
1350: tabs--;
1351: println("};");
1352: }
1353:
1354: // Generate nextToken() rule.
1355: // nextToken() is a synthetic lexer rule that is the implicit OR of all
1356: // user-defined lexer rules.
1357: genNextToken();
1358:
1359: // Generate code for each rule in the lexer
1360: ids = grammar.rules.elements();
1361: int ruleNum = 0;
1362: while (ids.hasMoreElements()) {
1363: RuleSymbol sym = (RuleSymbol) ids.nextElement();
1364: // Don't generate the synthetic rules
1365: if (!sym.getId().equals("mnextToken")) {
1366: genRule(sym, false, ruleNum++, grammar.getClassName()
1367: + "::");
1368: }
1369: exitIfError();
1370: }
1371:
1372: // Generate the semantic predicate map for debugging
1373: if (grammar.debuggingOutput)
1374: genSemPredMap(grammar.getClassName() + "::");
1375:
1376: // Generate the bitsets used throughout the lexer
1377: genBitsets(bitsetsUsed, ((LexerGrammar) grammar).charVocabulary
1378: .size(), grammar.getClassName() + "::");
1379:
1380: println("");
1381: if (nameSpace != null)
1382: nameSpace.emitClosures(currentOutput);
1383:
1384: // Close the lexer output stream
1385: currentOutput.close();
1386: currentOutput = null;
1387: }
1388:
1389: public void genInitFactory(Grammar g) {
1390: // Generate the method to initialize an ASTFactory when we're
1391: // building AST's
1392: println("void " + g.getClassName() + "::initializeASTFactory( "
1393: + namespaceAntlr + "ASTFactory& factory )");
1394: println("{");
1395: tabs++;
1396:
1397: if (g.buildAST) {
1398: // print out elements collected...
1399: Enumeration e = astTypes.elements();
1400: while (e.hasMoreElements())
1401: println((String) e.nextElement());
1402:
1403: println("factory.setMaxNodeType("
1404: + grammar.tokenManager.maxTokenType() + ");");
1405: }
1406: tabs--;
1407: println("}");
1408: }
1409:
1410: // FIXME: and so why are we passing here a g param while inside
1411: // we merrily use the global grammar.
1412: public void genBody(ParserGrammar g) throws IOException {
1413: // Open the output stream for the parser and set the currentOutput
1414: outputFile = grammar.getClassName() + ".cpp";
1415: outputLine = 1;
1416: currentOutput = antlrTool.openOutputFile(outputFile);
1417:
1418: genAST = grammar.buildAST;
1419:
1420: tabs = 0;
1421:
1422: // Generate the header common to all output files.
1423: genHeader(outputFile);
1424:
1425: printHeaderAction(preIncludeCpp);
1426:
1427: // Generate header for the parser
1428: println("#include \"" + grammar.getClassName() + ".hpp\"");
1429: println("#include <antlr/NoViableAltException.hpp>");
1430: println("#include <antlr/SemanticException.hpp>");
1431: println("#include <antlr/ASTFactory.hpp>");
1432:
1433: printHeaderAction(postIncludeCpp);
1434:
1435: if (nameSpace != null)
1436: nameSpace.emitDeclarations(currentOutput);
1437:
1438: // Output the user-defined parser preamble
1439: printAction(grammar.preambleAction);
1440:
1441: String sup = null;
1442: if (grammar.super Class != null)
1443: sup = grammar.super Class;
1444: else {
1445: sup = grammar.getSuperClass();
1446: if (sup.lastIndexOf('.') != -1)
1447: sup = sup.substring(sup.lastIndexOf('.') + 1);
1448: sup = namespaceAntlr + sup;
1449: }
1450:
1451: // set up an array of all the rule names so the debugger can
1452: // keep track of them only by number -- less to store in tree...
1453: if (grammar.debuggingOutput) {
1454: println("const char* " + grammar.getClassName()
1455: + "::_ruleNames[] = {");
1456: tabs++;
1457:
1458: Enumeration ids = grammar.rules.elements();
1459: int ruleNum = 0;
1460: while (ids.hasMoreElements()) {
1461: GrammarSymbol sym = (GrammarSymbol) ids.nextElement();
1462: if (sym instanceof RuleSymbol)
1463: println("\"" + ((RuleSymbol) sym).getId() + "\",");
1464: }
1465: println("0");
1466: tabs--;
1467: println("};");
1468: }
1469:
1470: // Generate _initialize function
1471: // disabled since it isn't used anymore..
1472:
1473: // println("void " + grammar.getClassName() + "::_initialize(void)");
1474: // println("{");
1475: // tabs++;
1476:
1477: // if debugging, set up arrays and call the user-overridable
1478: // debugging setup method
1479: // if ( grammar.debuggingOutput ) {
1480: // println("setRuleNames(_ruleNames);");
1481: // println("setSemPredNames(_semPredNames);");
1482: // println("setupDebugging();");
1483: // }
1484: // tabs--;
1485: // println("}");
1486: if (noConstructors) {
1487: println("#if 0");
1488: println("// constructor creation turned of with 'noConstructor' option");
1489: }
1490:
1491: // Generate parser class constructor from TokenBuffer
1492: print(grammar.getClassName() + "::" + grammar.getClassName());
1493: println("(" + namespaceAntlr + "TokenBuffer& tokenBuf, int k)");
1494: println(": " + sup + "(tokenBuf,k)");
1495: println("{");
1496: // tabs++;
1497: // println("_initialize();");
1498: // tabs--;
1499: println("}");
1500: println("");
1501:
1502: print(grammar.getClassName() + "::" + grammar.getClassName());
1503: println("(" + namespaceAntlr + "TokenBuffer& tokenBuf)");
1504: println(": " + sup + "(tokenBuf," + grammar.maxk + ")");
1505: println("{");
1506: // tabs++;
1507: // println("_initialize();");
1508: // tabs--;
1509: println("}");
1510: println("");
1511:
1512: // Generate parser class constructor from TokenStream
1513: print(grammar.getClassName() + "::" + grammar.getClassName());
1514: println("(" + namespaceAntlr + "TokenStream& lexer, int k)");
1515: println(": " + sup + "(lexer,k)");
1516: println("{");
1517: // tabs++;
1518: // println("_initialize();");
1519: // tabs--;
1520: println("}");
1521: println("");
1522:
1523: print(grammar.getClassName() + "::" + grammar.getClassName());
1524: println("(" + namespaceAntlr + "TokenStream& lexer)");
1525: println(": " + sup + "(lexer," + grammar.maxk + ")");
1526: println("{");
1527: // tabs++;
1528: // println("_initialize();");
1529: // tabs--;
1530: println("}");
1531: println("");
1532:
1533: print(grammar.getClassName() + "::" + grammar.getClassName());
1534: println("(const " + namespaceAntlr
1535: + "ParserSharedInputState& state)");
1536: println(": " + sup + "(state," + grammar.maxk + ")");
1537: println("{");
1538: // tabs++;
1539: // println("_initialize();");
1540: // tabs--;
1541: println("}");
1542: println("");
1543:
1544: if (noConstructors) {
1545: println("// constructor creation turned of with 'noConstructor' option");
1546: println("#endif");
1547: }
1548:
1549: astTypes = new Vector();
1550:
1551: // Generate code for each rule in the grammar
1552: Enumeration ids = grammar.rules.elements();
1553: int ruleNum = 0;
1554: while (ids.hasMoreElements()) {
1555: GrammarSymbol sym = (GrammarSymbol) ids.nextElement();
1556: if (sym instanceof RuleSymbol) {
1557: RuleSymbol rs = (RuleSymbol) sym;
1558: genRule(rs, rs.references.size() == 0, ruleNum++,
1559: grammar.getClassName() + "::");
1560: }
1561: exitIfError();
1562: }
1563: if (usingCustomAST) {
1564: // println("void "+grammar.getClassName()+"::setASTNodeFactory("+labeledElementASTType+" (*factory)() )");
1565: // println("{");
1566: // println("}");
1567: // println("");
1568:
1569: // when we are using a custom ast override Parser::getAST to return the
1570: // custom AST type
1571: println(labeledElementASTType + " "
1572: + grammar.getClassName() + "::getAST()");
1573: println("{");
1574: println("\treturn returnAST;");
1575: println("}");
1576: println("");
1577: }
1578:
1579: genInitFactory(g);
1580:
1581: // Generate the token names
1582: genTokenStrings(grammar.getClassName() + "::");
1583:
1584: // Generate the bitsets used throughout the grammar
1585: genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType(),
1586: grammar.getClassName() + "::");
1587:
1588: // Generate the semantic predicate map for debugging
1589: if (grammar.debuggingOutput)
1590: genSemPredMap(grammar.getClassName() + "::");
1591:
1592: // Close class definition
1593: println("");
1594: println("");
1595: if (nameSpace != null)
1596: nameSpace.emitClosures(currentOutput);
1597:
1598: // Close the parser output stream
1599: currentOutput.close();
1600: currentOutput = null;
1601: }
1602:
1603: public void genBody(TreeWalkerGrammar g) throws IOException {
1604: // Open the output stream for the parser and set the currentOutput
1605: outputFile = grammar.getClassName() + ".cpp";
1606: outputLine = 1;
1607: currentOutput = antlrTool.openOutputFile(outputFile);
1608: //SAS: changed for proper text file io
1609:
1610: genAST = grammar.buildAST;
1611: tabs = 0;
1612:
1613: // Generate the header common to all output files.
1614: genHeader(outputFile);
1615:
1616: printHeaderAction(preIncludeCpp);
1617:
1618: // Generate header for the parser
1619: println("#include \"" + grammar.getClassName() + ".hpp\"");
1620: println("#include <antlr/Token.hpp>");
1621: println("#include <antlr/AST.hpp>");
1622: println("#include <antlr/NoViableAltException.hpp>");
1623: println("#include <antlr/MismatchedTokenException.hpp>");
1624: println("#include <antlr/SemanticException.hpp>");
1625: println("#include <antlr/BitSet.hpp>");
1626:
1627: printHeaderAction(postIncludeCpp);
1628:
1629: if (nameSpace != null)
1630: nameSpace.emitDeclarations(currentOutput);
1631:
1632: // Output the user-defined parser premamble
1633: printAction(grammar.preambleAction);
1634:
1635: // Generate parser class definition
1636: String sup = null;
1637: if (grammar.super Class != null) {
1638: sup = grammar.super Class;
1639: } else {
1640: sup = grammar.getSuperClass();
1641: if (sup.lastIndexOf('.') != -1)
1642: sup = sup.substring(sup.lastIndexOf('.') + 1);
1643: sup = namespaceAntlr + sup;
1644: }
1645: if (noConstructors) {
1646: println("#if 0");
1647: println("// constructor creation turned of with 'noConstructor' option");
1648: }
1649:
1650: // Generate default parser class constructor
1651: println(grammar.getClassName() + "::" + grammar.getClassName()
1652: + "()");
1653: println("\t: " + namespaceAntlr + "TreeParser() {");
1654: tabs++;
1655: // println("setTokenNames(_tokenNames);");
1656: tabs--;
1657: println("}");
1658:
1659: if (noConstructors) {
1660: println("// constructor creation turned of with 'noConstructor' option");
1661: println("#endif");
1662: }
1663: println("");
1664:
1665: astTypes = new Vector();
1666:
1667: // Generate code for each rule in the grammar
1668: Enumeration ids = grammar.rules.elements();
1669: int ruleNum = 0;
1670: String ruleNameInits = "";
1671: while (ids.hasMoreElements()) {
1672: GrammarSymbol sym = (GrammarSymbol) ids.nextElement();
1673: if (sym instanceof RuleSymbol) {
1674: RuleSymbol rs = (RuleSymbol) sym;
1675: genRule(rs, rs.references.size() == 0, ruleNum++,
1676: grammar.getClassName() + "::");
1677: }
1678: exitIfError();
1679: }
1680:
1681: if (usingCustomAST) {
1682: // when we are using a custom ast override Parser::getAST to return the
1683: // custom AST type
1684: println(labeledElementASTType + " "
1685: + grammar.getClassName() + "::getAST()");
1686: println("{");
1687: println("\treturn returnAST;");
1688: println("}");
1689: println("");
1690: }
1691:
1692: // Generate the ASTFactory initialization function
1693: genInitFactory(grammar);
1694: // Generate the token names
1695: genTokenStrings(grammar.getClassName() + "::");
1696:
1697: // Generate the bitsets used throughout the grammar
1698: genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType(),
1699: grammar.getClassName() + "::");
1700:
1701: // Close class definition
1702: println("");
1703: println("");
1704:
1705: if (nameSpace != null)
1706: nameSpace.emitClosures(currentOutput);
1707:
1708: // Close the parser output stream
1709: currentOutput.close();
1710: currentOutput = null;
1711: }
1712:
1713: /** Generate a series of case statements that implement a BitSet test.
1714: * @param p The Bitset for which cases are to be generated
1715: */
1716: protected void genCases(BitSet p) {
1717: if (DEBUG_CODE_GENERATOR)
1718: System.out.println("genCases(" + p + ")");
1719: int[] elems;
1720:
1721: elems = p.toArray();
1722: // Wrap cases four-per-line for lexer, one-per-line for parser
1723: int wrap = 1; //(grammar instanceof LexerGrammar) ? 4 : 1;
1724: int j = 1;
1725: boolean startOfLine = true;
1726: for (int i = 0; i < elems.length; i++) {
1727: if (j == 1) {
1728: print("");
1729: } else {
1730: _print(" ");
1731: }
1732: _print("case " + getValueString(elems[i]) + ":");
1733:
1734: if (j == wrap) {
1735: _println("");
1736: startOfLine = true;
1737: j = 1;
1738: } else {
1739: j++;
1740: startOfLine = false;
1741: }
1742: }
1743: if (!startOfLine) {
1744: _println("");
1745: }
1746: }
1747:
1748: /** Generate common code for a block of alternatives; return a postscript
1749: * that needs to be generated at the end of the block. Other routines
1750: * may append else-clauses and such for error checking before the postfix
1751: * is generated.
1752: * If the grammar is a lexer, then generate alternatives in an order where
1753: * alternatives requiring deeper lookahead are generated first, and
1754: * EOF in the lookahead set reduces the depth of the lookahead.
1755: * @param blk The block to generate
1756: * @param noTestForSingle If true, then it does not generate a test for a single alternative.
1757: */
1758: public CppBlockFinishingInfo genCommonBlock(AlternativeBlock blk,
1759: boolean noTestForSingle) {
1760: int nIF = 0;
1761: boolean createdLL1Switch = false;
1762: int closingBracesOfIFSequence = 0;
1763: CppBlockFinishingInfo finishingInfo = new CppBlockFinishingInfo();
1764: if (DEBUG_CODE_GENERATOR)
1765: System.out.println("genCommonBlk(" + blk + ")");
1766:
1767: // Save the AST generation state, and set it to that of the block
1768: boolean savegenAST = genAST;
1769: genAST = genAST && blk.getAutoGen();
1770:
1771: boolean oldsaveTest = saveText;
1772: saveText = saveText && blk.getAutoGen();
1773:
1774: // Is this block inverted? If so, generate special-case code
1775: if (blk.not
1776: && analyzer.subruleCanBeInverted(blk,
1777: grammar instanceof LexerGrammar)) {
1778: Lookahead p = analyzer.look(1, blk);
1779: // Variable assignment for labeled elements
1780: if (blk.getLabel() != null && syntacticPredLevel == 0) {
1781: println(blk.getLabel() + " = " + lt1Value + ";");
1782: }
1783:
1784: // AST
1785: genElementAST(blk);
1786:
1787: String astArgs = "";
1788: if (grammar instanceof TreeWalkerGrammar) {
1789: if (usingCustomAST)
1790: astArgs = "static_cast<" + namespaceAntlr
1791: + "RefAST" + ">(_t),";
1792: else
1793: astArgs = "_t,";
1794: }
1795:
1796: // match the bitset for the alternative
1797: println("match(" + astArgs
1798: + getBitsetName(markBitsetForGen(p.fset)) + ");");
1799:
1800: // tack on tree cursor motion if doing a tree walker
1801: if (grammar instanceof TreeWalkerGrammar) {
1802: println("_t = _t->getNextSibling();");
1803: }
1804: return finishingInfo;
1805: }
1806:
1807: // Special handling for single alt
1808: if (blk.getAlternatives().size() == 1) {
1809: Alternative alt = blk.getAlternativeAt(0);
1810: // Generate a warning if there is a synPred for single alt.
1811: if (alt.synPred != null) {
1812: antlrTool
1813: .warning(
1814: "Syntactic predicate superfluous for single alternative",
1815: grammar.getFilename(), blk
1816: .getAlternativeAt(0).synPred
1817: .getLine(), blk
1818: .getAlternativeAt(0).synPred
1819: .getColumn());
1820: }
1821: if (noTestForSingle) {
1822: if (alt.semPred != null) {
1823: // Generate validating predicate
1824: genSemPred(alt.semPred, blk.line);
1825: }
1826: genAlt(alt, blk);
1827: return finishingInfo;
1828: }
1829: }
1830:
1831: // count number of simple LL(1) cases; only do switch for
1832: // many LL(1) cases (no preds, no end of token refs)
1833: // We don't care about exit paths for (...)*, (...)+
1834: // because we don't explicitly have a test for them
1835: // as an alt in the loop.
1836: //
1837: // Also, we now count how many unicode lookahead sets
1838: // there are--they must be moved to DEFAULT or ELSE
1839: // clause.
1840:
1841: int nLL1 = 0;
1842: for (int i = 0; i < blk.getAlternatives().size(); i++) {
1843: Alternative a = blk.getAlternativeAt(i);
1844: if (suitableForCaseExpression(a))
1845: nLL1++;
1846: }
1847:
1848: // do LL(1) cases
1849: if (nLL1 >= makeSwitchThreshold) {
1850: // Determine the name of the item to be compared
1851: String testExpr = lookaheadString(1);
1852: createdLL1Switch = true;
1853: // when parsing trees, convert null to valid tree node with NULL lookahead
1854: if (grammar instanceof TreeWalkerGrammar) {
1855: println("if (_t == " + labeledElementASTInit + " )");
1856: tabs++;
1857: println("_t = ASTNULL;");
1858: tabs--;
1859: }
1860: println("switch ( " + testExpr + ") {");
1861: for (int i = 0; i < blk.alternatives.size(); i++) {
1862: Alternative alt = blk.getAlternativeAt(i);
1863: // ignore any non-LL(1) alts, predicated alts or end-of-token alts
1864: // or end-of-token alts for case expressions
1865: if (!suitableForCaseExpression(alt)) {
1866: continue;
1867: }
1868: Lookahead p = alt.cache[1];
1869: if (p.fset.degree() == 0 && !p.containsEpsilon()) {
1870: antlrTool
1871: .warning(
1872: "Alternate omitted due to empty prediction set",
1873: grammar.getFilename(), alt.head
1874: .getLine(), alt.head
1875: .getColumn());
1876: } else {
1877: genCases(p.fset);
1878: println("{");
1879: tabs++;
1880: genAlt(alt, blk);
1881: println("break;");
1882: tabs--;
1883: println("}");
1884: }
1885: }
1886: println("default:");
1887: tabs++;
1888: }
1889:
1890: // do non-LL(1) and nondeterministic cases
1891: // This is tricky in the lexer, because of cases like:
1892: // STAR : '*' ;
1893: // ASSIGN_STAR : "*=";
1894: // Since nextToken is generated without a loop, then the STAR will
1895: // have end-of-token as it's lookahead set for LA(2). So, we must generate the
1896: // alternatives containing trailing end-of-token in their lookahead sets *after*
1897: // the alternatives without end-of-token. This implements the usual
1898: // lexer convention that longer matches come before shorter ones, e.g.
1899: // "*=" matches ASSIGN_STAR not STAR
1900: //
1901: // For non-lexer grammars, this does not sort the alternates by depth
1902: // Note that alts whose lookahead is purely end-of-token at k=1 end up
1903: // as default or else clauses.
1904: int startDepth = (grammar instanceof LexerGrammar) ? grammar.maxk
1905: : 0;
1906: for (int altDepth = startDepth; altDepth >= 0; altDepth--) {
1907: if (DEBUG_CODE_GENERATOR)
1908: System.out.println("checking depth " + altDepth);
1909: for (int i = 0; i < blk.alternatives.size(); i++) {
1910: Alternative alt = blk.getAlternativeAt(i);
1911: if (DEBUG_CODE_GENERATOR)
1912: System.out.println("genAlt: " + i);
1913: // if we made a switch above, ignore what we already took care
1914: // of. Specifically, LL(1) alts with no preds
1915: // that do not have end-of-token in their prediction set
1916: if (createdLL1Switch && suitableForCaseExpression(alt)) {
1917: if (DEBUG_CODE_GENERATOR)
1918: System.out
1919: .println("ignoring alt because it was in the switch");
1920: continue;
1921: }
1922: String e;
1923:
1924: boolean unpredicted = false;
1925:
1926: if (grammar instanceof LexerGrammar) {
1927: // Calculate the "effective depth" of the alt, which is the max
1928: // depth at which cache[depth]!=end-of-token
1929: int effectiveDepth = alt.lookaheadDepth;
1930: if (effectiveDepth == GrammarAnalyzer.NONDETERMINISTIC) {
1931: // use maximum lookahead
1932: effectiveDepth = grammar.maxk;
1933: }
1934: while (effectiveDepth >= 1
1935: && alt.cache[effectiveDepth]
1936: .containsEpsilon()) {
1937: effectiveDepth--;
1938: }
1939: // Ignore alts whose effective depth is other than the ones we
1940: // are generating for this iteration.
1941: if (effectiveDepth != altDepth) {
1942: if (DEBUG_CODE_GENERATOR)
1943: System.out
1944: .println("ignoring alt because effectiveDepth!=altDepth;"
1945: + effectiveDepth
1946: + "!="
1947: + altDepth);
1948: continue;
1949: }
1950: unpredicted = lookaheadIsEmpty(alt, effectiveDepth);
1951: e = getLookaheadTestExpression(alt, effectiveDepth);
1952: } else {
1953: unpredicted = lookaheadIsEmpty(alt, grammar.maxk);
1954: e = getLookaheadTestExpression(alt, grammar.maxk);
1955: }
1956:
1957: // Was it a big unicode range that forced unsuitability
1958: // for a case expression?
1959: if (alt.cache[1].fset.degree() > caseSizeThreshold
1960: && suitableForCaseExpression(alt)) {
1961: if (nIF == 0) {
1962: // generate this only for the first if the elseif's
1963: // are covered by this one
1964: if (grammar instanceof TreeWalkerGrammar) {
1965: println("if (_t == "
1966: + labeledElementASTInit + " )");
1967: tabs++;
1968: println("_t = ASTNULL;");
1969: tabs--;
1970: }
1971: println("if " + e + " {");
1972: } else
1973: println("else if " + e + " {");
1974: } else if (unpredicted && alt.semPred == null
1975: && alt.synPred == null) {
1976: // The alt has empty prediction set and no
1977: // predicate to help out. if we have not
1978: // generated a previous if, just put {...} around
1979: // the end-of-token clause
1980: if (nIF == 0) {
1981: println("{");
1982: } else {
1983: println("else {");
1984: }
1985: finishingInfo.needAnErrorClause = false;
1986: } else {
1987: // check for sem and syn preds
1988: // Add any semantic predicate expression to the lookahead test
1989: if (alt.semPred != null) {
1990: // if debugging, wrap the evaluation of the predicate in a method
1991: //
1992: // translate $ and # references
1993: ActionTransInfo tInfo = new ActionTransInfo();
1994: String actionStr = processActionForSpecialSymbols(
1995: alt.semPred, blk.line, currentRule,
1996: tInfo);
1997: // ignore translation info...we don't need to do anything with it.
1998:
1999: // call that will inform SemanticPredicateListeners of the result
2000: if (((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar))
2001: && grammar.debuggingOutput)
2002: e = "("
2003: + e
2004: + "&& fireSemanticPredicateEvaluated(antlr.debug.SemanticPredicateEvent.PREDICTING,"
2005: + //FIXME
2006: addSemPred(charFormatter
2007: .escapeString(actionStr))
2008: + "," + actionStr + "))";
2009: else
2010: e = "(" + e + "&&(" + actionStr + "))";
2011: }
2012:
2013: // Generate any syntactic predicates
2014: if (nIF > 0) {
2015: if (alt.synPred != null) {
2016: println("else {");
2017: tabs++;
2018: genSynPred(alt.synPred, e);
2019: closingBracesOfIFSequence++;
2020: } else {
2021: println("else if " + e + " {");
2022: }
2023: } else {
2024: if (alt.synPred != null) {
2025: genSynPred(alt.synPred, e);
2026: } else {
2027: // when parsing trees, convert null to valid tree node
2028: // with NULL lookahead.
2029: if (grammar instanceof TreeWalkerGrammar) {
2030: println("if (_t == "
2031: + labeledElementASTInit + " )");
2032: tabs++;
2033: println("_t = ASTNULL;");
2034: tabs--;
2035: }
2036: println("if " + e + " {");
2037: }
2038: }
2039:
2040: }
2041:
2042: nIF++;
2043: tabs++;
2044: genAlt(alt, blk);
2045: tabs--;
2046: println("}");
2047: }
2048: }
2049: String ps = "";
2050: for (int i = 1; i <= closingBracesOfIFSequence; i++) {
2051: tabs--; // does JavaCodeGenerator need this?
2052: ps += "}";
2053: }
2054:
2055: // Restore the AST generation state
2056: genAST = savegenAST;
2057:
2058: // restore save text state
2059: saveText = oldsaveTest;
2060:
2061: // Return the finishing info.
2062: if (createdLL1Switch) {
2063: tabs--;
2064: finishingInfo.postscript = ps + "}";
2065: finishingInfo.generatedSwitch = true;
2066: finishingInfo.generatedAnIf = nIF > 0;
2067: //return new CppBlockFinishingInfo(ps+"}",true,nIF>0); // close up switch statement
2068:
2069: } else {
2070: finishingInfo.postscript = ps;
2071: finishingInfo.generatedSwitch = false;
2072: finishingInfo.generatedAnIf = nIF > 0;
2073: //return new CppBlockFinishingInfo(ps, false,nIF>0);
2074: }
2075: return finishingInfo;
2076: }
2077:
2078: private static boolean suitableForCaseExpression(Alternative a) {
2079: return a.lookaheadDepth == 1 && a.semPred == null
2080: && !a.cache[1].containsEpsilon()
2081: && a.cache[1].fset.degree() <= caseSizeThreshold;
2082: }
2083:
2084: /** Generate code to link an element reference into the AST
2085: */
2086: private void genElementAST(AlternativeElement el) {
2087:
2088: // handle case where you're not building trees, but are in tree walker.
2089: // Just need to get labels set up.
2090: if (grammar instanceof TreeWalkerGrammar && !grammar.buildAST) {
2091: String elementRef;
2092: String astName;
2093:
2094: // Generate names and declarations of the AST variable(s)
2095: if (el.getLabel() == null) {
2096: elementRef = lt1Value;
2097: // Generate AST variables for unlabeled stuff
2098: astName = "tmp" + astVarNumber + "_AST";
2099: astVarNumber++;
2100: // Map the generated AST variable in the alternate
2101: mapTreeVariable(el, astName);
2102: // Generate an "input" AST variable also
2103: println(labeledElementASTType + " " + astName
2104: + "_in = " + elementRef + ";");
2105: }
2106: return;
2107: }
2108:
2109: if (grammar.buildAST && syntacticPredLevel == 0) {
2110: boolean needASTDecl = (genAST && (el.getLabel() != null || el
2111: .getAutoGenType() != GrammarElement.AUTO_GEN_BANG));
2112:
2113: // RK: if we have a grammar element always generate the decl
2114: // since some guy can access it from an action and we can't
2115: // peek ahead (well not without making a mess).
2116: // I'd prefer taking this out.
2117: if (el.getAutoGenType() != GrammarElement.AUTO_GEN_BANG
2118: && (el instanceof TokenRefElement))
2119: needASTDecl = true;
2120:
2121: boolean doNoGuessTest = (grammar.hasSyntacticPredicate && needASTDecl);
2122:
2123: String elementRef;
2124: String astNameBase;
2125:
2126: // Generate names and declarations of the AST variable(s)
2127: if (el.getLabel() != null) {
2128: // if the element is labeled use that name...
2129: elementRef = el.getLabel();
2130: astNameBase = el.getLabel();
2131: } else {
2132: // else generate a temporary name...
2133: elementRef = lt1Value;
2134: // Generate AST variables for unlabeled stuff
2135: astNameBase = "tmp" + astVarNumber;
2136: astVarNumber++;
2137: }
2138:
2139: // Generate the declaration if required.
2140: if (needASTDecl) {
2141: if (el instanceof GrammarAtom) {
2142: GrammarAtom ga = (GrammarAtom) el;
2143: if (ga.getASTNodeType() != null) {
2144: genASTDeclaration(el, astNameBase, "Ref"
2145: + ga.getASTNodeType());
2146: // println("Ref"+ga.getASTNodeType()+" " + astName + ";");
2147: } else {
2148: genASTDeclaration(el, astNameBase,
2149: labeledElementASTType);
2150: // println(labeledElementASTType+" " + astName + " = "+labeledElementASTInit+";");
2151: }
2152: } else {
2153: genASTDeclaration(el, astNameBase,
2154: labeledElementASTType);
2155: // println(labeledElementASTType+" " + astName + " = "+labeledElementASTInit+";");
2156: }
2157: }
2158:
2159: // for convenience..
2160: String astName = astNameBase + "_AST";
2161:
2162: // Map the generated AST variable in the alternate
2163: mapTreeVariable(el, astName);
2164: if (grammar instanceof TreeWalkerGrammar) {
2165: // Generate an "input" AST variable also
2166: println(labeledElementASTType + " " + astName
2167: + "_in = " + labeledElementASTInit + ";");
2168: }
2169:
2170: // Enclose actions with !guessing
2171: if (doNoGuessTest) {
2172: println("if ( inputState->guessing == 0 ) {");
2173: tabs++;
2174: }
2175:
2176: // if something has a label assume it will be used
2177: // so we must initialize the RefAST
2178: if (el.getLabel() != null) {
2179: if (el instanceof GrammarAtom) {
2180: println(astName
2181: + " = "
2182: + getASTCreateString((GrammarAtom) el,
2183: elementRef) + ";");
2184: } else {
2185: println(astName + " = "
2186: + getASTCreateString(elementRef) + ";");
2187: }
2188: }
2189:
2190: // if it has no label but a declaration exists initialize it.
2191: if (el.getLabel() == null && needASTDecl) {
2192: elementRef = lt1Value;
2193: if (el instanceof GrammarAtom) {
2194: println(astName
2195: + " = "
2196: + getASTCreateString((GrammarAtom) el,
2197: elementRef) + ";");
2198: } else {
2199: println(astName + " = "
2200: + getASTCreateString(elementRef) + ";");
2201: }
2202: // Map the generated AST variable in the alternate
2203: if (grammar instanceof TreeWalkerGrammar) {
2204: // set "input" AST variable also
2205: println(astName + "_in = " + elementRef + ";");
2206: }
2207: }
2208:
2209: if (genAST) {
2210: switch (el.getAutoGenType()) {
2211: case GrammarElement.AUTO_GEN_NONE:
2212: if (usingCustomAST
2213: || (el instanceof GrammarAtom && ((GrammarAtom) el)
2214: .getASTNodeType() != null))
2215: println("astFactory->addASTChild(currentAST, static_cast<"
2216: + namespaceAntlr
2217: + "RefAST>("
2218: + astName
2219: + "));");
2220: else
2221: println("astFactory->addASTChild(currentAST, "
2222: + astName + ");");
2223: // println("astFactory.addASTChild(currentAST, "+namespaceAntlr+"RefAST(" + astName + "));");
2224: break;
2225: case GrammarElement.AUTO_GEN_CARET:
2226: if (usingCustomAST
2227: || (el instanceof GrammarAtom && ((GrammarAtom) el)
2228: .getASTNodeType() != null))
2229: println("astFactory->makeASTRoot(currentAST, static_cast<"
2230: + namespaceAntlr
2231: + "RefAST>("
2232: + astName
2233: + "));");
2234: else
2235: println("astFactory->makeASTRoot(currentAST, "
2236: + astName + ");");
2237: break;
2238: default:
2239: break;
2240: }
2241: }
2242: if (doNoGuessTest) {
2243: tabs--;
2244: println("}");
2245: }
2246: }
2247: }
2248:
2249: /** Close the try block and generate catch phrases
2250: * if the element has a labeled handler in the rule
2251: */
2252: private void genErrorCatchForElement(AlternativeElement el) {
2253: if (el.getLabel() == null)
2254: return;
2255: String r = el.enclosingRuleName;
2256: if (grammar instanceof LexerGrammar) {
2257: r = CodeGenerator.encodeLexerRuleName(el.enclosingRuleName);
2258: }
2259: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(r);
2260: if (rs == null) {
2261: antlrTool.panic("Enclosing rule not found!");
2262: }
2263: ExceptionSpec ex = rs.block.findExceptionSpec(el.getLabel());
2264: if (ex != null) {
2265: tabs--;
2266: println("}");
2267: genErrorHandler(ex);
2268: }
2269: }
2270:
2271: /** Generate the catch phrases for a user-specified error handler */
2272: private void genErrorHandler(ExceptionSpec ex) {
2273: // Each ExceptionHandler in the ExceptionSpec is a separate catch
2274: for (int i = 0; i < ex.handlers.size(); i++) {
2275: ExceptionHandler handler = (ExceptionHandler) ex.handlers
2276: .elementAt(i);
2277: // Generate catch phrase
2278: println("catch (" + handler.exceptionTypeAndName.getText()
2279: + ") {");
2280: tabs++;
2281: if (grammar.hasSyntacticPredicate) {
2282: println("if (inputState->guessing==0) {");
2283: tabs++;
2284: }
2285:
2286: // When not guessing, execute user handler action
2287: ActionTransInfo tInfo = new ActionTransInfo();
2288: genLineNo(handler.action);
2289: printAction(processActionForSpecialSymbols(handler.action
2290: .getText(), handler.action.getLine(), currentRule,
2291: tInfo));
2292: genLineNo2();
2293:
2294: if (grammar.hasSyntacticPredicate) {
2295: tabs--;
2296: println("} else {");
2297: tabs++;
2298: // When guessing, rethrow exception
2299: println("throw;");
2300: tabs--;
2301: println("}");
2302: }
2303: // Close catch phrase
2304: tabs--;
2305: println("}");
2306: }
2307: }
2308:
2309: /** Generate a try { opening if the element has a labeled handler in the rule */
2310: private void genErrorTryForElement(AlternativeElement el) {
2311: if (el.getLabel() == null)
2312: return;
2313: String r = el.enclosingRuleName;
2314: if (grammar instanceof LexerGrammar) {
2315: r = CodeGenerator.encodeLexerRuleName(el.enclosingRuleName);
2316: }
2317: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(r);
2318: if (rs == null) {
2319: antlrTool.panic("Enclosing rule not found!");
2320: }
2321: ExceptionSpec ex = rs.block.findExceptionSpec(el.getLabel());
2322: if (ex != null) {
2323: println("try { // for error handling");
2324: tabs++;
2325: }
2326: }
2327:
2328: /** Generate a header that is common to all C++ files */
2329: protected void genHeader(String fileName) {
2330: println("/* $ANTLR " + antlrTool.version + ": " + "\""
2331: + antlrTool.fileMinusPath(antlrTool.grammarFile) + "\""
2332: + " -> " + "\"" + fileName + "\"$ */");
2333: }
2334:
2335: // these are unique to C++ mode
2336: public void genInclude(LexerGrammar g) throws IOException {
2337: outputFile = grammar.getClassName() + ".hpp";
2338: outputLine = 1;
2339: currentOutput = antlrTool.openOutputFile(outputFile);
2340: //SAS: changed for proper text file io
2341:
2342: genAST = false; // no way to gen trees.
2343: saveText = true; // save consumed characters.
2344:
2345: tabs = 0;
2346:
2347: // Generate a guard wrapper
2348: println("#ifndef INC_" + grammar.getClassName() + "_hpp_");
2349: println("#define INC_" + grammar.getClassName() + "_hpp_");
2350: println("");
2351:
2352: printHeaderAction(preIncludeHpp);
2353:
2354: println("#include <antlr/config.hpp>");
2355:
2356: // Generate header common to all C++ output files
2357: genHeader(outputFile);
2358:
2359: // Generate header specific to lexer header file
2360: println("#include <antlr/CommonToken.hpp>");
2361: println("#include <antlr/InputBuffer.hpp>");
2362: println("#include <antlr/BitSet.hpp>");
2363: println("#include \"" + grammar.tokenManager.getName()
2364: + TokenTypesFileSuffix + ".hpp\"");
2365:
2366: // Find the name of the super class
2367: String sup = null;
2368: if (grammar.super Class != null) {
2369: sup = grammar.super Class;
2370: println("#include \"" + sup + ".hpp\"");
2371: } else {
2372: sup = grammar.getSuperClass();
2373: if (sup.lastIndexOf('.') != -1)
2374: sup = sup.substring(sup.lastIndexOf('.') + 1);
2375: println("#include <antlr/" + sup + ".hpp>");
2376: sup = namespaceAntlr + sup;
2377: }
2378:
2379: // Do not use printAction because we assume tabs==0
2380: printHeaderAction(postIncludeHpp);
2381:
2382: if (nameSpace != null)
2383: nameSpace.emitDeclarations(currentOutput);
2384:
2385: printHeaderAction("");
2386:
2387: // print javadoc comment if any
2388: if (grammar.comment != null) {
2389: _println(grammar.comment);
2390: }
2391:
2392: // Generate lexer class definition
2393: print("class " + grammar.getClassName() + " : public " + sup);
2394: println(", public " + grammar.tokenManager.getName()
2395: + TokenTypesFileSuffix);
2396:
2397: Token tsuffix = (Token) grammar.options
2398: .get("classHeaderSuffix");
2399: if (tsuffix != null) {
2400: String suffix = StringUtils.stripFrontBack(tsuffix
2401: .getText(), "\"", "\"");
2402: if (suffix != null) {
2403: print(", " + suffix); // must be an interface name for Java
2404: }
2405: }
2406: println("{");
2407:
2408: // Generate user-defined lexer class members
2409: if (grammar.classMemberAction != null) {
2410: genLineNo(grammar.classMemberAction);
2411: print(processActionForSpecialSymbols(
2412: grammar.classMemberAction.getText(),
2413: grammar.classMemberAction.getLine(), currentRule,
2414: null));
2415: genLineNo2();
2416: }
2417:
2418: // Generate initLiterals() method
2419: tabs = 0;
2420: println("private:");
2421: tabs = 1;
2422: println("void initLiterals();");
2423:
2424: // Generate getCaseSensitiveLiterals() method
2425: tabs = 0;
2426: println("public:");
2427: tabs = 1;
2428: println("bool getCaseSensitiveLiterals() const");
2429: println("{");
2430: tabs++;
2431: println("return " + g.caseSensitiveLiterals + ";");
2432: tabs--;
2433: println("}");
2434:
2435: // Make constructors public
2436: tabs = 0;
2437: println("public:");
2438: tabs = 1;
2439:
2440: if (noConstructors) {
2441: tabs = 0;
2442: println("#if 0");
2443: println("// constructor creation turned of with 'noConstructor' option");
2444: tabs = 1;
2445: }
2446:
2447: // Generate the constructor from std::istream
2448: println(grammar.getClassName() + "(" + namespaceStd
2449: + "istream& in);");
2450:
2451: // Generate the constructor from InputBuffer
2452: println(grammar.getClassName() + "(" + namespaceAntlr
2453: + "InputBuffer& ib);");
2454:
2455: println(grammar.getClassName() + "(const " + namespaceAntlr
2456: + "LexerSharedInputState& state);");
2457: if (noConstructors) {
2458: tabs = 0;
2459: println("// constructor creation turned of with 'noConstructor' option");
2460: println("#endif");
2461: tabs = 1;
2462: }
2463:
2464: // Generate nextToken() rule.
2465: // nextToken() is a synthetic lexer rule that is the implicit OR of all
2466: // user-defined lexer rules.
2467: println(namespaceAntlr + "RefToken nextToken();");
2468:
2469: // Generate code for each rule in the lexer
2470: Enumeration ids = grammar.rules.elements();
2471: while (ids.hasMoreElements()) {
2472: RuleSymbol sym = (RuleSymbol) ids.nextElement();
2473: // Don't generate the synthetic rules
2474: if (!sym.getId().equals("mnextToken")) {
2475: genRuleHeader(sym, false);
2476: }
2477: exitIfError();
2478: }
2479:
2480: // Make the rest private
2481: tabs = 0;
2482: println("private:");
2483: tabs = 1;
2484:
2485: // generate the rule name array for debugging
2486: if (grammar.debuggingOutput) {
2487: println("static const char* _ruleNames[];");
2488: }
2489:
2490: // Generate the semantic predicate map for debugging
2491: if (grammar.debuggingOutput)
2492: println("static const char* _semPredNames[];");
2493:
2494: // Generate the bitsets used throughout the lexer
2495: genBitsetsHeader(bitsetsUsed,
2496: ((LexerGrammar) grammar).charVocabulary.size());
2497:
2498: tabs = 0;
2499: println("};");
2500: println("");
2501: if (nameSpace != null)
2502: nameSpace.emitClosures(currentOutput);
2503:
2504: // Generate a guard wrapper
2505: println("#endif /*INC_" + grammar.getClassName() + "_hpp_*/");
2506:
2507: // Close the lexer output stream
2508: currentOutput.close();
2509: currentOutput = null;
2510: }
2511:
2512: public void genInclude(ParserGrammar g) throws IOException {
2513: // Open the output stream for the parser and set the currentOutput
2514: outputFile = grammar.getClassName() + ".hpp";
2515: outputLine = 1;
2516: currentOutput = antlrTool.openOutputFile(outputFile);
2517: //SAS: changed for proper text file io
2518:
2519: genAST = grammar.buildAST;
2520:
2521: tabs = 0;
2522:
2523: // Generate a guard wrapper
2524: println("#ifndef INC_" + grammar.getClassName() + "_hpp_");
2525: println("#define INC_" + grammar.getClassName() + "_hpp_");
2526: println("");
2527: printHeaderAction(preIncludeHpp);
2528: println("#include <antlr/config.hpp>");
2529:
2530: // Generate the header common to all output files.
2531: genHeader(outputFile);
2532:
2533: // Generate header for the parser
2534: println("#include <antlr/TokenStream.hpp>");
2535: println("#include <antlr/TokenBuffer.hpp>");
2536: println("#include \"" + grammar.tokenManager.getName()
2537: + TokenTypesFileSuffix + ".hpp\"");
2538:
2539: // Generate parser class definition
2540: String sup = null;
2541: if (grammar.super Class != null) {
2542: sup = grammar.super Class;
2543: println("#include \"" + sup + ".hpp\"");
2544: } else {
2545: sup = grammar.getSuperClass();
2546: if (sup.lastIndexOf('.') != -1)
2547: sup = sup.substring(sup.lastIndexOf('.') + 1);
2548: println("#include <antlr/" + sup + ".hpp>");
2549: sup = namespaceAntlr + sup;
2550: }
2551: println("");
2552:
2553: // Do not use printAction because we assume tabs==0
2554: printHeaderAction(postIncludeHpp);
2555:
2556: if (nameSpace != null)
2557: nameSpace.emitDeclarations(currentOutput);
2558:
2559: printHeaderAction("");
2560:
2561: // print javadoc comment if any
2562: if (grammar.comment != null) {
2563: _println(grammar.comment);
2564: }
2565:
2566: // generate the actual class definition
2567: print("class " + grammar.getClassName() + " : public " + sup);
2568: println(", public " + grammar.tokenManager.getName()
2569: + TokenTypesFileSuffix);
2570:
2571: Token tsuffix = (Token) grammar.options
2572: .get("classHeaderSuffix");
2573: if (tsuffix != null) {
2574: String suffix = StringUtils.stripFrontBack(tsuffix
2575: .getText(), "\"", "\"");
2576: if (suffix != null)
2577: print(", " + suffix); // must be an interface name for Java
2578: }
2579: println("{");
2580:
2581: // set up an array of all the rule names so the debugger can
2582: // keep track of them only by number -- less to store in tree...
2583: if (grammar.debuggingOutput) {
2584: println("public: static const char* _ruleNames[];");
2585: }
2586: // Generate user-defined parser class members
2587: if (grammar.classMemberAction != null) {
2588: genLineNo(grammar.classMemberAction.getLine());
2589: print(processActionForSpecialSymbols(
2590: grammar.classMemberAction.getText(),
2591: grammar.classMemberAction.getLine(), currentRule,
2592: null));
2593: genLineNo2();
2594: }
2595: println("public:");
2596: tabs = 1;
2597: println("void initializeASTFactory( " + namespaceAntlr
2598: + "ASTFactory& factory );");
2599: // println("// called from constructors");
2600: // println("void _initialize( void );");
2601:
2602: // Generate parser class constructor from TokenBuffer
2603: tabs = 0;
2604: if (noConstructors) {
2605: println("#if 0");
2606: println("// constructor creation turned of with 'noConstructor' option");
2607: }
2608: println("protected:");
2609: tabs = 1;
2610: println(grammar.getClassName() + "(" + namespaceAntlr
2611: + "TokenBuffer& tokenBuf, int k);");
2612: tabs = 0;
2613: println("public:");
2614: tabs = 1;
2615: println(grammar.getClassName() + "(" + namespaceAntlr
2616: + "TokenBuffer& tokenBuf);");
2617:
2618: // Generate parser class constructor from TokenStream
2619: tabs = 0;
2620: println("protected:");
2621: tabs = 1;
2622: println(grammar.getClassName() + "(" + namespaceAntlr
2623: + "TokenStream& lexer, int k);");
2624: tabs = 0;
2625: println("public:");
2626: tabs = 1;
2627: println(grammar.getClassName() + "(" + namespaceAntlr
2628: + "TokenStream& lexer);");
2629:
2630: println(grammar.getClassName() + "(const " + namespaceAntlr
2631: + "ParserSharedInputState& state);");
2632: if (noConstructors) {
2633: tabs = 0;
2634: println("// constructor creation turned of with 'noConstructor' option");
2635: println("#endif");
2636: tabs = 1;
2637: }
2638:
2639: println("int getNumTokens() const");
2640: println("{");
2641: tabs++;
2642: println("return " + grammar.getClassName() + "::NUM_TOKENS;");
2643: tabs--;
2644: println("}");
2645: println("const char* getTokenName( int type ) const");
2646: println("{");
2647: tabs++;
2648: println("if( type > getNumTokens() ) return 0;");
2649: println("return " + grammar.getClassName()
2650: + "::tokenNames[type];");
2651: tabs--;
2652: println("}");
2653: println("const char* const* getTokenNames() const");
2654: println("{");
2655: tabs++;
2656: println("return " + grammar.getClassName() + "::tokenNames;");
2657: tabs--;
2658: println("}");
2659:
2660: // Generate code for each rule in the grammar
2661: Enumeration ids = grammar.rules.elements();
2662: while (ids.hasMoreElements()) {
2663: GrammarSymbol sym = (GrammarSymbol) ids.nextElement();
2664: if (sym instanceof RuleSymbol) {
2665: RuleSymbol rs = (RuleSymbol) sym;
2666: genRuleHeader(rs, rs.references.size() == 0);
2667: }
2668: exitIfError();
2669: }
2670:
2671: if (usingCustomAST) {
2672: // when we are using a custom ast override Parser::getAST to return the
2673: // custom AST type
2674: tabs = 0;
2675: println("public:");
2676: tabs = 1;
2677: println(labeledElementASTType + " getAST();");
2678: println("");
2679: tabs = 0;
2680: println("protected:");
2681: tabs = 1;
2682: println(labeledElementASTType + " returnAST;");
2683: }
2684: // Make the rest private
2685: tabs = 0;
2686: println("private:");
2687: tabs = 1;
2688:
2689: // Generate the token names
2690: println("static const char* tokenNames[];");
2691: // and how many there are of them
2692: _println("#ifndef NO_STATIC_CONSTS");
2693: println("static const int NUM_TOKENS = "
2694: + grammar.tokenManager.getVocabulary().size() + ";");
2695: _println("#else");
2696: println("enum {");
2697: println("\tNUM_TOKENS = "
2698: + grammar.tokenManager.getVocabulary().size());
2699: println("};");
2700: _println("#endif");
2701:
2702: // Generate the bitsets used throughout the grammar
2703: genBitsetsHeader(bitsetsUsed, grammar.tokenManager
2704: .maxTokenType());
2705:
2706: // Generate the semantic predicate map for debugging
2707: if (grammar.debuggingOutput)
2708: println("static const char* _semPredNames[];");
2709:
2710: // Close class definition
2711: tabs = 0;
2712: println("};");
2713: println("");
2714: if (nameSpace != null)
2715: nameSpace.emitClosures(currentOutput);
2716:
2717: // Generate a guard wrapper
2718: println("#endif /*INC_" + grammar.getClassName() + "_hpp_*/");
2719:
2720: // Close the parser output stream
2721: currentOutput.close();
2722: currentOutput = null;
2723: }
2724:
2725: public void genInclude(TreeWalkerGrammar g) throws IOException {
2726: // Open the output stream for the parser and set the currentOutput
2727: outputFile = grammar.getClassName() + ".hpp";
2728: outputLine = 1;
2729: currentOutput = antlrTool.openOutputFile(outputFile);
2730: //SAS: changed for proper text file io
2731:
2732: genAST = grammar.buildAST;
2733: tabs = 0;
2734:
2735: // Generate a guard wrapper
2736: println("#ifndef INC_" + grammar.getClassName() + "_hpp_");
2737: println("#define INC_" + grammar.getClassName() + "_hpp_");
2738: println("");
2739: printHeaderAction(preIncludeHpp);
2740: println("#include <antlr/config.hpp>");
2741: println("#include \"" + grammar.tokenManager.getName()
2742: + TokenTypesFileSuffix + ".hpp\"");
2743:
2744: // Generate the header common to all output files.
2745: genHeader(outputFile);
2746:
2747: // Find the name of the super class
2748: String sup = null;
2749: if (grammar.super Class != null) {
2750: sup = grammar.super Class;
2751: println("#include \"" + sup + ".hpp\"");
2752: } else {
2753: sup = grammar.getSuperClass();
2754: if (sup.lastIndexOf('.') != -1)
2755: sup = sup.substring(sup.lastIndexOf('.') + 1);
2756: println("#include <antlr/" + sup + ".hpp>");
2757: sup = namespaceAntlr + sup;
2758: }
2759: println("");
2760:
2761: // Generate header for the parser
2762: //
2763: // Do not use printAction because we assume tabs==0
2764: printHeaderAction(postIncludeHpp);
2765:
2766: if (nameSpace != null)
2767: nameSpace.emitDeclarations(currentOutput);
2768:
2769: printHeaderAction("");
2770:
2771: // print javadoc comment if any
2772: if (grammar.comment != null) {
2773: _println(grammar.comment);
2774: }
2775:
2776: // Generate parser class definition
2777: print("class " + grammar.getClassName() + " : public " + sup);
2778: println(", public " + grammar.tokenManager.getName()
2779: + TokenTypesFileSuffix);
2780:
2781: Token tsuffix = (Token) grammar.options
2782: .get("classHeaderSuffix");
2783: if (tsuffix != null) {
2784: String suffix = StringUtils.stripFrontBack(tsuffix
2785: .getText(), "\"", "\"");
2786: if (suffix != null) {
2787: print(", " + suffix); // must be an interface name for Java
2788: }
2789: }
2790: println("{");
2791:
2792: // Generate user-defined parser class members
2793: if (grammar.classMemberAction != null) {
2794: genLineNo(grammar.classMemberAction.getLine());
2795: print(processActionForSpecialSymbols(
2796: grammar.classMemberAction.getText(),
2797: grammar.classMemberAction.getLine(), currentRule,
2798: null));
2799: genLineNo2();
2800: }
2801:
2802: // Generate default parser class constructor
2803: tabs = 0;
2804: println("public:");
2805:
2806: if (noConstructors) {
2807: println("#if 0");
2808: println("// constructor creation turned of with 'noConstructor' option");
2809: }
2810: tabs = 1;
2811: println(grammar.getClassName() + "();");
2812: if (noConstructors) {
2813: tabs = 0;
2814: println("#endif");
2815: tabs = 1;
2816: }
2817:
2818: // Generate declaration for the initializeFactory method
2819: println("void initializeASTFactory( " + namespaceAntlr
2820: + "ASTFactory& factory );");
2821:
2822: println("int getNumTokens() const");
2823: println("{");
2824: tabs++;
2825: println("return " + grammar.getClassName() + "::NUM_TOKENS;");
2826: tabs--;
2827: println("}");
2828: println("const char* getTokenName( int type ) const");
2829: println("{");
2830: tabs++;
2831: println("if( type > getNumTokens() ) return 0;");
2832: println("return " + grammar.getClassName()
2833: + "::tokenNames[type];");
2834: tabs--;
2835: println("}");
2836:
2837: // Generate code for each rule in the grammar
2838: Enumeration ids = grammar.rules.elements();
2839: String ruleNameInits = "";
2840: while (ids.hasMoreElements()) {
2841: GrammarSymbol sym = (GrammarSymbol) ids.nextElement();
2842: if (sym instanceof RuleSymbol) {
2843: RuleSymbol rs = (RuleSymbol) sym;
2844: genRuleHeader(rs, rs.references.size() == 0);
2845: }
2846: exitIfError();
2847: }
2848: if (usingCustomAST) {
2849: // when we are using a custom ast override TreeParser::getAST to return the
2850: // custom AST type
2851: tabs = 0;
2852: println("public:");
2853: tabs = 1;
2854: println(labeledElementASTType + " getAST();");
2855: println("");
2856: tabs = 0;
2857: println("protected:");
2858: tabs = 1;
2859: println(labeledElementASTType + " returnAST;");
2860: println(labeledElementASTType + " _retTree;");
2861: }
2862:
2863: // Make the rest private
2864: tabs = 0;
2865: println("private:");
2866: tabs = 1;
2867:
2868: // Generate the token names
2869: println("static const char* tokenNames[];");
2870: // and how many there are of them
2871: _println("#ifndef NO_STATIC_CONSTS");
2872: println("static const int NUM_TOKENS = "
2873: + grammar.tokenManager.getVocabulary().size() + ";");
2874: _println("#else");
2875: println("enum {");
2876: println("\tNUM_TOKENS = "
2877: + grammar.tokenManager.getVocabulary().size());
2878: println("};");
2879: _println("#endif");
2880:
2881: // Generate the bitsets used throughout the grammar
2882: genBitsetsHeader(bitsetsUsed, grammar.tokenManager
2883: .maxTokenType());
2884:
2885: // Close class definition
2886: tabs = 0;
2887: println("};");
2888: println("");
2889: if (nameSpace != null)
2890: nameSpace.emitClosures(currentOutput);
2891:
2892: // Generate a guard wrapper
2893: println("#endif /*INC_" + grammar.getClassName() + "_hpp_*/");
2894:
2895: // Close the parser output stream
2896: currentOutput.close();
2897: currentOutput = null;
2898: }
2899:
2900: /// for convenience
2901: protected void genASTDeclaration(AlternativeElement el) {
2902: genASTDeclaration(el, labeledElementASTType);
2903: }
2904:
2905: /// for convenience
2906: protected void genASTDeclaration(AlternativeElement el,
2907: String node_type) {
2908: genASTDeclaration(el, el.getLabel(), node_type);
2909: }
2910:
2911: /// Generate (if not already done) a declaration for the AST for el.
2912: protected void genASTDeclaration(AlternativeElement el,
2913: String var_name, String node_type) {
2914: // already declared?
2915: if (declaredASTVariables.contains(el))
2916: return;
2917:
2918: String init = labeledElementASTInit;
2919:
2920: if (el instanceof GrammarAtom
2921: && ((GrammarAtom) el).getASTNodeType() != null)
2922: init = "static_cast<Ref"
2923: + ((GrammarAtom) el).getASTNodeType() + ">("
2924: + labeledElementASTInit + ")";
2925:
2926: // emit code
2927: println(node_type + " " + var_name + "_AST = " + init + ";");
2928:
2929: // mark as declared
2930: declaredASTVariables.add(el);
2931: }
2932:
2933: private void genLiteralsTest() {
2934: println("_ttype = testLiteralsTable(_ttype);");
2935: }
2936:
2937: private void genLiteralsTestForPartialToken() {
2938: println("_ttype = testLiteralsTable(text.substr(_begin, text.length()-_begin),_ttype);");
2939: }
2940:
2941: protected void genMatch(BitSet b) {
2942: }
2943:
2944: protected void genMatch(GrammarAtom atom) {
2945: if (atom instanceof StringLiteralElement) {
2946: if (grammar instanceof LexerGrammar) {
2947: genMatchUsingAtomText(atom);
2948: } else {
2949: genMatchUsingAtomTokenType(atom);
2950: }
2951: } else if (atom instanceof CharLiteralElement) {
2952: if (grammar instanceof LexerGrammar) {
2953: genMatchUsingAtomText(atom);
2954: } else {
2955: antlrTool
2956: .error("cannot ref character literals in grammar: "
2957: + atom);
2958: }
2959: } else if (atom instanceof TokenRefElement) {
2960: genMatchUsingAtomText(atom);
2961: } else if (atom instanceof WildcardElement) {
2962: gen((WildcardElement) atom);
2963: }
2964: }
2965:
2966: protected void genMatchUsingAtomText(GrammarAtom atom) {
2967: // match() for trees needs the _t cursor
2968: String astArgs = "";
2969: if (grammar instanceof TreeWalkerGrammar) {
2970: if (usingCustomAST)
2971: astArgs = "static_cast<" + namespaceAntlr + "RefAST"
2972: + ">(_t),";
2973: else
2974: astArgs = "_t,";
2975: }
2976:
2977: // if in lexer and ! on element, save buffer index to kill later
2978: if (grammar instanceof LexerGrammar
2979: && (!saveText || atom.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
2980: println("_saveIndex=text.length();");
2981: }
2982:
2983: print(atom.not ? "matchNot(" : "match(");
2984: _print(astArgs);
2985:
2986: // print out what to match
2987: if (atom.atomText.equals("EOF")) {
2988: // horrible hack to handle EOF case
2989: _print(namespaceAntlr + "Token::EOF_TYPE");
2990: } else {
2991: _print(textOrChar(atom.atomText));
2992: }
2993: _println(");");
2994:
2995: if (grammar instanceof LexerGrammar
2996: && (!saveText || atom.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
2997: println("text.erase(_saveIndex);"); // kill text atom put in buffer
2998: }
2999: }
3000:
3001: protected void genMatchUsingAtomTokenType(GrammarAtom atom) {
3002: // match() for trees needs the _t cursor
3003: String astArgs = "";
3004: if (grammar instanceof TreeWalkerGrammar) {
3005: if (usingCustomAST)
3006: astArgs = "static_cast<" + namespaceAntlr + "RefAST"
3007: + ">(_t),";
3008: else
3009: astArgs = "_t,";
3010: }
3011:
3012: // If the literal can be mangled, generate the symbolic constant instead
3013: String mangledName = null;
3014: String s = astArgs + getValueString(atom.getType());
3015:
3016: // matching
3017: println((atom.not ? "matchNot(" : "match(") + s + ");");
3018: }
3019:
3020: /** Generate the nextToken() rule.
3021: * nextToken() is a synthetic lexer rule that is the implicit OR of all
3022: * user-defined lexer rules.
3023: * @param RuleBlock
3024: */
3025: public void genNextToken() {
3026: // Are there any public rules? If not, then just generate a
3027: // fake nextToken().
3028: boolean hasPublicRules = false;
3029: for (int i = 0; i < grammar.rules.size(); i++) {
3030: RuleSymbol rs = (RuleSymbol) grammar.rules.elementAt(i);
3031: if (rs.isDefined() && rs.access.equals("public")) {
3032: hasPublicRules = true;
3033: break;
3034: }
3035: }
3036: if (!hasPublicRules) {
3037: println("");
3038: println(namespaceAntlr + "RefToken "
3039: + grammar.getClassName()
3040: + "::nextToken() { return " + namespaceAntlr
3041: + "RefToken(new " + namespaceAntlr + "CommonToken("
3042: + namespaceAntlr + "Token::EOF_TYPE, \"\")); }");
3043: println("");
3044: return;
3045: }
3046:
3047: // Create the synthesized nextToken() rule
3048: RuleBlock nextTokenBlk = MakeGrammar.createNextTokenRule(
3049: grammar, grammar.rules, "nextToken");
3050: // Define the nextToken rule symbol
3051: RuleSymbol nextTokenRs = new RuleSymbol("mnextToken");
3052: nextTokenRs.setDefined();
3053: nextTokenRs.setBlock(nextTokenBlk);
3054: nextTokenRs.access = "private";
3055: grammar.define(nextTokenRs);
3056: // Analyze the nextToken rule
3057: boolean ok = grammar.theLLkAnalyzer.deterministic(nextTokenBlk);
3058:
3059: // Generate the next token rule
3060: String filterRule = null;
3061: if (((LexerGrammar) grammar).filterMode) {
3062: filterRule = ((LexerGrammar) grammar).filterRule;
3063: }
3064:
3065: println("");
3066: println(namespaceAntlr + "RefToken " + grammar.getClassName()
3067: + "::nextToken()");
3068: println("{");
3069: tabs++;
3070: println(namespaceAntlr + "RefToken theRetToken;");
3071: println("for (;;) {");
3072: tabs++;
3073: println(namespaceAntlr + "RefToken theRetToken;");
3074: println("int _ttype = " + namespaceAntlr
3075: + "Token::INVALID_TYPE;");
3076: if (((LexerGrammar) grammar).filterMode) {
3077: println("setCommitToPath(false);");
3078: if (filterRule != null) {
3079: // Here's a good place to ensure that the filter rule actually exists
3080: if (!grammar.isDefined(CodeGenerator
3081: .encodeLexerRuleName(filterRule))) {
3082: grammar.antlrTool.error("Filter rule " + filterRule
3083: + " does not exist in this lexer");
3084: } else {
3085: RuleSymbol rs = (RuleSymbol) grammar
3086: .getSymbol(CodeGenerator
3087: .encodeLexerRuleName(filterRule));
3088: if (!rs.isDefined()) {
3089: grammar.antlrTool.error("Filter rule "
3090: + filterRule
3091: + " does not exist in this lexer");
3092: } else if (rs.access.equals("public")) {
3093: grammar.antlrTool.error("Filter rule "
3094: + filterRule + " must be protected");
3095: }
3096: }
3097: println("int _m;");
3098: println("_m = mark();");
3099: }
3100: }
3101: println("resetText();");
3102:
3103: // Generate try around whole thing to trap scanner errors
3104: println("try { // for lexical and char stream error handling");
3105: tabs++;
3106:
3107: // Test for public lexical rules with empty paths
3108: for (int i = 0; i < nextTokenBlk.getAlternatives().size(); i++) {
3109: Alternative a = nextTokenBlk.getAlternativeAt(i);
3110: if (a.cache[1].containsEpsilon()) {
3111: antlrTool.warning("found optional path in nextToken()");
3112: }
3113: }
3114:
3115: // Generate the block
3116: String newline = System.getProperty("line.separator");
3117: CppBlockFinishingInfo howToFinish = genCommonBlock(
3118: nextTokenBlk, false);
3119: String errFinish = "if (LA(1)==EOF_CHAR)" + newline
3120: + "\t\t\t\t{" + newline + "\t\t\t\t\tuponEOF();"
3121: + newline + "\t\t\t\t\t_returnToken = makeToken("
3122: + namespaceAntlr + "Token::EOF_TYPE);" + newline
3123: + "\t\t\t\t}";
3124: errFinish += newline + "\t\t\t\t";
3125: if (((LexerGrammar) grammar).filterMode) {
3126: if (filterRule == null) {
3127: errFinish += "else {consume(); goto tryAgain;}";
3128: } else {
3129: errFinish += "else {" + newline + "\t\t\t\t\tcommit();"
3130: + newline + "\t\t\t\t\ttry {m" + filterRule
3131: + "(false);}" + newline + "\t\t\t\t\tcatch("
3132: + namespaceAntlr + "RecognitionException& e) {"
3133: + newline
3134: + "\t\t\t\t\t // catastrophic failure"
3135: + newline + "\t\t\t\t\t reportError(e);"
3136: + newline + "\t\t\t\t\t consume();" + newline
3137: + "\t\t\t\t\t}" + newline
3138: + "\t\t\t\t\tgoto tryAgain;" + newline
3139: + "\t\t\t\t}";
3140: }
3141: } else {
3142: errFinish += "else {" + throwNoViable + "}";
3143: }
3144: genBlockFinish(howToFinish, errFinish);
3145:
3146: // at this point a valid token has been matched, undo "mark" that was done
3147: if (((LexerGrammar) grammar).filterMode && filterRule != null) {
3148: println("commit();");
3149: }
3150:
3151: // Generate literals test if desired
3152: // make sure _ttype is set first; note _returnToken must be
3153: // non-null as the rule was required to create it.
3154: println("if ( !_returnToken )" + newline
3155: + "\t\t\t\tgoto tryAgain; // found SKIP token"
3156: + newline);
3157: println("_ttype = _returnToken->getType();");
3158: if (((LexerGrammar) grammar).getTestLiterals()) {
3159: genLiteralsTest();
3160: }
3161:
3162: // return token created by rule reference in switch
3163: println("_returnToken->setType(_ttype);");
3164: println("return _returnToken;");
3165:
3166: // Close try block
3167: tabs--;
3168: println("}");
3169: println("catch (" + namespaceAntlr
3170: + "RecognitionException& e) {");
3171: tabs++;
3172: if (((LexerGrammar) grammar).filterMode) {
3173: if (filterRule == null) {
3174: println("if ( !getCommitToPath() ) {");
3175: tabs++;
3176: println("consume();");
3177: println("goto tryAgain;");
3178: tabs--;
3179: println("}");
3180: } else {
3181: println("if ( !getCommitToPath() ) {");
3182: tabs++;
3183: println("rewind(_m);");
3184: println("resetText();");
3185: println("try {m" + filterRule + "(false);}");
3186: println("catch(" + namespaceAntlr
3187: + "RecognitionException& ee) {");
3188: println(" // horrendous failure: error in filter rule");
3189: println(" reportError(ee);");
3190: println(" consume();");
3191: println("}");
3192: // println("goto tryAgain;");
3193: tabs--;
3194: println("}");
3195: println("else");
3196: }
3197: }
3198: if (nextTokenBlk.getDefaultErrorHandler()) {
3199: println("{");
3200: tabs++;
3201: println("reportError(e);");
3202: println("consume();");
3203: tabs--;
3204: println("}");
3205: } else {
3206: // pass on to invoking routine
3207: tabs++;
3208: println("throw " + namespaceAntlr
3209: + "TokenStreamRecognitionException(e);");
3210: tabs--;
3211: }
3212:
3213: // close CharStreamException try
3214: tabs--;
3215: println("}");
3216: println("catch (" + namespaceAntlr
3217: + "CharStreamIOException& csie) {");
3218: println("\tthrow " + namespaceAntlr
3219: + "TokenStreamIOException(csie.io);");
3220: println("}");
3221: println("catch (" + namespaceAntlr
3222: + "CharStreamException& cse) {");
3223: println("\tthrow " + namespaceAntlr
3224: + "TokenStreamException(cse.getMessage());");
3225: println("}");
3226:
3227: // close for-loop
3228: _println("tryAgain:;");
3229: tabs--;
3230: println("}");
3231:
3232: // close method nextToken
3233: tabs--;
3234: println("}");
3235: println("");
3236: }
3237:
3238: /** Gen a named rule block.
3239: * ASTs are generated for each element of an alternative unless
3240: * the rule or the alternative have a '!' modifier.
3241: *
3242: * If an alternative defeats the default tree construction, it
3243: * must set <rule>_AST to the root of the returned AST.
3244: *
3245: * Each alternative that does automatic tree construction, builds
3246: * up root and child list pointers in an ASTPair structure.
3247: *
3248: * A rule finishes by setting the returnAST variable from the
3249: * ASTPair.
3250: *
3251: * @param rule The name of the rule to generate
3252: * @param startSymbol true if the rule is a start symbol (i.e., not referenced elsewhere)
3253: */
3254: public void genRule(RuleSymbol s, boolean startSymbol, int ruleNum,
3255: String prefix) {
3256: // tabs=1; // JavaCodeGenerator needs this
3257: if (DEBUG_CODE_GENERATOR)
3258: System.out.println("genRule(" + s.getId() + ")");
3259: if (!s.isDefined()) {
3260: antlrTool.error("undefined rule: " + s.getId());
3261: return;
3262: }
3263:
3264: // Generate rule return type, name, arguments
3265: RuleBlock rblk = s.getBlock();
3266:
3267: currentRule = rblk;
3268: currentASTResult = s.getId();
3269:
3270: // clear list of declared ast variables..
3271: declaredASTVariables.clear();
3272:
3273: // Save the AST generation state, and set it to that of the rule
3274: boolean savegenAST = genAST;
3275: genAST = genAST && rblk.getAutoGen();
3276:
3277: // boolean oldsaveTest = saveText;
3278: saveText = rblk.getAutoGen();
3279:
3280: // print javadoc comment if any
3281: if (s.comment != null) {
3282: _println(s.comment);
3283: }
3284:
3285: // Gen method return type (note lexer return action set at rule creation)
3286: if (rblk.returnAction != null) {
3287: // Has specified return value
3288: _print(extractTypeOfAction(rblk.returnAction, rblk
3289: .getLine(), rblk.getColumn())
3290: + " ");
3291: } else {
3292: // No specified return value
3293: _print("void ");
3294: }
3295:
3296: // Gen method name
3297: _print(prefix + s.getId() + "(");
3298:
3299: // Additional rule parameters common to all rules for this grammar
3300: _print(commonExtraParams);
3301: if (commonExtraParams.length() != 0 && rblk.argAction != null) {
3302: _print(",");
3303: }
3304:
3305: // Gen arguments
3306: if (rblk.argAction != null) {
3307: // Has specified arguments
3308: _println("");
3309: // FIXME: make argAction also a token? Hmmmmm
3310: // genLineNo(rblk);
3311: tabs++;
3312: println(rblk.argAction);
3313: tabs--;
3314: print(") ");
3315: // genLineNo2(); // gcc gives error on the brace... hope it works for the others too
3316: } else {
3317: // No specified arguments
3318: _print(") ");
3319: }
3320:
3321: // Gen throws clause and open curly
3322: // _print(" throws " + exceptionThrown);
3323: // if ( !(grammar instanceof TreeWalkerGrammar) ) {
3324: // _print(", IOException");
3325: // }
3326: _println("{");
3327: tabs++;
3328:
3329: if (grammar.traceRules) {
3330: if (grammar instanceof TreeWalkerGrammar) {
3331: if (usingCustomAST)
3332: println("Tracer traceInOut(this,\"" + s.getId()
3333: + "\",static_cast<" + namespaceAntlr
3334: + "RefAST" + ">(_t));");
3335: else
3336: println("Tracer traceInOut(this,\"" + s.getId()
3337: + "\",_t);");
3338: } else {
3339: println("Tracer traceInOut(this, \"" + s.getId()
3340: + "\");");
3341: }
3342: }
3343:
3344: // Convert return action to variable declaration
3345: if (rblk.returnAction != null) {
3346: genLineNo(rblk);
3347: println(rblk.returnAction + ";");
3348: genLineNo2();
3349: }
3350:
3351: // print out definitions needed by rules for various grammar types
3352: if (!commonLocalVars.equals(""))
3353: println(commonLocalVars);
3354:
3355: if (grammar instanceof LexerGrammar) {
3356: // RK: why is this here? It seems not supported in the rest of the
3357: // tool.
3358: // lexer rule default return value is the rule's token name
3359: // This is a horrible hack to support the built-in EOF lexer rule.
3360: if (s.getId().equals("mEOF"))
3361: println("_ttype = " + namespaceAntlr
3362: + "Token::EOF_TYPE;");
3363: else
3364: println("_ttype = " + s.getId().substring(1) + ";");
3365: println("int _saveIndex;"); // used for element! (so we can kill text matched for element)
3366: /*
3367: println("boolean old_saveConsumedInput=saveConsumedInput;");
3368: if ( !rblk.getAutoGen() ) { // turn off "save input" if ! on rule
3369: println("saveConsumedInput=false;");
3370: }
3371: */
3372:
3373: }
3374:
3375: // if debugging, write code to mark entry to the rule
3376: if (grammar.debuggingOutput)
3377: if (grammar instanceof ParserGrammar)
3378: println("fireEnterRule(" + ruleNum + ",0);");
3379: else if (grammar instanceof LexerGrammar)
3380: println("fireEnterRule(" + ruleNum + ",_ttype);");
3381:
3382: // Generate trace code if desired
3383: // if ( grammar.debuggingOutput || grammar.traceRules) {
3384: // println("try { // debugging");
3385: // tabs++;
3386: // }
3387:
3388: // Initialize AST variables
3389: if (grammar instanceof TreeWalkerGrammar) {
3390: // "Input" value for rule
3391: // println(labeledElementASTType+" " + s.getId() + "_AST_in = "+labeledElementASTType+"(_t);");
3392: println(labeledElementASTType + " " + s.getId()
3393: + "_AST_in = _t;");
3394: }
3395: if (grammar.buildAST) {
3396: // Parser member used to pass AST returns from rule invocations
3397: println("returnAST = " + labeledElementASTInit + ";");
3398: // Tracks AST construction
3399: println(namespaceAntlr + "ASTPair currentAST;"); // = new ASTPair();");
3400: // User-settable return value for rule.
3401: println(labeledElementASTType + " " + s.getId() + "_AST = "
3402: + labeledElementASTInit + ";");
3403: }
3404:
3405: genBlockPreamble(rblk);
3406: genBlockInitAction(rblk);
3407: println("");
3408:
3409: // Search for an unlabeled exception specification attached to the rule
3410: ExceptionSpec unlabeledUserSpec = rblk.findExceptionSpec("");
3411:
3412: // Generate try block around the entire rule for error handling
3413: if (unlabeledUserSpec != null || rblk.getDefaultErrorHandler()) {
3414: println("try { // for error handling");
3415: tabs++;
3416: }
3417:
3418: // Generate the alternatives
3419: if (rblk.alternatives.size() == 1) {
3420: // One alternative -- use simple form
3421: Alternative alt = rblk.getAlternativeAt(0);
3422: String pred = alt.semPred;
3423: if (pred != null)
3424: genSemPred(pred, currentRule.line);
3425: if (alt.synPred != null) {
3426: antlrTool
3427: .warning(
3428: "Syntactic predicate ignored for single alternative",
3429: grammar.getFilename(), alt.synPred
3430: .getLine(), alt.synPred
3431: .getColumn());
3432: }
3433: genAlt(alt, rblk);
3434: } else {
3435: // Multiple alternatives -- generate complex form
3436: boolean ok = grammar.theLLkAnalyzer.deterministic(rblk);
3437:
3438: CppBlockFinishingInfo howToFinish = genCommonBlock(rblk,
3439: false);
3440: genBlockFinish(howToFinish, throwNoViable);
3441: }
3442:
3443: // Generate catch phrase for error handling
3444: if (unlabeledUserSpec != null || rblk.getDefaultErrorHandler()) {
3445: // Close the try block
3446: tabs--;
3447: println("}");
3448: }
3449:
3450: // Generate user-defined or default catch phrases
3451: if (unlabeledUserSpec != null) {
3452: genErrorHandler(unlabeledUserSpec);
3453: } else if (rblk.getDefaultErrorHandler()) {
3454: // Generate default catch phrase
3455: println("catch (" + exceptionThrown + "& ex) {");
3456: tabs++;
3457: // Generate code to handle error if not guessing
3458: if (grammar.hasSyntacticPredicate) {
3459: println("if( inputState->guessing == 0 ) {");
3460: tabs++;
3461: }
3462: println("reportError(ex);");
3463: if (!(grammar instanceof TreeWalkerGrammar)) {
3464: // Generate code to consume until token in k==1 follow set
3465: Lookahead follow = grammar.theLLkAnalyzer.FOLLOW(1,
3466: rblk.endNode);
3467: String followSetName = getBitsetName(markBitsetForGen(follow.fset));
3468: println("consume();");
3469: println("consumeUntil(" + followSetName + ");");
3470: } else {
3471: // Just consume one token
3472: println("if ( _t != " + labeledElementASTInit + " )");
3473: tabs++;
3474: println("_t = _t->getNextSibling();");
3475: tabs--;
3476: }
3477: if (grammar.hasSyntacticPredicate) {
3478: tabs--;
3479: // When guessing, rethrow exception
3480: println("} else {");
3481: tabs++;
3482: println("throw;");
3483: tabs--;
3484: println("}");
3485: }
3486: // Close catch phrase
3487: tabs--;
3488: println("}");
3489: }
3490:
3491: // Squirrel away the AST "return" value
3492: if (grammar.buildAST) {
3493: println("returnAST = " + s.getId() + "_AST;");
3494: }
3495:
3496: // Set return tree value for tree walkers
3497: if (grammar instanceof TreeWalkerGrammar) {
3498: println("_retTree = _t;");
3499: }
3500:
3501: // Generate literals test for lexer rules so marked
3502: if (rblk.getTestLiterals()) {
3503: if (s.access.equals("protected")) {
3504: genLiteralsTestForPartialToken();
3505: } else {
3506: genLiteralsTest();
3507: }
3508: }
3509:
3510: // if doing a lexer rule, dump code to create token if necessary
3511: if (grammar instanceof LexerGrammar) {
3512: println("if ( _createToken && _token==" + namespaceAntlr
3513: + "nullToken && _ttype!=" + namespaceAntlr
3514: + "Token::SKIP ) {");
3515: println(" _token = makeToken(_ttype);");
3516: println(" _token->setText(text.substr(_begin, text.length()-_begin));");
3517: println("}");
3518: println("_returnToken = _token;");
3519: // It should be easy for an optimizing compiler to realize this does nothing
3520: // but it avoids the warning about the variable being unused.
3521: println("_saveIndex=0;");
3522: }
3523:
3524: // Gen the return statement if there is one (lexer has hard-wired return action)
3525: if (rblk.returnAction != null) {
3526: println("return "
3527: + extractIdOfAction(rblk.returnAction, rblk
3528: .getLine(), rblk.getColumn()) + ";");
3529: }
3530:
3531: // if ( grammar.debuggingOutput || grammar.traceRules) {
3532: //// tabs--;
3533: //// println("} finally { // debugging");
3534: //// tabs++;
3535: //
3536: // // Generate trace code if desired
3537: // if ( grammar.debuggingOutput)
3538: // if (grammar instanceof ParserGrammar)
3539: // println("fireExitRule(" + ruleNum + ",0);");
3540: // else if (grammar instanceof LexerGrammar)
3541: // println("fireExitRule(" + ruleNum + ",_ttype);");
3542: //
3543: //// if (grammar.traceRules) {
3544: //// if ( grammar instanceof TreeWalkerGrammar ) {
3545: //// println("traceOut(\""+ s.getId() +"\",_t);");
3546: //// }
3547: //// else {
3548: //// println("traceOut(\""+ s.getId() +"\");");
3549: //// }
3550: //// }
3551: ////
3552: //// tabs--;
3553: //// println("}");
3554: // }
3555:
3556: tabs--;
3557: println("}");
3558: println("");
3559:
3560: // Restore the AST generation state
3561: genAST = savegenAST;
3562:
3563: // restore char save state
3564: // saveText = oldsaveTest;
3565: }
3566:
3567: public void genRuleHeader(RuleSymbol s, boolean startSymbol) {
3568: tabs = 1;
3569: if (DEBUG_CODE_GENERATOR)
3570: System.out.println("genRuleHeader(" + s.getId() + ")");
3571: if (!s.isDefined()) {
3572: antlrTool.error("undefined rule: " + s.getId());
3573: return;
3574: }
3575:
3576: // Generate rule return type, name, arguments
3577: RuleBlock rblk = s.getBlock();
3578: currentRule = rblk;
3579: currentASTResult = s.getId();
3580:
3581: // Save the AST generation state, and set it to that of the rule
3582: boolean savegenAST = genAST;
3583: genAST = genAST && rblk.getAutoGen();
3584:
3585: // boolean oldsaveTest = saveText;
3586: saveText = rblk.getAutoGen();
3587:
3588: // Gen method access
3589: print(s.access + ": ");
3590:
3591: // Gen method return type (note lexer return action set at rule creation)
3592: if (rblk.returnAction != null) {
3593: // Has specified return value
3594: _print(extractTypeOfAction(rblk.returnAction, rblk
3595: .getLine(), rblk.getColumn())
3596: + " ");
3597: } else {
3598: // No specified return value
3599: _print("void ");
3600: }
3601:
3602: // Gen method name
3603: _print(s.getId() + "(");
3604:
3605: // Additional rule parameters common to all rules for this grammar
3606: _print(commonExtraParams);
3607: if (commonExtraParams.length() != 0 && rblk.argAction != null) {
3608: _print(",");
3609: }
3610:
3611: // Gen arguments
3612: if (rblk.argAction != null) {
3613: // Has specified arguments
3614: _println("");
3615: tabs++;
3616: println(rblk.argAction);
3617: tabs--;
3618: print(")");
3619: } else {
3620: // No specified arguments
3621: _print(")");
3622: }
3623: _println(";");
3624:
3625: tabs--;
3626:
3627: // Restore the AST generation state
3628: genAST = savegenAST;
3629:
3630: // restore char save state
3631: // saveText = oldsaveTest;
3632: }
3633:
3634: private void GenRuleInvocation(RuleRefElement rr) {
3635: // dump rule name
3636: _print(rr.targetRule + "(");
3637:
3638: // lexers must tell rule if it should set _returnToken
3639: if (grammar instanceof LexerGrammar) {
3640: // if labeled, could access Token, so tell rule to create
3641: if (rr.getLabel() != null) {
3642: _print("true");
3643: } else {
3644: _print("false");
3645: }
3646: if (commonExtraArgs.length() != 0 || rr.args != null) {
3647: _print(",");
3648: }
3649: }
3650:
3651: // Extra arguments common to all rules for this grammar
3652: _print(commonExtraArgs);
3653: if (commonExtraArgs.length() != 0 && rr.args != null) {
3654: _print(",");
3655: }
3656:
3657: // Process arguments to method, if any
3658: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(rr.targetRule);
3659: if (rr.args != null) {
3660: // When not guessing, execute user arg action
3661: ActionTransInfo tInfo = new ActionTransInfo();
3662: // FIXME: fix line number passed to processActionForTreeSpecifiers here..
3663: // this one might be a bit off..
3664: String args = processActionForSpecialSymbols(rr.args,
3665: rr.line, currentRule, tInfo);
3666: if (tInfo.assignToRoot || tInfo.refRuleRoot != null) {
3667: antlrTool.error("Arguments of rule reference '"
3668: + rr.targetRule + "' cannot set or ref #"
3669: + currentRule.getRuleName() + " on line "
3670: + rr.getLine());
3671: }
3672: _print(args);
3673:
3674: // Warn if the rule accepts no arguments
3675: if (rs.block.argAction == null) {
3676: antlrTool.warning("Rule '" + rr.targetRule
3677: + "' accepts no arguments", grammar
3678: .getFilename(), rr.getLine(), rr.getColumn());
3679: }
3680: } else {
3681: // For C++, no warning if rule has parameters, because there may be default
3682: // values for all of the parameters
3683: //if (rs.block.argAction != null) {
3684: // tool.warning("Missing parameters on reference to rule "+rr.targetRule, rr.getLine());
3685: //}
3686: }
3687: _println(");");
3688:
3689: // move down to the first child while parsing
3690: if (grammar instanceof TreeWalkerGrammar) {
3691: println("_t = _retTree;");
3692: }
3693: }
3694:
3695: protected void genSemPred(String pred, int line) {
3696: // translate $ and # references
3697: ActionTransInfo tInfo = new ActionTransInfo();
3698: pred = processActionForSpecialSymbols(pred, line, currentRule,
3699: tInfo);
3700: // ignore translation info...we don't need to do anything with it.
3701: String escapedPred = charFormatter.escapeString(pred);
3702:
3703: // if debugging, wrap the semantic predicate evaluation in a method
3704: // that can tell SemanticPredicateListeners the result
3705: if (grammar.debuggingOutput
3706: && ((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar)))
3707: pred = "fireSemanticPredicateEvaluated(antlr.debug.SemanticPredicateEvent.VALIDATING," //FIXME
3708: + addSemPred(escapedPred) + "," + pred + ")";
3709: println("if (!(" + pred + "))");
3710: tabs++;
3711: println("throw " + namespaceAntlr + "SemanticException(\""
3712: + escapedPred + "\");");
3713: tabs--;
3714: }
3715:
3716: /** Write an array of Strings which are the semantic predicate
3717: * expressions. The debugger will reference them by number only
3718: */
3719: protected void genSemPredMap(String prefix) {
3720: Enumeration e = semPreds.elements();
3721: println("const char* " + prefix + "_semPredNames[] = {");
3722: tabs++;
3723: while (e.hasMoreElements())
3724: println("\"" + e.nextElement() + "\",");
3725: println("0");
3726: tabs--;
3727: println("};");
3728: }
3729:
3730: protected void genSynPred(SynPredBlock blk, String lookaheadExpr) {
3731: if (DEBUG_CODE_GENERATOR)
3732: System.out.println("gen=>(" + blk + ")");
3733:
3734: // Dump synpred result variable
3735: println("bool synPredMatched" + blk.ID + " = false;");
3736: // Gen normal lookahead test
3737: println("if (" + lookaheadExpr + ") {");
3738: tabs++;
3739:
3740: // Save input state
3741: if (grammar instanceof TreeWalkerGrammar) {
3742: println(labeledElementType + " __t" + blk.ID + " = _t;");
3743: } else {
3744: println("int _m" + blk.ID + " = mark();");
3745: }
3746:
3747: // Once inside the try, assume synpred works unless exception caught
3748: println("synPredMatched" + blk.ID + " = true;");
3749: println("inputState->guessing++;");
3750:
3751: // if debugging, tell listeners that a synpred has started
3752: if (grammar.debuggingOutput
3753: && ((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar))) {
3754: println("fireSyntacticPredicateStarted();");
3755: }
3756:
3757: syntacticPredLevel++;
3758: println("try {");
3759: tabs++;
3760: gen((AlternativeBlock) blk); // gen code to test predicate
3761: tabs--;
3762: //println("System.out.println(\"pred "+blk+" succeeded\");");
3763: println("}");
3764: println("catch (" + exceptionThrown + "& pe) {");
3765: tabs++;
3766: println("synPredMatched" + blk.ID + " = false;");
3767: //println("System.out.println(\"pred "+blk+" failed\");");
3768: tabs--;
3769: println("}");
3770:
3771: // Restore input state
3772: if (grammar instanceof TreeWalkerGrammar) {
3773: println("_t = __t" + blk.ID + ";");
3774: } else {
3775: println("rewind(_m" + blk.ID + ");");
3776: }
3777:
3778: println("inputState->guessing--;");
3779:
3780: // if debugging, tell listeners how the synpred turned out
3781: if (grammar.debuggingOutput
3782: && ((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar))) {
3783: println("if (synPredMatched" + blk.ID + ")");
3784: println(" fireSyntacticPredicateSucceeded();");
3785: println("else");
3786: println(" fireSyntacticPredicateFailed();");
3787: }
3788:
3789: syntacticPredLevel--;
3790: tabs--;
3791:
3792: // Close lookahead test
3793: println("}");
3794:
3795: // Test synpred result
3796: println("if ( synPredMatched" + blk.ID + " ) {");
3797: }
3798:
3799: /** Generate a static array containing the names of the tokens,
3800: * indexed by the token type values. This static array is used
3801: * to format error messages so that the token identifers or literal
3802: * strings are displayed instead of the token numbers.
3803: *
3804: * If a lexical rule has a paraphrase, use it rather than the
3805: * token label.
3806: */
3807: public void genTokenStrings(String prefix) {
3808: // Generate a string for each token. This creates a static
3809: // array of Strings indexed by token type.
3810: // println("");
3811: println("const char* " + prefix + "tokenNames[] = {");
3812: tabs++;
3813:
3814: // Walk the token vocabulary and generate a Vector of strings
3815: // from the tokens.
3816: Vector v = grammar.tokenManager.getVocabulary();
3817: for (int i = 0; i < v.size(); i++) {
3818: String s = (String) v.elementAt(i);
3819: if (s == null) {
3820: s = "<" + String.valueOf(i) + ">";
3821: }
3822: if (!s.startsWith("\"") && !s.startsWith("<")) {
3823: TokenSymbol ts = (TokenSymbol) grammar.tokenManager
3824: .getTokenSymbol(s);
3825: if (ts != null && ts.getParaphrase() != null) {
3826: s = StringUtils.stripFrontBack(ts.getParaphrase(),
3827: "\"", "\"");
3828: }
3829: }
3830: print(charFormatter.literalString(s));
3831: _println(",");
3832: }
3833: println("0");
3834:
3835: // Close the string array initailizer
3836: tabs--;
3837: println("};");
3838: }
3839:
3840: /** Generate the token types C++ file */
3841: protected void genTokenTypes(TokenManager tm) throws IOException {
3842: // Open the token output header file and set the currentOutput stream
3843: outputFile = tm.getName() + TokenTypesFileSuffix + ".hpp";
3844: outputLine = 1;
3845: currentOutput = antlrTool.openOutputFile(outputFile);
3846: //SAS: changed for proper text file io
3847:
3848: tabs = 0;
3849:
3850: // Generate a guard wrapper
3851: println("#ifndef INC_" + tm.getName() + TokenTypesFileSuffix
3852: + "_hpp_");
3853: println("#define INC_" + tm.getName() + TokenTypesFileSuffix
3854: + "_hpp_");
3855: println("");
3856:
3857: if (nameSpace != null)
3858: nameSpace.emitDeclarations(currentOutput);
3859:
3860: // Generate the header common to all C++ files
3861: genHeader(outputFile);
3862:
3863: // Encapsulate the definitions in an interface. This can be done
3864: // because they are all constants.
3865: println("struct " + tm.getName() + TokenTypesFileSuffix + " {");
3866: tabs++;
3867: println("enum {");
3868: tabs++;
3869:
3870: // Generate a definition for each token type
3871: Vector v = tm.getVocabulary();
3872:
3873: // Do special tokens manually
3874: println("EOF_ = " + Token.EOF_TYPE + ",");
3875:
3876: // Move the other special token to the end, so we can solve
3877: // the superfluous comma problem easily
3878:
3879: for (int i = Token.MIN_USER_TYPE; i < v.size(); i++) {
3880: String s = (String) v.elementAt(i);
3881: if (s != null) {
3882: if (s.startsWith("\"")) {
3883: // a string literal
3884: StringLiteralSymbol sl = (StringLiteralSymbol) tm
3885: .getTokenSymbol(s);
3886: if (sl == null) {
3887: antlrTool.panic("String literal " + s
3888: + " not in symbol table");
3889: } else if (sl.label != null) {
3890: println(sl.label + " = " + i + ",");
3891: } else {
3892: String mangledName = mangleLiteral(s);
3893: if (mangledName != null) {
3894: // We were able to create a meaningful mangled token name
3895: println(mangledName + " = " + i + ",");
3896: // if no label specified, make the label equal to the mangled name
3897: sl.label = mangledName;
3898: } else {
3899: println("// " + s + " = " + i);
3900: }
3901: }
3902: } else if (!s.startsWith("<")) {
3903: println(s + " = " + i + ",");
3904: }
3905: }
3906: }
3907:
3908: // Moved from above
3909: println("NULL_TREE_LOOKAHEAD = " + Token.NULL_TREE_LOOKAHEAD);
3910:
3911: // Close the enum
3912: tabs--;
3913: println("};");
3914:
3915: // Close the interface
3916: tabs--;
3917: println("};");
3918:
3919: if (nameSpace != null)
3920: nameSpace.emitClosures(currentOutput);
3921:
3922: // Generate a guard wrapper
3923: println("#endif /*INC_" + tm.getName() + TokenTypesFileSuffix
3924: + "_hpp_*/");
3925:
3926: // Close the tokens output file
3927: currentOutput.close();
3928: currentOutput = null;
3929: exitIfError();
3930: }
3931:
3932: /** Process a string for an simple expression for use in xx/action.g
3933: * it is used to cast simple tokens/references to the right type for
3934: * the generated language. Basically called for every element in
3935: * the vector to getASTCreateString(vector V)
3936: * @param str A String.
3937: */
3938: public String processStringForASTConstructor(String str) {
3939: if (usingCustomAST
3940: && ((grammar instanceof TreeWalkerGrammar) || (grammar instanceof ParserGrammar))
3941: && !(grammar.tokenManager.tokenDefined(str))) {
3942: // System.out.println("processStringForASTConstructor: "+str+" with cast");
3943: return "static_cast<" + namespaceAntlr + "RefAST>(" + str
3944: + ")";
3945: } else {
3946: // System.out.println("processStringForASTConstructor: "+str);
3947: return str;
3948: }
3949: }
3950:
3951: /** Get a string for an expression to generate creation of an AST subtree.
3952: * @param v A Vector of String, where each element is an expression
3953: * in the target language yielding an AST node.
3954: */
3955: public String getASTCreateString(Vector v) {
3956: if (v.size() == 0) {
3957: return "";
3958: }
3959: StringBuffer buf = new StringBuffer();
3960: // the labeledElementASTType here can probably be a cast or nothing
3961: // in the case of ! usingCustomAST
3962: buf.append(labeledElementASTType + "(astFactory->make((new "
3963: + namespaceAntlr + "ASTArray(" + v.size() + "))");
3964: for (int i = 0; i < v.size(); i++) {
3965: buf.append("->add(" + v.elementAt(i) + ")");
3966: }
3967: buf.append("))");
3968: return buf.toString();
3969: }
3970:
3971: /** Get a string for an expression to generate creating of an AST node
3972: * @param str The arguments to the AST constructor
3973: */
3974: public String getASTCreateString(GrammarAtom atom, String str) {
3975: if (atom != null && atom.getASTNodeType() != null) {
3976:
3977: // this atom is using a heterogeneous AST type.
3978: // make note of the factory needed to generate it..
3979: // later this is inserted into the initializeFactory method.
3980: astTypes.appendElement("factory.registerFactory("
3981: + atom.getType() + ", \"" + atom.getASTNodeType()
3982: + "\", " + atom.getASTNodeType() + "::factory);");
3983:
3984: // after above init the factory knows what to generate...
3985: // maybe add apropriate cast here..
3986: // return "astFactory->create("+atom.getType()+", "+str+")";
3987: // // The string itself should be enough...
3988: return "astFactory->create(" + str + ")";
3989: // return "Ref"+atom.getASTNodeType()+"(new "+atom.getASTNodeType()+"("+str+"))";
3990: } else {
3991: // FIXME: This is *SO* ugly! but it will have to do for now...
3992: // 2.7.2 will have better I hope
3993: // this is due to the usage of getASTCreateString from inside
3994: // actions/cpp/action.g
3995: boolean is_constructor = false;
3996: if (str.indexOf(',') != -1)
3997: is_constructor = grammar.tokenManager.tokenDefined(str
3998: .substring(0, str.indexOf(',')));
3999:
4000: // System.out.println("getAstCreateString(as): "+str+" "+grammar.tokenManager.tokenDefined(str));
4001: if (usingCustomAST
4002: && (grammar instanceof TreeWalkerGrammar)
4003: && !(grammar.tokenManager.tokenDefined(str))
4004: && !is_constructor)
4005: return "astFactory->create(static_cast<"
4006: + namespaceAntlr + "RefAST>(" + str + "))";
4007: else
4008: return "astFactory->create(" + str + ")";
4009: }
4010: }
4011:
4012: /** Get a string for an expression to generate creating of an AST node
4013: * @param str The arguments to the AST constructor
4014: */
4015: public String getASTCreateString(String str) {
4016: // System.out.println("getAstCreateString(str): "+str+" "+grammar.tokenManager.tokenDefined(str));
4017: if (usingCustomAST)
4018: return "static_cast<" + labeledElementASTType
4019: + ">(astFactory->create(static_cast<"
4020: + namespaceAntlr + "RefAST>(" + str + ")))";
4021: else
4022: return "astFactory->create(" + str + ")";
4023: }
4024:
4025: protected String getLookaheadTestExpression(Lookahead[] look, int k) {
4026: StringBuffer e = new StringBuffer(100);
4027: boolean first = true;
4028:
4029: e.append("(");
4030: for (int i = 1; i <= k; i++) {
4031: BitSet p = look[i].fset;
4032: if (!first) {
4033: e.append(") && (");
4034: }
4035: first = false;
4036:
4037: // Syn preds can yield <end-of-syn-pred> (epsilon) lookahead.
4038: // There is no way to predict what that token would be. Just
4039: // allow anything instead.
4040: if (look[i].containsEpsilon()) {
4041: e.append("true");
4042: } else {
4043: e.append(getLookaheadTestTerm(i, p));
4044: }
4045: }
4046: e.append(")");
4047:
4048: return e.toString();
4049: }
4050:
4051: /** Generate a lookahead test expression for an alternate. This
4052: * will be a series of tests joined by '&&' and enclosed by '()',
4053: * the number of such tests being determined by the depth of the lookahead.
4054: */
4055: protected String getLookaheadTestExpression(Alternative alt,
4056: int maxDepth) {
4057: int depth = alt.lookaheadDepth;
4058: if (depth == GrammarAnalyzer.NONDETERMINISTIC) {
4059: // if the decision is nondeterministic, do the best we can: LL(k)
4060: // any predicates that are around will be generated later.
4061: depth = grammar.maxk;
4062: }
4063:
4064: if (maxDepth == 0) {
4065: // empty lookahead can result from alt with sem pred
4066: // that can see end of token. E.g., A : {pred}? ('a')? ;
4067: return "true";
4068: }
4069:
4070: /*
4071: boolean first = true;
4072: for (int i=1; i<=depth && i<=maxDepth; i++) {
4073: BitSet p = alt.cache[i].fset;
4074: if (!first) {
4075: e.append(") && (");
4076: }
4077: first = false;
4078:
4079: // Syn preds can yield <end-of-syn-pred> (epsilon) lookahead.
4080: // There is no way to predict what that token would be. Just
4081: // allow anything instead.
4082: if ( alt.cache[i].containsEpsilon() ) {
4083: e.append("true");
4084: }
4085: else {
4086: e.append(getLookaheadTestTerm(i, p));
4087: }
4088: }
4089:
4090: e.append(")");
4091: */
4092:
4093: return "(" + getLookaheadTestExpression(alt.cache, depth) + ")";
4094: }
4095:
4096: /**Generate a depth==1 lookahead test expression given the BitSet.
4097: * This may be one of:
4098: * 1) a series of 'x==X||' tests
4099: * 2) a range test using >= && <= where possible,
4100: * 3) a bitset membership test for complex comparisons
4101: * @param k The lookahead level
4102: * @param p The lookahead set for level k
4103: */
4104: protected String getLookaheadTestTerm(int k, BitSet p) {
4105: // Determine the name of the item to be compared
4106: String ts = lookaheadString(k);
4107:
4108: // Generate a range expression if possible
4109: int[] elems = p.toArray();
4110: if (elementsAreRange(elems)) {
4111: return getRangeExpression(k, elems);
4112: }
4113:
4114: // Generate a bitset membership test if possible
4115: StringBuffer e;
4116: int degree = p.degree();
4117: if (degree == 0) {
4118: return "true";
4119: }
4120:
4121: if (degree >= bitsetTestThreshold) {
4122: int bitsetIdx = markBitsetForGen(p);
4123: return getBitsetName(bitsetIdx) + ".member(" + ts + ")";
4124: }
4125:
4126: // Otherwise, generate the long-winded series of "x==X||" tests
4127: e = new StringBuffer();
4128: for (int i = 0; i < elems.length; i++) {
4129: // Get the compared-to item (token or character value)
4130: String cs = getValueString(elems[i]);
4131:
4132: // Generate the element comparison
4133: if (i > 0)
4134: e.append(" || ");
4135: e.append(ts);
4136: e.append(" == ");
4137: e.append(cs);
4138: }
4139: return e.toString();
4140: }
4141:
4142: /** Return an expression for testing a contiguous renage of elements
4143: * @param k The lookahead level
4144: * @param elems The elements representing the set, usually from BitSet.toArray().
4145: * @return String containing test expression.
4146: */
4147: public String getRangeExpression(int k, int[] elems) {
4148: if (!elementsAreRange(elems)) {
4149: antlrTool.panic("getRangeExpression called with non-range");
4150: }
4151: int begin = elems[0];
4152: int end = elems[elems.length - 1];
4153: return "(" + lookaheadString(k) + " >= "
4154: + getValueString(begin) + " && " + lookaheadString(k)
4155: + " <= " + getValueString(end) + ")";
4156: }
4157:
4158: /** getValueString: get a string representation of a token or char value
4159: * @param value The token or char value
4160: */
4161: private String getValueString(int value) {
4162: String cs;
4163: if (grammar instanceof LexerGrammar) {
4164: cs = charFormatter.literalChar(value);
4165: } else {
4166: TokenSymbol ts = grammar.tokenManager
4167: .getTokenSymbolAt(value);
4168: if (ts == null) {
4169: return "" + value; // return token type as string
4170: // tool.panic("vocabulary for token type " + value + " is null");
4171: }
4172: String tId = ts.getId();
4173: if (ts instanceof StringLiteralSymbol) {
4174: // if string literal, use predefined label if any
4175: // if no predefined, try to mangle into LITERAL_xxx.
4176: // if can't mangle, use int value as last resort
4177: StringLiteralSymbol sl = (StringLiteralSymbol) ts;
4178: String label = sl.getLabel();
4179: if (label != null) {
4180: cs = label;
4181: } else {
4182: cs = mangleLiteral(tId);
4183: if (cs == null) {
4184: cs = String.valueOf(value);
4185: }
4186: }
4187: } else {
4188: if (tId.equals("EOF"))
4189: cs = namespaceAntlr + "Token::EOF_TYPE";
4190: else
4191: cs = tId;
4192: }
4193: }
4194: return cs;
4195: }
4196:
4197: /**Is the lookahead for this alt empty? */
4198: protected boolean lookaheadIsEmpty(Alternative alt, int maxDepth) {
4199: int depth = alt.lookaheadDepth;
4200: if (depth == GrammarAnalyzer.NONDETERMINISTIC) {
4201: depth = grammar.maxk;
4202: }
4203: for (int i = 1; i <= depth && i <= maxDepth; i++) {
4204: BitSet p = alt.cache[i].fset;
4205: if (p.degree() != 0) {
4206: return false;
4207: }
4208: }
4209: return true;
4210: }
4211:
4212: private String lookaheadString(int k) {
4213: if (grammar instanceof TreeWalkerGrammar) {
4214: return "_t->getType()";
4215: }
4216: return "LA(" + k + ")";
4217: }
4218:
4219: /** Mangle a string literal into a meaningful token name. This is
4220: * only possible for literals that are all characters. The resulting
4221: * mangled literal name is literalsPrefix with the text of the literal
4222: * appended.
4223: * @return A string representing the mangled literal, or null if not possible.
4224: */
4225: private String mangleLiteral(String s) {
4226: String mangled = antlrTool.literalsPrefix;
4227: for (int i = 1; i < s.length() - 1; i++) {
4228: if (!Character.isLetter(s.charAt(i)) && s.charAt(i) != '_') {
4229: return null;
4230: }
4231: mangled += s.charAt(i);
4232: }
4233: if (antlrTool.upperCaseMangledLiterals) {
4234: mangled = mangled.toUpperCase();
4235: }
4236: return mangled;
4237: }
4238:
4239: /** Map an identifier to it's corresponding tree-node variable.
4240: * This is context-sensitive, depending on the rule and alternative
4241: * being generated
4242: * @param idParam The identifier name to map
4243: * @return The mapped id (which may be the same as the input), or null if the mapping is invalid due to duplicates
4244: */
4245: public String mapTreeId(String idParam, ActionTransInfo transInfo) {
4246: // if not in an action of a rule, nothing to map.
4247: if (currentRule == null)
4248: return idParam;
4249: // System.out.print("mapTreeId: "+idParam+" "+currentRule.getRuleName()+" ");
4250:
4251: boolean in_var = false;
4252: String id = idParam;
4253: if (grammar instanceof TreeWalkerGrammar) {
4254: // RK: hmmm this seems odd. If buildAST is false it translates
4255: // #rulename_in to 'rulename_in' else to 'rulename_AST_in' which indeed
4256: // exists. disabling for now.. and hope it doesn't blow up somewhere.
4257: if (!grammar.buildAST) {
4258: in_var = true;
4259: // System.out.println("in_var1");
4260: }
4261: // If the id ends with "_in", then map it to the input variable
4262: // else
4263: if (id.length() > 3
4264: && id.lastIndexOf("_in") == id.length() - 3) {
4265: // Strip off the "_in"
4266: id = id.substring(0, id.length() - 3);
4267: in_var = true;
4268: // System.out.println("in_var2");
4269: }
4270: }
4271: // System.out.print(in_var+"\t");
4272:
4273: // Check the rule labels. If id is a label, then the output
4274: // variable is label_AST, and the input variable is plain label.
4275: for (int i = 0; i < currentRule.labeledElements.size(); i++) {
4276: AlternativeElement elt = (AlternativeElement) currentRule.labeledElements
4277: .elementAt(i);
4278: if (elt.getLabel().equals(id)) {
4279: // if( in_var )
4280: // System.out.println("returning (vec) "+(in_var ? id : id + "_AST"));
4281: return in_var ? id : id + "_AST";
4282: }
4283: }
4284:
4285: // Failing that, check the id-to-variable map for the alternative.
4286: // If the id is in the map, then output variable is the name in the
4287: // map, and input variable is name_in
4288: String s = (String) treeVariableMap.get(id);
4289: if (s != null) {
4290: if (s == NONUNIQUE) {
4291: // if( in_var )
4292: // System.out.println("returning null (nonunique)");
4293: // There is more than one element with this id
4294: antlrTool.error("Ambiguous reference to AST element "
4295: + id + " in rule " + currentRule.getRuleName());
4296: return null;
4297: } else if (s.equals(currentRule.getRuleName())) {
4298: // a recursive call to the enclosing rule is
4299: // ambiguous with the rule itself.
4300: // if( in_var )
4301: // System.out.println("returning null (rulename)");
4302: antlrTool.error("Ambiguous reference to AST element "
4303: + id + " in rule " + currentRule.getRuleName());
4304: return null;
4305: } else {
4306: // if( in_var )
4307: // System.out.println("returning "+(in_var?s+"_in":s));
4308: return in_var ? s + "_in" : s;
4309: }
4310: }
4311:
4312: // System.out.println("Last check: "+id+" == "+currentRule.getRuleName());
4313: // Failing that, check the rule name itself. Output variable
4314: // is rule_AST; input variable is rule_AST_in (treeparsers).
4315: if (id.equals(currentRule.getRuleName())) {
4316: String r = in_var ? id + "_AST_in" : id + "_AST";
4317: if (transInfo != null) {
4318: if (!in_var) {
4319: transInfo.refRuleRoot = r;
4320: }
4321: }
4322: // if( in_var )
4323: // System.out.println("returning (r) "+r);
4324: return r;
4325: } else {
4326: // if( in_var )
4327: // System.out.println("returning (last) "+id);
4328: // id does not map to anything -- return itself.
4329: return id;
4330: }
4331: }
4332:
4333: /** Given an element and the name of an associated AST variable,
4334: * create a mapping between the element "name" and the variable name.
4335: */
4336: private void mapTreeVariable(AlternativeElement e, String name) {
4337: // For tree elements, defer to the root
4338: if (e instanceof TreeElement) {
4339: mapTreeVariable(((TreeElement) e).root, name);
4340: return;
4341: }
4342:
4343: // Determine the name of the element, if any, for mapping purposes
4344: String elName = null;
4345:
4346: // Don't map labeled items
4347: if (e.getLabel() == null) {
4348: if (e instanceof TokenRefElement) {
4349: // use the token id
4350: elName = ((TokenRefElement) e).atomText;
4351: } else if (e instanceof RuleRefElement) {
4352: // use the rule name
4353: elName = ((RuleRefElement) e).targetRule;
4354: }
4355: }
4356: // Add the element to the tree variable map if it has a name
4357: if (elName != null) {
4358: if (treeVariableMap.get(elName) != null) {
4359: // Name is already in the map -- mark it as duplicate
4360: treeVariableMap.remove(elName);
4361: treeVariableMap.put(elName, NONUNIQUE);
4362: } else {
4363: treeVariableMap.put(elName, name);
4364: }
4365: }
4366: }
4367:
4368: /** Lexically process tree-specifiers in the action.
4369: * This will replace #id and #(...) with the appropriate
4370: * function calls and/or variables.
4371: */
4372: protected String processActionForSpecialSymbols(String actionStr,
4373: int line, RuleBlock currentRule, ActionTransInfo tInfo) {
4374: if (actionStr == null || actionStr.length() == 0)
4375: return null;
4376:
4377: // The action trans info tells us (at the moment) whether an
4378: // assignment was done to the rule's tree root.
4379: if (grammar == null)
4380: return actionStr;
4381:
4382: if ((grammar.buildAST && actionStr.indexOf('#') != -1)
4383: || grammar instanceof TreeWalkerGrammar
4384: || ((grammar instanceof LexerGrammar || grammar instanceof ParserGrammar) && actionStr
4385: .indexOf('$') != -1)) {
4386: // Create a lexer to read an action and return the translated version
4387: antlr.actions.cpp.ActionLexer lexer = new antlr.actions.cpp.ActionLexer(
4388: actionStr, currentRule, this , tInfo);
4389: lexer.setLineOffset(line);
4390: lexer.setFilename(grammar.getFilename());
4391: lexer.setTool(antlrTool);
4392:
4393: try {
4394: lexer.mACTION(true);
4395: actionStr = lexer.getTokenObject().getText();
4396: // System.out.println("action translated: "+actionStr);
4397: // System.out.println("trans info is "+tInfo);
4398: } catch (RecognitionException ex) {
4399: lexer.reportError(ex);
4400: return actionStr;
4401: } catch (TokenStreamException tex) {
4402: antlrTool.panic("Error reading action:" + actionStr);
4403: return actionStr;
4404: } catch (CharStreamException io) {
4405: antlrTool.panic("Error reading action:" + actionStr);
4406: return actionStr;
4407: }
4408: }
4409: return actionStr;
4410: }
4411:
4412: private String fixNameSpaceOption(String ns) {
4413: ns = StringUtils.stripFrontBack(ns, "\"", "\"");
4414: if (ns.length() > 2
4415: && !ns.substring(ns.length() - 2, ns.length()).equals(
4416: "::"))
4417: ns += "::";
4418: return ns;
4419: }
4420:
4421: private void setupGrammarParameters(Grammar g) {
4422: if (g instanceof ParserGrammar || g instanceof LexerGrammar
4423: || g instanceof TreeWalkerGrammar) {
4424: /* RK: options also have to be added to Grammar.java and for options
4425: * on the file level entries have to be defined in
4426: * DefineGrammarSymbols.java and passed around via 'globals' in
4427: * antlrTool.java
4428: */
4429: if (antlrTool.nameSpace != null)
4430: nameSpace = antlrTool.nameSpace;
4431:
4432: if (antlrTool.namespaceStd != null)
4433: namespaceStd = fixNameSpaceOption(antlrTool.namespaceStd);
4434:
4435: if (antlrTool.namespaceAntlr != null)
4436: namespaceAntlr = fixNameSpaceOption(antlrTool.namespaceAntlr);
4437:
4438: genHashLines = antlrTool.genHashLines;
4439:
4440: /* let grammar level options override filelevel ones...
4441: */
4442: if (g.hasOption("namespace")) {
4443: Token t = g.getOption("namespace");
4444: if (t != null) {
4445: nameSpace = new NameSpace(t.getText());
4446: }
4447: }
4448: if (g.hasOption("namespaceAntlr")) {
4449: Token t = g.getOption("namespaceAntlr");
4450: if (t != null) {
4451: String ns = StringUtils.stripFrontBack(t.getText(),
4452: "\"", "\"");
4453: if (ns != null) {
4454: if (ns.length() > 2
4455: && !ns.substring(ns.length() - 2,
4456: ns.length()).equals("::"))
4457: ns += "::";
4458: namespaceAntlr = ns;
4459: }
4460: }
4461: }
4462: if (g.hasOption("namespaceStd")) {
4463: Token t = g.getOption("namespaceStd");
4464: if (t != null) {
4465: String ns = StringUtils.stripFrontBack(t.getText(),
4466: "\"", "\"");
4467: if (ns != null) {
4468: if (ns.length() > 2
4469: && !ns.substring(ns.length() - 2,
4470: ns.length()).equals("::"))
4471: ns += "::";
4472: namespaceStd = ns;
4473: }
4474: }
4475: }
4476: if (g.hasOption("genHashLines")) {
4477: Token t = g.getOption("genHashLines");
4478: if (t != null) {
4479: String val = StringUtils.stripFrontBack(
4480: t.getText(), "\"", "\"");
4481: genHashLines = val.equals("true");
4482: }
4483: }
4484: noConstructors = antlrTool.noConstructors; // get the default
4485: if (g.hasOption("noConstructors")) {
4486: Token t = g.getOption("noConstructors");
4487: if ((t != null)
4488: && !(t.getText().equals("true") || t.getText()
4489: .equals("false")))
4490: antlrTool
4491: .error(
4492: "noConstructors option must be true or false",
4493: antlrTool.getGrammarFile(), t
4494: .getLine(), t.getColumn());
4495: noConstructors = t.getText().equals("true");
4496: }
4497: }
4498: if (g instanceof ParserGrammar) {
4499: labeledElementASTType = namespaceAntlr + "RefAST";
4500: labeledElementASTInit = namespaceAntlr + "nullAST";
4501: if (g.hasOption("ASTLabelType")) {
4502: Token tsuffix = g.getOption("ASTLabelType");
4503: if (tsuffix != null) {
4504: String suffix = StringUtils.stripFrontBack(tsuffix
4505: .getText(), "\"", "\"");
4506: if (suffix != null) {
4507: usingCustomAST = true;
4508: labeledElementASTType = suffix;
4509: labeledElementASTInit = "static_cast<" + suffix
4510: + ">(" + namespaceAntlr + "nullAST)";
4511: }
4512: }
4513: }
4514: labeledElementType = namespaceAntlr + "RefToken ";
4515: labeledElementInit = namespaceAntlr + "nullToken";
4516: commonExtraArgs = "";
4517: commonExtraParams = "";
4518: commonLocalVars = "";
4519: lt1Value = "LT(1)";
4520: exceptionThrown = namespaceAntlr + "RecognitionException";
4521: throwNoViable = "throw " + namespaceAntlr
4522: + "NoViableAltException(LT(1), getFilename());";
4523: } else if (g instanceof LexerGrammar) {
4524: labeledElementType = "char ";
4525: labeledElementInit = "'\\0'";
4526: commonExtraArgs = "";
4527: commonExtraParams = "bool _createToken";
4528: commonLocalVars = "int _ttype; " + namespaceAntlr
4529: + "RefToken _token; int _begin=text.length();";
4530: lt1Value = "LA(1)";
4531: exceptionThrown = namespaceAntlr + "RecognitionException";
4532: throwNoViable = "throw "
4533: + namespaceAntlr
4534: + "NoViableAltForCharException(LA(1), getFilename(), getLine(), getColumn());";
4535: } else if (g instanceof TreeWalkerGrammar) {
4536: labeledElementInit = namespaceAntlr + "nullAST";
4537: labeledElementASTInit = namespaceAntlr + "nullAST";
4538: labeledElementASTType = namespaceAntlr + "RefAST";
4539: labeledElementType = namespaceAntlr + "RefAST";
4540: commonExtraParams = namespaceAntlr + "RefAST _t";
4541: throwNoViable = "throw " + namespaceAntlr
4542: + "NoViableAltException(_t);";
4543: lt1Value = "_t";
4544: if (g.hasOption("ASTLabelType")) {
4545: Token tsuffix = g.getOption("ASTLabelType");
4546: if (tsuffix != null) {
4547: String suffix = StringUtils.stripFrontBack(tsuffix
4548: .getText(), "\"", "\"");
4549: if (suffix != null) {
4550: usingCustomAST = true;
4551: labeledElementASTType = suffix;
4552: labeledElementType = suffix;
4553: labeledElementInit = "static_cast<" + suffix
4554: + ">(" + namespaceAntlr + "nullAST)";
4555: labeledElementASTInit = labeledElementInit;
4556: commonExtraParams = suffix + " _t";
4557: throwNoViable = "throw " + namespaceAntlr
4558: + "NoViableAltException(static_cast<"
4559: + namespaceAntlr + "RefAST>(_t));";
4560: lt1Value = "_t";
4561: }
4562: }
4563: }
4564: if (!g.hasOption("ASTLabelType")) {
4565: g.setOption("ASTLabelType", new Token(
4566: ANTLRTokenTypes.STRING_LITERAL, namespaceAntlr
4567: + "RefAST"));
4568: }
4569: commonExtraArgs = "_t";
4570: commonLocalVars = "";
4571: exceptionThrown = namespaceAntlr + "RecognitionException";
4572: } else {
4573: antlrTool.panic("Unknown grammar type");
4574: }
4575: }
4576:
4577: private String textOrChar(String text) {
4578: // check to see if the text is a single character
4579: if (text.startsWith("'")) {
4580: // assume it also ends with '
4581: return charFormatter.literalChar(ANTLRLexer
4582: .tokenTypeForCharLiteral(text));
4583: } else
4584: return text;
4585: }
4586: }
|