0001: package antlr;
0002:
0003: /* ANTLR Translator Generator
0004: * Project led by Terence Parr at http://www.jGuru.com
0005: * Software rights: http://www.antlr.org/RIGHTS.html
0006: *
0007: * $Id: CSharpCodeGenerator.java,v 1.1 2004/01/21 19:18:29 rgrimm Exp $
0008: */
0009:
0010: //
0011: // ANTLR C# Code Generator by Micheal Jordan
0012: // Kunle Odutola : kunle UNDERSCORE odutola AT hotmail DOT com
0013: // Anthony Oguntimehin
0014: //
0015: // With many thanks to Eric V. Smith from the ANTLR list.
0016: //
0017: // HISTORY:
0018: //
0019: // 17-May-2002 kunle Fixed bug in OctalToUnicode() - was processing non-Octal escape sequences
0020: // Also added namespace support based on Cpp version.
0021: // 07-Jun-2002 kunle Added Scott Ellis's _saveIndex creation optimizations
0022: // 09-Sep-2002 richardN Richard Ney's bug-fix for literals table construction.
0023: // [ Hashtable ctor needed instance of hash code provider not it's class name. ]
0024: // 17-Sep-2002 kunle & Added all Token ID definitions as data member of every Lexer/Parser/TreeParser
0025: // AOg [ A by-product of problem-solving phase of the hetero-AST changes below
0026: // but, it breaks nothing and restores "normal" ANTLR codegen behaviour. ]
0027: // 19-Oct-2002 kunle & Completed the work required to support heterogenous ASTs (many changes)
0028: // AOg &
0029: // michealj
0030: // 14-Nov-2002 michealj Added "initializeASTFactory()" to support flexible ASTFactory initialization.
0031: // [ Thanks to Ric Klaren - for suggesting it and implementing it for Cpp. ]
0032: // 18-Nov-2002 kunle Added fix to make xx_tokenSet_xx names CLS compliant.
0033: // 01-Dec-2002 richardN Patch to reduce "unreachable code" warnings
0034: // 01-Dec-2002 richardN Fix to generate correct TreeParser token-type classnames.
0035: // 12-Jan-2002 kunle & Generated Lexers, Parsers and TreeParsers now support ANTLR's tracing option.
0036: // michealj
0037: // 12-Jan-2003 kunle Fixed issue where initializeASTFactory() was generated when "buildAST=false"
0038: // 14-Jan-2003 AOg initializeASTFactory(AST factory) method was modifying the Parser's "astFactory"
0039: // member rather than it's own "factory" parameter. Fixed.
0040: // 18-Jan-2003 kunle & Fixed reported issues with ASTFactory create() calls for hetero ASTs
0041: // michealj - code generated for LEXER token with hetero-AST option specified does not compile
0042: // - code generated for imaginary tokens with hetero-AST option specified uses default AST type
0043: // - code generated for per-TokenRef hetero-AST option specified does not compile
0044: // 18-Jan-2003 kunle initializeASTFactory(AST) method is now a static public member
0045: //
0046: //
0047: import java.util.Enumeration;
0048: import java.util.Hashtable;
0049: import java.util.HashSet;
0050: import antlr.collections.impl.BitSet;
0051: import antlr.collections.impl.Vector;
0052: import java.io.PrintWriter; //SAS: changed for proper text file io
0053: import java.io.IOException;
0054: import java.io.FileWriter;
0055:
0056: /** Generates MyParser.cs, MyLexer.cs and MyParserTokenTypes.cs */
0057: public class CSharpCodeGenerator extends CodeGenerator {
0058: // non-zero if inside syntactic predicate generation
0059: protected int syntacticPredLevel = 0;
0060:
0061: // Are we generating ASTs (for parsers and tree parsers) right now?
0062: protected boolean genAST = false;
0063:
0064: // Are we saving the text consumed (for lexers) right now?
0065: protected boolean saveText = false;
0066:
0067: // Grammar parameters set up to handle different grammar classes.
0068: // These are used to get instanceof tests out of code generation
0069: boolean usingCustomAST = false;
0070: String labeledElementType;
0071: String labeledElementASTType;
0072: String labeledElementInit;
0073: String commonExtraArgs;
0074: String commonExtraParams;
0075: String commonLocalVars;
0076: String lt1Value;
0077: String exceptionThrown;
0078: String throwNoViable;
0079:
0080: // Tracks the rule being generated. Used for mapTreeId
0081: RuleBlock currentRule;
0082: // Tracks the rule or labeled subrule being generated. Used for AST generation.
0083: String currentASTResult;
0084:
0085: /** Mapping between the ids used in the current alt, and the
0086: * names of variables used to represent their AST values.
0087: */
0088: Hashtable treeVariableMap = new Hashtable();
0089:
0090: /** Used to keep track of which AST variables have been defined in a rule
0091: * (except for the #rule_name and #rule_name_in var's
0092: */
0093: HashSet declaredASTVariables = new HashSet();
0094:
0095: /* Count of unnamed generated variables */
0096: int astVarNumber = 1;
0097:
0098: /** Special value used to mark duplicate in treeVariableMap */
0099: protected static final String NONUNIQUE = new String();
0100:
0101: public static final int caseSizeThreshold = 127; // ascii is max
0102:
0103: private Vector semPreds;
0104: // Used to keep track of which (heterogeneous AST types are used)
0105: // which need to be set in the ASTFactory of the generated parser
0106: private java.util.Vector astTypes;
0107:
0108: private static CSharpNameSpace nameSpace = null;
0109:
0110: // _saveIndex creation optimization -- don't create it unless we need to use it
0111: boolean bSaveIndexCreated = false;
0112:
0113: /** Create a CSharp code-generator using the given Grammar.
0114: * The caller must still call setTool, setBehavior, and setAnalyzer
0115: * before generating code.
0116: */
0117: public CSharpCodeGenerator() {
0118: super ();
0119: charFormatter = new CSharpCharFormatter();
0120: }
0121:
0122: /** Adds a semantic predicate string to the sem pred vector
0123: These strings will be used to build an array of sem pred names
0124: when building a debugging parser. This method should only be
0125: called when the debug option is specified
0126: */
0127: protected int addSemPred(String predicate) {
0128: semPreds.appendElement(predicate);
0129: return semPreds.size() - 1;
0130: }
0131:
0132: public void exitIfError() {
0133: if (antlrTool.hasError()) {
0134: antlrTool.fatalError("Exiting due to errors.");
0135: }
0136: }
0137:
0138: /**Generate the parser, lexer, treeparser, and token types in CSharp */
0139: public void gen() {
0140: // Do the code generation
0141: try {
0142: // Loop over all grammars
0143: Enumeration grammarIter = behavior.grammars.elements();
0144: while (grammarIter.hasMoreElements()) {
0145: Grammar g = (Grammar) grammarIter.nextElement();
0146: // Connect all the components to each other
0147: g.setGrammarAnalyzer(analyzer);
0148: g.setCodeGenerator(this );
0149: analyzer.setGrammar(g);
0150: // To get right overloading behavior across heterogeneous grammars
0151: setupGrammarParameters(g);
0152: g.generate();
0153: exitIfError();
0154: }
0155:
0156: // Loop over all token managers (some of which are lexers)
0157: Enumeration tmIter = behavior.tokenManagers.elements();
0158: while (tmIter.hasMoreElements()) {
0159: TokenManager tm = (TokenManager) tmIter.nextElement();
0160: if (!tm.isReadOnly()) {
0161: // Write the token manager tokens as CSharp
0162: // this must appear before genTokenInterchange so that
0163: // labels are set on string literals
0164: genTokenTypes(tm);
0165: // Write the token manager tokens as plain text
0166: genTokenInterchange(tm);
0167: }
0168: exitIfError();
0169: }
0170: } catch (IOException e) {
0171: antlrTool.reportException(e, null);
0172: }
0173: }
0174:
0175: /** Generate code for the given grammar element.
0176: * @param blk The {...} action to generate
0177: */
0178: public void gen(ActionElement action) {
0179: if (DEBUG_CODE_GENERATOR)
0180: System.out.println("genAction(" + action + ")");
0181: if (action.isSemPred) {
0182: genSemPred(action.actionText, action.line);
0183: } else {
0184: if (grammar.hasSyntacticPredicate) {
0185: println("if (0==inputState.guessing)");
0186: println("{");
0187: tabs++;
0188: }
0189:
0190: ActionTransInfo tInfo = new ActionTransInfo();
0191: String actionStr = processActionForSpecialSymbols(
0192: action.actionText, action.getLine(), currentRule,
0193: tInfo);
0194:
0195: if (tInfo.refRuleRoot != null) {
0196: // Somebody referenced "#rule", make sure translated var is valid
0197: // assignment to #rule is left as a ref also, meaning that assignments
0198: // with no other refs like "#rule = foo();" still forces this code to be
0199: // generated (unnecessarily).
0200: println(tInfo.refRuleRoot + " = ("
0201: + labeledElementASTType + ")currentAST.root;");
0202: }
0203:
0204: // dump the translated action
0205: printAction(actionStr);
0206:
0207: if (tInfo.assignToRoot) {
0208: // Somebody did a "#rule=", reset internal currentAST.root
0209: println("currentAST.root = " + tInfo.refRuleRoot + ";");
0210: // reset the child pointer too to be last sibling in sibling list
0211: println("if ( (null != " + tInfo.refRuleRoot
0212: + ") && (null != " + tInfo.refRuleRoot
0213: + ".getFirstChild()) )");
0214: tabs++;
0215: println("currentAST.child = " + tInfo.refRuleRoot
0216: + ".getFirstChild();");
0217: tabs--;
0218: println("else");
0219: tabs++;
0220: println("currentAST.child = " + tInfo.refRuleRoot + ";");
0221: tabs--;
0222: println("currentAST.advanceChildToEnd();");
0223: }
0224:
0225: if (grammar.hasSyntacticPredicate) {
0226: tabs--;
0227: println("}");
0228: }
0229: }
0230: }
0231:
0232: /** Generate code for the given grammar element.
0233: * @param blk The "x|y|z|..." block to generate
0234: */
0235: public void gen(AlternativeBlock blk) {
0236: if (DEBUG_CODE_GENERATOR)
0237: System.out.println("gen(" + blk + ")");
0238: println("{");
0239: tabs++;
0240:
0241: genBlockPreamble(blk);
0242: genBlockInitAction(blk);
0243:
0244: // Tell AST generation to build subrule result
0245: String saveCurrentASTResult = currentASTResult;
0246: if (blk.getLabel() != null) {
0247: currentASTResult = blk.getLabel();
0248: }
0249:
0250: boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
0251:
0252: CSharpBlockFinishingInfo howToFinish = genCommonBlock(blk, true);
0253: genBlockFinish(howToFinish, throwNoViable);
0254:
0255: tabs--;
0256: println("}");
0257:
0258: // Restore previous AST generation
0259: currentASTResult = saveCurrentASTResult;
0260: }
0261:
0262: /** Generate code for the given grammar element.
0263: * @param blk The block-end element to generate. Block-end
0264: * elements are synthesized by the grammar parser to represent
0265: * the end of a block.
0266: */
0267: public void gen(BlockEndElement end) {
0268: if (DEBUG_CODE_GENERATOR)
0269: System.out.println("genRuleEnd(" + end + ")");
0270: }
0271:
0272: /** Generate code for the given grammar element.
0273: * @param blk The character literal reference to generate
0274: */
0275: public void gen(CharLiteralElement atom) {
0276: if (DEBUG_CODE_GENERATOR)
0277: System.out.println("genChar(" + atom + ")");
0278:
0279: if (atom.getLabel() != null) {
0280: println(atom.getLabel() + " = " + lt1Value + ";");
0281: }
0282:
0283: boolean oldsaveText = saveText;
0284: saveText = saveText
0285: && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;
0286: genMatch(atom);
0287: saveText = oldsaveText;
0288: }
0289:
0290: /** Generate code for the given grammar element.
0291: * @param blk The character-range reference to generate
0292: */
0293: public void gen(CharRangeElement r) {
0294: if (r.getLabel() != null && syntacticPredLevel == 0) {
0295: println(r.getLabel() + " = " + lt1Value + ";");
0296: }
0297: boolean flag = (grammar instanceof LexerGrammar && (!saveText || (r
0298: .getAutoGenType() == GrammarElement.AUTO_GEN_BANG)));
0299: if (flag)
0300: println("_saveIndex = text.Length;");
0301:
0302: println("matchRange(" + OctalToUnicode(r.beginText) + ","
0303: + OctalToUnicode(r.endText) + ");");
0304:
0305: if (flag)
0306: println("text.Length = _saveIndex;");
0307: }
0308:
0309: /** Generate the lexer CSharp file */
0310: public void gen(LexerGrammar g) throws IOException {
0311: // If debugging, create a new sempred vector for this grammar
0312: if (g.debuggingOutput)
0313: semPreds = new Vector();
0314:
0315: setGrammar(g);
0316: if (!(grammar instanceof LexerGrammar)) {
0317: antlrTool.panic("Internal error generating lexer");
0318: }
0319: genBody(g);
0320: }
0321:
0322: /** Generate code for the given grammar element.
0323: * @param blk The (...)+ block to generate
0324: */
0325: public void gen(OneOrMoreBlock blk) {
0326: if (DEBUG_CODE_GENERATOR)
0327: System.out.println("gen+(" + blk + ")");
0328: String label;
0329: String cnt;
0330: println("{ // ( ... )+");
0331: genBlockPreamble(blk);
0332: if (blk.getLabel() != null) {
0333: cnt = "_cnt_" + blk.getLabel();
0334: } else {
0335: cnt = "_cnt" + blk.ID;
0336: }
0337: println("int " + cnt + "=0;");
0338: if (blk.getLabel() != null) {
0339: label = blk.getLabel();
0340: } else {
0341: label = "_loop" + blk.ID;
0342: }
0343:
0344: println("for (;;)");
0345: println("{");
0346: tabs++;
0347: // generate the init action for ()+ ()* inside the loop
0348: // this allows us to do usefull EOF checking...
0349: genBlockInitAction(blk);
0350:
0351: // Tell AST generation to build subrule result
0352: String saveCurrentASTResult = currentASTResult;
0353: if (blk.getLabel() != null) {
0354: currentASTResult = blk.getLabel();
0355: }
0356:
0357: boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
0358:
0359: // generate exit test if greedy set to false
0360: // and an alt is ambiguous with exit branch
0361: // or when lookahead derived purely from end-of-file
0362: // Lookahead analysis stops when end-of-file is hit,
0363: // returning set {epsilon}. Since {epsilon} is not
0364: // ambig with any real tokens, no error is reported
0365: // by deterministic() routines and we have to check
0366: // for the case where the lookahead depth didn't get
0367: // set to NONDETERMINISTIC (this only happens when the
0368: // FOLLOW contains real atoms + epsilon).
0369: boolean generateNonGreedyExitPath = false;
0370: int nonGreedyExitDepth = grammar.maxk;
0371:
0372: if (!blk.greedy
0373: && blk.exitLookaheadDepth <= grammar.maxk
0374: && blk.exitCache[blk.exitLookaheadDepth]
0375: .containsEpsilon()) {
0376: generateNonGreedyExitPath = true;
0377: nonGreedyExitDepth = blk.exitLookaheadDepth;
0378: } else if (!blk.greedy
0379: && blk.exitLookaheadDepth == LLkGrammarAnalyzer.NONDETERMINISTIC) {
0380: generateNonGreedyExitPath = true;
0381: }
0382:
0383: // generate exit test if greedy set to false
0384: // and an alt is ambiguous with exit branch
0385: if (generateNonGreedyExitPath) {
0386: if (DEBUG_CODE_GENERATOR) {
0387: System.out
0388: .println("nongreedy (...)+ loop; exit depth is "
0389: + blk.exitLookaheadDepth);
0390: }
0391: String predictExit = getLookaheadTestExpression(
0392: blk.exitCache, nonGreedyExitDepth);
0393: println("// nongreedy exit test");
0394: println("if ((" + cnt + " >= 1) && " + predictExit
0395: + ") goto " + label + "_breakloop;");
0396: }
0397:
0398: CSharpBlockFinishingInfo howToFinish = genCommonBlock(blk,
0399: false);
0400: genBlockFinish(howToFinish, "if (" + cnt + " >= 1) { goto "
0401: + label + "_breakloop; } else { " + throwNoViable
0402: + "; }");
0403:
0404: println(cnt + "++;");
0405: tabs--;
0406: println("}");
0407: _print(label + "_breakloop:");
0408: println(";");
0409: println("} // ( ... )+");
0410:
0411: // Restore previous AST generation
0412: currentASTResult = saveCurrentASTResult;
0413: }
0414:
0415: /** Generate the parser CSharp file */
0416: public void gen(ParserGrammar g) throws IOException {
0417:
0418: // if debugging, set up a new vector to keep track of sempred
0419: // strings for this grammar
0420: if (g.debuggingOutput)
0421: semPreds = new Vector();
0422:
0423: setGrammar(g);
0424: if (!(grammar instanceof ParserGrammar)) {
0425: antlrTool.panic("Internal error generating parser");
0426: }
0427: genBody(g);
0428: }
0429:
0430: /** Generate code for the given grammar element.
0431: * @param blk The rule-reference to generate
0432: */
0433: public void gen(RuleRefElement rr) {
0434: if (DEBUG_CODE_GENERATOR)
0435: System.out.println("genRR(" + rr + ")");
0436: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(rr.targetRule);
0437: if (rs == null || !rs.isDefined()) {
0438: // Is this redundant???
0439: antlrTool.error("Rule '" + rr.targetRule
0440: + "' is not defined", grammar.getFilename(), rr
0441: .getLine(), rr.getColumn());
0442: return;
0443: }
0444: if (!(rs instanceof RuleSymbol)) {
0445: // Is this redundant???
0446: antlrTool.error("'" + rr.targetRule
0447: + "' does not name a grammar rule", grammar
0448: .getFilename(), rr.getLine(), rr.getColumn());
0449: return;
0450: }
0451:
0452: genErrorTryForElement(rr);
0453:
0454: // AST value for labeled rule refs in tree walker.
0455: // This is not AST construction; it is just the input tree node value.
0456: if (grammar instanceof TreeWalkerGrammar
0457: && rr.getLabel() != null && syntacticPredLevel == 0) {
0458: println(rr.getLabel() + " = _t==ASTNULL ? null : "
0459: + lt1Value + ";");
0460: }
0461:
0462: // if in lexer and ! on rule ref or alt or rule, save buffer index to kill later
0463: if (grammar instanceof LexerGrammar
0464: && (!saveText || rr.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
0465: declareSaveIndexVariableIfNeeded();
0466: println("_saveIndex = text.Length;");
0467: }
0468:
0469: // Process return value assignment if any
0470: printTabs();
0471: if (rr.idAssign != null) {
0472: // Warn if the rule has no return type
0473: if (rs.block.returnAction == null) {
0474: antlrTool.warning("Rule '" + rr.targetRule
0475: + "' has no return type",
0476: grammar.getFilename(), rr.getLine(), rr
0477: .getColumn());
0478: }
0479: _print(rr.idAssign + "=");
0480: } else {
0481: // Warn about return value if any, but not inside syntactic predicate
0482: if (!(grammar instanceof LexerGrammar)
0483: && syntacticPredLevel == 0
0484: && rs.block.returnAction != null) {
0485: antlrTool.warning("Rule '" + rr.targetRule
0486: + "' returns a value", grammar.getFilename(),
0487: rr.getLine(), rr.getColumn());
0488: }
0489: }
0490:
0491: // Call the rule
0492: GenRuleInvocation(rr);
0493:
0494: // if in lexer and ! on element or alt or rule, save buffer index to kill later
0495: if (grammar instanceof LexerGrammar
0496: && (!saveText || rr.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
0497: declareSaveIndexVariableIfNeeded();
0498: println("text.Length = _saveIndex;");
0499: }
0500:
0501: // if not in a syntactic predicate
0502: if (syntacticPredLevel == 0) {
0503: boolean doNoGuessTest = (grammar.hasSyntacticPredicate && (grammar.buildAST
0504: && rr.getLabel() != null || (genAST && rr
0505: .getAutoGenType() == GrammarElement.AUTO_GEN_NONE)));
0506: if (doNoGuessTest) {
0507: println("if (0 == inputState.guessing)");
0508: println("{");
0509: tabs++;
0510: }
0511:
0512: if (grammar.buildAST && rr.getLabel() != null) {
0513: // always gen variable for rule return on labeled rules
0514: println(rr.getLabel() + "_AST = ("
0515: + labeledElementASTType + ")returnAST;");
0516: }
0517: if (genAST) {
0518: switch (rr.getAutoGenType()) {
0519: case GrammarElement.AUTO_GEN_NONE:
0520: if (usingCustomAST)
0521: println("astFactory.addASTChild(currentAST, (AST)returnAST);");
0522: else
0523: println("astFactory.addASTChild(currentAST, returnAST);");
0524: break;
0525: case GrammarElement.AUTO_GEN_CARET:
0526: antlrTool
0527: .error("Internal: encountered ^ after rule reference");
0528: break;
0529: default:
0530: break;
0531: }
0532: }
0533:
0534: // if a lexer and labeled, Token label defined at rule level, just set it here
0535: if (grammar instanceof LexerGrammar
0536: && rr.getLabel() != null) {
0537: println(rr.getLabel() + " = returnToken_;");
0538: }
0539:
0540: if (doNoGuessTest) {
0541: tabs--;
0542: println("}");
0543: }
0544: }
0545: genErrorCatchForElement(rr);
0546: }
0547:
0548: /** Generate code for the given grammar element.
0549: * @param blk The string-literal reference to generate
0550: */
0551: public void gen(StringLiteralElement atom) {
0552: if (DEBUG_CODE_GENERATOR)
0553: System.out.println("genString(" + atom + ")");
0554:
0555: // Variable declarations for labeled elements
0556: if (atom.getLabel() != null && syntacticPredLevel == 0) {
0557: println(atom.getLabel() + " = " + lt1Value + ";");
0558: }
0559:
0560: // AST
0561: genElementAST(atom);
0562:
0563: // is there a bang on the literal?
0564: boolean oldsaveText = saveText;
0565: saveText = saveText
0566: && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;
0567:
0568: // matching
0569: genMatch(atom);
0570:
0571: saveText = oldsaveText;
0572:
0573: // tack on tree cursor motion if doing a tree walker
0574: if (grammar instanceof TreeWalkerGrammar) {
0575: println("_t = _t.getNextSibling();");
0576: }
0577: }
0578:
0579: /** Generate code for the given grammar element.
0580: * @param blk The token-range reference to generate
0581: */
0582: public void gen(TokenRangeElement r) {
0583: genErrorTryForElement(r);
0584: if (r.getLabel() != null && syntacticPredLevel == 0) {
0585: println(r.getLabel() + " = " + lt1Value + ";");
0586: }
0587:
0588: // AST
0589: genElementAST(r);
0590:
0591: // match
0592: println("matchRange(" + OctalToUnicode(r.beginText) + ","
0593: + OctalToUnicode(r.endText) + ");");
0594: genErrorCatchForElement(r);
0595: }
0596:
0597: /** Generate code for the given grammar element.
0598: * @param blk The token-reference to generate
0599: */
0600: public void gen(TokenRefElement atom) {
0601: if (DEBUG_CODE_GENERATOR)
0602: System.out.println("genTokenRef(" + atom + ")");
0603: if (grammar instanceof LexerGrammar) {
0604: antlrTool.panic("Token reference found in lexer");
0605: }
0606: genErrorTryForElement(atom);
0607: // Assign Token value to token label variable
0608: if (atom.getLabel() != null && syntacticPredLevel == 0) {
0609: println(atom.getLabel() + " = " + lt1Value + ";");
0610: }
0611:
0612: // AST
0613: genElementAST(atom);
0614: // matching
0615: genMatch(atom);
0616: genErrorCatchForElement(atom);
0617:
0618: // tack on tree cursor motion if doing a tree walker
0619: if (grammar instanceof TreeWalkerGrammar) {
0620: println("_t = _t.getNextSibling();");
0621: }
0622: }
0623:
0624: public void gen(TreeElement t) {
0625: // save AST cursor
0626: println("AST __t" + t.ID + " = _t;");
0627:
0628: // If there is a label on the root, then assign that to the variable
0629: if (t.root.getLabel() != null) {
0630: println(t.root.getLabel() + " = (ASTNULL == _t) ? null : ("
0631: + labeledElementASTType + ")_t;");
0632: }
0633:
0634: // check for invalid modifiers ! and ^ on tree element roots
0635: if (t.root.getAutoGenType() == GrammarElement.AUTO_GEN_BANG) {
0636: antlrTool
0637: .error(
0638: "Suffixing a root node with '!' is not implemented",
0639: grammar.getFilename(), t.getLine(), t
0640: .getColumn());
0641: t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE);
0642: }
0643: if (t.root.getAutoGenType() == GrammarElement.AUTO_GEN_CARET) {
0644: antlrTool
0645: .warning(
0646: "Suffixing a root node with '^' is redundant; already a root",
0647: grammar.getFilename(), t.getLine(), t
0648: .getColumn());
0649: t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE);
0650: }
0651:
0652: // Generate AST variables
0653: genElementAST(t.root);
0654: if (grammar.buildAST) {
0655: // Save the AST construction state
0656: println("ASTPair __currentAST" + t.ID
0657: + " = currentAST.copy();");
0658: // Make the next item added a child of the TreeElement root
0659: println("currentAST.root = currentAST.child;");
0660: println("currentAST.child = null;");
0661: }
0662:
0663: // match root
0664: if (t.root instanceof WildcardElement) {
0665: println("if (null == _t) throw new MismatchedTokenException();");
0666: } else {
0667: genMatch(t.root);
0668: }
0669: // move to list of children
0670: println("_t = _t.getFirstChild();");
0671:
0672: // walk list of children, generating code for each
0673: for (int i = 0; i < t.getAlternatives().size(); i++) {
0674: Alternative a = t.getAlternativeAt(i);
0675: AlternativeElement e = a.head;
0676: while (e != null) {
0677: e.generate();
0678: e = e.next;
0679: }
0680: }
0681:
0682: if (grammar.buildAST) {
0683: // restore the AST construction state to that just after the
0684: // tree root was added
0685: println("currentAST = __currentAST" + t.ID + ";");
0686: }
0687: // restore AST cursor
0688: println("_t = __t" + t.ID + ";");
0689: // move cursor to sibling of tree just parsed
0690: println("_t = _t.getNextSibling();");
0691: }
0692:
0693: /** Generate the tree-parser CSharp file */
0694: public void gen(TreeWalkerGrammar g) throws IOException {
0695: // SAS: debugging stuff removed for now...
0696: setGrammar(g);
0697: if (!(grammar instanceof TreeWalkerGrammar)) {
0698: antlrTool.panic("Internal error generating tree-walker");
0699: }
0700: genBody(g);
0701: }
0702:
0703: /** Generate code for the given grammar element.
0704: * @param wc The wildcard element to generate
0705: */
0706: public void gen(WildcardElement wc) {
0707: // Variable assignment for labeled elements
0708: if (wc.getLabel() != null && syntacticPredLevel == 0) {
0709: println(wc.getLabel() + " = " + lt1Value + ";");
0710: }
0711:
0712: // AST
0713: genElementAST(wc);
0714: // Match anything but EOF
0715: if (grammar instanceof TreeWalkerGrammar) {
0716: println("if (null == _t) throw new MismatchedTokenException();");
0717: } else if (grammar instanceof LexerGrammar) {
0718: if (grammar instanceof LexerGrammar
0719: && (!saveText || wc.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
0720: declareSaveIndexVariableIfNeeded();
0721: println("_saveIndex = text.Length;");
0722: }
0723: println("matchNot(EOF/*_CHAR*/);");
0724: if (grammar instanceof LexerGrammar
0725: && (!saveText || wc.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
0726: declareSaveIndexVariableIfNeeded();
0727: println("text.Length = _saveIndex;"); // kill text atom put in buffer
0728: }
0729: } else {
0730: println("matchNot(" + getValueString(Token.EOF_TYPE) + ");");
0731: }
0732:
0733: // tack on tree cursor motion if doing a tree walker
0734: if (grammar instanceof TreeWalkerGrammar) {
0735: println("_t = _t.getNextSibling();");
0736: }
0737: }
0738:
0739: /** Generate code for the given grammar element.
0740: * @param blk The (...)* block to generate
0741: */
0742: public void gen(ZeroOrMoreBlock blk) {
0743: if (DEBUG_CODE_GENERATOR)
0744: System.out.println("gen*(" + blk + ")");
0745: println("{ // ( ... )*");
0746: tabs++;
0747: genBlockPreamble(blk);
0748: String label;
0749: if (blk.getLabel() != null) {
0750: label = blk.getLabel();
0751: } else {
0752: label = "_loop" + blk.ID;
0753: }
0754: println("for (;;)");
0755: println("{");
0756: tabs++;
0757: // generate the init action for ()+ ()* inside the loop
0758: // this allows us to do usefull EOF checking...
0759: genBlockInitAction(blk);
0760:
0761: // Tell AST generation to build subrule result
0762: String saveCurrentASTResult = currentASTResult;
0763: if (blk.getLabel() != null) {
0764: currentASTResult = blk.getLabel();
0765: }
0766:
0767: boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
0768:
0769: // generate exit test if greedy set to false
0770: // and an alt is ambiguous with exit branch
0771: // or when lookahead derived purely from end-of-file
0772: // Lookahead analysis stops when end-of-file is hit,
0773: // returning set {epsilon}. Since {epsilon} is not
0774: // ambig with any real tokens, no error is reported
0775: // by deterministic() routines and we have to check
0776: // for the case where the lookahead depth didn't get
0777: // set to NONDETERMINISTIC (this only happens when the
0778: // FOLLOW contains real atoms + epsilon).
0779: boolean generateNonGreedyExitPath = false;
0780: int nonGreedyExitDepth = grammar.maxk;
0781:
0782: if (!blk.greedy
0783: && blk.exitLookaheadDepth <= grammar.maxk
0784: && blk.exitCache[blk.exitLookaheadDepth]
0785: .containsEpsilon()) {
0786: generateNonGreedyExitPath = true;
0787: nonGreedyExitDepth = blk.exitLookaheadDepth;
0788: } else if (!blk.greedy
0789: && blk.exitLookaheadDepth == LLkGrammarAnalyzer.NONDETERMINISTIC) {
0790: generateNonGreedyExitPath = true;
0791: }
0792: if (generateNonGreedyExitPath) {
0793: if (DEBUG_CODE_GENERATOR) {
0794: System.out
0795: .println("nongreedy (...)* loop; exit depth is "
0796: + blk.exitLookaheadDepth);
0797: }
0798: String predictExit = getLookaheadTestExpression(
0799: blk.exitCache, nonGreedyExitDepth);
0800: println("// nongreedy exit test");
0801: println("if (" + predictExit + ") goto " + label
0802: + "_breakloop;");
0803: }
0804:
0805: CSharpBlockFinishingInfo howToFinish = genCommonBlock(blk,
0806: false);
0807: genBlockFinish(howToFinish, "goto " + label + "_breakloop;");
0808:
0809: tabs--;
0810: println("}");
0811: _print(label + "_breakloop:");
0812: println(";");
0813: tabs--;
0814: println("} // ( ... )*");
0815:
0816: // Restore previous AST generation
0817: currentASTResult = saveCurrentASTResult;
0818: }
0819:
0820: /** Generate an alternative.
0821: * @param alt The alternative to generate
0822: * @param blk The block to which the alternative belongs
0823: */
0824: protected void genAlt(Alternative alt, AlternativeBlock blk) {
0825: // Save the AST generation state, and set it to that of the alt
0826: boolean savegenAST = genAST;
0827: genAST = genAST && alt.getAutoGen();
0828:
0829: boolean oldsaveTest = saveText;
0830: saveText = saveText && alt.getAutoGen();
0831:
0832: // Reset the variable name map for the alternative
0833: Hashtable saveMap = treeVariableMap;
0834: treeVariableMap = new Hashtable();
0835:
0836: // Generate try block around the alt for error handling
0837: if (alt.exceptionSpec != null) {
0838: println("try // for error handling");
0839: println("{");
0840: tabs++;
0841: }
0842:
0843: AlternativeElement elem = alt.head;
0844: while (!(elem instanceof BlockEndElement)) {
0845: elem.generate(); // alt can begin with anything. Ask target to gen.
0846: elem = elem.next;
0847: }
0848:
0849: if (genAST) {
0850: if (blk instanceof RuleBlock) {
0851: // Set the AST return value for the rule
0852: RuleBlock rblk = (RuleBlock) blk;
0853: if (usingCustomAST) {
0854: println(rblk.getRuleName() + "_AST = ("
0855: + labeledElementASTType
0856: + ")currentAST.root;");
0857: } else {
0858: println(rblk.getRuleName()
0859: + "_AST = currentAST.root;");
0860: }
0861: } else if (blk.getLabel() != null) {
0862: // ### future: also set AST value for labeled subrules.
0863: // println(blk.getLabel() + "_AST = ("+labeledElementASTType+")currentAST.root;");
0864: antlrTool.warning("Labeled subrules not yet supported",
0865: grammar.getFilename(), blk.getLine(), blk
0866: .getColumn());
0867: }
0868: }
0869:
0870: if (alt.exceptionSpec != null) {
0871: // close try block
0872: tabs--;
0873: println("}");
0874: genErrorHandler(alt.exceptionSpec);
0875: }
0876:
0877: genAST = savegenAST;
0878: saveText = oldsaveTest;
0879:
0880: treeVariableMap = saveMap;
0881: }
0882:
0883: /** Generate all the bitsets to be used in the parser or lexer
0884: * Generate the raw bitset data like "long _tokenSet1_data[] = {...};"
0885: * and the BitSet object declarations like "BitSet _tokenSet1 = new BitSet(_tokenSet1_data);"
0886: * Note that most languages do not support object initialization inside a
0887: * class definition, so other code-generators may have to separate the
0888: * bitset declarations from the initializations (e.g., put the initializations
0889: * in the generated constructor instead).
0890: * @param bitsetList The list of bitsets to generate.
0891: * @param maxVocabulary Ensure that each generated bitset can contain at least this value.
0892: */
0893: protected void genBitsets(Vector bitsetList, int maxVocabulary) {
0894: println("");
0895: for (int i = 0; i < bitsetList.size(); i++) {
0896: BitSet p = (BitSet) bitsetList.elementAt(i);
0897: // Ensure that generated BitSet is large enough for vocabulary
0898: p.growToInclude(maxVocabulary);
0899: genBitSet(p, i);
0900: }
0901: }
0902:
0903: /** Do something simple like:
0904: * private static final long[] mk_tokenSet_0() {
0905: * long[] data = { -2305839160922996736L, 63L, 16777216L, 0L, 0L, 0L };
0906: * return data;
0907: * }
0908: * public static final BitSet _tokenSet_0 = new BitSet(mk_tokenSet_0());
0909: *
0910: * Or, for large bitsets, optimize init so ranges are collapsed into loops.
0911: * This is most useful for lexers using unicode.
0912: */
0913: private void genBitSet(BitSet p, int id) {
0914: // initialization data
0915: println("private static long[] mk_" + getBitsetName(id) + "()");
0916: println("{");
0917: tabs++;
0918: int n = p.lengthInLongWords();
0919: if (n < BITSET_OPTIMIZE_INIT_THRESHOLD) {
0920: println("long[] data = { " + p.toStringOfWords() + "};");
0921: } else {
0922: // will init manually, allocate space then set values
0923: println("long[] data = new long[" + n + "];");
0924: long[] elems = p.toPackedArray();
0925: for (int i = 0; i < elems.length;) {
0926: if ((i + 1) == elems.length || elems[i] != elems[i + 1]) {
0927: // last number or no run of numbers, just dump assignment
0928: println("data[" + i + "]=" + elems[i] + "L;");
0929: i++;
0930: } else {
0931: // scan to find end of run
0932: int j;
0933: for (j = i + 1; j < elems.length
0934: && elems[j] == elems[i]; j++) {
0935: ;
0936: }
0937: // j-1 is last member of run
0938: println("for (int i = " + i + "; i<=" + (j - 1)
0939: + "; i++) { data[i]=" + elems[i] + "L; }");
0940: i = j;
0941: }
0942: }
0943: }
0944:
0945: println("return data;");
0946: tabs--;
0947: println("}");
0948: // BitSet object
0949: println("public static readonly BitSet " + getBitsetName(id)
0950: + " = new BitSet(" + "mk_" + getBitsetName(id) + "()"
0951: + ");");
0952: }
0953:
0954: /** Given the index of a bitset in the bitset list, generate a unique name.
0955: * Specific code-generators may want to override this
0956: * if the language does not allow '_' or numerals in identifiers.
0957: * @param index The index of the bitset in the bitset list.
0958: */
0959: protected String getBitsetName(int index) {
0960: return "tokenSet_" + index + "_";
0961: }
0962:
0963: /** Generate the finish of a block, using a combination of the info
0964: * returned from genCommonBlock() and the action to perform when
0965: * no alts were taken
0966: * @param howToFinish The return of genCommonBlock()
0967: * @param noViableAction What to generate when no alt is taken
0968: */
0969: private void genBlockFinish(CSharpBlockFinishingInfo howToFinish,
0970: String noViableAction) {
0971:
0972: if (howToFinish.needAnErrorClause
0973: && (howToFinish.generatedAnIf || howToFinish.generatedSwitch)) {
0974: if (howToFinish.generatedAnIf) {
0975: println("else");
0976: println("{");
0977: } else {
0978: println("{");
0979: }
0980: tabs++;
0981: println(noViableAction);
0982: tabs--;
0983: println("}");
0984: }
0985:
0986: if (howToFinish.postscript != null) {
0987: if (howToFinish.needAnErrorClause
0988: && howToFinish.generatedSwitch
0989: && !howToFinish.generatedAnIf
0990: && noViableAction != null) {
0991: // Check to make sure that noViableAction is only a throw statement
0992: if (noViableAction.indexOf("throw") == 0
0993: || noViableAction.indexOf("goto") == 0) {
0994: // Remove the break statement since it isn't reachable with a throw exception
0995: int endOfBreak = howToFinish.postscript
0996: .indexOf("break;") + 6;
0997: String newPostScript = howToFinish.postscript
0998: .substring(endOfBreak);
0999: println(newPostScript);
1000: } else {
1001: println(howToFinish.postscript);
1002: }
1003: } else {
1004: println(howToFinish.postscript);
1005: }
1006: }
1007: }
1008:
1009: /** Generate the init action for a block, which may be a RuleBlock or a
1010: * plain AlternativeBLock.
1011: * @blk The block for which the preamble is to be generated.
1012: */
1013: protected void genBlockInitAction(AlternativeBlock blk) {
1014: // dump out init action
1015: if (blk.initAction != null) {
1016: printAction(processActionForSpecialSymbols(blk.initAction,
1017: blk.getLine(), currentRule, null));
1018: }
1019: }
1020:
1021: /** Generate the header for a block, which may be a RuleBlock or a
1022: * plain AlternativeBLock. This generates any variable declarations
1023: * and syntactic-predicate-testing variables.
1024: * @blk The block for which the preamble is to be generated.
1025: */
1026: protected void genBlockPreamble(AlternativeBlock blk) {
1027: // define labels for rule blocks.
1028: if (blk instanceof RuleBlock) {
1029: RuleBlock rblk = (RuleBlock) blk;
1030: if (rblk.labeledElements != null) {
1031: for (int i = 0; i < rblk.labeledElements.size(); i++) {
1032:
1033: AlternativeElement a = (AlternativeElement) rblk.labeledElements
1034: .elementAt(i);
1035: //System.out.println("looking at labeled element: "+a);
1036: //Variables for labeled rule refs and
1037: //subrules are different than variables for
1038: //grammar atoms. This test is a little tricky
1039: //because we want to get all rule refs and ebnf,
1040: //but not rule blocks or syntactic predicates
1041: if (a instanceof RuleRefElement
1042: || a instanceof AlternativeBlock
1043: && !(a instanceof RuleBlock)
1044: && !(a instanceof SynPredBlock)) {
1045:
1046: if (!(a instanceof RuleRefElement)
1047: && ((AlternativeBlock) a).not
1048: && analyzer
1049: .subruleCanBeInverted(
1050: ((AlternativeBlock) a),
1051: grammar instanceof LexerGrammar)) {
1052: // Special case for inverted subrules that
1053: // will be inlined. Treat these like
1054: // token or char literal references
1055: println(labeledElementType + " "
1056: + a.getLabel() + " = "
1057: + labeledElementInit + ";");
1058: if (grammar.buildAST) {
1059: genASTDeclaration(a);
1060: }
1061: } else {
1062: if (grammar.buildAST) {
1063: // Always gen AST variables for
1064: // labeled elements, even if the
1065: // element itself is marked with !
1066: genASTDeclaration(a);
1067: }
1068: if (grammar instanceof LexerGrammar) {
1069: println("Token " + a.getLabel()
1070: + " = null;");
1071: }
1072: if (grammar instanceof TreeWalkerGrammar) {
1073: // always generate rule-ref variables
1074: // for tree walker
1075: println(labeledElementType + " "
1076: + a.getLabel() + " = "
1077: + labeledElementInit + ";");
1078: }
1079: }
1080: } else {
1081: // It is a token or literal reference. Generate the
1082: // correct variable type for this grammar
1083: println(labeledElementType + " " + a.getLabel()
1084: + " = " + labeledElementInit + ";");
1085: // In addition, generate *_AST variables if building ASTs
1086: if (grammar.buildAST) {
1087: //println(labeledElementASTType+" " + a.getLabel() + "_AST = null;");
1088: if (a instanceof GrammarAtom
1089: && ((GrammarAtom) a)
1090: .getASTNodeType() != null) {
1091: GrammarAtom ga = (GrammarAtom) a;
1092: genASTDeclaration(a, ga
1093: .getASTNodeType());
1094: } else {
1095: genASTDeclaration(a);
1096: }
1097: }
1098: }
1099: }
1100: }
1101: }
1102: }
1103:
1104: public void genBody(LexerGrammar g) throws IOException {
1105: // SAS: moved output creation to method so a subclass can change
1106: // how the output is generated (for VAJ interface)
1107: setupOutput(grammar.getClassName());
1108:
1109: genAST = false; // no way to gen trees.
1110: saveText = true; // save consumed characters.
1111:
1112: tabs = 0;
1113:
1114: // Generate header common to all CSharp output files
1115: genHeader();
1116: // Do not use printAction because we assume tabs==0
1117: println(behavior.getHeaderAction(""));
1118:
1119: // Generate the CSharp namespace declaration (if specified)
1120: if (nameSpace != null)
1121: nameSpace.emitDeclarations(currentOutput);
1122: tabs++;
1123:
1124: // Generate header specific to lexer CSharp file
1125: // println("import java.io.FileInputStream;");
1126: println("// Generate header specific to lexer CSharp file");
1127: println("using System;");
1128: println("using Stream = System.IO.Stream;");
1129: println("using TextReader = System.IO.TextReader;");
1130: println("using Hashtable = System.Collections.Hashtable;");
1131: println("");
1132: println("using TokenStreamException = antlr.TokenStreamException;");
1133: println("using TokenStreamIOException = antlr.TokenStreamIOException;");
1134: println("using TokenStreamRecognitionException = antlr.TokenStreamRecognitionException;");
1135: println("using CharStreamException = antlr.CharStreamException;");
1136: println("using CharStreamIOException = antlr.CharStreamIOException;");
1137: println("using ANTLRException = antlr.ANTLRException;");
1138: println("using CharScanner = antlr.CharScanner;");
1139: println("using InputBuffer = antlr.InputBuffer;");
1140: println("using ByteBuffer = antlr.ByteBuffer;");
1141: println("using CharBuffer = antlr.CharBuffer;");
1142: println("using Token = antlr.Token;");
1143: println("using CommonToken = antlr.CommonToken;");
1144: println("using RecognitionException = antlr.RecognitionException;");
1145: println("using NoViableAltForCharException = antlr.NoViableAltForCharException;");
1146: println("using MismatchedCharException = antlr.MismatchedCharException;");
1147: println("using TokenStream = antlr.TokenStream;");
1148: println("using LexerSharedInputState = antlr.LexerSharedInputState;");
1149: println("using BitSet = antlr.collections.impl.BitSet;");
1150:
1151: // Generate user-defined lexer file preamble
1152: println(grammar.preambleAction.getText());
1153:
1154: // Generate lexer class definition
1155: String sup = null;
1156: if (grammar.super Class != null) {
1157: sup = grammar.super Class;
1158: } else {
1159: sup = "antlr." + grammar.getSuperClass();
1160: }
1161:
1162: // print javadoc comment if any
1163: if (grammar.comment != null) {
1164: _println(grammar.comment);
1165: }
1166:
1167: Token tprefix = (Token) grammar.options
1168: .get("classHeaderPrefix");
1169: if (tprefix == null) {
1170: print("public ");
1171: } else {
1172: String p = StringUtils.stripFrontBack(tprefix.getText(),
1173: "\"", "\"");
1174: if (p == null) {
1175: print("public ");
1176: } else {
1177: print(p + " ");
1178: }
1179: }
1180:
1181: print("class " + grammar.getClassName() + " : " + sup);
1182: println(", TokenStream");
1183: Token tsuffix = (Token) grammar.options
1184: .get("classHeaderSuffix");
1185: if (tsuffix != null) {
1186: String suffix = StringUtils.stripFrontBack(tsuffix
1187: .getText(), "\"", "\"");
1188: if (suffix != null) {
1189: print(", " + suffix); // must be an interface name for CSharp
1190: }
1191: }
1192: println(" {");
1193: tabs++;
1194:
1195: // Generate 'const' definitions for Token IDs
1196: genTokenDefinitions(grammar.tokenManager);
1197:
1198: // Generate user-defined lexer class members
1199: print(processActionForSpecialSymbols(grammar.classMemberAction
1200: .getText(), grammar.classMemberAction.getLine(),
1201: currentRule, null));
1202:
1203: //
1204: // Generate the constructor from InputStream, which in turn
1205: // calls the ByteBuffer constructor
1206: //
1207: println("public " + grammar.getClassName()
1208: + "(Stream ins) : this(new ByteBuffer(ins))");
1209: println("{");
1210: println("}");
1211: println("");
1212:
1213: //
1214: // Generate the constructor from Reader, which in turn
1215: // calls the CharBuffer constructor
1216: //
1217: println("public " + grammar.getClassName()
1218: + "(TextReader r) : this(new CharBuffer(r))");
1219: println("{");
1220: println("}");
1221: println("");
1222:
1223: print("public " + grammar.getClassName() + "(InputBuffer ib)");
1224: // if debugging, wrap the input buffer in a debugger
1225: if (grammar.debuggingOutput)
1226: println(" : this(new LexerSharedInputState(new antlr.debug.DebuggingInputBuffer(ib)))");
1227: else
1228: println(" : this(new LexerSharedInputState(ib))");
1229: println("{");
1230: println("}");
1231: println("");
1232:
1233: //
1234: // Generate the constructor from InputBuffer (char or byte)
1235: //
1236: println("public " + grammar.getClassName()
1237: + "(LexerSharedInputState state) : base(state)");
1238: println("{");
1239: tabs++;
1240: println("initialize();");
1241: tabs--;
1242: println("}");
1243:
1244: // Generate the initialize function
1245: println("private void initialize()");
1246: println("{");
1247: tabs++;
1248:
1249: // if debugging, set up array variables and call user-overridable
1250: // debugging setup method
1251: if (grammar.debuggingOutput) {
1252: println("ruleNames = _ruleNames;");
1253: println("semPredNames = _semPredNames;");
1254: println("setupDebugging();");
1255: }
1256:
1257: // Generate the setting of various generated options.
1258: // These need to be before the literals since ANTLRHashString depends on
1259: // the casesensitive stuff.
1260: println("caseSensitiveLiterals = " + g.caseSensitiveLiterals
1261: + ";");
1262: println("setCaseSensitive(" + g.caseSensitive + ");");
1263:
1264: // Generate the initialization of a hashtable
1265: // containing the string literals used in the lexer
1266: // The literals variable itself is in CharScanner
1267: if (g.caseSensitiveLiterals)
1268: println("literals = new Hashtable();");
1269: else
1270: println("literals = new Hashtable(new System.Collections.CaseInsensitiveHashCodeProvider(), null);");
1271: Enumeration keys = grammar.tokenManager.getTokenSymbolKeys();
1272: while (keys.hasMoreElements()) {
1273: String key = (String) keys.nextElement();
1274: if (key.charAt(0) != '"') {
1275: continue;
1276: }
1277: TokenSymbol sym = grammar.tokenManager.getTokenSymbol(key);
1278: if (sym instanceof StringLiteralSymbol) {
1279: StringLiteralSymbol s = (StringLiteralSymbol) sym;
1280: println("literals.Add(" + s.getId() + ", "
1281: + s.getTokenType() + ");");
1282: }
1283: }
1284:
1285: Enumeration ids;
1286: tabs--;
1287: println("}");
1288:
1289: // generate the rule name array for debugging
1290: if (grammar.debuggingOutput) {
1291: println("private const string[] _ruleNames = {");
1292:
1293: ids = grammar.rules.elements();
1294: int ruleNum = 0;
1295: while (ids.hasMoreElements()) {
1296: GrammarSymbol sym = (GrammarSymbol) ids.nextElement();
1297: if (sym instanceof RuleSymbol)
1298: println(" \"" + ((RuleSymbol) sym).getId() + "\",");
1299: }
1300: println("};");
1301: }
1302:
1303: // Generate nextToken() rule.
1304: // nextToken() is a synthetic lexer rule that is the implicit OR of all
1305: // user-defined lexer rules.
1306: genNextToken();
1307:
1308: // Generate code for each rule in the lexer
1309: ids = grammar.rules.elements();
1310: int ruleNum = 0;
1311: while (ids.hasMoreElements()) {
1312: RuleSymbol sym = (RuleSymbol) ids.nextElement();
1313: // Don't generate the synthetic rules
1314: if (!sym.getId().equals("mnextToken")) {
1315: genRule(sym, false, ruleNum++, grammar.tokenManager);
1316: }
1317: exitIfError();
1318: }
1319:
1320: // Generate the semantic predicate map for debugging
1321: if (grammar.debuggingOutput)
1322: genSemPredMap();
1323:
1324: // Generate the bitsets used throughout the lexer
1325: genBitsets(bitsetsUsed, ((LexerGrammar) grammar).charVocabulary
1326: .size());
1327:
1328: println("");
1329: tabs--;
1330: println("}");
1331:
1332: tabs--;
1333: // Generate the CSharp namespace closures (if required)
1334: if (nameSpace != null)
1335: nameSpace.emitClosures(currentOutput);
1336:
1337: // Close the lexer output stream
1338: currentOutput.close();
1339: currentOutput = null;
1340: }
1341:
1342: public void genInitFactory(Grammar g) {
1343: if (g.buildAST) {
1344: // Generate the method to initialize an ASTFactory when we're
1345: // building AST's
1346: println("static public void initializeASTFactory( ASTFactory factory )");
1347: println("{");
1348: tabs++;
1349:
1350: println("factory.setMaxNodeType("
1351: + g.tokenManager.maxTokenType() + ");");
1352:
1353: // Walk the token vocabulary and generate code to register every TokenID->ASTNodeType
1354: // mapping specified in the tokens {...} section with the ASTFactory.
1355: Vector v = g.tokenManager.getVocabulary();
1356: for (int i = 0; i < v.size(); i++) {
1357: String s = (String) v.elementAt(i);
1358: if (s != null) {
1359: TokenSymbol ts = g.tokenManager.getTokenSymbol(s);
1360: if (ts != null && ts.getASTNodeType() != null) {
1361: println("factory.setTokenTypeASTNodeType(" + s
1362: + ", \"" + ts.getASTNodeType() + "\");");
1363: }
1364: }
1365: }
1366:
1367: tabs--;
1368: println("}");
1369: }
1370: }
1371:
1372: public void genBody(ParserGrammar g) throws IOException {
1373: // Open the output stream for the parser and set the currentOutput
1374: // SAS: moved file setup so subclass could do it (for VAJ interface)
1375: setupOutput(grammar.getClassName());
1376:
1377: genAST = grammar.buildAST;
1378:
1379: tabs = 0;
1380:
1381: // Generate the header common to all output files.
1382: genHeader();
1383: // Do not use printAction because we assume tabs==0
1384: println(behavior.getHeaderAction(""));
1385:
1386: // Generate the CSharp namespace declaration (if specified)
1387: if (nameSpace != null)
1388: nameSpace.emitDeclarations(currentOutput);
1389: tabs++;
1390:
1391: // Generate header for the parser
1392: println("// Generate the header common to all output files.");
1393: println("using System;");
1394: println("");
1395: println("using TokenBuffer = antlr.TokenBuffer;");
1396: println("using TokenStreamException = antlr.TokenStreamException;");
1397: println("using TokenStreamIOException = antlr.TokenStreamIOException;");
1398: println("using ANTLRException = antlr.ANTLRException;");
1399: println("using " + grammar.getSuperClass() + " = antlr."
1400: + grammar.getSuperClass() + ";");
1401: println("using Token = antlr.Token;");
1402: println("using TokenStream = antlr.TokenStream;");
1403: println("using RecognitionException = antlr.RecognitionException;");
1404: println("using NoViableAltException = antlr.NoViableAltException;");
1405: println("using MismatchedTokenException = antlr.MismatchedTokenException;");
1406: println("using SemanticException = antlr.SemanticException;");
1407: println("using ParserSharedInputState = antlr.ParserSharedInputState;");
1408: println("using BitSet = antlr.collections.impl.BitSet;");
1409: if (genAST) {
1410: println("using AST = antlr.collections.AST;");
1411: println("using ASTPair = antlr.ASTPair;");
1412: println("using ASTFactory = antlr.ASTFactory;");
1413: println("using ASTArray = antlr.collections.impl.ASTArray;");
1414: }
1415:
1416: // Output the user-defined parser preamble
1417: println(grammar.preambleAction.getText());
1418:
1419: // Generate parser class definition
1420: String sup = null;
1421: if (grammar.super Class != null)
1422: sup = grammar.super Class;
1423: else
1424: sup = "antlr." + grammar.getSuperClass();
1425:
1426: // print javadoc comment if any
1427: if (grammar.comment != null) {
1428: _println(grammar.comment);
1429: }
1430:
1431: Token tprefix = (Token) grammar.options
1432: .get("classHeaderPrefix");
1433: if (tprefix == null) {
1434: print("public ");
1435: } else {
1436: String p = StringUtils.stripFrontBack(tprefix.getText(),
1437: "\"", "\"");
1438: if (p == null) {
1439: print("public ");
1440: } else {
1441: print(p + " ");
1442: }
1443: }
1444:
1445: println("class " + grammar.getClassName() + " : " + sup);
1446:
1447: Token tsuffix = (Token) grammar.options
1448: .get("classHeaderSuffix");
1449: if (tsuffix != null) {
1450: String suffix = StringUtils.stripFrontBack(tsuffix
1451: .getText(), "\"", "\"");
1452: if (suffix != null)
1453: print(" , " + suffix); // must be an interface name for CSharp
1454: }
1455: println("{");
1456: tabs++;
1457:
1458: // Generate 'const' definitions for Token IDs
1459: genTokenDefinitions(grammar.tokenManager);
1460:
1461: // set up an array of all the rule names so the debugger can
1462: // keep track of them only by number -- less to store in tree...
1463: if (grammar.debuggingOutput) {
1464: println("private const string[] _ruleNames = {");
1465: tabs++;
1466:
1467: Enumeration ids = grammar.rules.elements();
1468: int ruleNum = 0;
1469: while (ids.hasMoreElements()) {
1470: GrammarSymbol sym = (GrammarSymbol) ids.nextElement();
1471: if (sym instanceof RuleSymbol)
1472: println(" \"" + ((RuleSymbol) sym).getId() + "\",");
1473: }
1474: tabs--;
1475: println("};");
1476: }
1477:
1478: // Generate user-defined parser class members
1479: print(processActionForSpecialSymbols(grammar.classMemberAction
1480: .getText(), grammar.classMemberAction.getLine(),
1481: currentRule, null));
1482:
1483: // Generate parser class constructor from TokenBuffer
1484: println("");
1485: println("protected void initialize()");
1486: println("{");
1487: tabs++;
1488: println("tokenNames = tokenNames_;");
1489:
1490: if (grammar.buildAST)
1491: println("initializeFactory();");
1492:
1493: // if debugging, set up arrays and call the user-overridable
1494: // debugging setup method
1495: if (grammar.debuggingOutput) {
1496: println("ruleNames = _ruleNames;");
1497: println("semPredNames = _semPredNames;");
1498: println("setupDebugging(tokenBuf);");
1499: }
1500: tabs--;
1501: println("}");
1502: println("");
1503:
1504: println("");
1505: println("protected " + grammar.getClassName()
1506: + "(TokenBuffer tokenBuf, int k) : base(tokenBuf, k)");
1507: println("{");
1508: tabs++;
1509: println("initialize();");
1510: tabs--;
1511: println("}");
1512: println("");
1513:
1514: println("public " + grammar.getClassName()
1515: + "(TokenBuffer tokenBuf) : this(tokenBuf,"
1516: + grammar.maxk + ")");
1517: println("{");
1518: println("}");
1519: println("");
1520:
1521: // Generate parser class constructor from TokenStream
1522: println("protected " + grammar.getClassName()
1523: + "(TokenStream lexer, int k) : base(lexer,k)");
1524: println("{");
1525: tabs++;
1526: println("initialize();");
1527: tabs--;
1528: println("}");
1529: println("");
1530:
1531: println("public " + grammar.getClassName()
1532: + "(TokenStream lexer) : this(lexer," + grammar.maxk
1533: + ")");
1534: println("{");
1535: println("}");
1536: println("");
1537:
1538: println("public " + grammar.getClassName()
1539: + "(ParserSharedInputState state) : base(state,"
1540: + grammar.maxk + ")");
1541: println("{");
1542: tabs++;
1543: println("initialize();");
1544: tabs--;
1545: println("}");
1546: println("");
1547:
1548: astTypes = new java.util.Vector(100);
1549:
1550: // Generate code for each rule in the grammar
1551: Enumeration ids = grammar.rules.elements();
1552: int ruleNum = 0;
1553: while (ids.hasMoreElements()) {
1554: GrammarSymbol sym = (GrammarSymbol) ids.nextElement();
1555: if (sym instanceof RuleSymbol) {
1556: RuleSymbol rs = (RuleSymbol) sym;
1557: genRule(rs, rs.references.size() == 0, ruleNum++,
1558: grammar.tokenManager);
1559: }
1560: exitIfError();
1561: }
1562: if (usingCustomAST) {
1563: // when we are using a custom AST, overload Parser.getAST() to return the
1564: // custom AST type
1565: println("public new " + labeledElementASTType + " getAST()");
1566: println("{");
1567: tabs++;
1568: println("return (" + labeledElementASTType + ") returnAST;");
1569: tabs--;
1570: println("}");
1571: println("");
1572: }
1573:
1574: // Generate the method that initializes the ASTFactory when we're
1575: // building AST's
1576: println("private void initializeFactory()");
1577: println("{");
1578: tabs++;
1579: if (grammar.buildAST) {
1580: println("if (astFactory == null)");
1581: println("{");
1582: tabs++;
1583: if (usingCustomAST) {
1584: println("astFactory = new ASTFactory(\""
1585: + labeledElementASTType + "\");");
1586: } else
1587: println("astFactory = new ASTFactory();");
1588: tabs--;
1589: println("}");
1590: println("initializeASTFactory( astFactory );");
1591: }
1592: tabs--;
1593: println("}");
1594: genInitFactory(g);
1595:
1596: // Generate the token names
1597: genTokenStrings();
1598:
1599: // Generate the bitsets used throughout the grammar
1600: genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType());
1601:
1602: // Generate the semantic predicate map for debugging
1603: if (grammar.debuggingOutput)
1604: genSemPredMap();
1605:
1606: // Close class definition
1607: println("");
1608: tabs--;
1609: println("}");
1610:
1611: tabs--;
1612: // Generate the CSharp namespace closures (if required)
1613: if (nameSpace != null)
1614: nameSpace.emitClosures(currentOutput);
1615:
1616: // Close the parser output stream
1617: currentOutput.close();
1618: currentOutput = null;
1619: }
1620:
1621: public void genBody(TreeWalkerGrammar g) throws IOException {
1622: // Open the output stream for the parser and set the currentOutput
1623: // SAS: move file open to method so subclass can override it
1624: // (mainly for VAJ interface)
1625: setupOutput(grammar.getClassName());
1626:
1627: genAST = grammar.buildAST;
1628: tabs = 0;
1629:
1630: // Generate the header common to all output files.
1631: genHeader();
1632: // Do not use printAction because we assume tabs==0
1633: println(behavior.getHeaderAction(""));
1634:
1635: // Generate the CSharp namespace declaration (if specified)
1636: if (nameSpace != null)
1637: nameSpace.emitDeclarations(currentOutput);
1638: tabs++;
1639:
1640: // Generate header specific to the tree-parser CSharp file
1641: println("// Generate header specific to the tree-parser CSharp file");
1642: println("using System;");
1643: println("");
1644: println("using " + grammar.getSuperClass() + " = antlr."
1645: + grammar.getSuperClass() + ";");
1646: println("using Token = antlr.Token;");
1647: println("using AST = antlr.collections.AST;");
1648: println("using RecognitionException = antlr.RecognitionException;");
1649: println("using ANTLRException = antlr.ANTLRException;");
1650: println("using NoViableAltException = antlr.NoViableAltException;");
1651: println("using MismatchedTokenException = antlr.MismatchedTokenException;");
1652: println("using SemanticException = antlr.SemanticException;");
1653: println("using BitSet = antlr.collections.impl.BitSet;");
1654: println("using ASTPair = antlr.ASTPair;");
1655: println("using ASTFactory = antlr.ASTFactory;");
1656: println("using ASTArray = antlr.collections.impl.ASTArray;");
1657:
1658: // Output the user-defined parser premamble
1659: println(grammar.preambleAction.getText());
1660:
1661: // Generate parser class definition
1662: String sup = null;
1663: if (grammar.super Class != null) {
1664: sup = grammar.super Class;
1665: } else {
1666: sup = "antlr." + grammar.getSuperClass();
1667: }
1668: println("");
1669:
1670: // print javadoc comment if any
1671: if (grammar.comment != null) {
1672: _println(grammar.comment);
1673: }
1674:
1675: Token tprefix = (Token) grammar.options
1676: .get("classHeaderPrefix");
1677: if (tprefix == null) {
1678: print("public ");
1679: } else {
1680: String p = StringUtils.stripFrontBack(tprefix.getText(),
1681: "\"", "\"");
1682: if (p == null) {
1683: print("public ");
1684: } else {
1685: print(p + " ");
1686: }
1687: }
1688:
1689: println("class " + grammar.getClassName() + " : " + sup);
1690: Token tsuffix = (Token) grammar.options
1691: .get("classHeaderSuffix");
1692: if (tsuffix != null) {
1693: String suffix = StringUtils.stripFrontBack(tsuffix
1694: .getText(), "\"", "\"");
1695: if (suffix != null) {
1696: print(" , " + suffix); // must be an interface name for CSharp
1697: }
1698: }
1699: println("{");
1700: tabs++;
1701:
1702: // Generate 'const' definitions for Token IDs
1703: genTokenDefinitions(grammar.tokenManager);
1704:
1705: // Generate user-defined parser class members
1706: print(processActionForSpecialSymbols(grammar.classMemberAction
1707: .getText(), grammar.classMemberAction.getLine(),
1708: currentRule, null));
1709:
1710: // Generate default parser class constructor
1711: println("public " + grammar.getClassName() + "()");
1712: println("{");
1713: tabs++;
1714: println("tokenNames = tokenNames_;");
1715: tabs--;
1716: println("}");
1717: println("");
1718:
1719: astTypes = new java.util.Vector();
1720: // Generate code for each rule in the grammar
1721: Enumeration ids = grammar.rules.elements();
1722: int ruleNum = 0;
1723: String ruleNameInits = "";
1724: while (ids.hasMoreElements()) {
1725: GrammarSymbol sym = (GrammarSymbol) ids.nextElement();
1726: if (sym instanceof RuleSymbol) {
1727: RuleSymbol rs = (RuleSymbol) sym;
1728: genRule(rs, rs.references.size() == 0, ruleNum++,
1729: grammar.tokenManager);
1730: }
1731: exitIfError();
1732: }
1733:
1734: if (usingCustomAST) {
1735: // when we are using a custom ast override Parser.getAST to return the
1736: // custom AST type
1737: println("public new " + labeledElementASTType + " getAST()");
1738: println("{");
1739: tabs++;
1740: println("return (" + labeledElementASTType + ") returnAST;");
1741: tabs--;
1742: println("}");
1743: println("");
1744: }
1745:
1746: // Generate the ASTFactory initialization function
1747: genInitFactory(grammar);
1748:
1749: // Generate the token names
1750: genTokenStrings();
1751:
1752: // Generate the bitsets used throughout the grammar
1753: genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType());
1754:
1755: // Close class definition
1756: tabs--;
1757: println("}");
1758: println("");
1759:
1760: tabs--;
1761: // Generate the CSharp namespace closures (if required)
1762: if (nameSpace != null)
1763: nameSpace.emitClosures(currentOutput);
1764:
1765: // Close the parser output stream
1766: currentOutput.close();
1767: currentOutput = null;
1768: }
1769:
1770: /** Generate a series of case statements that implement a BitSet test.
1771: * @param p The Bitset for which cases are to be generated
1772: */
1773: protected void genCases(BitSet p) {
1774: if (DEBUG_CODE_GENERATOR)
1775: System.out.println("genCases(" + p + ")");
1776: int[] elems;
1777:
1778: elems = p.toArray();
1779: // Wrap cases four-per-line for lexer, one-per-line for parser
1780: int wrap = (grammar instanceof LexerGrammar) ? 4 : 1;
1781: int j = 1;
1782: boolean startOfLine = true;
1783: for (int i = 0; i < elems.length; i++) {
1784: if (j == 1) {
1785: print("");
1786: } else {
1787: _print(" ");
1788: }
1789: _print("case " + getValueString(elems[i]) + ":");
1790: if (j == wrap) {
1791: _println("");
1792: startOfLine = true;
1793: j = 1;
1794: } else {
1795: j++;
1796: startOfLine = false;
1797: }
1798: }
1799: if (!startOfLine) {
1800: _println("");
1801: }
1802: }
1803:
1804: /**Generate common code for a block of alternatives; return a
1805: * postscript that needs to be generated at the end of the
1806: * block. Other routines may append else-clauses and such for
1807: * error checking before the postfix is generated. If the
1808: * grammar is a lexer, then generate alternatives in an order
1809: * where alternatives requiring deeper lookahead are generated
1810: * first, and EOF in the lookahead set reduces the depth of
1811: * the lookahead. @param blk The block to generate @param
1812: * noTestForSingle If true, then it does not generate a test
1813: * for a single alternative.
1814: */
1815: public CSharpBlockFinishingInfo genCommonBlock(
1816: AlternativeBlock blk, boolean noTestForSingle) {
1817: int nIF = 0;
1818: boolean createdLL1Switch = false;
1819: int closingBracesOfIFSequence = 0;
1820: CSharpBlockFinishingInfo finishingInfo = new CSharpBlockFinishingInfo();
1821: if (DEBUG_CODE_GENERATOR)
1822: System.out.println("genCommonBlock(" + blk + ")");
1823:
1824: // Save the AST generation state, and set it to that of the block
1825: boolean savegenAST = genAST;
1826: genAST = genAST && blk.getAutoGen();
1827:
1828: boolean oldsaveTest = saveText;
1829: saveText = saveText && blk.getAutoGen();
1830:
1831: // Is this block inverted? If so, generate special-case code
1832: if (blk.not
1833: && analyzer.subruleCanBeInverted(blk,
1834: grammar instanceof LexerGrammar)) {
1835: if (DEBUG_CODE_GENERATOR)
1836: System.out.println("special case: ~(subrule)");
1837: Lookahead p = analyzer.look(1, blk);
1838: // Variable assignment for labeled elements
1839: if (blk.getLabel() != null && syntacticPredLevel == 0) {
1840: println(blk.getLabel() + " = " + lt1Value + ";");
1841: }
1842:
1843: // AST
1844: genElementAST(blk);
1845:
1846: String astArgs = "";
1847: if (grammar instanceof TreeWalkerGrammar) {
1848: if (usingCustomAST)
1849: astArgs = "(AST)_t,";
1850: else
1851: astArgs = "_t,";
1852: }
1853:
1854: // match the bitset for the alternative
1855: println("match(" + astArgs
1856: + getBitsetName(markBitsetForGen(p.fset)) + ");");
1857:
1858: // tack on tree cursor motion if doing a tree walker
1859: if (grammar instanceof TreeWalkerGrammar) {
1860: println("_t = _t.getNextSibling();");
1861: }
1862: return finishingInfo;
1863: }
1864:
1865: // Special handling for single alt
1866: if (blk.getAlternatives().size() == 1) {
1867: Alternative alt = blk.getAlternativeAt(0);
1868: // Generate a warning if there is a synPred for single alt.
1869: if (alt.synPred != null) {
1870: antlrTool
1871: .warning(
1872: "Syntactic predicate superfluous for single alternative",
1873: grammar.getFilename(), blk
1874: .getAlternativeAt(0).synPred
1875: .getLine(), blk
1876: .getAlternativeAt(0).synPred
1877: .getColumn());
1878: }
1879: if (noTestForSingle) {
1880: if (alt.semPred != null) {
1881: // Generate validating predicate
1882: genSemPred(alt.semPred, blk.line);
1883: }
1884: genAlt(alt, blk);
1885: return finishingInfo;
1886: }
1887: }
1888:
1889: // count number of simple LL(1) cases; only do switch for
1890: // many LL(1) cases (no preds, no end of token refs)
1891: // We don't care about exit paths for (...)*, (...)+
1892: // because we don't explicitly have a test for them
1893: // as an alt in the loop.
1894: //
1895: // Also, we now count how many unicode lookahead sets
1896: // there are--they must be moved to DEFAULT or ELSE
1897: // clause.
1898: int nLL1 = 0;
1899: for (int i = 0; i < blk.getAlternatives().size(); i++) {
1900: Alternative a = blk.getAlternativeAt(i);
1901: if (suitableForCaseExpression(a)) {
1902: nLL1++;
1903: }
1904: }
1905:
1906: // do LL(1) cases
1907: if (nLL1 >= makeSwitchThreshold) {
1908: // Determine the name of the item to be compared
1909: String testExpr = lookaheadString(1);
1910: createdLL1Switch = true;
1911: // when parsing trees, convert null to valid tree node with NULL lookahead
1912: if (grammar instanceof TreeWalkerGrammar) {
1913: println("if (null == _t)");
1914: tabs++;
1915: println("_t = ASTNULL;");
1916: tabs--;
1917: }
1918: println("switch ( " + testExpr + " )");
1919: println("{");
1920: //tabs++;
1921: for (int i = 0; i < blk.alternatives.size(); i++) {
1922: Alternative alt = blk.getAlternativeAt(i);
1923: // ignore any non-LL(1) alts, predicated alts,
1924: // or end-of-token alts for case expressions
1925: if (!suitableForCaseExpression(alt)) {
1926: continue;
1927: }
1928: Lookahead p = alt.cache[1];
1929: if (p.fset.degree() == 0 && !p.containsEpsilon()) {
1930: antlrTool
1931: .warning(
1932: "Alternate omitted due to empty prediction set",
1933: grammar.getFilename(), alt.head
1934: .getLine(), alt.head
1935: .getColumn());
1936: } else {
1937: genCases(p.fset);
1938: println("{");
1939: tabs++;
1940: genAlt(alt, blk);
1941: println("break;");
1942: tabs--;
1943: println("}");
1944: }
1945: }
1946: println("default:");
1947: tabs++;
1948: }
1949:
1950: // do non-LL(1) and nondeterministic cases This is tricky in
1951: // the lexer, because of cases like: STAR : '*' ; ASSIGN_STAR
1952: // : "*="; Since nextToken is generated without a loop, then
1953: // the STAR will have end-of-token as it's lookahead set for
1954: // LA(2). So, we must generate the alternatives containing
1955: // trailing end-of-token in their lookahead sets *after* the
1956: // alternatives without end-of-token. This implements the
1957: // usual lexer convention that longer matches come before
1958: // shorter ones, e.g. "*=" matches ASSIGN_STAR not STAR
1959: //
1960: // For non-lexer grammars, this does not sort the alternates
1961: // by depth Note that alts whose lookahead is purely
1962: // end-of-token at k=1 end up as default or else clauses.
1963: int startDepth = (grammar instanceof LexerGrammar) ? grammar.maxk
1964: : 0;
1965: for (int altDepth = startDepth; altDepth >= 0; altDepth--) {
1966: if (DEBUG_CODE_GENERATOR)
1967: System.out.println("checking depth " + altDepth);
1968: for (int i = 0; i < blk.alternatives.size(); i++) {
1969: Alternative alt = blk.getAlternativeAt(i);
1970: if (DEBUG_CODE_GENERATOR)
1971: System.out.println("genAlt: " + i);
1972: // if we made a switch above, ignore what we already took care
1973: // of. Specifically, LL(1) alts with no preds
1974: // that do not have end-of-token in their prediction set
1975: // and that are not giant unicode sets.
1976: if (createdLL1Switch && suitableForCaseExpression(alt)) {
1977: if (DEBUG_CODE_GENERATOR)
1978: System.out
1979: .println("ignoring alt because it was in the switch");
1980: continue;
1981: }
1982: String e;
1983:
1984: boolean unpredicted = false;
1985:
1986: if (grammar instanceof LexerGrammar) {
1987: // Calculate the "effective depth" of the alt,
1988: // which is the max depth at which
1989: // cache[depth]!=end-of-token
1990: int effectiveDepth = alt.lookaheadDepth;
1991: if (effectiveDepth == GrammarAnalyzer.NONDETERMINISTIC) {
1992: // use maximum lookahead
1993: effectiveDepth = grammar.maxk;
1994: }
1995: while (effectiveDepth >= 1
1996: && alt.cache[effectiveDepth]
1997: .containsEpsilon()) {
1998: effectiveDepth--;
1999: }
2000: // Ignore alts whose effective depth is other than
2001: // the ones we are generating for this iteration.
2002: if (effectiveDepth != altDepth) {
2003: if (DEBUG_CODE_GENERATOR)
2004: System.out
2005: .println("ignoring alt because effectiveDepth!=altDepth;"
2006: + effectiveDepth
2007: + "!="
2008: + altDepth);
2009: continue;
2010: }
2011: unpredicted = lookaheadIsEmpty(alt, effectiveDepth);
2012: e = getLookaheadTestExpression(alt, effectiveDepth);
2013: } else {
2014: unpredicted = lookaheadIsEmpty(alt, grammar.maxk);
2015: e = getLookaheadTestExpression(alt, grammar.maxk);
2016: }
2017:
2018: // Was it a big unicode range that forced unsuitability
2019: // for a case expression?
2020: if (alt.cache[1].fset.degree() > caseSizeThreshold
2021: && suitableForCaseExpression(alt)) {
2022: if (nIF == 0) {
2023: println("if " + e);
2024: println("{");
2025: } else {
2026: println("else if " + e);
2027: println("{");
2028: }
2029: } else if (unpredicted && alt.semPred == null
2030: && alt.synPred == null) {
2031: // The alt has empty prediction set and no
2032: // predicate to help out. if we have not
2033: // generated a previous if, just put {...} around
2034: // the end-of-token clause
2035: if (nIF == 0) {
2036: println("{");
2037: } else {
2038: println("else {");
2039: }
2040: finishingInfo.needAnErrorClause = false;
2041: } else {
2042: // check for sem and syn preds
2043: // Add any semantic predicate expression to the lookahead test
2044: if (alt.semPred != null) {
2045: // if debugging, wrap the evaluation of the predicate in a method
2046: //
2047: // translate $ and # references
2048: ActionTransInfo tInfo = new ActionTransInfo();
2049: String actionStr = processActionForSpecialSymbols(
2050: alt.semPred, blk.line, currentRule,
2051: tInfo);
2052: // ignore translation info...we don't need to
2053: // do anything with it. call that will inform
2054: // SemanticPredicateListeners of the result
2055: if (((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar))
2056: && grammar.debuggingOutput) {
2057: e = "("
2058: + e
2059: + "&& fireSemanticPredicateEvaluated(antlr.debug.SemanticPredicateEvent.PREDICTING,"
2060: + //FIXME
2061: addSemPred(charFormatter
2062: .escapeString(actionStr))
2063: + "," + actionStr + "))";
2064: } else {
2065: e = "(" + e + "&&(" + actionStr + "))";
2066: }
2067: }
2068:
2069: // Generate any syntactic predicates
2070: if (nIF > 0) {
2071: if (alt.synPred != null) {
2072: println("else {");
2073: tabs++;
2074: genSynPred(alt.synPred, e);
2075: closingBracesOfIFSequence++;
2076: } else {
2077: println("else if " + e + " {");
2078: }
2079: } else {
2080: if (alt.synPred != null) {
2081: genSynPred(alt.synPred, e);
2082: } else {
2083: // when parsing trees, convert null to valid tree node
2084: // with NULL lookahead.
2085: if (grammar instanceof TreeWalkerGrammar) {
2086: println("if (_t == null)");
2087: tabs++;
2088: println("_t = ASTNULL;");
2089: tabs--;
2090: }
2091: println("if " + e);
2092: println("{");
2093: }
2094: }
2095:
2096: }
2097:
2098: nIF++;
2099: tabs++;
2100: genAlt(alt, blk);
2101: tabs--;
2102: println("}");
2103: }
2104: }
2105:
2106: String ps = "";
2107: for (int i = 1; i <= closingBracesOfIFSequence; i++) {
2108: ps += "}";
2109: }
2110:
2111: // Restore the AST generation state
2112: genAST = savegenAST;
2113:
2114: // restore save text state
2115: saveText = oldsaveTest;
2116:
2117: // Return the finishing info.
2118: if (createdLL1Switch) {
2119: tabs--;
2120: finishingInfo.postscript = ps + "break; }";
2121: finishingInfo.generatedSwitch = true;
2122: finishingInfo.generatedAnIf = nIF > 0;
2123: //return new CSharpBlockFinishingInfo(ps+"}",true,nIF>0); // close up switch statement
2124:
2125: } else {
2126: finishingInfo.postscript = ps;
2127: finishingInfo.generatedSwitch = false;
2128: finishingInfo.generatedAnIf = nIF > 0;
2129: // return new CSharpBlockFinishingInfo(ps, false,nIF>0);
2130: }
2131: return finishingInfo;
2132: }
2133:
2134: private static boolean suitableForCaseExpression(Alternative a) {
2135: return a.lookaheadDepth == 1 && a.semPred == null
2136: && !a.cache[1].containsEpsilon()
2137: && a.cache[1].fset.degree() <= caseSizeThreshold;
2138: }
2139:
2140: /** Generate code to link an element reference into the AST */
2141: private void genElementAST(AlternativeElement el) {
2142: // handle case where you're not building trees, but are in tree walker.
2143: // Just need to get labels set up.
2144: if (grammar instanceof TreeWalkerGrammar && !grammar.buildAST) {
2145: String elementRef;
2146: String astName;
2147:
2148: // Generate names and declarations of the AST variable(s)
2149: if (el.getLabel() == null) {
2150: elementRef = lt1Value;
2151: // Generate AST variables for unlabeled stuff
2152: astName = "tmp" + astVarNumber + "_AST";
2153: astVarNumber++;
2154: // Map the generated AST variable in the alternate
2155: mapTreeVariable(el, astName);
2156: // Generate an "input" AST variable also
2157: println(labeledElementASTType + " " + astName
2158: + "_in = " + elementRef + ";");
2159: }
2160: return;
2161: }
2162:
2163: if (grammar.buildAST && syntacticPredLevel == 0) {
2164: boolean needASTDecl = (genAST && (el.getLabel() != null || (el
2165: .getAutoGenType() != GrammarElement.AUTO_GEN_BANG)));
2166:
2167: // RK: if we have a grammar element always generate the decl
2168: // since some guy can access it from an action and we can't
2169: // peek ahead (well not without making a mess).
2170: // I'd prefer taking this out.
2171: if (el.getAutoGenType() != GrammarElement.AUTO_GEN_BANG
2172: && (el instanceof TokenRefElement))
2173: needASTDecl = true;
2174:
2175: boolean doNoGuessTest = (grammar.hasSyntacticPredicate && needASTDecl);
2176:
2177: String elementRef;
2178: String astNameBase;
2179:
2180: // Generate names and declarations of the AST variable(s)
2181: if (el.getLabel() != null) {
2182: // if the element is labeled use that name...
2183: elementRef = el.getLabel();
2184: astNameBase = el.getLabel();
2185: } else {
2186: // else generate a temporary name...
2187: elementRef = lt1Value;
2188: // Generate AST variables for unlabeled stuff
2189: astNameBase = "tmp" + astVarNumber;
2190: astVarNumber++;
2191: }
2192:
2193: // Generate the declaration if required.
2194: if (needASTDecl) {
2195: // Generate the declaration
2196: if (el instanceof GrammarAtom) {
2197: GrammarAtom ga = (GrammarAtom) el;
2198: if (ga.getASTNodeType() != null) {
2199: genASTDeclaration(el, astNameBase, ga
2200: .getASTNodeType());
2201: //println(ga.getASTNodeType()+" " + astName+" = null;");
2202: } else {
2203: genASTDeclaration(el, astNameBase,
2204: labeledElementASTType);
2205: //println(labeledElementASTType+" " + astName + " = null;");
2206: }
2207: } else {
2208: genASTDeclaration(el, astNameBase,
2209: labeledElementASTType);
2210: //println(labeledElementASTType+" " + astName + " = null;");
2211: }
2212: }
2213:
2214: // for convenience..
2215: String astName = astNameBase + "_AST";
2216:
2217: // Map the generated AST variable in the alternate
2218: mapTreeVariable(el, astName);
2219: if (grammar instanceof TreeWalkerGrammar) {
2220: // Generate an "input" AST variable also
2221: println(labeledElementASTType + " " + astName
2222: + "_in = null;");
2223: }
2224:
2225: // Enclose actions with !guessing
2226: if (doNoGuessTest) {
2227: //println("if (0 == inputState.guessing)");
2228: //println("{");
2229: //tabs++;
2230: }
2231:
2232: // if something has a label assume it will be used
2233: // so we must initialize the RefAST
2234: if (el.getLabel() != null) {
2235: if (el instanceof GrammarAtom) {
2236: println(astName
2237: + " = "
2238: + getASTCreateString((GrammarAtom) el,
2239: elementRef) + ";");
2240: } else {
2241: println(astName + " = "
2242: + getASTCreateString(elementRef) + ";");
2243: }
2244: }
2245:
2246: // if it has no label but a declaration exists initialize it.
2247: if (el.getLabel() == null && needASTDecl) {
2248: elementRef = lt1Value;
2249: if (el instanceof GrammarAtom) {
2250: println(astName
2251: + " = "
2252: + getASTCreateString((GrammarAtom) el,
2253: elementRef) + ";");
2254: } else {
2255: println(astName + " = "
2256: + getASTCreateString(elementRef) + ";");
2257: }
2258: // Map the generated AST variable in the alternate
2259: if (grammar instanceof TreeWalkerGrammar) {
2260: // set "input" AST variable also
2261: println(astName + "_in = " + elementRef + ";");
2262: }
2263: }
2264:
2265: if (genAST) {
2266: switch (el.getAutoGenType()) {
2267: case GrammarElement.AUTO_GEN_NONE:
2268: if (usingCustomAST
2269: || ((el instanceof GrammarAtom) && (((GrammarAtom) el)
2270: .getASTNodeType() != null)))
2271: println("astFactory.addASTChild(currentAST, (AST)"
2272: + astName + ");");
2273: else
2274: println("astFactory.addASTChild(currentAST, "
2275: + astName + ");");
2276: break;
2277: case GrammarElement.AUTO_GEN_CARET:
2278: if (usingCustomAST
2279: || ((el instanceof GrammarAtom) && (((GrammarAtom) el)
2280: .getASTNodeType() != null)))
2281: println("astFactory.makeASTRoot(currentAST, (AST)"
2282: + astName + ");");
2283: else
2284: println("astFactory.makeASTRoot(currentAST, "
2285: + astName + ");");
2286: break;
2287: default:
2288: break;
2289: }
2290: }
2291: if (doNoGuessTest) {
2292: //tabs--;
2293: //println("}");
2294: }
2295: }
2296: }
2297:
2298: /** Close the try block and generate catch phrases
2299: * if the element has a labeled handler in the rule
2300: */
2301: private void genErrorCatchForElement(AlternativeElement el) {
2302: if (el.getLabel() == null)
2303: return;
2304: String r = el.enclosingRuleName;
2305: if (grammar instanceof LexerGrammar) {
2306: r = CodeGenerator.encodeLexerRuleName(el.enclosingRuleName);
2307: }
2308: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(r);
2309: if (rs == null) {
2310: antlrTool.panic("Enclosing rule not found!");
2311: }
2312: ExceptionSpec ex = rs.block.findExceptionSpec(el.getLabel());
2313: if (ex != null) {
2314: tabs--;
2315: println("}");
2316: genErrorHandler(ex);
2317: }
2318: }
2319:
2320: /** Generate the catch phrases for a user-specified error handler */
2321: private void genErrorHandler(ExceptionSpec ex) {
2322: // Each ExceptionHandler in the ExceptionSpec is a separate catch
2323: for (int i = 0; i < ex.handlers.size(); i++) {
2324: ExceptionHandler handler = (ExceptionHandler) ex.handlers
2325: .elementAt(i);
2326: // Generate catch phrase
2327: println("catch (" + handler.exceptionTypeAndName.getText()
2328: + ")");
2329: println("{");
2330: tabs++;
2331: if (grammar.hasSyntacticPredicate) {
2332: println("if (0 == inputState.guessing)");
2333: println("{");
2334: tabs++;
2335: }
2336:
2337: // When not guessing, execute user handler action
2338: ActionTransInfo tInfo = new ActionTransInfo();
2339: printAction(processActionForSpecialSymbols(handler.action
2340: .getText(), handler.action.getLine(), currentRule,
2341: tInfo));
2342:
2343: if (grammar.hasSyntacticPredicate) {
2344: tabs--;
2345: println("}");
2346: println("else");
2347: println("{");
2348: tabs++;
2349: // When guessing, rethrow exception
2350: //println("throw " + extractIdOfAction(handler.exceptionTypeAndName) + ";");
2351: println("throw;");
2352: tabs--;
2353: println("}");
2354: }
2355: // Close catch phrase
2356: tabs--;
2357: println("}");
2358: }
2359: }
2360:
2361: /** Generate a try { opening if the element has a labeled handler in the rule */
2362: private void genErrorTryForElement(AlternativeElement el) {
2363: if (el.getLabel() == null)
2364: return;
2365: String r = el.enclosingRuleName;
2366: if (grammar instanceof LexerGrammar) {
2367: r = CodeGenerator.encodeLexerRuleName(el.enclosingRuleName);
2368: }
2369: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(r);
2370: if (rs == null) {
2371: antlrTool.panic("Enclosing rule not found!");
2372: }
2373: ExceptionSpec ex = rs.block.findExceptionSpec(el.getLabel());
2374: if (ex != null) {
2375: println("try // for error handling");
2376: println("{");
2377: tabs++;
2378: }
2379: }
2380:
2381: protected void genASTDeclaration(AlternativeElement el) {
2382: genASTDeclaration(el, labeledElementASTType);
2383: }
2384:
2385: protected void genASTDeclaration(AlternativeElement el,
2386: String node_type) {
2387: genASTDeclaration(el, el.getLabel(), node_type);
2388: }
2389:
2390: protected void genASTDeclaration(AlternativeElement el,
2391: String var_name, String node_type) {
2392: // already declared?
2393: if (declaredASTVariables.contains(el))
2394: return;
2395:
2396: // emit code
2397: //String s = StringUtils.stripFrontBack(node_type, "\"", "\"");
2398: //println(s + " " + var_name + "_AST = null;");
2399: println(node_type + " " + var_name + "_AST = null;");
2400:
2401: // mark as declared
2402: declaredASTVariables.add(el);
2403: }
2404:
2405: /** Generate a header that is common to all CSharp files */
2406: protected void genHeader() {
2407: println("// $ANTLR " + Tool.version + ": " + "\""
2408: + antlrTool.fileMinusPath(antlrTool.grammarFile) + "\""
2409: + " -> " + "\"" + grammar.getClassName() + ".cs\"$");
2410: }
2411:
2412: private void genLiteralsTest() {
2413: println("_ttype = testLiteralsTable(_ttype);");
2414: }
2415:
2416: private void genLiteralsTestForPartialToken() {
2417: println("_ttype = testLiteralsTable(text.ToString(_begin, text.Length-_begin), _ttype);");
2418: }
2419:
2420: protected void genMatch(BitSet b) {
2421: }
2422:
2423: protected void genMatch(GrammarAtom atom) {
2424: if (atom instanceof StringLiteralElement) {
2425: if (grammar instanceof LexerGrammar) {
2426: genMatchUsingAtomText(atom);
2427: } else {
2428: genMatchUsingAtomTokenType(atom);
2429: }
2430: } else if (atom instanceof CharLiteralElement) {
2431: if (grammar instanceof LexerGrammar) {
2432: genMatchUsingAtomText(atom);
2433: } else {
2434: antlrTool
2435: .error("cannot ref character literals in grammar: "
2436: + atom);
2437: }
2438: } else if (atom instanceof TokenRefElement) {
2439: genMatchUsingAtomText(atom);
2440: } else if (atom instanceof WildcardElement) {
2441: gen((WildcardElement) atom);
2442: }
2443: }
2444:
2445: protected void genMatchUsingAtomText(GrammarAtom atom) {
2446: // match() for trees needs the _t cursor
2447: String astArgs = "";
2448: if (grammar instanceof TreeWalkerGrammar) {
2449: if (usingCustomAST)
2450: astArgs = "(AST)_t,";
2451: else
2452: astArgs = "_t,";
2453: }
2454:
2455: // if in lexer and ! on element, save buffer index to kill later
2456: if (grammar instanceof LexerGrammar
2457: && (!saveText || atom.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
2458: declareSaveIndexVariableIfNeeded();
2459: println("_saveIndex = text.Length;");
2460: }
2461:
2462: print(atom.not ? "matchNot(" : "match(");
2463: _print(astArgs);
2464:
2465: // print out what to match
2466: if (atom.atomText.equals("EOF")) {
2467: // horrible hack to handle EOF case
2468: _print("Token.EOF_TYPE");
2469: } else {
2470: _print(atom.atomText);
2471: }
2472: _println(");");
2473:
2474: if (grammar instanceof LexerGrammar
2475: && (!saveText || atom.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
2476: declareSaveIndexVariableIfNeeded();
2477: println("text.Length = _saveIndex;"); // kill text atom put in buffer
2478: }
2479: }
2480:
2481: protected void genMatchUsingAtomTokenType(GrammarAtom atom) {
2482: // match() for trees needs the _t cursor
2483: String astArgs = "";
2484: if (grammar instanceof TreeWalkerGrammar) {
2485: if (usingCustomAST)
2486: astArgs = "(AST)_t,";
2487: else
2488: astArgs = "_t,";
2489: }
2490:
2491: // If the literal can be mangled, generate the symbolic constant instead
2492: String mangledName = null;
2493: String s = astArgs + getValueString(atom.getType());
2494:
2495: // matching
2496: println((atom.not ? "matchNot(" : "match(") + s + ");");
2497: }
2498:
2499: /** Generate the nextToken() rule. nextToken() is a synthetic
2500: * lexer rule that is the implicit OR of all user-defined
2501: * lexer rules.
2502: */
2503: public void genNextToken() {
2504: // Are there any public rules? If not, then just generate a
2505: // fake nextToken().
2506: boolean hasPublicRules = false;
2507: for (int i = 0; i < grammar.rules.size(); i++) {
2508: RuleSymbol rs = (RuleSymbol) grammar.rules.elementAt(i);
2509: if (rs.isDefined() && rs.access.equals("public")) {
2510: hasPublicRules = true;
2511: break;
2512: }
2513: }
2514: if (!hasPublicRules) {
2515: println("");
2516: println("override public new Token nextToken()\t\t\t//throws TokenStreamException");
2517: println("{");
2518: tabs++;
2519: println("try");
2520: println("{");
2521: tabs++;
2522: println("uponEOF();");
2523: tabs--;
2524: println("}");
2525: println("catch(CharStreamIOException csioe)");
2526: println("{");
2527: tabs++;
2528: println("throw new TokenStreamIOException(csioe.io);");
2529: tabs--;
2530: println("}");
2531: println("catch(CharStreamException cse)");
2532: println("{");
2533: tabs++;
2534: println("throw new TokenStreamException(cse.Message);");
2535: tabs--;
2536: println("}");
2537: println("return new CommonToken(Token.EOF_TYPE, \"\");");
2538: tabs--;
2539: println("}");
2540: println("");
2541: return;
2542: }
2543:
2544: // Create the synthesized nextToken() rule
2545: RuleBlock nextTokenBlk = MakeGrammar.createNextTokenRule(
2546: grammar, grammar.rules, "nextToken");
2547: // Define the nextToken rule symbol
2548: RuleSymbol nextTokenRs = new RuleSymbol("mnextToken");
2549: nextTokenRs.setDefined();
2550: nextTokenRs.setBlock(nextTokenBlk);
2551: nextTokenRs.access = "private";
2552: grammar.define(nextTokenRs);
2553: // Analyze the nextToken rule
2554: boolean ok = grammar.theLLkAnalyzer.deterministic(nextTokenBlk);
2555:
2556: // Generate the next token rule
2557: String filterRule = null;
2558: if (((LexerGrammar) grammar).filterMode) {
2559: filterRule = ((LexerGrammar) grammar).filterRule;
2560: }
2561:
2562: println("");
2563: println("public new Token nextToken()\t\t\t//throws TokenStreamException");
2564: println("{");
2565: tabs++;
2566: println("Token theRetToken = null;");
2567: _println("tryAgain:");
2568: println("for (;;)");
2569: println("{");
2570: tabs++;
2571: println("Token _token = null;");
2572: println("int _ttype = Token.INVALID_TYPE;");
2573: if (((LexerGrammar) grammar).filterMode) {
2574: println("setCommitToPath(false);");
2575: if (filterRule != null) {
2576: // Here's a good place to ensure that the filter rule actually exists
2577: if (!grammar.isDefined(CodeGenerator
2578: .encodeLexerRuleName(filterRule))) {
2579: grammar.antlrTool.error("Filter rule " + filterRule
2580: + " does not exist in this lexer");
2581: } else {
2582: RuleSymbol rs = (RuleSymbol) grammar
2583: .getSymbol(CodeGenerator
2584: .encodeLexerRuleName(filterRule));
2585: if (!rs.isDefined()) {
2586: grammar.antlrTool.error("Filter rule "
2587: + filterRule
2588: + " does not exist in this lexer");
2589: } else if (rs.access.equals("public")) {
2590: grammar.antlrTool.error("Filter rule "
2591: + filterRule + " must be protected");
2592: }
2593: }
2594: println("int _m;");
2595: println("_m = mark();");
2596: }
2597: }
2598: println("resetText();");
2599:
2600: println("try // for char stream error handling");
2601: println("{");
2602: tabs++;
2603:
2604: // Generate try around whole thing to trap scanner errors
2605: println("try // for lexical error handling");
2606: println("{");
2607: tabs++;
2608:
2609: // Test for public lexical rules with empty paths
2610: for (int i = 0; i < nextTokenBlk.getAlternatives().size(); i++) {
2611: Alternative a = nextTokenBlk.getAlternativeAt(i);
2612: if (a.cache[1].containsEpsilon()) {
2613: //String r = a.head.toString();
2614: RuleRefElement rr = (RuleRefElement) a.head;
2615: String r = CodeGenerator
2616: .decodeLexerRuleName(rr.targetRule);
2617: antlrTool.warning("public lexical rule " + r
2618: + " is optional (can match \"nothing\")");
2619: }
2620: }
2621:
2622: // Generate the block
2623: String newline = System.getProperty("line.separator");
2624: CSharpBlockFinishingInfo howToFinish = genCommonBlock(
2625: nextTokenBlk, false);
2626: String errFinish = "if (LA(1)==EOF_CHAR) { uponEOF(); returnToken_ = makeToken(Token.EOF_TYPE); }";
2627: errFinish += newline + "\t\t\t\t";
2628: if (((LexerGrammar) grammar).filterMode) {
2629: if (filterRule == null) {
2630: //kunle: errFinish += "else { consume(); continue tryAgain; }";
2631: errFinish += "\t\t\t\telse";
2632: errFinish += "\t\t\t\t{";
2633: errFinish += "\t\t\t\t\tconsume();";
2634: errFinish += "\t\t\t\t\tgoto tryAgain;";
2635: errFinish += "\t\t\t\t}";
2636: } else {
2637: errFinish += "\t\t\t\t\telse" + newline + "\t\t\t\t\t{"
2638: + newline + "\t\t\t\t\tcommit();" + newline
2639: + "\t\t\t\t\ttry {m" + filterRule + "(false);}"
2640: + newline
2641: + "\t\t\t\t\tcatch(RecognitionException e)"
2642: + newline + "\t\t\t\t\t{" + newline
2643: + "\t\t\t\t\t // catastrophic failure"
2644: + newline + "\t\t\t\t\t reportError(e);"
2645: + newline + "\t\t\t\t\t consume();" + newline
2646: + "\t\t\t\t\t}" + newline
2647: + "\t\t\t\t\tgoto tryAgain;" + newline
2648: + "\t\t\t\t}";
2649: }
2650: } else {
2651: errFinish += "else {" + throwNoViable + "}";
2652: }
2653: genBlockFinish(howToFinish, errFinish);
2654:
2655: // at this point a valid token has been matched, undo "mark" that was done
2656: if (((LexerGrammar) grammar).filterMode && filterRule != null) {
2657: println("commit();");
2658: }
2659:
2660: // Generate literals test if desired
2661: // make sure _ttype is set first; note returnToken_ must be
2662: // non-null as the rule was required to create it.
2663: println("if ( null==returnToken_ ) goto tryAgain; // found SKIP token");
2664: println("_ttype = returnToken_.Type;");
2665: if (((LexerGrammar) grammar).getTestLiterals()) {
2666: genLiteralsTest();
2667: }
2668:
2669: // return token created by rule reference in switch
2670: println("returnToken_.Type = _ttype;");
2671: println("return returnToken_;");
2672:
2673: // Close try block
2674: tabs--;
2675: println("}");
2676: println("catch (RecognitionException e) {");
2677: tabs++;
2678: if (((LexerGrammar) grammar).filterMode) {
2679: if (filterRule == null) {
2680: println("if (!getCommitToPath())");
2681: println("{");
2682: tabs++;
2683: println("consume();");
2684: println("goto tryAgain;");
2685: tabs--;
2686: println("}");
2687: } else {
2688: println("if (!getCommitToPath())");
2689: println("{");
2690: tabs++;
2691: println("rewind(_m);");
2692: println("resetText();");
2693: println("try {m" + filterRule + "(false);}");
2694: println("catch(RecognitionException ee) {");
2695: println(" // horrendous failure: error in filter rule");
2696: println(" reportError(ee);");
2697: println(" consume();");
2698: println("}");
2699: //println("goto tryAgain;");
2700: tabs--;
2701: println("}");
2702: println("else");
2703: }
2704: }
2705: if (nextTokenBlk.getDefaultErrorHandler()) {
2706: println("{");
2707: tabs++;
2708: println("reportError(e);");
2709: println("consume();");
2710: tabs--;
2711: println("}");
2712: } else {
2713: // pass on to invoking routine
2714: tabs++;
2715: println("throw new TokenStreamRecognitionException(e);");
2716: tabs--;
2717: }
2718: tabs--;
2719: println("}");
2720:
2721: // close CharStreamException try
2722: tabs--;
2723: println("}");
2724: println("catch (CharStreamException cse) {");
2725: println(" if ( cse is CharStreamIOException ) {");
2726: println(" throw new TokenStreamIOException(((CharStreamIOException)cse).io);");
2727: println(" }");
2728: println(" else {");
2729: println(" throw new TokenStreamException(cse.Message);");
2730: println(" }");
2731: println("}");
2732:
2733: // close for-loop
2734: tabs--;
2735: println("}");
2736:
2737: // close method nextToken
2738: tabs--;
2739: println("}");
2740: println("");
2741: }
2742:
2743: /** Gen a named rule block.
2744: * ASTs are generated for each element of an alternative unless
2745: * the rule or the alternative have a '!' modifier.
2746: *
2747: * If an alternative defeats the default tree construction, it
2748: * must set <rule>_AST to the root of the returned AST.
2749: *
2750: * Each alternative that does automatic tree construction, builds
2751: * up root and child list pointers in an ASTPair structure.
2752: *
2753: * A rule finishes by setting the returnAST variable from the
2754: * ASTPair.
2755: *
2756: * @param rule The name of the rule to generate
2757: * @param startSymbol true if the rule is a start symbol (i.e., not referenced elsewhere)
2758: */
2759: public void genRule(RuleSymbol s, boolean startSymbol, int ruleNum,
2760: TokenManager tm) {
2761: tabs = 1;
2762: if (DEBUG_CODE_GENERATOR)
2763: System.out.println("genRule(" + s.getId() + ")");
2764: if (!s.isDefined()) {
2765: antlrTool.error("undefined rule: " + s.getId());
2766: return;
2767: }
2768:
2769: // Generate rule return type, name, arguments
2770: RuleBlock rblk = s.getBlock();
2771: currentRule = rblk;
2772: currentASTResult = s.getId();
2773:
2774: // clear list of declared ast variables..
2775: declaredASTVariables.clear();
2776:
2777: // Save the AST generation state, and set it to that of the rule
2778: boolean savegenAST = genAST;
2779: genAST = genAST && rblk.getAutoGen();
2780:
2781: // boolean oldsaveTest = saveText;
2782: saveText = rblk.getAutoGen();
2783:
2784: // print javadoc comment if any
2785: if (s.comment != null) {
2786: _println(s.comment);
2787: }
2788:
2789: // Gen method access and final qualifier
2790: //print(s.access + " final ");
2791: print(s.access + " ");
2792:
2793: // Gen method return type (note lexer return action set at rule creation)
2794: if (rblk.returnAction != null) {
2795: // Has specified return value
2796: _print(extractTypeOfAction(rblk.returnAction, rblk
2797: .getLine(), rblk.getColumn())
2798: + " ");
2799: } else {
2800: // No specified return value
2801: _print("void ");
2802: }
2803:
2804: // Gen method name
2805: _print(s.getId() + "(");
2806:
2807: // Additional rule parameters common to all rules for this grammar
2808: _print(commonExtraParams);
2809: if (commonExtraParams.length() != 0 && rblk.argAction != null) {
2810: _print(",");
2811: }
2812:
2813: // Gen arguments
2814: if (rblk.argAction != null) {
2815: // Has specified arguments
2816: _println("");
2817: tabs++;
2818: println(rblk.argAction);
2819: tabs--;
2820: print(")");
2821: } else {
2822: // No specified arguments
2823: _print(")");
2824: }
2825:
2826: // Gen throws clause and open curly
2827: _print(" //throws " + exceptionThrown);
2828: if (grammar instanceof ParserGrammar) {
2829: _print(", TokenStreamException");
2830: } else if (grammar instanceof LexerGrammar) {
2831: _print(", CharStreamException, TokenStreamException");
2832: }
2833: // Add user-defined exceptions unless lexer (for now)
2834: if (rblk.throwsSpec != null) {
2835: if (grammar instanceof LexerGrammar) {
2836: antlrTool
2837: .error("user-defined throws spec not allowed (yet) for lexer rule "
2838: + rblk.ruleName);
2839: } else {
2840: _print(", " + rblk.throwsSpec);
2841: }
2842: }
2843:
2844: _println("");
2845: _println("{");
2846: tabs++;
2847:
2848: // Convert return action to variable declaration
2849: if (rblk.returnAction != null)
2850: println(rblk.returnAction + ";");
2851:
2852: // print out definitions needed by rules for various grammar types
2853: println(commonLocalVars);
2854:
2855: if (grammar.traceRules) {
2856: if (grammar instanceof TreeWalkerGrammar) {
2857: if (usingCustomAST)
2858: println("traceIn(\"" + s.getId() + "\",(AST)_t);");
2859: else
2860: println("traceIn(\"" + s.getId() + "\",_t);");
2861: } else {
2862: println("traceIn(\"" + s.getId() + "\");");
2863: }
2864: }
2865:
2866: if (grammar instanceof LexerGrammar) {
2867: // lexer rule default return value is the rule's token name
2868: // This is a horrible hack to support the built-in EOF lexer rule.
2869: if (s.getId().equals("mEOF"))
2870: println("_ttype = Token.EOF_TYPE;");
2871: else
2872: println("_ttype = " + s.getId().substring(1) + ";");
2873:
2874: // delay creation of _saveIndex until we need it OK?
2875: bSaveIndexCreated = false;
2876:
2877: /*
2878: println("boolean old_saveConsumedInput=saveConsumedInput;");
2879: if ( !rblk.getAutoGen() ) { // turn off "save input" if ! on rule
2880: println("saveConsumedInput=false;");
2881: }
2882: */
2883: }
2884:
2885: // if debugging, write code to mark entry to the rule
2886: if (grammar.debuggingOutput)
2887: if (grammar instanceof ParserGrammar)
2888: println("fireEnterRule(" + ruleNum + ",0);");
2889: else if (grammar instanceof LexerGrammar)
2890: println("fireEnterRule(" + ruleNum + ",_ttype);");
2891:
2892: // Generate trace code if desired
2893: if (grammar.debuggingOutput || grammar.traceRules) {
2894: println("try { // debugging");
2895: tabs++;
2896: }
2897:
2898: // Initialize AST variables
2899: if (grammar instanceof TreeWalkerGrammar) {
2900: // "Input" value for rule
2901: println(labeledElementASTType + " " + s.getId()
2902: + "_AST_in = (" + labeledElementASTType + ")_t;");
2903: }
2904: if (grammar.buildAST) {
2905: // Parser member used to pass AST returns from rule invocations
2906: println("returnAST = null;");
2907: // Tracks AST construction
2908: // println("ASTPair currentAST = (inputState.guessing==0) ? new ASTPair() : null;");
2909: println("ASTPair currentAST = new ASTPair();");
2910: // User-settable return value for rule.
2911: println(labeledElementASTType + " " + s.getId()
2912: + "_AST = null;");
2913: }
2914:
2915: genBlockPreamble(rblk);
2916: genBlockInitAction(rblk);
2917: println("");
2918:
2919: // Search for an unlabeled exception specification attached to the rule
2920: ExceptionSpec unlabeledUserSpec = rblk.findExceptionSpec("");
2921:
2922: // Generate try block around the entire rule for error handling
2923: if (unlabeledUserSpec != null || rblk.getDefaultErrorHandler()) {
2924: println("try { // for error handling");
2925: tabs++;
2926: }
2927:
2928: // Generate the alternatives
2929: if (rblk.alternatives.size() == 1) {
2930: // One alternative -- use simple form
2931: Alternative alt = rblk.getAlternativeAt(0);
2932: String pred = alt.semPred;
2933: if (pred != null)
2934: genSemPred(pred, currentRule.line);
2935: if (alt.synPred != null) {
2936: antlrTool
2937: .warning(
2938: "Syntactic predicate ignored for single alternative",
2939: grammar.getFilename(), alt.synPred
2940: .getLine(), alt.synPred
2941: .getColumn());
2942: }
2943: genAlt(alt, rblk);
2944: } else {
2945: // Multiple alternatives -- generate complex form
2946: boolean ok = grammar.theLLkAnalyzer.deterministic(rblk);
2947:
2948: CSharpBlockFinishingInfo howToFinish = genCommonBlock(rblk,
2949: false);
2950: genBlockFinish(howToFinish, throwNoViable);
2951: }
2952:
2953: // Generate catch phrase for error handling
2954: if (unlabeledUserSpec != null || rblk.getDefaultErrorHandler()) {
2955: // Close the try block
2956: tabs--;
2957: println("}");
2958: }
2959:
2960: // Generate user-defined or default catch phrases
2961: if (unlabeledUserSpec != null) {
2962: genErrorHandler(unlabeledUserSpec);
2963: } else if (rblk.getDefaultErrorHandler()) {
2964: // Generate default catch phrase
2965: println("catch (" + exceptionThrown + " ex)");
2966: println("{");
2967: tabs++;
2968: // Generate code to handle error if not guessing
2969: if (grammar.hasSyntacticPredicate) {
2970: println("if (0 == inputState.guessing)");
2971: println("{");
2972: tabs++;
2973: }
2974: println("reportError(ex);");
2975: if (!(grammar instanceof TreeWalkerGrammar)) {
2976: // Generate code to consume until token in k==1 follow set
2977: Lookahead follow = grammar.theLLkAnalyzer.FOLLOW(1,
2978: rblk.endNode);
2979: String followSetName = getBitsetName(markBitsetForGen(follow.fset));
2980: println("consume();");
2981: println("consumeUntil(" + followSetName + ");");
2982: } else {
2983: // Just consume one token
2984: println("if (null != _t)");
2985: println("{");
2986: tabs++;
2987: println("_t = _t.getNextSibling();");
2988: tabs--;
2989: println("}");
2990: }
2991: if (grammar.hasSyntacticPredicate) {
2992: tabs--;
2993: // When guessing, rethrow exception
2994: println("}");
2995: println("else");
2996: println("{");
2997: tabs++;
2998: //println("throw ex;");
2999: println("throw;");
3000: tabs--;
3001: println("}");
3002: }
3003: // Close catch phrase
3004: tabs--;
3005: println("}");
3006: }
3007:
3008: // Squirrel away the AST "return" value
3009: if (grammar.buildAST) {
3010: println("returnAST = " + s.getId() + "_AST;");
3011: }
3012:
3013: // Set return tree value for tree walkers
3014: if (grammar instanceof TreeWalkerGrammar) {
3015: println("retTree_ = _t;");
3016: }
3017:
3018: // Generate literals test for lexer rules so marked
3019: if (rblk.getTestLiterals()) {
3020: if (s.access.equals("protected")) {
3021: genLiteralsTestForPartialToken();
3022: } else {
3023: genLiteralsTest();
3024: }
3025: }
3026:
3027: // if doing a lexer rule, dump code to create token if necessary
3028: if (grammar instanceof LexerGrammar) {
3029: println("if (_createToken && (null == _token) && (_ttype != Token.SKIP))");
3030: println("{");
3031: tabs++;
3032: println("_token = makeToken(_ttype);");
3033: println("_token.setText(text.ToString(_begin, text.Length-_begin));");
3034: tabs--;
3035: println("}");
3036: println("returnToken_ = _token;");
3037: }
3038:
3039: // Gen the return statement if there is one (lexer has hard-wired return action)
3040: if (rblk.returnAction != null) {
3041: println("return "
3042: + extractIdOfAction(rblk.returnAction, rblk
3043: .getLine(), rblk.getColumn()) + ";");
3044: }
3045:
3046: if (grammar.debuggingOutput || grammar.traceRules) {
3047: tabs--;
3048: println("}");
3049: println("finally");
3050: println("{ // debugging");
3051: tabs++;
3052:
3053: // If debugging, generate calls to mark exit of rule
3054: if (grammar.debuggingOutput)
3055: if (grammar instanceof ParserGrammar)
3056: println("fireExitRule(" + ruleNum + ",0);");
3057: else if (grammar instanceof LexerGrammar)
3058: println("fireExitRule(" + ruleNum + ",_ttype);");
3059:
3060: if (grammar.traceRules) {
3061: if (grammar instanceof TreeWalkerGrammar) {
3062: println("traceOut(\"" + s.getId() + "\",_t);");
3063: } else {
3064: println("traceOut(\"" + s.getId() + "\");");
3065: }
3066: }
3067:
3068: tabs--;
3069: println("}");
3070: }
3071:
3072: tabs--;
3073: println("}");
3074: println("");
3075:
3076: // Restore the AST generation state
3077: genAST = savegenAST;
3078:
3079: // restore char save state
3080: // saveText = oldsaveTest;
3081: }
3082:
3083: private void GenRuleInvocation(RuleRefElement rr) {
3084: // dump rule name
3085: _print(rr.targetRule + "(");
3086:
3087: // lexers must tell rule if it should set returnToken_
3088: if (grammar instanceof LexerGrammar) {
3089: // if labeled, could access Token, so tell rule to create
3090: if (rr.getLabel() != null) {
3091: _print("true");
3092: } else {
3093: _print("false");
3094: }
3095: if (commonExtraArgs.length() != 0 || rr.args != null) {
3096: _print(",");
3097: }
3098: }
3099:
3100: // Extra arguments common to all rules for this grammar
3101: _print(commonExtraArgs);
3102: if (commonExtraArgs.length() != 0 && rr.args != null) {
3103: _print(",");
3104: }
3105:
3106: // Process arguments to method, if any
3107: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(rr.targetRule);
3108: if (rr.args != null) {
3109: // When not guessing, execute user arg action
3110: ActionTransInfo tInfo = new ActionTransInfo();
3111: String args = processActionForSpecialSymbols(rr.args, 0,
3112: currentRule, tInfo);
3113: if (tInfo.assignToRoot || tInfo.refRuleRoot != null) {
3114: antlrTool.error("Arguments of rule reference '"
3115: + rr.targetRule + "' cannot set or ref #"
3116: + currentRule.getRuleName(), grammar
3117: .getFilename(), rr.getLine(), rr.getColumn());
3118: }
3119: _print(args);
3120:
3121: // Warn if the rule accepts no arguments
3122: if (rs.block.argAction == null) {
3123: antlrTool.warning("Rule '" + rr.targetRule
3124: + "' accepts no arguments", grammar
3125: .getFilename(), rr.getLine(), rr.getColumn());
3126: }
3127: } else {
3128: // For C++, no warning if rule has parameters, because there may be default
3129: // values for all of the parameters
3130: if (rs.block.argAction != null) {
3131: antlrTool.warning(
3132: "Missing parameters on reference to rule "
3133: + rr.targetRule, grammar.getFilename(),
3134: rr.getLine(), rr.getColumn());
3135: }
3136: }
3137: _println(");");
3138:
3139: // move down to the first child while parsing
3140: if (grammar instanceof TreeWalkerGrammar) {
3141: println("_t = retTree_;");
3142: }
3143: }
3144:
3145: protected void genSemPred(String pred, int line) {
3146: // translate $ and # references
3147: ActionTransInfo tInfo = new ActionTransInfo();
3148: pred = processActionForSpecialSymbols(pred, line, currentRule,
3149: tInfo);
3150: // ignore translation info...we don't need to do anything with it.
3151: String escapedPred = charFormatter.escapeString(pred);
3152:
3153: // if debugging, wrap the semantic predicate evaluation in a method
3154: // that can tell SemanticPredicateListeners the result
3155: if (grammar.debuggingOutput
3156: && ((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar)))
3157: pred = "fireSemanticPredicateEvaluated(antlr.debug.SemanticPredicateEvent.VALIDATING,"
3158: + addSemPred(escapedPred) + "," + pred + ")";
3159: println("if (!(" + pred + "))");
3160: println(" throw new SemanticException(\"" + escapedPred
3161: + "\");");
3162: }
3163:
3164: /** Write an array of Strings which are the semantic predicate
3165: * expressions. The debugger will reference them by number only
3166: */
3167: protected void genSemPredMap() {
3168: Enumeration e = semPreds.elements();
3169: println("private string[] _semPredNames = {");
3170: tabs++;
3171: while (e.hasMoreElements())
3172: println("\"" + e.nextElement() + "\",");
3173: tabs--;
3174: println("};");
3175: }
3176:
3177: protected void genSynPred(SynPredBlock blk, String lookaheadExpr) {
3178: if (DEBUG_CODE_GENERATOR)
3179: System.out.println("gen=>(" + blk + ")");
3180:
3181: // Dump synpred result variable
3182: println("bool synPredMatched" + blk.ID + " = false;");
3183: // Gen normal lookahead test
3184: println("if (" + lookaheadExpr + ")");
3185: println("{");
3186: tabs++;
3187:
3188: // Save input state
3189: if (grammar instanceof TreeWalkerGrammar) {
3190: println("AST __t" + blk.ID + " = _t;");
3191: } else {
3192: println("int _m" + blk.ID + " = mark();");
3193: }
3194:
3195: // Once inside the try, assume synpred works unless exception caught
3196: println("synPredMatched" + blk.ID + " = true;");
3197: println("inputState.guessing++;");
3198:
3199: // if debugging, tell listeners that a synpred has started
3200: if (grammar.debuggingOutput
3201: && ((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar))) {
3202: println("fireSyntacticPredicateStarted();");
3203: }
3204:
3205: syntacticPredLevel++;
3206: println("try {");
3207: tabs++;
3208: gen((AlternativeBlock) blk); // gen code to test predicate
3209: tabs--;
3210: //println("System.out.println(\"pred "+blk+" succeeded\");");
3211: println("}");
3212: //kunle: lose a few warnings cheaply
3213: // println("catch (" + exceptionThrown + " pe)");
3214: println("catch (" + exceptionThrown + ")");
3215: println("{");
3216: tabs++;
3217: println("synPredMatched" + blk.ID + " = false;");
3218: //println("System.out.println(\"pred "+blk+" failed\");");
3219: tabs--;
3220: println("}");
3221:
3222: // Restore input state
3223: if (grammar instanceof TreeWalkerGrammar) {
3224: println("_t = __t" + blk.ID + ";");
3225: } else {
3226: println("rewind(_m" + blk.ID + ");");
3227: }
3228:
3229: println("inputState.guessing--;");
3230:
3231: // if debugging, tell listeners how the synpred turned out
3232: if (grammar.debuggingOutput
3233: && ((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar))) {
3234: println("if (synPredMatched" + blk.ID + ")");
3235: println(" fireSyntacticPredicateSucceeded();");
3236: println("else");
3237: println(" fireSyntacticPredicateFailed();");
3238: }
3239:
3240: syntacticPredLevel--;
3241: tabs--;
3242:
3243: // Close lookahead test
3244: println("}");
3245:
3246: // Test synred result
3247: println("if ( synPredMatched" + blk.ID + " )");
3248: println("{");
3249: }
3250:
3251: /** Generate a static array containing the names of the tokens,
3252: * indexed by the token type values. This static array is used
3253: * to format error messages so that the token identifers or literal
3254: * strings are displayed instead of the token numbers.
3255: *
3256: * If a lexical rule has a paraphrase, use it rather than the
3257: * token label.
3258: */
3259: public void genTokenStrings() {
3260: // Generate a string for each token. This creates a static
3261: // array of Strings indexed by token type.
3262: println("");
3263: println("public static readonly string[] tokenNames_ = new string[] {");
3264: tabs++;
3265:
3266: // Walk the token vocabulary and generate a Vector of strings
3267: // from the tokens.
3268: Vector v = grammar.tokenManager.getVocabulary();
3269: for (int i = 0; i < v.size(); i++) {
3270: String s = (String) v.elementAt(i);
3271: if (s == null) {
3272: s = "<" + String.valueOf(i) + ">";
3273: }
3274: if (!s.startsWith("\"") && !s.startsWith("<")) {
3275: TokenSymbol ts = (TokenSymbol) grammar.tokenManager
3276: .getTokenSymbol(s);
3277: if (ts != null && ts.getParaphrase() != null) {
3278: s = StringUtils.stripFrontBack(ts.getParaphrase(),
3279: "\"", "\"");
3280: }
3281: } else if (s.startsWith("\"")) {
3282: s = StringUtils.stripFrontBack(s, "\"", "\"");
3283: }
3284: print(charFormatter.literalString(s));
3285: if (i != v.size() - 1) {
3286: _print(",");
3287: }
3288: _println("");
3289: }
3290:
3291: // Close the string array initailizer
3292: tabs--;
3293: println("};");
3294: }
3295:
3296: /** Generate the token types CSharp file */
3297: protected void genTokenTypes(TokenManager tm) throws IOException {
3298: // Open the token output CSharp file and set the currentOutput stream
3299: // SAS: file open was moved to a method so a subclass can override
3300: // This was mainly for the VAJ interface
3301: setupOutput(tm.getName() + TokenTypesFileSuffix);
3302:
3303: tabs = 0;
3304:
3305: // Generate the header common to all CSharp files
3306: genHeader();
3307: // Do not use printAction because we assume tabs==0
3308: println(behavior.getHeaderAction(""));
3309:
3310: // Generate the CSharp namespace declaration (if specified)
3311: if (nameSpace != null)
3312: nameSpace.emitDeclarations(currentOutput);
3313: tabs++;
3314:
3315: // Encapsulate the definitions in a class. This has to be done as a class because
3316: // they are all constants and CSharp inteface types cannot contain constants.
3317: println("public class " + tm.getName() + TokenTypesFileSuffix);
3318: //println("public class " + getTokenTypesClassName());
3319: println("{");
3320: tabs++;
3321:
3322: genTokenDefinitions(tm);
3323:
3324: // Close the interface
3325: tabs--;
3326: println("}");
3327:
3328: tabs--;
3329: // Generate the CSharp namespace closures (if required)
3330: if (nameSpace != null)
3331: nameSpace.emitClosures(currentOutput);
3332:
3333: // Close the tokens output file
3334: currentOutput.close();
3335: currentOutput = null;
3336: exitIfError();
3337: }
3338:
3339: protected void genTokenDefinitions(TokenManager tm)
3340: throws IOException {
3341: // Generate a definition for each token type
3342: Vector v = tm.getVocabulary();
3343:
3344: // Do special tokens manually
3345: println("public const int EOF = " + Token.EOF_TYPE + ";");
3346: println("public const int NULL_TREE_LOOKAHEAD = "
3347: + Token.NULL_TREE_LOOKAHEAD + ";");
3348:
3349: for (int i = Token.MIN_USER_TYPE; i < v.size(); i++) {
3350: String s = (String) v.elementAt(i);
3351: if (s != null) {
3352: if (s.startsWith("\"")) {
3353: // a string literal
3354: StringLiteralSymbol sl = (StringLiteralSymbol) tm
3355: .getTokenSymbol(s);
3356: if (sl == null) {
3357: antlrTool.panic("String literal " + s
3358: + " not in symbol table");
3359: } else if (sl.label != null) {
3360: println("public const int " + sl.label + " = "
3361: + i + ";");
3362: } else {
3363: String mangledName = mangleLiteral(s);
3364: if (mangledName != null) {
3365: // We were able to create a meaningful mangled token name
3366: println("public const int " + mangledName
3367: + " = " + i + ";");
3368: // if no label specified, make the label equal to the mangled name
3369: sl.label = mangledName;
3370: } else {
3371: println("// " + s + " = " + i);
3372: }
3373: }
3374: } else if (!s.startsWith("<")) {
3375: println("public const int " + s + " = " + i + ";");
3376: }
3377: }
3378: }
3379: println("");
3380: }
3381:
3382: /** Process a string for an simple expression for use in xx/action.g
3383: * it is used to cast simple tokens/references to the right type for
3384: * the generated language. Basically called for every element in
3385: * the vector to getASTCreateString(vector V)
3386: * @param str A String.
3387: */
3388: public String processStringForASTConstructor(String str) {
3389: /*
3390: System.out.println("processStringForASTConstructor: str = "+str+
3391: ", custom = "+(new Boolean(usingCustomAST)).toString()+
3392: ", tree = "+(new Boolean((grammar instanceof TreeWalkerGrammar))).toString()+
3393: ", parser = "+(new Boolean((grammar instanceof ParserGrammar))).toString()+
3394: ", notDefined = "+(new Boolean((!(grammar.tokenManager.tokenDefined(str))))).toString()
3395: );
3396: */
3397: if (usingCustomAST
3398: && ((grammar instanceof TreeWalkerGrammar) || (grammar instanceof ParserGrammar))
3399: && !(grammar.tokenManager.tokenDefined(str))) {
3400: //System.out.println("processStringForASTConstructor: "+str+" with cast");
3401: return "(AST)" + str;
3402: } else {
3403: //System.out.println("processStringForASTConstructor: "+str);
3404: return str;
3405: }
3406: }
3407:
3408: /** Get a string for an expression to generate creation of an AST subtree.
3409: * @param v A Vector of String, where each element is an expression
3410: * in the target language yielding an AST node.
3411: */
3412: public String getASTCreateString(Vector v) {
3413: if (v.size() == 0) {
3414: return "";
3415: }
3416: StringBuffer buf = new StringBuffer();
3417: buf.append("(" + labeledElementASTType
3418: + ")astFactory.make( (new ASTArray(" + v.size() + "))");
3419: for (int i = 0; i < v.size(); i++) {
3420: buf.append(".add(" + v.elementAt(i) + ")");
3421: }
3422: buf.append(")");
3423: return buf.toString();
3424: }
3425:
3426: /** Get a string for an expression to generate creating of an AST node
3427: * @param atom The grammar node for which you are creating the node
3428: * @param str The arguments to the AST constructor
3429: */
3430: public String getASTCreateString(GrammarAtom atom,
3431: String astCtorArgs) {
3432: String astCreateString = "astFactory.create(" + astCtorArgs
3433: + ")";
3434:
3435: if (atom == null)
3436: return getASTCreateString(astCtorArgs);
3437: else {
3438: if (atom.getASTNodeType() != null) {
3439: // this Atom was instantiated from a Token that had an "AST" option - associating
3440: // it with a specific heterogeneous AST type - applied to either:
3441: // 1) it's underlying TokenSymbol (in the "tokens {} section" or,
3442: // 2) a particular token reference in the grammar
3443: //
3444: // For option (1), we simply generate a cast to hetero-AST type
3445: // For option (2), we generate a call to factory.create(Token, ASTNodeType) and cast it too
3446: TokenSymbol ts = grammar.tokenManager
3447: .getTokenSymbol(atom.getText());
3448: if ((ts == null)
3449: || (ts.getASTNodeType() != atom
3450: .getASTNodeType()))
3451: astCreateString = "(" + atom.getASTNodeType()
3452: + ") astFactory.create(" + astCtorArgs
3453: + ", \"" + atom.getASTNodeType() + "\")";
3454: else if ((ts != null) && (ts.getASTNodeType() != null))
3455: astCreateString = "(" + ts.getASTNodeType() + ") "
3456: + astCreateString;
3457: } else if (usingCustomAST)
3458: astCreateString = "(" + labeledElementASTType + ") "
3459: + astCreateString;
3460: }
3461: return astCreateString;
3462: }
3463:
3464: /** Returns a string expression that creates an AST node using the specified
3465: * AST constructor argument string.
3466: * Parses the first (possibly only) argument in the supplied AST ctor argument
3467: * string to obtain the token type -- ctorID.
3468: *
3469: * IF the token type is a valid token symbol AND
3470: * it has an associated AST node type AND
3471: * this is not a #[ID, "T", "ASTType"] constructor
3472: * THEN
3473: * generate a call to factory.create(ID, Text, token.ASTNodeType())
3474: *
3475: * #[ID, "T", "ASTType"] constructors are mapped to astFactory.create(ID, "T", "ASTType")
3476: *
3477: * The supported AST constructor forms are:
3478: * #[ID]
3479: * #[ID, "text"]
3480: * #[ID, "text", ASTclassname] -- introduced in 2.7.2
3481: *
3482: * @param astCtorArgs The arguments to the AST constructor
3483: */
3484: public String getASTCreateString(String astCtorArgs) {
3485: // kunle: 19-Aug-2002
3486: // This AST creation string is almost certainly[*1] a manual tree construction request.
3487: // From the manual [I couldn't read ALL of the code ;-)], this can only be one of:
3488: // 1) #[ID] -- 'astCtorArgs' contains: 'ID' (without quotes) or,
3489: // 2) #[ID, "T"] -- 'astCtorArgs' contains: 'ID, "Text"' (without single quotes) or,
3490: // kunle: 08-Dec-2002 - 2.7.2a6
3491: // 3) #[ID, "T", "ASTTypeName"] -- 'astCtorArgs' contains: 'ID, "T", "ASTTypeName"' (without single quotes)
3492: //
3493: // [*1] In my tests, 'atom' was '== null' only for manual tree construction requests
3494:
3495: if (astCtorArgs == null) {
3496: astCtorArgs = "";
3497: }
3498: String astCreateString = "astFactory.create(" + astCtorArgs
3499: + ")";
3500: String ctorID = astCtorArgs;
3501: String ctorText = null;
3502: int commaIndex;
3503: boolean ctorIncludesCustomType = false; // Is this a #[ID, "t", "ASTType"] constructor?
3504:
3505: commaIndex = astCtorArgs.indexOf(',');
3506: if (commaIndex != -1) {
3507: ctorID = astCtorArgs.substring(0, commaIndex); // the 'ID' portion of #[ID, "Text"]
3508: ctorText = astCtorArgs.substring(commaIndex + 1,
3509: astCtorArgs.length()); // the 'Text' portion of #[ID, "Text"]
3510: commaIndex = ctorText.indexOf(',');
3511: if (commaIndex != -1) {
3512: // This is an AST creation of the form: #[ID, "Text", "ASTTypename"]
3513: // Support for this was introduced with 2.7.2a6
3514: // create default type or (since 2.7.2) 3rd arg is classname
3515: ctorIncludesCustomType = true;
3516: }
3517: }
3518: TokenSymbol ts = grammar.tokenManager.getTokenSymbol(ctorID);
3519: if ((null != ts) && (null != ts.getASTNodeType()))
3520: astCreateString = "(" + ts.getASTNodeType() + ") "
3521: + astCreateString;
3522: else if (usingCustomAST)
3523: astCreateString = "(" + labeledElementASTType + ") "
3524: + astCreateString;
3525:
3526: return astCreateString;
3527: }
3528:
3529: protected String getLookaheadTestExpression(Lookahead[] look, int k) {
3530: StringBuffer e = new StringBuffer(100);
3531: boolean first = true;
3532:
3533: e.append("(");
3534: for (int i = 1; i <= k; i++) {
3535: BitSet p = look[i].fset;
3536: if (!first) {
3537: e.append(") && (");
3538: }
3539: first = false;
3540:
3541: // Syn preds can yield <end-of-syn-pred> (epsilon) lookahead.
3542: // There is no way to predict what that token would be. Just
3543: // allow anything instead.
3544: if (look[i].containsEpsilon()) {
3545: e.append("true");
3546: } else {
3547: e.append(getLookaheadTestTerm(i, p));
3548: }
3549: }
3550: e.append(")");
3551:
3552: return e.toString();
3553: }
3554:
3555: /**Generate a lookahead test expression for an alternate. This
3556: * will be a series of tests joined by '&&' and enclosed by '()',
3557: * the number of such tests being determined by the depth of the lookahead.
3558: */
3559: protected String getLookaheadTestExpression(Alternative alt,
3560: int maxDepth) {
3561: int depth = alt.lookaheadDepth;
3562: if (depth == GrammarAnalyzer.NONDETERMINISTIC) {
3563: // if the decision is nondeterministic, do the best we can: LL(k)
3564: // any predicates that are around will be generated later.
3565: depth = grammar.maxk;
3566: }
3567:
3568: if (maxDepth == 0) {
3569: // empty lookahead can result from alt with sem pred
3570: // that can see end of token. E.g., A : {pred}? ('a')? ;
3571: return "( true )";
3572: }
3573: return "(" + getLookaheadTestExpression(alt.cache, depth) + ")";
3574: }
3575:
3576: /**Generate a depth==1 lookahead test expression given the BitSet.
3577: * This may be one of:
3578: * 1) a series of 'x==X||' tests
3579: * 2) a range test using >= && <= where possible,
3580: * 3) a bitset membership test for complex comparisons
3581: * @param k The lookahead level
3582: * @param p The lookahead set for level k
3583: */
3584: protected String getLookaheadTestTerm(int k, BitSet p) {
3585: // Determine the name of the item to be compared
3586: String ts = lookaheadString(k);
3587:
3588: // Generate a range expression if possible
3589: int[] elems = p.toArray();
3590: if (elementsAreRange(elems)) {
3591: return getRangeExpression(k, elems);
3592: }
3593:
3594: // Generate a bitset membership test if possible
3595: StringBuffer e;
3596: int degree = p.degree();
3597: if (degree == 0) {
3598: return "true";
3599: }
3600:
3601: if (degree >= bitsetTestThreshold) {
3602: int bitsetIdx = markBitsetForGen(p);
3603: return getBitsetName(bitsetIdx) + ".member(" + ts + ")";
3604: }
3605:
3606: // Otherwise, generate the long-winded series of "x==X||" tests
3607: e = new StringBuffer();
3608: for (int i = 0; i < elems.length; i++) {
3609: // Get the compared-to item (token or character value)
3610: String cs = getValueString(elems[i]);
3611:
3612: // Generate the element comparison
3613: if (i > 0)
3614: e.append("||");
3615: e.append(ts);
3616: e.append("==");
3617: e.append(cs);
3618: }
3619: return e.toString();
3620: }
3621:
3622: /** Return an expression for testing a contiguous renage of elements
3623: * @param k The lookahead level
3624: * @param elems The elements representing the set, usually from BitSet.toArray().
3625: * @return String containing test expression.
3626: */
3627: public String getRangeExpression(int k, int[] elems) {
3628: if (!elementsAreRange(elems)) {
3629: antlrTool.panic("getRangeExpression called with non-range");
3630: }
3631: int begin = elems[0];
3632: int end = elems[elems.length - 1];
3633:
3634: return "(" + lookaheadString(k) + " >= "
3635: + getValueString(begin) + " && " + lookaheadString(k)
3636: + " <= " + getValueString(end) + ")";
3637: }
3638:
3639: /** getValueString: get a string representation of a token or char value
3640: * @param value The token or char value
3641: */
3642: private String getValueString(int value) {
3643: String cs;
3644: if (grammar instanceof LexerGrammar) {
3645: cs = charFormatter.literalChar(value);
3646: } else {
3647: TokenSymbol ts = grammar.tokenManager
3648: .getTokenSymbolAt(value);
3649: if (ts == null) {
3650: return "" + value; // return token type as string
3651: // antlrTool.panic("vocabulary for token type " + value + " is null");
3652: }
3653: String tId = ts.getId();
3654: if (ts instanceof StringLiteralSymbol) {
3655: // if string literal, use predefined label if any
3656: // if no predefined, try to mangle into LITERAL_xxx.
3657: // if can't mangle, use int value as last resort
3658: StringLiteralSymbol sl = (StringLiteralSymbol) ts;
3659: String label = sl.getLabel();
3660: if (label != null) {
3661: cs = label;
3662: } else {
3663: cs = mangleLiteral(tId);
3664: if (cs == null) {
3665: cs = String.valueOf(value);
3666: }
3667: }
3668: } else {
3669: cs = tId;
3670: }
3671: }
3672: return cs;
3673: }
3674:
3675: /**Is the lookahead for this alt empty? */
3676: protected boolean lookaheadIsEmpty(Alternative alt, int maxDepth) {
3677: int depth = alt.lookaheadDepth;
3678: if (depth == GrammarAnalyzer.NONDETERMINISTIC) {
3679: depth = grammar.maxk;
3680: }
3681: for (int i = 1; i <= depth && i <= maxDepth; i++) {
3682: BitSet p = alt.cache[i].fset;
3683: if (p.degree() != 0) {
3684: return false;
3685: }
3686: }
3687: return true;
3688: }
3689:
3690: private String lookaheadString(int k) {
3691: if (grammar instanceof TreeWalkerGrammar) {
3692: return "_t.Type";
3693: }
3694: return "LA(" + k + ")";
3695: }
3696:
3697: /** Mangle a string literal into a meaningful token name. This is
3698: * only possible for literals that are all characters. The resulting
3699: * mangled literal name is literalsPrefix with the text of the literal
3700: * appended.
3701: * @return A string representing the mangled literal, or null if not possible.
3702: */
3703: private String mangleLiteral(String s) {
3704: String mangled = antlrTool.literalsPrefix;
3705: for (int i = 1; i < s.length() - 1; i++) {
3706: if (!Character.isLetter(s.charAt(i)) && s.charAt(i) != '_') {
3707: return null;
3708: }
3709: mangled += s.charAt(i);
3710: }
3711: if (antlrTool.upperCaseMangledLiterals) {
3712: mangled = mangled.toUpperCase();
3713: }
3714: return mangled;
3715: }
3716:
3717: /** Map an identifier to it's corresponding tree-node variable.
3718: * This is context-sensitive, depending on the rule and alternative
3719: * being generated
3720: * @param idParam The identifier name to map
3721: * @return The mapped id (which may be the same as the input), or null if the mapping is invalid due to duplicates
3722: */
3723: public String mapTreeId(String idParam, ActionTransInfo transInfo) {
3724: // if not in an action of a rule, nothing to map.
3725: if (currentRule == null)
3726: return idParam;
3727:
3728: boolean in_var = false;
3729: String id = idParam;
3730: if (grammar instanceof TreeWalkerGrammar) {
3731: if (!grammar.buildAST) {
3732: in_var = true;
3733: }
3734: // If the id ends with "_in", then map it to the input variable
3735: else if (id.length() > 3
3736: && id.lastIndexOf("_in") == id.length() - 3) {
3737: // Strip off the "_in"
3738: id = id.substring(0, id.length() - 3);
3739: in_var = true;
3740: }
3741: }
3742:
3743: // Check the rule labels. If id is a label, then the output
3744: // variable is label_AST, and the input variable is plain label.
3745: for (int i = 0; i < currentRule.labeledElements.size(); i++) {
3746: AlternativeElement elt = (AlternativeElement) currentRule.labeledElements
3747: .elementAt(i);
3748: if (elt.getLabel().equals(id)) {
3749: return in_var ? id : id + "_AST";
3750: }
3751: }
3752:
3753: // Failing that, check the id-to-variable map for the alternative.
3754: // If the id is in the map, then output variable is the name in the
3755: // map, and input variable is name_in
3756: String s = (String) treeVariableMap.get(id);
3757: if (s != null) {
3758: if (s == NONUNIQUE) {
3759: // There is more than one element with this id
3760: antlrTool.error("Ambiguous reference to AST element "
3761: + id + " in rule " + currentRule.getRuleName());
3762: return null;
3763: } else if (s.equals(currentRule.getRuleName())) {
3764: // a recursive call to the enclosing rule is
3765: // ambiguous with the rule itself.
3766: // if( in_var )
3767: // System.out.println("returning null (rulename)");
3768: antlrTool.error("Ambiguous reference to AST element "
3769: + id + " in rule " + currentRule.getRuleName());
3770: return null;
3771: } else {
3772: return in_var ? s + "_in" : s;
3773: }
3774: }
3775:
3776: // Failing that, check the rule name itself. Output variable
3777: // is rule_AST; input variable is rule_AST_in (treeparsers).
3778: if (id.equals(currentRule.getRuleName())) {
3779: String r = in_var ? id + "_AST_in" : id + "_AST";
3780: if (transInfo != null) {
3781: if (!in_var) {
3782: transInfo.refRuleRoot = r;
3783: }
3784: }
3785: return r;
3786: } else {
3787: // id does not map to anything -- return itself.
3788: return id;
3789: }
3790: }
3791:
3792: /** Given an element and the name of an associated AST variable,
3793: * create a mapping between the element "name" and the variable name.
3794: */
3795: private void mapTreeVariable(AlternativeElement e, String name) {
3796: // For tree elements, defer to the root
3797: if (e instanceof TreeElement) {
3798: mapTreeVariable(((TreeElement) e).root, name);
3799: return;
3800: }
3801:
3802: // Determine the name of the element, if any, for mapping purposes
3803: String elName = null;
3804:
3805: // Don't map labeled items
3806: if (e.getLabel() == null) {
3807: if (e instanceof TokenRefElement) {
3808: // use the token id
3809: elName = ((TokenRefElement) e).atomText;
3810: } else if (e instanceof RuleRefElement) {
3811: // use the rule name
3812: elName = ((RuleRefElement) e).targetRule;
3813: }
3814: }
3815: // Add the element to the tree variable map if it has a name
3816: if (elName != null) {
3817: if (treeVariableMap.get(elName) != null) {
3818: // Name is already in the map -- mark it as duplicate
3819: treeVariableMap.remove(elName);
3820: treeVariableMap.put(elName, NONUNIQUE);
3821: } else {
3822: treeVariableMap.put(elName, name);
3823: }
3824: }
3825: }
3826:
3827: /** Lexically process tree-specifiers in the action.
3828: * This will replace #id and #(...) with the appropriate
3829: * function calls and/or variables.
3830: */
3831: protected String processActionForSpecialSymbols(String actionStr,
3832: int line, RuleBlock currentRule, ActionTransInfo tInfo) {
3833: if (actionStr == null || actionStr.length() == 0)
3834: return null;
3835:
3836: // The action trans info tells us (at the moment) whether an
3837: // assignment was done to the rule's tree root.
3838: if (grammar == null)
3839: return actionStr;
3840:
3841: // see if we have anything to do...
3842: if ((grammar.buildAST && actionStr.indexOf('#') != -1)
3843: || grammar instanceof TreeWalkerGrammar
3844: || ((grammar instanceof LexerGrammar || grammar instanceof ParserGrammar) && actionStr
3845: .indexOf('$') != -1)) {
3846: // Create a lexer to read an action and return the translated version
3847: antlr.actions.csharp.ActionLexer lexer = new antlr.actions.csharp.ActionLexer(
3848: actionStr, currentRule, this , tInfo);
3849:
3850: lexer.setLineOffset(line);
3851: lexer.setFilename(grammar.getFilename());
3852: lexer.setTool(antlrTool);
3853:
3854: try {
3855: lexer.mACTION(true);
3856: actionStr = lexer.getTokenObject().getText();
3857: // System.out.println("action translated: "+actionStr);
3858: // System.out.println("trans info is "+tInfo);
3859: } catch (RecognitionException ex) {
3860: lexer.reportError(ex);
3861: return actionStr;
3862: } catch (TokenStreamException tex) {
3863: antlrTool.panic("Error reading action:" + actionStr);
3864: return actionStr;
3865: } catch (CharStreamException io) {
3866: antlrTool.panic("Error reading action:" + actionStr);
3867: return actionStr;
3868: }
3869: }
3870: return actionStr;
3871: }
3872:
3873: private void setupGrammarParameters(Grammar g) {
3874: if (g instanceof ParserGrammar || g instanceof LexerGrammar
3875: || g instanceof TreeWalkerGrammar) {
3876: /* RK: options also have to be added to Grammar.java and for options
3877: * on the file level entries have to be defined in
3878: * DefineGrammarSymbols.java and passed around via 'globals' in antlrTool.java
3879: */
3880: if (antlrTool.nameSpace != null)
3881: nameSpace = new CSharpNameSpace(antlrTool.nameSpace
3882: .getName());
3883: //genHashLines = antlrTool.genHashLines;
3884:
3885: /* let grammar level options override filelevel ones...
3886: */
3887: if (g.hasOption("namespace")) {
3888: Token t = g.getOption("namespace");
3889: if (t != null) {
3890: nameSpace = new CSharpNameSpace(t.getText());
3891: }
3892: }
3893: /*
3894: if( g.hasOption("genHashLines") ) {
3895: Token t = g.getOption("genHashLines");
3896: if( t != null ) {
3897: String val = StringUtils.stripFrontBack(t.getText(),"\"","\"");
3898: genHashLines = val.equals("true");
3899: }
3900: }
3901: */
3902: }
3903:
3904: if (g instanceof ParserGrammar) {
3905: labeledElementASTType = "AST";
3906: if (g.hasOption("ASTLabelType")) {
3907: Token tsuffix = g.getOption("ASTLabelType");
3908: if (tsuffix != null) {
3909: String suffix = StringUtils.stripFrontBack(tsuffix
3910: .getText(), "\"", "\"");
3911: if (suffix != null) {
3912: usingCustomAST = true;
3913: labeledElementASTType = suffix;
3914: }
3915: }
3916: }
3917: labeledElementType = "Token ";
3918: labeledElementInit = "null";
3919: commonExtraArgs = "";
3920: commonExtraParams = "";
3921: commonLocalVars = "";
3922: lt1Value = "LT(1)";
3923: exceptionThrown = "RecognitionException";
3924: throwNoViable = "throw new NoViableAltException(LT(1), getFilename());";
3925: } else if (g instanceof LexerGrammar) {
3926: labeledElementType = "char ";
3927: labeledElementInit = "'\\0'";
3928: commonExtraArgs = "";
3929: commonExtraParams = "bool _createToken";
3930: commonLocalVars = "int _ttype; Token _token=null; int _begin=text.Length;";
3931: lt1Value = "LA(1)";
3932: exceptionThrown = "RecognitionException";
3933: throwNoViable = "throw new NoViableAltForCharException((char)LA(1), getFilename(), getLine(), getColumn());";
3934: } else if (g instanceof TreeWalkerGrammar) {
3935: labeledElementASTType = "AST";
3936: labeledElementType = "AST";
3937: if (g.hasOption("ASTLabelType")) {
3938: Token tsuffix = g.getOption("ASTLabelType");
3939: if (tsuffix != null) {
3940: String suffix = StringUtils.stripFrontBack(tsuffix
3941: .getText(), "\"", "\"");
3942: if (suffix != null) {
3943: usingCustomAST = true;
3944: labeledElementASTType = suffix;
3945: labeledElementType = suffix;
3946: }
3947: }
3948: }
3949: if (!g.hasOption("ASTLabelType")) {
3950: g.setOption("ASTLabelType", new Token(
3951: ANTLRTokenTypes.STRING_LITERAL, "AST"));
3952: }
3953: labeledElementInit = "null";
3954: commonExtraArgs = "_t";
3955: commonExtraParams = "AST _t";
3956: commonLocalVars = "";
3957: lt1Value = "(" + labeledElementASTType + ")_t";
3958: exceptionThrown = "RecognitionException";
3959: throwNoViable = "throw new NoViableAltException(_t);";
3960: } else {
3961: antlrTool.panic("Unknown grammar type");
3962: }
3963: }
3964:
3965: /** This method exists so a subclass, namely VAJCodeGenerator,
3966: * can open the file in its own evil way. JavaCodeGenerator
3967: * simply opens a text file...
3968: */
3969: public void setupOutput(String className) throws IOException {
3970: currentOutput = antlrTool.openOutputFile(className + ".cs");
3971: }
3972:
3973: /** Helper method from Eric Smith's version of CSharpCodeGenerator.*/
3974: private static String OctalToUnicode(String str) {
3975: // only do any conversion if the string looks like "'\003'"
3976: if ((4 <= str.length()) && ('\'' == str.charAt(0))
3977: && ('\\' == str.charAt(1))
3978: && (('0' <= str.charAt(2)) && ('7' >= str.charAt(2)))
3979: && ('\'' == str.charAt(str.length() - 1))) {
3980: // convert octal representation to decimal, then to hex
3981: Integer x = Integer.valueOf(str.substring(2,
3982: str.length() - 1), 8);
3983:
3984: return "'\\x" + Integer.toHexString(x.intValue()) + "'";
3985: } else {
3986: return str;
3987: }
3988: }
3989:
3990: /** Helper method that returns the name of the interface/class/enum type for
3991: token type constants.
3992: */
3993: public String getTokenTypesClassName() {
3994: TokenManager tm = grammar.tokenManager;
3995: return new String(tm.getName() + TokenTypesFileSuffix);
3996: }
3997:
3998: private void declareSaveIndexVariableIfNeeded() {
3999: if (!bSaveIndexCreated) {
4000: println("int _saveIndex = 0;");
4001: bSaveIndexCreated = true;
4002: }
4003: }
4004: }
|