0001: package persistence.antlr;
0002:
0003: /* ANTLR Translator Generator
0004: * Project led by Terence Parr at http://www.jGuru.com
0005: * Software rights: http://www.antlr.org/license.html
0006: *
0007: */
0008:
0009: import java.util.Enumeration;
0010: import java.util.Hashtable;
0011:
0012: import persistence.antlr.collections.impl.BitSet;
0013: import persistence.antlr.collections.impl.Vector;
0014:
0015: import java.io.PrintWriter; //SAS: changed for proper text file io
0016: import java.io.IOException;
0017: import java.io.FileWriter;
0018:
0019: /**Generate MyParser.java, MyLexer.java and MyParserTokenTypes.java */
0020: public class JavaCodeGenerator extends CodeGenerator {
0021: // non-zero if inside syntactic predicate generation
0022: protected int syntacticPredLevel = 0;
0023:
0024: // Are we generating ASTs (for parsers and tree parsers) right now?
0025: protected boolean genAST = false;
0026:
0027: // Are we saving the text consumed (for lexers) right now?
0028: protected boolean saveText = false;
0029:
0030: // Grammar parameters set up to handle different grammar classes.
0031: // These are used to get instanceof tests out of code generation
0032: String labeledElementType;
0033: String labeledElementASTType;
0034: String labeledElementInit;
0035: String commonExtraArgs;
0036: String commonExtraParams;
0037: String commonLocalVars;
0038: String lt1Value;
0039: String exceptionThrown;
0040: String throwNoViable;
0041:
0042: /** Tracks the rule being generated. Used for mapTreeId */
0043: RuleBlock currentRule;
0044:
0045: /** Tracks the rule or labeled subrule being generated. Used for
0046: AST generation. */
0047: String currentASTResult;
0048:
0049: /** Mapping between the ids used in the current alt, and the
0050: * names of variables used to represent their AST values.
0051: */
0052: Hashtable treeVariableMap = new Hashtable();
0053:
0054: /** Used to keep track of which AST variables have been defined in a rule
0055: * (except for the #rule_name and #rule_name_in var's
0056: */
0057: Hashtable declaredASTVariables = new Hashtable();
0058:
0059: /* Count of unnamed generated variables */
0060: int astVarNumber = 1;
0061:
0062: /** Special value used to mark duplicate in treeVariableMap */
0063: protected static final String NONUNIQUE = new String();
0064:
0065: public static final int caseSizeThreshold = 127; // ascii is max
0066:
0067: private Vector semPreds;
0068:
0069: /** Create a Java code-generator using the given Grammar.
0070: * The caller must still call setTool, setBehavior, and setAnalyzer
0071: * before generating code.
0072: */
0073: public JavaCodeGenerator() {
0074: super ();
0075: charFormatter = new JavaCharFormatter();
0076: }
0077:
0078: /** Adds a semantic predicate string to the sem pred vector
0079: These strings will be used to build an array of sem pred names
0080: when building a debugging parser. This method should only be
0081: called when the debug option is specified
0082: */
0083: protected int addSemPred(String predicate) {
0084: semPreds.appendElement(predicate);
0085: return semPreds.size() - 1;
0086: }
0087:
0088: public void exitIfError() {
0089: if (antlrTool.hasError()) {
0090: antlrTool.fatalError("Exiting due to errors.");
0091: }
0092: }
0093:
0094: /**Generate the parser, lexer, treeparser, and token types in Java */
0095: public void gen() {
0096: // Do the code generation
0097: try {
0098: // Loop over all grammars
0099: Enumeration grammarIter = behavior.grammars.elements();
0100: while (grammarIter.hasMoreElements()) {
0101: Grammar g = (Grammar) grammarIter.nextElement();
0102: // Connect all the components to each other
0103: g.setGrammarAnalyzer(analyzer);
0104: g.setCodeGenerator(this );
0105: analyzer.setGrammar(g);
0106: // To get right overloading behavior across hetrogeneous grammars
0107: setupGrammarParameters(g);
0108: g.generate();
0109: // print out the grammar with lookahead sets (and FOLLOWs)
0110: // System.out.print(g.toString());
0111: exitIfError();
0112: }
0113:
0114: // Loop over all token managers (some of which are lexers)
0115: Enumeration tmIter = behavior.tokenManagers.elements();
0116: while (tmIter.hasMoreElements()) {
0117: TokenManager tm = (TokenManager) tmIter.nextElement();
0118: if (!tm.isReadOnly()) {
0119: // Write the token manager tokens as Java
0120: // this must appear before genTokenInterchange so that
0121: // labels are set on string literals
0122: genTokenTypes(tm);
0123: // Write the token manager tokens as plain text
0124: genTokenInterchange(tm);
0125: }
0126: exitIfError();
0127: }
0128: } catch (IOException e) {
0129: antlrTool.reportException(e, null);
0130: }
0131: }
0132:
0133: /** Generate code for the given grammar element.
0134: * @param blk The {...} action to generate
0135: */
0136: public void gen(ActionElement action) {
0137: if (DEBUG_CODE_GENERATOR)
0138: System.out.println("genAction(" + action + ")");
0139: if (action.isSemPred) {
0140: genSemPred(action.actionText, action.line);
0141: } else {
0142: if (grammar.hasSyntacticPredicate) {
0143: println("if ( inputState.guessing==0 ) {");
0144: tabs++;
0145: }
0146:
0147: // get the name of the followSet for the current rule so that we
0148: // can replace $FOLLOW in the .g file.
0149: ActionTransInfo tInfo = new ActionTransInfo();
0150: String actionStr = processActionForSpecialSymbols(
0151: action.actionText, action.getLine(), currentRule,
0152: tInfo);
0153:
0154: if (tInfo.refRuleRoot != null) {
0155: // Somebody referenced "#rule", make sure translated var is valid
0156: // assignment to #rule is left as a ref also, meaning that assignments
0157: // with no other refs like "#rule = foo();" still forces this code to be
0158: // generated (unnecessarily).
0159: println(tInfo.refRuleRoot + " = ("
0160: + labeledElementASTType + ")currentAST.root;");
0161: }
0162:
0163: // dump the translated action
0164: printAction(actionStr);
0165:
0166: if (tInfo.assignToRoot) {
0167: // Somebody did a "#rule=", reset internal currentAST.root
0168: println("currentAST.root = " + tInfo.refRuleRoot + ";");
0169: // reset the child pointer too to be last sibling in sibling list
0170: println("currentAST.child = " + tInfo.refRuleRoot
0171: + "!=null &&" + tInfo.refRuleRoot
0172: + ".getFirstChild()!=null ?");
0173: tabs++;
0174: println(tInfo.refRuleRoot + ".getFirstChild() : "
0175: + tInfo.refRuleRoot + ";");
0176: tabs--;
0177: println("currentAST.advanceChildToEnd();");
0178: }
0179:
0180: if (grammar.hasSyntacticPredicate) {
0181: tabs--;
0182: println("}");
0183: }
0184: }
0185: }
0186:
0187: /** Generate code for the given grammar element.
0188: * @param blk The "x|y|z|..." block to generate
0189: */
0190: public void gen(AlternativeBlock blk) {
0191: if (DEBUG_CODE_GENERATOR)
0192: System.out.println("gen(" + blk + ")");
0193: println("{");
0194: genBlockPreamble(blk);
0195: genBlockInitAction(blk);
0196:
0197: // Tell AST generation to build subrule result
0198: String saveCurrentASTResult = currentASTResult;
0199: if (blk.getLabel() != null) {
0200: currentASTResult = blk.getLabel();
0201: }
0202:
0203: boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
0204:
0205: JavaBlockFinishingInfo howToFinish = genCommonBlock(blk, true);
0206: genBlockFinish(howToFinish, throwNoViable);
0207:
0208: println("}");
0209:
0210: // Restore previous AST generation
0211: currentASTResult = saveCurrentASTResult;
0212: }
0213:
0214: /** Generate code for the given grammar element.
0215: * @param blk The block-end element to generate. Block-end
0216: * elements are synthesized by the grammar parser to represent
0217: * the end of a block.
0218: */
0219: public void gen(BlockEndElement end) {
0220: if (DEBUG_CODE_GENERATOR)
0221: System.out.println("genRuleEnd(" + end + ")");
0222: }
0223:
0224: /** Generate code for the given grammar element.
0225: * @param blk The character literal reference to generate
0226: */
0227: public void gen(CharLiteralElement atom) {
0228: if (DEBUG_CODE_GENERATOR)
0229: System.out.println("genChar(" + atom + ")");
0230:
0231: if (atom.getLabel() != null) {
0232: println(atom.getLabel() + " = " + lt1Value + ";");
0233: }
0234:
0235: boolean oldsaveText = saveText;
0236: saveText = saveText
0237: && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;
0238: genMatch(atom);
0239: saveText = oldsaveText;
0240: }
0241:
0242: /** Generate code for the given grammar element.
0243: * @param blk The character-range reference to generate
0244: */
0245: public void gen(CharRangeElement r) {
0246: if (r.getLabel() != null && syntacticPredLevel == 0) {
0247: println(r.getLabel() + " = " + lt1Value + ";");
0248: }
0249: boolean flag = (grammar instanceof LexerGrammar && (!saveText || r
0250: .getAutoGenType() == GrammarElement.AUTO_GEN_BANG));
0251: if (flag) {
0252: println("_saveIndex=text.length();");
0253: }
0254:
0255: println("matchRange(" + r.beginText + "," + r.endText + ");");
0256:
0257: if (flag) {
0258: println("text.setLength(_saveIndex);");
0259: }
0260: }
0261:
0262: /** Generate the lexer Java file */
0263: public void gen(LexerGrammar g) throws IOException {
0264: // If debugging, create a new sempred vector for this grammar
0265: if (g.debuggingOutput)
0266: semPreds = new Vector();
0267:
0268: setGrammar(g);
0269: if (!(grammar instanceof LexerGrammar)) {
0270: antlrTool.panic("Internal error generating lexer");
0271: }
0272:
0273: // SAS: moved output creation to method so a subclass can change
0274: // how the output is generated (for VAJ interface)
0275: setupOutput(grammar.getClassName());
0276:
0277: genAST = false; // no way to gen trees.
0278: saveText = true; // save consumed characters.
0279:
0280: tabs = 0;
0281:
0282: // Generate header common to all Java output files
0283: genHeader();
0284: // Do not use printAction because we assume tabs==0
0285: println(behavior.getHeaderAction(""));
0286:
0287: // Generate header specific to lexer Java file
0288: // println("import java.io.FileInputStream;");
0289: println("import java.io.InputStream;");
0290: println("import persistence.antlr.TokenStreamException;");
0291: println("import persistence.antlr.TokenStreamIOException;");
0292: println("import persistence.antlr.TokenStreamRecognitionException;");
0293: println("import persistence.antlr.CharStreamException;");
0294: println("import persistence.antlr.CharStreamIOException;");
0295: println("import persistence.antlr.ANTLRException;");
0296: println("import java.io.Reader;");
0297: println("import java.util.Hashtable;");
0298: println("import persistence.antlr." + grammar.getSuperClass()
0299: + ";");
0300: println("import persistence.antlr.InputBuffer;");
0301: println("import persistence.antlr.ByteBuffer;");
0302: println("import persistence.antlr.CharBuffer;");
0303: println("import persistence.antlr.Token;");
0304: println("import persistence.antlr.CommonToken;");
0305: println("import persistence.antlr.RecognitionException;");
0306: println("import persistence.antlr.NoViableAltForCharException;");
0307: println("import persistence.antlr.MismatchedCharException;");
0308: println("import persistence.antlr.TokenStream;");
0309: println("import persistence.antlr.ANTLRHashString;");
0310: println("import persistence.antlr.LexerSharedInputState;");
0311: println("import persistence.antlr.collections.impl.BitSet;");
0312: println("import persistence.antlr.SemanticException;");
0313:
0314: // Generate user-defined lexer file preamble
0315: println(grammar.preambleAction.getText());
0316:
0317: // Generate lexer class definition
0318: String sup = null;
0319: if (grammar.super Class != null) {
0320: sup = grammar.super Class;
0321: } else {
0322: sup = "persistence.antlr." + grammar.getSuperClass();
0323: }
0324:
0325: // print javadoc comment if any
0326: if (grammar.comment != null) {
0327: _println(grammar.comment);
0328: }
0329:
0330: // get prefix (replaces "public" and lets user specify)
0331: String prefix = "public";
0332: Token tprefix = (Token) grammar.options
0333: .get("classHeaderPrefix");
0334: if (tprefix != null) {
0335: String p = StringUtils.stripFrontBack(tprefix.getText(),
0336: "\"", "\"");
0337: if (p != null) {
0338: prefix = p;
0339: }
0340: }
0341:
0342: print(prefix + " ");
0343: print("class " + grammar.getClassName() + " extends " + sup);
0344: println(" implements " + grammar.tokenManager.getName()
0345: + TokenTypesFileSuffix + ", TokenStream");
0346: Token tsuffix = (Token) grammar.options
0347: .get("classHeaderSuffix");
0348: if (tsuffix != null) {
0349: String suffix = StringUtils.stripFrontBack(tsuffix
0350: .getText(), "\"", "\"");
0351: if (suffix != null) {
0352: print(", " + suffix); // must be an interface name for Java
0353: }
0354: }
0355: println(" {");
0356:
0357: // Generate user-defined lexer class members
0358: print(processActionForSpecialSymbols(grammar.classMemberAction
0359: .getText(), grammar.classMemberAction.getLine(),
0360: currentRule, null));
0361:
0362: //
0363: // Generate the constructor from InputStream, which in turn
0364: // calls the ByteBuffer constructor
0365: //
0366: println("public " + grammar.getClassName()
0367: + "(InputStream in) {");
0368: tabs++;
0369: println("this(new ByteBuffer(in));");
0370: tabs--;
0371: println("}");
0372:
0373: //
0374: // Generate the constructor from Reader, which in turn
0375: // calls the CharBuffer constructor
0376: //
0377: println("public " + grammar.getClassName() + "(Reader in) {");
0378: tabs++;
0379: println("this(new CharBuffer(in));");
0380: tabs--;
0381: println("}");
0382:
0383: println("public " + grammar.getClassName()
0384: + "(InputBuffer ib) {");
0385: tabs++;
0386: // if debugging, wrap the input buffer in a debugger
0387: if (grammar.debuggingOutput)
0388: println("this(new LexerSharedInputState(new persistence.antlr.debug.DebuggingInputBuffer(ib)));");
0389: else
0390: println("this(new LexerSharedInputState(ib));");
0391: tabs--;
0392: println("}");
0393:
0394: //
0395: // Generate the constructor from InputBuffer (char or byte)
0396: //
0397: println("public " + grammar.getClassName()
0398: + "(LexerSharedInputState state) {");
0399: tabs++;
0400:
0401: println("super(state);");
0402: // if debugging, set up array variables and call user-overridable
0403: // debugging setup method
0404: if (grammar.debuggingOutput) {
0405: println(" ruleNames = _ruleNames;");
0406: println(" semPredNames = _semPredNames;");
0407: println(" setupDebugging();");
0408: }
0409:
0410: // Generate the setting of various generated options.
0411: // These need to be before the literals since ANTLRHashString depends on
0412: // the casesensitive stuff.
0413: println("caseSensitiveLiterals = " + g.caseSensitiveLiterals
0414: + ";");
0415: println("setCaseSensitive(" + g.caseSensitive + ");");
0416:
0417: // Generate the initialization of a hashtable
0418: // containing the string literals used in the lexer
0419: // The literals variable itself is in CharScanner
0420: println("literals = new Hashtable();");
0421: Enumeration keys = grammar.tokenManager.getTokenSymbolKeys();
0422: while (keys.hasMoreElements()) {
0423: String key = (String) keys.nextElement();
0424: if (key.charAt(0) != '"') {
0425: continue;
0426: }
0427: TokenSymbol sym = grammar.tokenManager.getTokenSymbol(key);
0428: if (sym instanceof StringLiteralSymbol) {
0429: StringLiteralSymbol s = (StringLiteralSymbol) sym;
0430: println("literals.put(new ANTLRHashString(" + s.getId()
0431: + ", this), new Integer(" + s.getTokenType()
0432: + "));");
0433: }
0434: }
0435: tabs--;
0436:
0437: Enumeration ids;
0438: println("}");
0439:
0440: // generate the rule name array for debugging
0441: if (grammar.debuggingOutput) {
0442: println("private static final String _ruleNames[] = {");
0443:
0444: ids = grammar.rules.elements();
0445: int ruleNum = 0;
0446: while (ids.hasMoreElements()) {
0447: GrammarSymbol sym = (GrammarSymbol) ids.nextElement();
0448: if (sym instanceof RuleSymbol)
0449: println(" \"" + ((RuleSymbol) sym).getId() + "\",");
0450: }
0451: println("};");
0452: }
0453:
0454: // Generate nextToken() rule.
0455: // nextToken() is a synthetic lexer rule that is the implicit OR of all
0456: // user-defined lexer rules.
0457: genNextToken();
0458:
0459: // Generate code for each rule in the lexer
0460: ids = grammar.rules.elements();
0461: int ruleNum = 0;
0462: while (ids.hasMoreElements()) {
0463: RuleSymbol sym = (RuleSymbol) ids.nextElement();
0464: // Don't generate the synthetic rules
0465: if (!sym.getId().equals("mnextToken")) {
0466: genRule(sym, false, ruleNum++);
0467: }
0468: exitIfError();
0469: }
0470:
0471: // Generate the semantic predicate map for debugging
0472: if (grammar.debuggingOutput)
0473: genSemPredMap();
0474:
0475: // Generate the bitsets used throughout the lexer
0476: genBitsets(bitsetsUsed, ((LexerGrammar) grammar).charVocabulary
0477: .size());
0478:
0479: println("");
0480: println("}");
0481:
0482: // Close the lexer output stream
0483: currentOutput.close();
0484: currentOutput = null;
0485: }
0486:
0487: /** Generate code for the given grammar element.
0488: * @param blk The (...)+ block to generate
0489: */
0490: public void gen(OneOrMoreBlock blk) {
0491: if (DEBUG_CODE_GENERATOR)
0492: System.out.println("gen+(" + blk + ")");
0493: String label;
0494: String cnt;
0495: println("{");
0496: genBlockPreamble(blk);
0497: if (blk.getLabel() != null) {
0498: cnt = "_cnt_" + blk.getLabel();
0499: } else {
0500: cnt = "_cnt" + blk.ID;
0501: }
0502: println("int " + cnt + "=0;");
0503: if (blk.getLabel() != null) {
0504: label = blk.getLabel();
0505: } else {
0506: label = "_loop" + blk.ID;
0507: }
0508: println(label + ":");
0509: println("do {");
0510: tabs++;
0511: // generate the init action for ()+ ()* inside the loop
0512: // this allows us to do usefull EOF checking...
0513: genBlockInitAction(blk);
0514:
0515: // Tell AST generation to build subrule result
0516: String saveCurrentASTResult = currentASTResult;
0517: if (blk.getLabel() != null) {
0518: currentASTResult = blk.getLabel();
0519: }
0520:
0521: boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
0522:
0523: // generate exit test if greedy set to false
0524: // and an alt is ambiguous with exit branch
0525: // or when lookahead derived purely from end-of-file
0526: // Lookahead analysis stops when end-of-file is hit,
0527: // returning set {epsilon}. Since {epsilon} is not
0528: // ambig with any real tokens, no error is reported
0529: // by deterministic() routines and we have to check
0530: // for the case where the lookahead depth didn't get
0531: // set to NONDETERMINISTIC (this only happens when the
0532: // FOLLOW contains real atoms + epsilon).
0533: boolean generateNonGreedyExitPath = false;
0534: int nonGreedyExitDepth = grammar.maxk;
0535:
0536: if (!blk.greedy
0537: && blk.exitLookaheadDepth <= grammar.maxk
0538: && blk.exitCache[blk.exitLookaheadDepth]
0539: .containsEpsilon()) {
0540: generateNonGreedyExitPath = true;
0541: nonGreedyExitDepth = blk.exitLookaheadDepth;
0542: } else if (!blk.greedy
0543: && blk.exitLookaheadDepth == LLkGrammarAnalyzer.NONDETERMINISTIC) {
0544: generateNonGreedyExitPath = true;
0545: }
0546:
0547: // generate exit test if greedy set to false
0548: // and an alt is ambiguous with exit branch
0549: if (generateNonGreedyExitPath) {
0550: if (DEBUG_CODE_GENERATOR) {
0551: System.out
0552: .println("nongreedy (...)+ loop; exit depth is "
0553: + blk.exitLookaheadDepth);
0554: }
0555: String predictExit = getLookaheadTestExpression(
0556: blk.exitCache, nonGreedyExitDepth);
0557: println("// nongreedy exit test");
0558: println("if ( " + cnt + ">=1 && " + predictExit
0559: + ") break " + label + ";");
0560: }
0561:
0562: JavaBlockFinishingInfo howToFinish = genCommonBlock(blk, false);
0563: genBlockFinish(howToFinish, "if ( " + cnt + ">=1 ) { break "
0564: + label + "; } else {" + throwNoViable + "}");
0565:
0566: println(cnt + "++;");
0567: tabs--;
0568: println("} while (true);");
0569: println("}");
0570:
0571: // Restore previous AST generation
0572: currentASTResult = saveCurrentASTResult;
0573: }
0574:
0575: /** Generate the parser Java file */
0576: public void gen(ParserGrammar g) throws IOException {
0577:
0578: // if debugging, set up a new vector to keep track of sempred
0579: // strings for this grammar
0580: if (g.debuggingOutput)
0581: semPreds = new Vector();
0582:
0583: setGrammar(g);
0584: if (!(grammar instanceof ParserGrammar)) {
0585: antlrTool.panic("Internal error generating parser");
0586: }
0587:
0588: // Open the output stream for the parser and set the currentOutput
0589: // SAS: moved file setup so subclass could do it (for VAJ interface)
0590: setupOutput(grammar.getClassName());
0591:
0592: genAST = grammar.buildAST;
0593:
0594: tabs = 0;
0595:
0596: // Generate the header common to all output files.
0597: genHeader();
0598: // Do not use printAction because we assume tabs==0
0599: println(behavior.getHeaderAction(""));
0600:
0601: // Generate header for the parser
0602: println("import persistence.antlr.TokenBuffer;");
0603: println("import persistence.antlr.TokenStreamException;");
0604: println("import persistence.antlr.TokenStreamIOException;");
0605: println("import persistence.antlr.ANTLRException;");
0606: println("import persistence.antlr." + grammar.getSuperClass()
0607: + ";");
0608: println("import persistence.antlr.Token;");
0609: println("import persistence.antlr.TokenStream;");
0610: println("import persistence.antlr.RecognitionException;");
0611: println("import persistence.antlr.NoViableAltException;");
0612: println("import persistence.antlr.MismatchedTokenException;");
0613: println("import persistence.antlr.SemanticException;");
0614: println("import persistence.antlr.ParserSharedInputState;");
0615: println("import persistence.antlr.collections.impl.BitSet;");
0616: if (genAST) {
0617: println("import persistence.antlr.collections.AST;");
0618: println("import java.util.Hashtable;");
0619: println("import persistence.antlr.ASTFactory;");
0620: println("import persistence.antlr.ASTPair;");
0621: println("import persistence.antlr.collections.impl.ASTArray;");
0622: }
0623:
0624: // Output the user-defined parser preamble
0625: println(grammar.preambleAction.getText());
0626:
0627: // Generate parser class definition
0628: String sup = null;
0629: if (grammar.super Class != null)
0630: sup = grammar.super Class;
0631: else
0632: sup = "persistence.antlr." + grammar.getSuperClass();
0633:
0634: // print javadoc comment if any
0635: if (grammar.comment != null) {
0636: _println(grammar.comment);
0637: }
0638:
0639: // get prefix (replaces "public" and lets user specify)
0640: String prefix = "public";
0641: Token tprefix = (Token) grammar.options
0642: .get("classHeaderPrefix");
0643: if (tprefix != null) {
0644: String p = StringUtils.stripFrontBack(tprefix.getText(),
0645: "\"", "\"");
0646: if (p != null) {
0647: prefix = p;
0648: }
0649: }
0650:
0651: print(prefix + " ");
0652: print("class " + grammar.getClassName() + " extends " + sup);
0653: println(" implements " + grammar.tokenManager.getName()
0654: + TokenTypesFileSuffix);
0655:
0656: Token tsuffix = (Token) grammar.options
0657: .get("classHeaderSuffix");
0658: if (tsuffix != null) {
0659: String suffix = StringUtils.stripFrontBack(tsuffix
0660: .getText(), "\"", "\"");
0661: if (suffix != null)
0662: print(", " + suffix); // must be an interface name for Java
0663: }
0664: println(" {");
0665:
0666: // set up an array of all the rule names so the debugger can
0667: // keep track of them only by number -- less to store in tree...
0668: if (grammar.debuggingOutput) {
0669: println("private static final String _ruleNames[] = {");
0670:
0671: Enumeration ids = grammar.rules.elements();
0672: int ruleNum = 0;
0673: while (ids.hasMoreElements()) {
0674: GrammarSymbol sym = (GrammarSymbol) ids.nextElement();
0675: if (sym instanceof RuleSymbol)
0676: println(" \"" + ((RuleSymbol) sym).getId() + "\",");
0677: }
0678: println("};");
0679: }
0680:
0681: // Generate user-defined parser class members
0682: print(processActionForSpecialSymbols(grammar.classMemberAction
0683: .getText(), grammar.classMemberAction.getLine(),
0684: currentRule, null));
0685:
0686: // Generate parser class constructor from TokenBuffer
0687: println("");
0688: println("protected " + grammar.getClassName()
0689: + "(TokenBuffer tokenBuf, int k) {");
0690: println(" super(tokenBuf,k);");
0691: println(" tokenNames = _tokenNames;");
0692: // if debugging, set up arrays and call the user-overridable
0693: // debugging setup method
0694: if (grammar.debuggingOutput) {
0695: println(" ruleNames = _ruleNames;");
0696: println(" semPredNames = _semPredNames;");
0697: println(" setupDebugging(tokenBuf);");
0698: }
0699: if (grammar.buildAST) {
0700: println(" buildTokenTypeASTClassMap();");
0701: println(" astFactory = new ASTFactory(getTokenTypeToASTClassMap());");
0702: }
0703: println("}");
0704: println("");
0705:
0706: println("public " + grammar.getClassName()
0707: + "(TokenBuffer tokenBuf) {");
0708: println(" this(tokenBuf," + grammar.maxk + ");");
0709: println("}");
0710: println("");
0711:
0712: // Generate parser class constructor from TokenStream
0713: println("protected " + grammar.getClassName()
0714: + "(TokenStream lexer, int k) {");
0715: println(" super(lexer,k);");
0716: println(" tokenNames = _tokenNames;");
0717:
0718: // if debugging, set up arrays and call the user-overridable
0719: // debugging setup method
0720: if (grammar.debuggingOutput) {
0721: println(" ruleNames = _ruleNames;");
0722: println(" semPredNames = _semPredNames;");
0723: println(" setupDebugging(lexer);");
0724: }
0725: if (grammar.buildAST) {
0726: println(" buildTokenTypeASTClassMap();");
0727: println(" astFactory = new ASTFactory(getTokenTypeToASTClassMap());");
0728: }
0729: println("}");
0730: println("");
0731:
0732: println("public " + grammar.getClassName()
0733: + "(TokenStream lexer) {");
0734: println(" this(lexer," + grammar.maxk + ");");
0735: println("}");
0736: println("");
0737:
0738: println("public " + grammar.getClassName()
0739: + "(ParserSharedInputState state) {");
0740: println(" super(state," + grammar.maxk + ");");
0741: println(" tokenNames = _tokenNames;");
0742: if (grammar.buildAST) {
0743: println(" buildTokenTypeASTClassMap();");
0744: println(" astFactory = new ASTFactory(getTokenTypeToASTClassMap());");
0745: }
0746: println("}");
0747: println("");
0748:
0749: // Generate code for each rule in the grammar
0750: Enumeration ids = grammar.rules.elements();
0751: int ruleNum = 0;
0752: while (ids.hasMoreElements()) {
0753: GrammarSymbol sym = (GrammarSymbol) ids.nextElement();
0754: if (sym instanceof RuleSymbol) {
0755: RuleSymbol rs = (RuleSymbol) sym;
0756: genRule(rs, rs.references.size() == 0, ruleNum++);
0757: }
0758: exitIfError();
0759: }
0760:
0761: // Generate the token names
0762: genTokenStrings();
0763:
0764: if (grammar.buildAST) {
0765: genTokenASTNodeMap();
0766: }
0767:
0768: // Generate the bitsets used throughout the grammar
0769: genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType());
0770:
0771: // Generate the semantic predicate map for debugging
0772: if (grammar.debuggingOutput)
0773: genSemPredMap();
0774:
0775: // Close class definition
0776: println("");
0777: println("}");
0778:
0779: // Close the parser output stream
0780: currentOutput.close();
0781: currentOutput = null;
0782: }
0783:
0784: /** Generate code for the given grammar element.
0785: * @param blk The rule-reference to generate
0786: */
0787: public void gen(RuleRefElement rr) {
0788: if (DEBUG_CODE_GENERATOR)
0789: System.out.println("genRR(" + rr + ")");
0790: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(rr.targetRule);
0791: if (rs == null || !rs.isDefined()) {
0792: // Is this redundant???
0793: antlrTool.error("Rule '" + rr.targetRule
0794: + "' is not defined", grammar.getFilename(), rr
0795: .getLine(), rr.getColumn());
0796: return;
0797: }
0798: if (!(rs instanceof RuleSymbol)) {
0799: // Is this redundant???
0800: antlrTool.error("'" + rr.targetRule
0801: + "' does not name a grammar rule", grammar
0802: .getFilename(), rr.getLine(), rr.getColumn());
0803: return;
0804: }
0805:
0806: genErrorTryForElement(rr);
0807:
0808: // AST value for labeled rule refs in tree walker.
0809: // This is not AST construction; it is just the input tree node value.
0810: if (grammar instanceof TreeWalkerGrammar
0811: && rr.getLabel() != null && syntacticPredLevel == 0) {
0812: println(rr.getLabel() + " = _t==ASTNULL ? null : "
0813: + lt1Value + ";");
0814: }
0815:
0816: // if in lexer and ! on rule ref or alt or rule, save buffer index to kill later
0817: if (grammar instanceof LexerGrammar
0818: && (!saveText || rr.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
0819: println("_saveIndex=text.length();");
0820: }
0821:
0822: // Process return value assignment if any
0823: printTabs();
0824: if (rr.idAssign != null) {
0825: // Warn if the rule has no return type
0826: if (rs.block.returnAction == null) {
0827: antlrTool.warning("Rule '" + rr.targetRule
0828: + "' has no return type",
0829: grammar.getFilename(), rr.getLine(), rr
0830: .getColumn());
0831: }
0832: _print(rr.idAssign + "=");
0833: } else {
0834: // Warn about return value if any, but not inside syntactic predicate
0835: if (!(grammar instanceof LexerGrammar)
0836: && syntacticPredLevel == 0
0837: && rs.block.returnAction != null) {
0838: antlrTool.warning("Rule '" + rr.targetRule
0839: + "' returns a value", grammar.getFilename(),
0840: rr.getLine(), rr.getColumn());
0841: }
0842: }
0843:
0844: // Call the rule
0845: GenRuleInvocation(rr);
0846:
0847: // if in lexer and ! on element or alt or rule, save buffer index to kill later
0848: if (grammar instanceof LexerGrammar
0849: && (!saveText || rr.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
0850: println("text.setLength(_saveIndex);");
0851: }
0852:
0853: // if not in a syntactic predicate
0854: if (syntacticPredLevel == 0) {
0855: boolean doNoGuessTest = (grammar.hasSyntacticPredicate && (grammar.buildAST
0856: && rr.getLabel() != null || (genAST && rr
0857: .getAutoGenType() == GrammarElement.AUTO_GEN_NONE)));
0858: if (doNoGuessTest) {
0859: // println("if (inputState.guessing==0) {");
0860: // tabs++;
0861: }
0862:
0863: if (grammar.buildAST && rr.getLabel() != null) {
0864: // always gen variable for rule return on labeled rules
0865: println(rr.getLabel() + "_AST = ("
0866: + labeledElementASTType + ")returnAST;");
0867: }
0868: if (genAST) {
0869: switch (rr.getAutoGenType()) {
0870: case GrammarElement.AUTO_GEN_NONE:
0871: // println("theASTFactory.addASTChild(currentAST, returnAST);");
0872: println("astFactory.addASTChild(currentAST, returnAST);");
0873: break;
0874: case GrammarElement.AUTO_GEN_CARET:
0875: antlrTool
0876: .error("Internal: encountered ^ after rule reference");
0877: break;
0878: default:
0879: break;
0880: }
0881: }
0882:
0883: // if a lexer and labeled, Token label defined at rule level, just set it here
0884: if (grammar instanceof LexerGrammar
0885: && rr.getLabel() != null) {
0886: println(rr.getLabel() + "=_returnToken;");
0887: }
0888:
0889: if (doNoGuessTest) {
0890: // tabs--;
0891: // println("}");
0892: }
0893: }
0894: genErrorCatchForElement(rr);
0895: }
0896:
0897: /** Generate code for the given grammar element.
0898: * @param blk The string-literal reference to generate
0899: */
0900: public void gen(StringLiteralElement atom) {
0901: if (DEBUG_CODE_GENERATOR)
0902: System.out.println("genString(" + atom + ")");
0903:
0904: // Variable declarations for labeled elements
0905: if (atom.getLabel() != null && syntacticPredLevel == 0) {
0906: println(atom.getLabel() + " = " + lt1Value + ";");
0907: }
0908:
0909: // AST
0910: genElementAST(atom);
0911:
0912: // is there a bang on the literal?
0913: boolean oldsaveText = saveText;
0914: saveText = saveText
0915: && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;
0916:
0917: // matching
0918: genMatch(atom);
0919:
0920: saveText = oldsaveText;
0921:
0922: // tack on tree cursor motion if doing a tree walker
0923: if (grammar instanceof TreeWalkerGrammar) {
0924: println("_t = _t.getNextSibling();");
0925: }
0926: }
0927:
0928: /** Generate code for the given grammar element.
0929: * @param blk The token-range reference to generate
0930: */
0931: public void gen(TokenRangeElement r) {
0932: genErrorTryForElement(r);
0933: if (r.getLabel() != null && syntacticPredLevel == 0) {
0934: println(r.getLabel() + " = " + lt1Value + ";");
0935: }
0936:
0937: // AST
0938: genElementAST(r);
0939:
0940: // match
0941: println("matchRange(" + r.beginText + "," + r.endText + ");");
0942: genErrorCatchForElement(r);
0943: }
0944:
0945: /** Generate code for the given grammar element.
0946: * @param blk The token-reference to generate
0947: */
0948: public void gen(TokenRefElement atom) {
0949: if (DEBUG_CODE_GENERATOR)
0950: System.out.println("genTokenRef(" + atom + ")");
0951: if (grammar instanceof LexerGrammar) {
0952: antlrTool.panic("Token reference found in lexer");
0953: }
0954: genErrorTryForElement(atom);
0955: // Assign Token value to token label variable
0956: if (atom.getLabel() != null && syntacticPredLevel == 0) {
0957: println(atom.getLabel() + " = " + lt1Value + ";");
0958: }
0959:
0960: // AST
0961: genElementAST(atom);
0962: // matching
0963: genMatch(atom);
0964: genErrorCatchForElement(atom);
0965:
0966: // tack on tree cursor motion if doing a tree walker
0967: if (grammar instanceof TreeWalkerGrammar) {
0968: println("_t = _t.getNextSibling();");
0969: }
0970: }
0971:
0972: public void gen(TreeElement t) {
0973: // save AST cursor
0974: println("AST __t" + t.ID + " = _t;");
0975:
0976: // If there is a label on the root, then assign that to the variable
0977: if (t.root.getLabel() != null) {
0978: println(t.root.getLabel() + " = _t==ASTNULL ? null :("
0979: + labeledElementASTType + ")_t;");
0980: }
0981:
0982: // check for invalid modifiers ! and ^ on tree element roots
0983: if (t.root.getAutoGenType() == GrammarElement.AUTO_GEN_BANG) {
0984: antlrTool
0985: .error(
0986: "Suffixing a root node with '!' is not implemented",
0987: grammar.getFilename(), t.getLine(), t
0988: .getColumn());
0989: t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE);
0990: }
0991: if (t.root.getAutoGenType() == GrammarElement.AUTO_GEN_CARET) {
0992: antlrTool
0993: .warning(
0994: "Suffixing a root node with '^' is redundant; already a root",
0995: grammar.getFilename(), t.getLine(), t
0996: .getColumn());
0997: t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE);
0998: }
0999:
1000: // Generate AST variables
1001: genElementAST(t.root);
1002: if (grammar.buildAST) {
1003: // Save the AST construction state
1004: println("ASTPair __currentAST" + t.ID
1005: + " = currentAST.copy();");
1006: // Make the next item added a child of the TreeElement root
1007: println("currentAST.root = currentAST.child;");
1008: println("currentAST.child = null;");
1009: }
1010:
1011: // match root
1012: if (t.root instanceof WildcardElement) {
1013: println("if ( _t==null ) throw new MismatchedTokenException();");
1014: } else {
1015: genMatch(t.root);
1016: }
1017: // move to list of children
1018: println("_t = _t.getFirstChild();");
1019:
1020: // walk list of children, generating code for each
1021: for (int i = 0; i < t.getAlternatives().size(); i++) {
1022: Alternative a = t.getAlternativeAt(i);
1023: AlternativeElement e = a.head;
1024: while (e != null) {
1025: e.generate();
1026: e = e.next;
1027: }
1028: }
1029:
1030: if (grammar.buildAST) {
1031: // restore the AST construction state to that just after the
1032: // tree root was added
1033: println("currentAST = __currentAST" + t.ID + ";");
1034: }
1035: // restore AST cursor
1036: println("_t = __t" + t.ID + ";");
1037: // move cursor to sibling of tree just parsed
1038: println("_t = _t.getNextSibling();");
1039: }
1040:
1041: /** Generate the tree-parser Java file */
1042: public void gen(TreeWalkerGrammar g) throws IOException {
1043: // SAS: debugging stuff removed for now...
1044: setGrammar(g);
1045: if (!(grammar instanceof TreeWalkerGrammar)) {
1046: antlrTool.panic("Internal error generating tree-walker");
1047: }
1048: // Open the output stream for the parser and set the currentOutput
1049: // SAS: move file open to method so subclass can override it
1050: // (mainly for VAJ interface)
1051: setupOutput(grammar.getClassName());
1052:
1053: genAST = grammar.buildAST;
1054: tabs = 0;
1055:
1056: // Generate the header common to all output files.
1057: genHeader();
1058: // Do not use printAction because we assume tabs==0
1059: println(behavior.getHeaderAction(""));
1060:
1061: // Generate header for the parser
1062: println("import persistence.antlr." + grammar.getSuperClass()
1063: + ";");
1064: println("import persistence.antlr.Token;");
1065: println("import persistence.antlr.collections.AST;");
1066: println("import persistence.antlr.RecognitionException;");
1067: println("import persistence.antlr.ANTLRException;");
1068: println("import persistence.antlr.NoViableAltException;");
1069: println("import persistence.antlr.MismatchedTokenException;");
1070: println("import persistence.antlr.SemanticException;");
1071: println("import persistence.antlr.collections.impl.BitSet;");
1072: println("import persistence.antlr.ASTPair;");
1073: println("import persistence.antlr.collections.impl.ASTArray;");
1074:
1075: // Output the user-defined parser premamble
1076: println(grammar.preambleAction.getText());
1077:
1078: // Generate parser class definition
1079: String sup = null;
1080: if (grammar.super Class != null) {
1081: sup = grammar.super Class;
1082: } else {
1083: sup = "persistence.antlr." + grammar.getSuperClass();
1084: }
1085: println("");
1086:
1087: // print javadoc comment if any
1088: if (grammar.comment != null) {
1089: _println(grammar.comment);
1090: }
1091:
1092: // get prefix (replaces "public" and lets user specify)
1093: String prefix = "public";
1094: Token tprefix = (Token) grammar.options
1095: .get("classHeaderPrefix");
1096: if (tprefix != null) {
1097: String p = StringUtils.stripFrontBack(tprefix.getText(),
1098: "\"", "\"");
1099: if (p != null) {
1100: prefix = p;
1101: }
1102: }
1103:
1104: print(prefix + " ");
1105: print("class " + grammar.getClassName() + " extends " + sup);
1106: println(" implements " + grammar.tokenManager.getName()
1107: + TokenTypesFileSuffix);
1108: Token tsuffix = (Token) grammar.options
1109: .get("classHeaderSuffix");
1110: if (tsuffix != null) {
1111: String suffix = StringUtils.stripFrontBack(tsuffix
1112: .getText(), "\"", "\"");
1113: if (suffix != null) {
1114: print(", " + suffix); // must be an interface name for Java
1115: }
1116: }
1117: println(" {");
1118:
1119: // Generate user-defined parser class members
1120: print(processActionForSpecialSymbols(grammar.classMemberAction
1121: .getText(), grammar.classMemberAction.getLine(),
1122: currentRule, null));
1123:
1124: // Generate default parser class constructor
1125: println("public " + grammar.getClassName() + "() {");
1126: tabs++;
1127: println("tokenNames = _tokenNames;");
1128: tabs--;
1129: println("}");
1130: println("");
1131:
1132: // Generate code for each rule in the grammar
1133: Enumeration ids = grammar.rules.elements();
1134: int ruleNum = 0;
1135: String ruleNameInits = "";
1136: while (ids.hasMoreElements()) {
1137: GrammarSymbol sym = (GrammarSymbol) ids.nextElement();
1138: if (sym instanceof RuleSymbol) {
1139: RuleSymbol rs = (RuleSymbol) sym;
1140: genRule(rs, rs.references.size() == 0, ruleNum++);
1141: }
1142: exitIfError();
1143: }
1144:
1145: // Generate the token names
1146: genTokenStrings();
1147:
1148: // Generate the bitsets used throughout the grammar
1149: genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType());
1150:
1151: // Close class definition
1152: println("}");
1153: println("");
1154:
1155: // Close the parser output stream
1156: currentOutput.close();
1157: currentOutput = null;
1158: }
1159:
1160: /** Generate code for the given grammar element.
1161: * @param wc The wildcard element to generate
1162: */
1163: public void gen(WildcardElement wc) {
1164: // Variable assignment for labeled elements
1165: if (wc.getLabel() != null && syntacticPredLevel == 0) {
1166: println(wc.getLabel() + " = " + lt1Value + ";");
1167: }
1168:
1169: // AST
1170: genElementAST(wc);
1171: // Match anything but EOF
1172: if (grammar instanceof TreeWalkerGrammar) {
1173: println("if ( _t==null ) throw new MismatchedTokenException();");
1174: } else if (grammar instanceof LexerGrammar) {
1175: if (grammar instanceof LexerGrammar
1176: && (!saveText || wc.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
1177: println("_saveIndex=text.length();");
1178: }
1179: println("matchNot(EOF_CHAR);");
1180: if (grammar instanceof LexerGrammar
1181: && (!saveText || wc.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
1182: println("text.setLength(_saveIndex);"); // kill text atom put in buffer
1183: }
1184: } else {
1185: println("matchNot(" + getValueString(Token.EOF_TYPE) + ");");
1186: }
1187:
1188: // tack on tree cursor motion if doing a tree walker
1189: if (grammar instanceof TreeWalkerGrammar) {
1190: println("_t = _t.getNextSibling();");
1191: }
1192: }
1193:
1194: /** Generate code for the given grammar element.
1195: * @param blk The (...)* block to generate
1196: */
1197: public void gen(ZeroOrMoreBlock blk) {
1198: if (DEBUG_CODE_GENERATOR)
1199: System.out.println("gen*(" + blk + ")");
1200: println("{");
1201: genBlockPreamble(blk);
1202: String label;
1203: if (blk.getLabel() != null) {
1204: label = blk.getLabel();
1205: } else {
1206: label = "_loop" + blk.ID;
1207: }
1208: println(label + ":");
1209: println("do {");
1210: tabs++;
1211: // generate the init action for ()* inside the loop
1212: // this allows us to do usefull EOF checking...
1213: genBlockInitAction(blk);
1214:
1215: // Tell AST generation to build subrule result
1216: String saveCurrentASTResult = currentASTResult;
1217: if (blk.getLabel() != null) {
1218: currentASTResult = blk.getLabel();
1219: }
1220:
1221: boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
1222:
1223: // generate exit test if greedy set to false
1224: // and an alt is ambiguous with exit branch
1225: // or when lookahead derived purely from end-of-file
1226: // Lookahead analysis stops when end-of-file is hit,
1227: // returning set {epsilon}. Since {epsilon} is not
1228: // ambig with any real tokens, no error is reported
1229: // by deterministic() routines and we have to check
1230: // for the case where the lookahead depth didn't get
1231: // set to NONDETERMINISTIC (this only happens when the
1232: // FOLLOW contains real atoms + epsilon).
1233: boolean generateNonGreedyExitPath = false;
1234: int nonGreedyExitDepth = grammar.maxk;
1235:
1236: if (!blk.greedy
1237: && blk.exitLookaheadDepth <= grammar.maxk
1238: && blk.exitCache[blk.exitLookaheadDepth]
1239: .containsEpsilon()) {
1240: generateNonGreedyExitPath = true;
1241: nonGreedyExitDepth = blk.exitLookaheadDepth;
1242: } else if (!blk.greedy
1243: && blk.exitLookaheadDepth == LLkGrammarAnalyzer.NONDETERMINISTIC) {
1244: generateNonGreedyExitPath = true;
1245: }
1246: if (generateNonGreedyExitPath) {
1247: if (DEBUG_CODE_GENERATOR) {
1248: System.out
1249: .println("nongreedy (...)* loop; exit depth is "
1250: + blk.exitLookaheadDepth);
1251: }
1252: String predictExit = getLookaheadTestExpression(
1253: blk.exitCache, nonGreedyExitDepth);
1254: println("// nongreedy exit test");
1255: println("if (" + predictExit + ") break " + label + ";");
1256: }
1257:
1258: JavaBlockFinishingInfo howToFinish = genCommonBlock(blk, false);
1259: genBlockFinish(howToFinish, "break " + label + ";");
1260:
1261: tabs--;
1262: println("} while (true);");
1263: println("}");
1264:
1265: // Restore previous AST generation
1266: currentASTResult = saveCurrentASTResult;
1267: }
1268:
1269: /** Generate an alternative.
1270: * @param alt The alternative to generate
1271: * @param blk The block to which the alternative belongs
1272: */
1273: protected void genAlt(Alternative alt, AlternativeBlock blk) {
1274: // Save the AST generation state, and set it to that of the alt
1275: boolean savegenAST = genAST;
1276: genAST = genAST && alt.getAutoGen();
1277:
1278: boolean oldsaveTest = saveText;
1279: saveText = saveText && alt.getAutoGen();
1280:
1281: // Reset the variable name map for the alternative
1282: Hashtable saveMap = treeVariableMap;
1283: treeVariableMap = new Hashtable();
1284:
1285: // Generate try block around the alt for error handling
1286: if (alt.exceptionSpec != null) {
1287: println("try { // for error handling");
1288: tabs++;
1289: }
1290:
1291: AlternativeElement elem = alt.head;
1292: while (!(elem instanceof BlockEndElement)) {
1293: elem.generate(); // alt can begin with anything. Ask target to gen.
1294: elem = elem.next;
1295: }
1296:
1297: if (genAST) {
1298: if (blk instanceof RuleBlock) {
1299: // Set the AST return value for the rule
1300: RuleBlock rblk = (RuleBlock) blk;
1301: if (grammar.hasSyntacticPredicate) {
1302: // println("if ( inputState.guessing==0 ) {");
1303: // tabs++;
1304: }
1305: println(rblk.getRuleName() + "_AST = ("
1306: + labeledElementASTType + ")currentAST.root;");
1307: if (grammar.hasSyntacticPredicate) {
1308: // --tabs;
1309: // println("}");
1310: }
1311: } else if (blk.getLabel() != null) {
1312: // ### future: also set AST value for labeled subrules.
1313: // println(blk.getLabel() + "_AST = ("+labeledElementASTType+")currentAST.root;");
1314: antlrTool.warning("Labeled subrules not yet supported",
1315: grammar.getFilename(), blk.getLine(), blk
1316: .getColumn());
1317: }
1318: }
1319:
1320: if (alt.exceptionSpec != null) {
1321: // close try block
1322: tabs--;
1323: println("}");
1324: genErrorHandler(alt.exceptionSpec);
1325: }
1326:
1327: genAST = savegenAST;
1328: saveText = oldsaveTest;
1329:
1330: treeVariableMap = saveMap;
1331: }
1332:
1333: /** Generate all the bitsets to be used in the parser or lexer
1334: * Generate the raw bitset data like "long _tokenSet1_data[] = {...};"
1335: * and the BitSet object declarations like "BitSet _tokenSet1 = new BitSet(_tokenSet1_data);"
1336: * Note that most languages do not support object initialization inside a
1337: * class definition, so other code-generators may have to separate the
1338: * bitset declarations from the initializations (e.g., put the initializations
1339: * in the generated constructor instead).
1340: * @param bitsetList The list of bitsets to generate.
1341: * @param maxVocabulary Ensure that each generated bitset can contain at least this value.
1342: */
1343: protected void genBitsets(Vector bitsetList, int maxVocabulary) {
1344: println("");
1345: for (int i = 0; i < bitsetList.size(); i++) {
1346: BitSet p = (BitSet) bitsetList.elementAt(i);
1347: // Ensure that generated BitSet is large enough for vocabulary
1348: p.growToInclude(maxVocabulary);
1349: genBitSet(p, i);
1350: }
1351: }
1352:
1353: /** Do something simple like:
1354: * private static final long[] mk_tokenSet_0() {
1355: * long[] data = { -2305839160922996736L, 63L, 16777216L, 0L, 0L, 0L };
1356: * return data;
1357: * }
1358: * public static final BitSet _tokenSet_0 = new BitSet(mk_tokenSet_0());
1359: *
1360: * Or, for large bitsets, optimize init so ranges are collapsed into loops.
1361: * This is most useful for lexers using unicode.
1362: */
1363: private void genBitSet(BitSet p, int id) {
1364: // initialization data
1365: println("private static final long[] mk" + getBitsetName(id)
1366: + "() {");
1367: int n = p.lengthInLongWords();
1368: if (n < BITSET_OPTIMIZE_INIT_THRESHOLD) {
1369: println("\tlong[] data = { " + p.toStringOfWords() + "};");
1370: } else {
1371: // will init manually, allocate space then set values
1372: println("\tlong[] data = new long[" + n + "];");
1373: long[] elems = p.toPackedArray();
1374: for (int i = 0; i < elems.length;) {
1375: if (elems[i] == 0) {
1376: // done automatically by Java, don't waste time/code
1377: i++;
1378: continue;
1379: }
1380: if ((i + 1) == elems.length || elems[i] != elems[i + 1]) {
1381: // last number or no run of numbers, just dump assignment
1382: println("\tdata[" + i + "]=" + elems[i] + "L;");
1383: i++;
1384: } else {
1385: // scan to find end of run
1386: int j;
1387: for (j = i + 1; j < elems.length
1388: && elems[j] == elems[i]; j++) {
1389: }
1390: // j-1 is last member of run
1391: println("\tfor (int i = " + i + "; i<=" + (j - 1)
1392: + "; i++) { data[i]=" + elems[i] + "L; }");
1393: i = j;
1394: }
1395: }
1396: }
1397:
1398: println("\treturn data;");
1399: println("}");
1400: // BitSet object
1401: println("public static final BitSet " + getBitsetName(id)
1402: + " = new BitSet(" + "mk" + getBitsetName(id) + "()"
1403: + ");");
1404: }
1405:
1406: /** Generate the finish of a block, using a combination of the info
1407: * returned from genCommonBlock() and the action to perform when
1408: * no alts were taken
1409: * @param howToFinish The return of genCommonBlock()
1410: * @param noViableAction What to generate when no alt is taken
1411: */
1412: private void genBlockFinish(JavaBlockFinishingInfo howToFinish,
1413: String noViableAction) {
1414: if (howToFinish.needAnErrorClause
1415: && (howToFinish.generatedAnIf || howToFinish.generatedSwitch)) {
1416: if (howToFinish.generatedAnIf) {
1417: println("else {");
1418: } else {
1419: println("{");
1420: }
1421: tabs++;
1422: println(noViableAction);
1423: tabs--;
1424: println("}");
1425: }
1426:
1427: if (howToFinish.postscript != null) {
1428: println(howToFinish.postscript);
1429: }
1430: }
1431:
1432: /** Generate the init action for a block, which may be a RuleBlock or a
1433: * plain AlternativeBLock.
1434: * @blk The block for which the preamble is to be generated.
1435: */
1436: protected void genBlockInitAction(AlternativeBlock blk) {
1437: // dump out init action
1438: if (blk.initAction != null) {
1439: printAction(processActionForSpecialSymbols(blk.initAction,
1440: blk.getLine(), currentRule, null));
1441: }
1442: }
1443:
1444: /** Generate the header for a block, which may be a RuleBlock or a
1445: * plain AlternativeBLock. This generates any variable declarations
1446: * and syntactic-predicate-testing variables.
1447: * @blk The block for which the preamble is to be generated.
1448: */
1449: protected void genBlockPreamble(AlternativeBlock blk) {
1450: // define labels for rule blocks.
1451: if (blk instanceof RuleBlock) {
1452: RuleBlock rblk = (RuleBlock) blk;
1453: if (rblk.labeledElements != null) {
1454: for (int i = 0; i < rblk.labeledElements.size(); i++) {
1455: AlternativeElement a = (AlternativeElement) rblk.labeledElements
1456: .elementAt(i);
1457: // System.out.println("looking at labeled element: "+a);
1458: // Variables for labeled rule refs and
1459: // subrules are different than variables for
1460: // grammar atoms. This test is a little tricky
1461: // because we want to get all rule refs and ebnf,
1462: // but not rule blocks or syntactic predicates
1463: if (a instanceof RuleRefElement
1464: || a instanceof AlternativeBlock
1465: && !(a instanceof RuleBlock)
1466: && !(a instanceof SynPredBlock)) {
1467:
1468: if (!(a instanceof RuleRefElement)
1469: && ((AlternativeBlock) a).not
1470: && analyzer
1471: .subruleCanBeInverted(
1472: ((AlternativeBlock) a),
1473: grammar instanceof LexerGrammar)) {
1474: // Special case for inverted subrules that
1475: // will be inlined. Treat these like
1476: // token or char literal references
1477: println(labeledElementType + " "
1478: + a.getLabel() + " = "
1479: + labeledElementInit + ";");
1480: if (grammar.buildAST) {
1481: genASTDeclaration(a);
1482: }
1483: } else {
1484: if (grammar.buildAST) {
1485: // Always gen AST variables for
1486: // labeled elements, even if the
1487: // element itself is marked with !
1488: genASTDeclaration(a);
1489: }
1490: if (grammar instanceof LexerGrammar) {
1491: println("Token " + a.getLabel()
1492: + "=null;");
1493: }
1494: if (grammar instanceof TreeWalkerGrammar) {
1495: // always generate rule-ref variables
1496: // for tree walker
1497: println(labeledElementType + " "
1498: + a.getLabel() + " = "
1499: + labeledElementInit + ";");
1500: }
1501: }
1502: } else {
1503: // It is a token or literal reference. Generate the
1504: // correct variable type for this grammar
1505: println(labeledElementType + " " + a.getLabel()
1506: + " = " + labeledElementInit + ";");
1507:
1508: // In addition, generate *_AST variables if
1509: // building ASTs
1510: if (grammar.buildAST) {
1511: if (a instanceof GrammarAtom
1512: && ((GrammarAtom) a)
1513: .getASTNodeType() != null) {
1514: GrammarAtom ga = (GrammarAtom) a;
1515: genASTDeclaration(a, ga
1516: .getASTNodeType());
1517: } else {
1518: genASTDeclaration(a);
1519: }
1520: }
1521: }
1522: }
1523: }
1524: }
1525: }
1526:
1527: /** Generate a series of case statements that implement a BitSet test.
1528: * @param p The Bitset for which cases are to be generated
1529: */
1530: protected void genCases(BitSet p) {
1531: if (DEBUG_CODE_GENERATOR)
1532: System.out.println("genCases(" + p + ")");
1533: int[] elems;
1534:
1535: elems = p.toArray();
1536: // Wrap cases four-per-line for lexer, one-per-line for parser
1537: int wrap = (grammar instanceof LexerGrammar) ? 4 : 1;
1538: int j = 1;
1539: boolean startOfLine = true;
1540: for (int i = 0; i < elems.length; i++) {
1541: if (j == 1) {
1542: print("");
1543: } else {
1544: _print(" ");
1545: }
1546: _print("case " + getValueString(elems[i]) + ":");
1547:
1548: if (j == wrap) {
1549: _println("");
1550: startOfLine = true;
1551: j = 1;
1552: } else {
1553: j++;
1554: startOfLine = false;
1555: }
1556: }
1557: if (!startOfLine) {
1558: _println("");
1559: }
1560: }
1561:
1562: /**Generate common code for a block of alternatives; return a
1563: * postscript that needs to be generated at the end of the
1564: * block. Other routines may append else-clauses and such for
1565: * error checking before the postfix is generated. If the
1566: * grammar is a lexer, then generate alternatives in an order
1567: * where alternatives requiring deeper lookahead are generated
1568: * first, and EOF in the lookahead set reduces the depth of
1569: * the lookahead. @param blk The block to generate @param
1570: * noTestForSingle If true, then it does not generate a test
1571: * for a single alternative.
1572: */
1573: public JavaBlockFinishingInfo genCommonBlock(AlternativeBlock blk,
1574: boolean noTestForSingle) {
1575: int nIF = 0;
1576: boolean createdLL1Switch = false;
1577: int closingBracesOfIFSequence = 0;
1578: JavaBlockFinishingInfo finishingInfo = new JavaBlockFinishingInfo();
1579: if (DEBUG_CODE_GENERATOR)
1580: System.out.println("genCommonBlock(" + blk + ")");
1581:
1582: // Save the AST generation state, and set it to that of the block
1583: boolean savegenAST = genAST;
1584: genAST = genAST && blk.getAutoGen();
1585:
1586: boolean oldsaveTest = saveText;
1587: saveText = saveText && blk.getAutoGen();
1588:
1589: // Is this block inverted? If so, generate special-case code
1590: if (blk.not
1591: && analyzer.subruleCanBeInverted(blk,
1592: grammar instanceof LexerGrammar)) {
1593: if (DEBUG_CODE_GENERATOR)
1594: System.out.println("special case: ~(subrule)");
1595: Lookahead p = analyzer.look(1, blk);
1596: // Variable assignment for labeled elements
1597: if (blk.getLabel() != null && syntacticPredLevel == 0) {
1598: println(blk.getLabel() + " = " + lt1Value + ";");
1599: }
1600:
1601: // AST
1602: genElementAST(blk);
1603:
1604: String astArgs = "";
1605: if (grammar instanceof TreeWalkerGrammar) {
1606: astArgs = "_t,";
1607: }
1608:
1609: // match the bitset for the alternative
1610: println("match(" + astArgs
1611: + getBitsetName(markBitsetForGen(p.fset)) + ");");
1612:
1613: // tack on tree cursor motion if doing a tree walker
1614: if (grammar instanceof TreeWalkerGrammar) {
1615: println("_t = _t.getNextSibling();");
1616: }
1617: return finishingInfo;
1618: }
1619:
1620: // Special handling for single alt
1621: if (blk.getAlternatives().size() == 1) {
1622: Alternative alt = blk.getAlternativeAt(0);
1623: // Generate a warning if there is a synPred for single alt.
1624: if (alt.synPred != null) {
1625: antlrTool
1626: .warning(
1627: "Syntactic predicate superfluous for single alternative",
1628: grammar.getFilename(), blk
1629: .getAlternativeAt(0).synPred
1630: .getLine(), blk
1631: .getAlternativeAt(0).synPred
1632: .getColumn());
1633: }
1634: if (noTestForSingle) {
1635: if (alt.semPred != null) {
1636: // Generate validating predicate
1637: genSemPred(alt.semPred, blk.line);
1638: }
1639: genAlt(alt, blk);
1640: return finishingInfo;
1641: }
1642: }
1643:
1644: // count number of simple LL(1) cases; only do switch for
1645: // many LL(1) cases (no preds, no end of token refs)
1646: // We don't care about exit paths for (...)*, (...)+
1647: // because we don't explicitly have a test for them
1648: // as an alt in the loop.
1649: //
1650: // Also, we now count how many unicode lookahead sets
1651: // there are--they must be moved to DEFAULT or ELSE
1652: // clause.
1653: int nLL1 = 0;
1654: for (int i = 0; i < blk.getAlternatives().size(); i++) {
1655: Alternative a = blk.getAlternativeAt(i);
1656: if (suitableForCaseExpression(a)) {
1657: nLL1++;
1658: }
1659: }
1660:
1661: // do LL(1) cases
1662: if (nLL1 >= makeSwitchThreshold) {
1663: // Determine the name of the item to be compared
1664: String testExpr = lookaheadString(1);
1665: createdLL1Switch = true;
1666: // when parsing trees, convert null to valid tree node with NULL lookahead
1667: if (grammar instanceof TreeWalkerGrammar) {
1668: println("if (_t==null) _t=ASTNULL;");
1669: }
1670: println("switch ( " + testExpr + ") {");
1671: for (int i = 0; i < blk.alternatives.size(); i++) {
1672: Alternative alt = blk.getAlternativeAt(i);
1673: // ignore any non-LL(1) alts, predicated alts,
1674: // or end-of-token alts for case expressions
1675: if (!suitableForCaseExpression(alt)) {
1676: continue;
1677: }
1678: Lookahead p = alt.cache[1];
1679: if (p.fset.degree() == 0 && !p.containsEpsilon()) {
1680: antlrTool
1681: .warning(
1682: "Alternate omitted due to empty prediction set",
1683: grammar.getFilename(), alt.head
1684: .getLine(), alt.head
1685: .getColumn());
1686: } else {
1687: genCases(p.fset);
1688: println("{");
1689: tabs++;
1690: genAlt(alt, blk);
1691: println("break;");
1692: tabs--;
1693: println("}");
1694: }
1695: }
1696: println("default:");
1697: tabs++;
1698: }
1699:
1700: // do non-LL(1) and nondeterministic cases This is tricky in
1701: // the lexer, because of cases like: STAR : '*' ; ASSIGN_STAR
1702: // : "*="; Since nextToken is generated without a loop, then
1703: // the STAR will have end-of-token as it's lookahead set for
1704: // LA(2). So, we must generate the alternatives containing
1705: // trailing end-of-token in their lookahead sets *after* the
1706: // alternatives without end-of-token. This implements the
1707: // usual lexer convention that longer matches come before
1708: // shorter ones, e.g. "*=" matches ASSIGN_STAR not STAR
1709: //
1710: // For non-lexer grammars, this does not sort the alternates
1711: // by depth Note that alts whose lookahead is purely
1712: // end-of-token at k=1 end up as default or else clauses.
1713: int startDepth = (grammar instanceof LexerGrammar) ? grammar.maxk
1714: : 0;
1715: for (int altDepth = startDepth; altDepth >= 0; altDepth--) {
1716: if (DEBUG_CODE_GENERATOR)
1717: System.out.println("checking depth " + altDepth);
1718: for (int i = 0; i < blk.alternatives.size(); i++) {
1719: Alternative alt = blk.getAlternativeAt(i);
1720: if (DEBUG_CODE_GENERATOR)
1721: System.out.println("genAlt: " + i);
1722: // if we made a switch above, ignore what we already took care
1723: // of. Specifically, LL(1) alts with no preds
1724: // that do not have end-of-token in their prediction set
1725: // and that are not giant unicode sets.
1726: if (createdLL1Switch && suitableForCaseExpression(alt)) {
1727: if (DEBUG_CODE_GENERATOR)
1728: System.out
1729: .println("ignoring alt because it was in the switch");
1730: continue;
1731: }
1732: String e;
1733:
1734: boolean unpredicted = false;
1735:
1736: if (grammar instanceof LexerGrammar) {
1737: // Calculate the "effective depth" of the alt,
1738: // which is the max depth at which
1739: // cache[depth]!=end-of-token
1740: int effectiveDepth = alt.lookaheadDepth;
1741: if (effectiveDepth == GrammarAnalyzer.NONDETERMINISTIC) {
1742: // use maximum lookahead
1743: effectiveDepth = grammar.maxk;
1744: }
1745: while (effectiveDepth >= 1
1746: && alt.cache[effectiveDepth]
1747: .containsEpsilon()) {
1748: effectiveDepth--;
1749: }
1750: // Ignore alts whose effective depth is other than
1751: // the ones we are generating for this iteration.
1752: if (effectiveDepth != altDepth) {
1753: if (DEBUG_CODE_GENERATOR)
1754: System.out
1755: .println("ignoring alt because effectiveDepth!=altDepth;"
1756: + effectiveDepth
1757: + "!="
1758: + altDepth);
1759: continue;
1760: }
1761: unpredicted = lookaheadIsEmpty(alt, effectiveDepth);
1762: e = getLookaheadTestExpression(alt, effectiveDepth);
1763: } else {
1764: unpredicted = lookaheadIsEmpty(alt, grammar.maxk);
1765: e = getLookaheadTestExpression(alt, grammar.maxk);
1766: }
1767:
1768: // Was it a big unicode range that forced unsuitability
1769: // for a case expression?
1770: if (alt.cache[1].fset.degree() > caseSizeThreshold
1771: && suitableForCaseExpression(alt)) {
1772: if (nIF == 0) {
1773: println("if " + e + " {");
1774: } else {
1775: println("else if " + e + " {");
1776: }
1777: } else if (unpredicted && alt.semPred == null
1778: && alt.synPred == null) {
1779: // The alt has empty prediction set and no
1780: // predicate to help out. if we have not
1781: // generated a previous if, just put {...} around
1782: // the end-of-token clause
1783: if (nIF == 0) {
1784: println("{");
1785: } else {
1786: println("else {");
1787: }
1788: finishingInfo.needAnErrorClause = false;
1789: } else { // check for sem and syn preds
1790:
1791: // Add any semantic predicate expression to the
1792: // lookahead test
1793: if (alt.semPred != null) {
1794: // if debugging, wrap the evaluation of the
1795: // predicate in a method translate $ and #
1796: // references
1797: ActionTransInfo tInfo = new ActionTransInfo();
1798: String actionStr = processActionForSpecialSymbols(
1799: alt.semPred, blk.line, currentRule,
1800: tInfo);
1801: // ignore translation info...we don't need to
1802: // do anything with it. call that will inform
1803: // SemanticPredicateListeners of the result
1804: if (((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar))
1805: && grammar.debuggingOutput) {
1806: e = "("
1807: + e
1808: + "&& fireSemanticPredicateEvaluated(persistence.antlr.debug.SemanticPredicateEvent.PREDICTING,"
1809: + addSemPred(charFormatter
1810: .escapeString(actionStr))
1811: + "," + actionStr + "))";
1812: } else {
1813: e = "(" + e + "&&(" + actionStr + "))";
1814: }
1815: }
1816:
1817: // Generate any syntactic predicates
1818: if (nIF > 0) {
1819: if (alt.synPred != null) {
1820: println("else {");
1821: tabs++;
1822: genSynPred(alt.synPred, e);
1823: closingBracesOfIFSequence++;
1824: } else {
1825: println("else if " + e + " {");
1826: }
1827: } else {
1828: if (alt.synPred != null) {
1829: genSynPred(alt.synPred, e);
1830: } else {
1831: // when parsing trees, convert null to
1832: // valid tree node with NULL lookahead.
1833: if (grammar instanceof TreeWalkerGrammar) {
1834: println("if (_t==null) _t=ASTNULL;");
1835: }
1836: println("if " + e + " {");
1837: }
1838: }
1839:
1840: }
1841:
1842: nIF++;
1843: tabs++;
1844: genAlt(alt, blk);
1845: tabs--;
1846: println("}");
1847: }
1848: }
1849: String ps = "";
1850: for (int i = 1; i <= closingBracesOfIFSequence; i++) {
1851: ps += "}";
1852: }
1853:
1854: // Restore the AST generation state
1855: genAST = savegenAST;
1856:
1857: // restore save text state
1858: saveText = oldsaveTest;
1859:
1860: // Return the finishing info.
1861: if (createdLL1Switch) {
1862: tabs--;
1863: finishingInfo.postscript = ps + "}";
1864: finishingInfo.generatedSwitch = true;
1865: finishingInfo.generatedAnIf = nIF > 0;
1866: //return new JavaBlockFinishingInfo(ps+"}",true,nIF>0); // close up switch statement
1867:
1868: } else {
1869: finishingInfo.postscript = ps;
1870: finishingInfo.generatedSwitch = false;
1871: finishingInfo.generatedAnIf = nIF > 0;
1872: // return new JavaBlockFinishingInfo(ps, false,nIF>0);
1873: }
1874: return finishingInfo;
1875: }
1876:
1877: private static boolean suitableForCaseExpression(Alternative a) {
1878: return a.lookaheadDepth == 1 && a.semPred == null
1879: && !a.cache[1].containsEpsilon()
1880: && a.cache[1].fset.degree() <= caseSizeThreshold;
1881: }
1882:
1883: /** Generate code to link an element reference into the AST */
1884: private void genElementAST(AlternativeElement el) {
1885: // handle case where you're not building trees, but are in tree walker.
1886: // Just need to get labels set up.
1887: if (grammar instanceof TreeWalkerGrammar && !grammar.buildAST) {
1888: String elementRef;
1889: String astName;
1890:
1891: // Generate names and declarations of the AST variable(s)
1892: if (el.getLabel() == null) {
1893: elementRef = lt1Value;
1894: // Generate AST variables for unlabeled stuff
1895: astName = "tmp" + astVarNumber + "_AST";
1896: astVarNumber++;
1897: // Map the generated AST variable in the alternate
1898: mapTreeVariable(el, astName);
1899: // Generate an "input" AST variable also
1900: println(labeledElementASTType + " " + astName
1901: + "_in = " + elementRef + ";");
1902: }
1903: return;
1904: }
1905:
1906: if (grammar.buildAST && syntacticPredLevel == 0) {
1907: boolean needASTDecl = (genAST && (el.getLabel() != null || el
1908: .getAutoGenType() != GrammarElement.AUTO_GEN_BANG));
1909:
1910: // RK: if we have a grammar element always generate the decl
1911: // since some guy can access it from an action and we can't
1912: // peek ahead (well not without making a mess).
1913: // I'd prefer taking this out.
1914: if (el.getAutoGenType() != GrammarElement.AUTO_GEN_BANG
1915: && (el instanceof TokenRefElement)) {
1916: needASTDecl = true;
1917: }
1918:
1919: boolean doNoGuessTest = (grammar.hasSyntacticPredicate && needASTDecl);
1920:
1921: String elementRef;
1922: String astNameBase;
1923:
1924: // Generate names and declarations of the AST variable(s)
1925: if (el.getLabel() != null) {
1926: elementRef = el.getLabel();
1927: astNameBase = el.getLabel();
1928: } else {
1929: elementRef = lt1Value;
1930: // Generate AST variables for unlabeled stuff
1931: astNameBase = "tmp" + astVarNumber;
1932: ;
1933: astVarNumber++;
1934: }
1935:
1936: // Generate the declaration if required.
1937: if (needASTDecl) {
1938: // Generate the declaration
1939: if (el instanceof GrammarAtom) {
1940: GrammarAtom ga = (GrammarAtom) el;
1941: if (ga.getASTNodeType() != null) {
1942: genASTDeclaration(el, astNameBase, ga
1943: .getASTNodeType());
1944: // println(ga.getASTNodeType()+" " + astName+" = null;");
1945: } else {
1946: genASTDeclaration(el, astNameBase,
1947: labeledElementASTType);
1948: // println(labeledElementASTType+" " + astName + " = null;");
1949: }
1950: } else {
1951: genASTDeclaration(el, astNameBase,
1952: labeledElementASTType);
1953: // println(labeledElementASTType+" " + astName + " = null;");
1954: }
1955: }
1956:
1957: // for convenience..
1958: String astName = astNameBase + "_AST";
1959:
1960: // Map the generated AST variable in the alternate
1961: mapTreeVariable(el, astName);
1962: if (grammar instanceof TreeWalkerGrammar) {
1963: // Generate an "input" AST variable also
1964: println(labeledElementASTType + " " + astName
1965: + "_in = null;");
1966: }
1967:
1968: // Enclose actions with !guessing
1969: if (doNoGuessTest) {
1970: // println("if (inputState.guessing==0) {");
1971: // tabs++;
1972: }
1973:
1974: // if something has a label assume it will be used
1975: // so we must initialize the RefAST
1976: if (el.getLabel() != null) {
1977: if (el instanceof GrammarAtom) {
1978: println(astName
1979: + " = "
1980: + getASTCreateString((GrammarAtom) el,
1981: elementRef) + ";");
1982: } else {
1983: println(astName + " = "
1984: + getASTCreateString(elementRef) + ";");
1985: }
1986: }
1987:
1988: // if it has no label but a declaration exists initialize it.
1989: if (el.getLabel() == null && needASTDecl) {
1990: elementRef = lt1Value;
1991: if (el instanceof GrammarAtom) {
1992: println(astName
1993: + " = "
1994: + getASTCreateString((GrammarAtom) el,
1995: elementRef) + ";");
1996: } else {
1997: println(astName + " = "
1998: + getASTCreateString(elementRef) + ";");
1999: }
2000: // Map the generated AST variable in the alternate
2001: if (grammar instanceof TreeWalkerGrammar) {
2002: // set "input" AST variable also
2003: println(astName + "_in = " + elementRef + ";");
2004: }
2005: }
2006:
2007: if (genAST) {
2008: switch (el.getAutoGenType()) {
2009: case GrammarElement.AUTO_GEN_NONE:
2010: println("astFactory.addASTChild(currentAST, "
2011: + astName + ");");
2012: break;
2013: case GrammarElement.AUTO_GEN_CARET:
2014: println("astFactory.makeASTRoot(currentAST, "
2015: + astName + ");");
2016: break;
2017: default:
2018: break;
2019: }
2020: }
2021: if (doNoGuessTest) {
2022: // tabs--;
2023: // println("}");
2024: }
2025: }
2026: }
2027:
2028: /** Close the try block and generate catch phrases
2029: * if the element has a labeled handler in the rule
2030: */
2031: private void genErrorCatchForElement(AlternativeElement el) {
2032: if (el.getLabel() == null)
2033: return;
2034: String r = el.enclosingRuleName;
2035: if (grammar instanceof LexerGrammar) {
2036: r = CodeGenerator.encodeLexerRuleName(el.enclosingRuleName);
2037: }
2038: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(r);
2039: if (rs == null) {
2040: antlrTool.panic("Enclosing rule not found!");
2041: }
2042: ExceptionSpec ex = rs.block.findExceptionSpec(el.getLabel());
2043: if (ex != null) {
2044: tabs--;
2045: println("}");
2046: genErrorHandler(ex);
2047: }
2048: }
2049:
2050: /** Generate the catch phrases for a user-specified error handler */
2051: private void genErrorHandler(ExceptionSpec ex) {
2052: // Each ExceptionHandler in the ExceptionSpec is a separate catch
2053: for (int i = 0; i < ex.handlers.size(); i++) {
2054: ExceptionHandler handler = (ExceptionHandler) ex.handlers
2055: .elementAt(i);
2056: // Generate catch phrase
2057: println("catch (" + handler.exceptionTypeAndName.getText()
2058: + ") {");
2059: tabs++;
2060: if (grammar.hasSyntacticPredicate) {
2061: println("if (inputState.guessing==0) {");
2062: tabs++;
2063: }
2064:
2065: // When not guessing, execute user handler action
2066: ActionTransInfo tInfo = new ActionTransInfo();
2067: printAction(processActionForSpecialSymbols(handler.action
2068: .getText(), handler.action.getLine(), currentRule,
2069: tInfo));
2070:
2071: if (grammar.hasSyntacticPredicate) {
2072: tabs--;
2073: println("} else {");
2074: tabs++;
2075: // When guessing, rethrow exception
2076: println("throw "
2077: + extractIdOfAction(handler.exceptionTypeAndName)
2078: + ";");
2079: tabs--;
2080: println("}");
2081: }
2082: // Close catch phrase
2083: tabs--;
2084: println("}");
2085: }
2086: }
2087:
2088: /** Generate a try { opening if the element has a labeled handler in the rule */
2089: private void genErrorTryForElement(AlternativeElement el) {
2090: if (el.getLabel() == null)
2091: return;
2092: String r = el.enclosingRuleName;
2093: if (grammar instanceof LexerGrammar) {
2094: r = CodeGenerator.encodeLexerRuleName(el.enclosingRuleName);
2095: }
2096: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(r);
2097: if (rs == null) {
2098: antlrTool.panic("Enclosing rule not found!");
2099: }
2100: ExceptionSpec ex = rs.block.findExceptionSpec(el.getLabel());
2101: if (ex != null) {
2102: println("try { // for error handling");
2103: tabs++;
2104: }
2105: }
2106:
2107: protected void genASTDeclaration(AlternativeElement el) {
2108: genASTDeclaration(el, labeledElementASTType);
2109: }
2110:
2111: protected void genASTDeclaration(AlternativeElement el,
2112: String node_type) {
2113: genASTDeclaration(el, el.getLabel(), node_type);
2114: }
2115:
2116: protected void genASTDeclaration(AlternativeElement el,
2117: String var_name, String node_type) {
2118: // already declared?
2119: if (declaredASTVariables.contains(el))
2120: return;
2121:
2122: // emit code
2123: println(node_type + " " + var_name + "_AST = null;");
2124:
2125: // mark as declared
2126: declaredASTVariables.put(el, el);
2127: }
2128:
2129: /** Generate a header that is common to all Java files */
2130: protected void genHeader() {
2131: println("// $ANTLR " + Tool.version + ": " + "\""
2132: + antlrTool.fileMinusPath(antlrTool.grammarFile) + "\""
2133: + " -> " + "\"" + grammar.getClassName() + ".java\"$");
2134: }
2135:
2136: private void genLiteralsTest() {
2137: println("_ttype = testLiteralsTable(_ttype);");
2138: }
2139:
2140: private void genLiteralsTestForPartialToken() {
2141: println("_ttype = testLiteralsTable(new String(text.getBuffer(),_begin,text.length()-_begin),_ttype);");
2142: }
2143:
2144: protected void genMatch(BitSet b) {
2145: }
2146:
2147: protected void genMatch(GrammarAtom atom) {
2148: if (atom instanceof StringLiteralElement) {
2149: if (grammar instanceof LexerGrammar) {
2150: genMatchUsingAtomText(atom);
2151: } else {
2152: genMatchUsingAtomTokenType(atom);
2153: }
2154: } else if (atom instanceof CharLiteralElement) {
2155: if (grammar instanceof LexerGrammar) {
2156: genMatchUsingAtomText(atom);
2157: } else {
2158: antlrTool
2159: .error("cannot ref character literals in grammar: "
2160: + atom);
2161: }
2162: } else if (atom instanceof TokenRefElement) {
2163: genMatchUsingAtomText(atom);
2164: } else if (atom instanceof WildcardElement) {
2165: gen((WildcardElement) atom);
2166: }
2167: }
2168:
2169: protected void genMatchUsingAtomText(GrammarAtom atom) {
2170: // match() for trees needs the _t cursor
2171: String astArgs = "";
2172: if (grammar instanceof TreeWalkerGrammar) {
2173: astArgs = "_t,";
2174: }
2175:
2176: // if in lexer and ! on element, save buffer index to kill later
2177: if (grammar instanceof LexerGrammar
2178: && (!saveText || atom.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
2179: println("_saveIndex=text.length();");
2180: }
2181:
2182: print(atom.not ? "matchNot(" : "match(");
2183: _print(astArgs);
2184:
2185: // print out what to match
2186: if (atom.atomText.equals("EOF")) {
2187: // horrible hack to handle EOF case
2188: _print("Token.EOF_TYPE");
2189: } else {
2190: _print(atom.atomText);
2191: }
2192: _println(");");
2193:
2194: if (grammar instanceof LexerGrammar
2195: && (!saveText || atom.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
2196: println("text.setLength(_saveIndex);"); // kill text atom put in buffer
2197: }
2198: }
2199:
2200: protected void genMatchUsingAtomTokenType(GrammarAtom atom) {
2201: // match() for trees needs the _t cursor
2202: String astArgs = "";
2203: if (grammar instanceof TreeWalkerGrammar) {
2204: astArgs = "_t,";
2205: }
2206:
2207: // If the literal can be mangled, generate the symbolic constant instead
2208: String mangledName = null;
2209: String s = astArgs + getValueString(atom.getType());
2210:
2211: // matching
2212: println((atom.not ? "matchNot(" : "match(") + s + ");");
2213: }
2214:
2215: /** Generate the nextToken() rule. nextToken() is a synthetic
2216: * lexer rule that is the implicit OR of all user-defined
2217: * lexer rules.
2218: */
2219: public void genNextToken() {
2220: // Are there any public rules? If not, then just generate a
2221: // fake nextToken().
2222: boolean hasPublicRules = false;
2223: for (int i = 0; i < grammar.rules.size(); i++) {
2224: RuleSymbol rs = (RuleSymbol) grammar.rules.elementAt(i);
2225: if (rs.isDefined() && rs.access.equals("public")) {
2226: hasPublicRules = true;
2227: break;
2228: }
2229: }
2230: if (!hasPublicRules) {
2231: println("");
2232: println("public Token nextToken() throws TokenStreamException {");
2233: println("\ttry {uponEOF();}");
2234: println("\tcatch(CharStreamIOException csioe) {");
2235: println("\t\tthrow new TokenStreamIOException(csioe.io);");
2236: println("\t}");
2237: println("\tcatch(CharStreamException cse) {");
2238: println("\t\tthrow new TokenStreamException(cse.getMessage());");
2239: println("\t}");
2240: println("\treturn new CommonToken(Token.EOF_TYPE, \"\");");
2241: println("}");
2242: println("");
2243: return;
2244: }
2245:
2246: // Create the synthesized nextToken() rule
2247: RuleBlock nextTokenBlk = MakeGrammar.createNextTokenRule(
2248: grammar, grammar.rules, "nextToken");
2249: // Define the nextToken rule symbol
2250: RuleSymbol nextTokenRs = new RuleSymbol("mnextToken");
2251: nextTokenRs.setDefined();
2252: nextTokenRs.setBlock(nextTokenBlk);
2253: nextTokenRs.access = "private";
2254: grammar.define(nextTokenRs);
2255: // Analyze the nextToken rule
2256: boolean ok = grammar.theLLkAnalyzer.deterministic(nextTokenBlk);
2257:
2258: // Generate the next token rule
2259: String filterRule = null;
2260: if (((LexerGrammar) grammar).filterMode) {
2261: filterRule = ((LexerGrammar) grammar).filterRule;
2262: }
2263:
2264: println("");
2265: println("public Token nextToken() throws TokenStreamException {");
2266: tabs++;
2267: println("Token theRetToken=null;");
2268: _println("tryAgain:");
2269: println("for (;;) {");
2270: tabs++;
2271: println("Token _token = null;");
2272: println("int _ttype = Token.INVALID_TYPE;");
2273: if (((LexerGrammar) grammar).filterMode) {
2274: println("setCommitToPath(false);");
2275: if (filterRule != null) {
2276: // Here's a good place to ensure that the filter rule actually exists
2277: if (!grammar.isDefined(CodeGenerator
2278: .encodeLexerRuleName(filterRule))) {
2279: grammar.antlrTool.error("Filter rule " + filterRule
2280: + " does not exist in this lexer");
2281: } else {
2282: RuleSymbol rs = (RuleSymbol) grammar
2283: .getSymbol(CodeGenerator
2284: .encodeLexerRuleName(filterRule));
2285: if (!rs.isDefined()) {
2286: grammar.antlrTool.error("Filter rule "
2287: + filterRule
2288: + " does not exist in this lexer");
2289: } else if (rs.access.equals("public")) {
2290: grammar.antlrTool.error("Filter rule "
2291: + filterRule + " must be protected");
2292: }
2293: }
2294: println("int _m;");
2295: println("_m = mark();");
2296: }
2297: }
2298: println("resetText();");
2299:
2300: println("try { // for char stream error handling");
2301: tabs++;
2302:
2303: // Generate try around whole thing to trap scanner errors
2304: println("try { // for lexical error handling");
2305: tabs++;
2306:
2307: // Test for public lexical rules with empty paths
2308: for (int i = 0; i < nextTokenBlk.getAlternatives().size(); i++) {
2309: Alternative a = nextTokenBlk.getAlternativeAt(i);
2310: if (a.cache[1].containsEpsilon()) {
2311: //String r = a.head.toString();
2312: RuleRefElement rr = (RuleRefElement) a.head;
2313: String r = CodeGenerator
2314: .decodeLexerRuleName(rr.targetRule);
2315: antlrTool.warning("public lexical rule " + r
2316: + " is optional (can match \"nothing\")");
2317: }
2318: }
2319:
2320: // Generate the block
2321: String newline = System.getProperty("line.separator");
2322: JavaBlockFinishingInfo howToFinish = genCommonBlock(
2323: nextTokenBlk, false);
2324: String errFinish = "if (LA(1)==EOF_CHAR) {uponEOF(); _returnToken = makeToken(Token.EOF_TYPE);}";
2325: errFinish += newline + "\t\t\t\t";
2326: if (((LexerGrammar) grammar).filterMode) {
2327: if (filterRule == null) {
2328: errFinish += "else {consume(); continue tryAgain;}";
2329: } else {
2330: errFinish += "else {" + newline + "\t\t\t\t\tcommit();"
2331: + newline + "\t\t\t\t\ttry {m" + filterRule
2332: + "(false);}" + newline
2333: + "\t\t\t\t\tcatch(RecognitionException e) {"
2334: + newline
2335: + "\t\t\t\t\t // catastrophic failure"
2336: + newline + "\t\t\t\t\t reportError(e);"
2337: + newline + "\t\t\t\t\t consume();" + newline
2338: + "\t\t\t\t\t}" + newline
2339: + "\t\t\t\t\tcontinue tryAgain;" + newline
2340: + "\t\t\t\t}";
2341: }
2342: } else {
2343: errFinish += "else {" + throwNoViable + "}";
2344: }
2345: genBlockFinish(howToFinish, errFinish);
2346:
2347: // at this point a valid token has been matched, undo "mark" that was done
2348: if (((LexerGrammar) grammar).filterMode && filterRule != null) {
2349: println("commit();");
2350: }
2351:
2352: // Generate literals test if desired
2353: // make sure _ttype is set first; note _returnToken must be
2354: // non-null as the rule was required to create it.
2355: println("if ( _returnToken==null ) continue tryAgain; // found SKIP token");
2356: println("_ttype = _returnToken.getType();");
2357: if (((LexerGrammar) grammar).getTestLiterals()) {
2358: genLiteralsTest();
2359: }
2360:
2361: // return token created by rule reference in switch
2362: println("_returnToken.setType(_ttype);");
2363: println("return _returnToken;");
2364:
2365: // Close try block
2366: tabs--;
2367: println("}");
2368: println("catch (RecognitionException e) {");
2369: tabs++;
2370: if (((LexerGrammar) grammar).filterMode) {
2371: if (filterRule == null) {
2372: println("if ( !getCommitToPath() ) {consume(); continue tryAgain;}");
2373: } else {
2374: println("if ( !getCommitToPath() ) {");
2375: tabs++;
2376: println("rewind(_m);");
2377: println("resetText();");
2378: println("try {m" + filterRule + "(false);}");
2379: println("catch(RecognitionException ee) {");
2380: println(" // horrendous failure: error in filter rule");
2381: println(" reportError(ee);");
2382: println(" consume();");
2383: println("}");
2384: println("continue tryAgain;");
2385: tabs--;
2386: println("}");
2387: }
2388: }
2389: if (nextTokenBlk.getDefaultErrorHandler()) {
2390: println("reportError(e);");
2391: println("consume();");
2392: } else {
2393: // pass on to invoking routine
2394: println("throw new TokenStreamRecognitionException(e);");
2395: }
2396: tabs--;
2397: println("}");
2398:
2399: // close CharStreamException try
2400: tabs--;
2401: println("}");
2402: println("catch (CharStreamException cse) {");
2403: println(" if ( cse instanceof CharStreamIOException ) {");
2404: println(" throw new TokenStreamIOException(((CharStreamIOException)cse).io);");
2405: println(" }");
2406: println(" else {");
2407: println(" throw new TokenStreamException(cse.getMessage());");
2408: println(" }");
2409: println("}");
2410:
2411: // close for-loop
2412: tabs--;
2413: println("}");
2414:
2415: // close method nextToken
2416: tabs--;
2417: println("}");
2418: println("");
2419: }
2420:
2421: /** Gen a named rule block.
2422: * ASTs are generated for each element of an alternative unless
2423: * the rule or the alternative have a '!' modifier.
2424: *
2425: * If an alternative defeats the default tree construction, it
2426: * must set <rule>_AST to the root of the returned AST.
2427: *
2428: * Each alternative that does automatic tree construction, builds
2429: * up root and child list pointers in an ASTPair structure.
2430: *
2431: * A rule finishes by setting the returnAST variable from the
2432: * ASTPair.
2433: *
2434: * @param rule The name of the rule to generate
2435: * @param startSymbol true if the rule is a start symbol (i.e., not referenced elsewhere)
2436: */
2437: public void genRule(RuleSymbol s, boolean startSymbol, int ruleNum) {
2438: tabs = 1;
2439:
2440: if (DEBUG_CODE_GENERATOR)
2441: System.out.println("genRule(" + s.getId() + ")");
2442: if (!s.isDefined()) {
2443: antlrTool.error("undefined rule: " + s.getId());
2444: return;
2445: }
2446:
2447: // Generate rule return type, name, arguments
2448: RuleBlock rblk = s.getBlock();
2449:
2450: currentRule = rblk;
2451: currentASTResult = s.getId();
2452:
2453: // clear list of declared ast variables..
2454: declaredASTVariables.clear();
2455:
2456: // Save the AST generation state, and set it to that of the rule
2457: boolean savegenAST = genAST;
2458: genAST = genAST && rblk.getAutoGen();
2459:
2460: // boolean oldsaveTest = saveText;
2461: saveText = rblk.getAutoGen();
2462:
2463: // print javadoc comment if any
2464: if (s.comment != null) {
2465: _println(s.comment);
2466: }
2467:
2468: // Gen method access and final qualifier
2469: print(s.access + " final ");
2470:
2471: // Gen method return type (note lexer return action set at rule creation)
2472: if (rblk.returnAction != null) {
2473: // Has specified return value
2474: _print(extractTypeOfAction(rblk.returnAction, rblk
2475: .getLine(), rblk.getColumn())
2476: + " ");
2477: } else {
2478: // No specified return value
2479: _print("void ");
2480: }
2481:
2482: // Gen method name
2483: _print(s.getId() + "(");
2484:
2485: // Additional rule parameters common to all rules for this grammar
2486: _print(commonExtraParams);
2487: if (commonExtraParams.length() != 0 && rblk.argAction != null) {
2488: _print(",");
2489: }
2490:
2491: // Gen arguments
2492: if (rblk.argAction != null) {
2493: // Has specified arguments
2494: _println("");
2495: tabs++;
2496: println(rblk.argAction);
2497: tabs--;
2498: print(")");
2499: } else {
2500: // No specified arguments
2501: _print(")");
2502: }
2503:
2504: // Gen throws clause and open curly
2505: _print(" throws " + exceptionThrown);
2506: if (grammar instanceof ParserGrammar) {
2507: _print(", TokenStreamException");
2508: } else if (grammar instanceof LexerGrammar) {
2509: _print(", CharStreamException, TokenStreamException");
2510: }
2511: // Add user-defined exceptions unless lexer (for now)
2512: if (rblk.throwsSpec != null) {
2513: if (grammar instanceof LexerGrammar) {
2514: antlrTool
2515: .error("user-defined throws spec not allowed (yet) for lexer rule "
2516: + rblk.ruleName);
2517: } else {
2518: _print(", " + rblk.throwsSpec);
2519: }
2520: }
2521:
2522: _println(" {");
2523: tabs++;
2524:
2525: // Convert return action to variable declaration
2526: if (rblk.returnAction != null)
2527: println(rblk.returnAction + ";");
2528:
2529: // print out definitions needed by rules for various grammar types
2530: println(commonLocalVars);
2531:
2532: if (grammar.traceRules) {
2533: if (grammar instanceof TreeWalkerGrammar) {
2534: println("traceIn(\"" + s.getId() + "\",_t);");
2535: } else {
2536: println("traceIn(\"" + s.getId() + "\");");
2537: }
2538: }
2539:
2540: if (grammar instanceof LexerGrammar) {
2541: // lexer rule default return value is the rule's token name
2542: // This is a horrible hack to support the built-in EOF lexer rule.
2543: if (s.getId().equals("mEOF"))
2544: println("_ttype = Token.EOF_TYPE;");
2545: else
2546: println("_ttype = " + s.getId().substring(1) + ";");
2547: println("int _saveIndex;"); // used for element! (so we can kill text matched for element)
2548: /*
2549: println("boolean old_saveConsumedInput=saveConsumedInput;");
2550: if ( !rblk.getAutoGen() ) { // turn off "save input" if ! on rule
2551: println("saveConsumedInput=false;");
2552: }
2553: */
2554: }
2555:
2556: // if debugging, write code to mark entry to the rule
2557: if (grammar.debuggingOutput)
2558: if (grammar instanceof ParserGrammar)
2559: println("fireEnterRule(" + ruleNum + ",0);");
2560: else if (grammar instanceof LexerGrammar)
2561: println("fireEnterRule(" + ruleNum + ",_ttype);");
2562:
2563: // Generate trace code if desired
2564: if (grammar.debuggingOutput || grammar.traceRules) {
2565: println("try { // debugging");
2566: tabs++;
2567: }
2568:
2569: // Initialize AST variables
2570: if (grammar instanceof TreeWalkerGrammar) {
2571: // "Input" value for rule
2572: println(labeledElementASTType + " " + s.getId()
2573: + "_AST_in = (_t == ASTNULL) ? null : ("
2574: + labeledElementASTType + ")_t;");
2575: }
2576: if (grammar.buildAST) {
2577: // Parser member used to pass AST returns from rule invocations
2578: println("returnAST = null;");
2579: // Tracks AST construction
2580: // println("ASTPair currentAST = (inputState.guessing==0) ? new ASTPair() : null;");
2581: println("ASTPair currentAST = new ASTPair();");
2582: // User-settable return value for rule.
2583: println(labeledElementASTType + " " + s.getId()
2584: + "_AST = null;");
2585: }
2586:
2587: genBlockPreamble(rblk);
2588: genBlockInitAction(rblk);
2589: println("");
2590:
2591: // Search for an unlabeled exception specification attached to the rule
2592: ExceptionSpec unlabeledUserSpec = rblk.findExceptionSpec("");
2593:
2594: // Generate try block around the entire rule for error handling
2595: if (unlabeledUserSpec != null || rblk.getDefaultErrorHandler()) {
2596: println("try { // for error handling");
2597: tabs++;
2598: }
2599:
2600: // Generate the alternatives
2601: if (rblk.alternatives.size() == 1) {
2602: // One alternative -- use simple form
2603: Alternative alt = rblk.getAlternativeAt(0);
2604: String pred = alt.semPred;
2605: if (pred != null)
2606: genSemPred(pred, currentRule.line);
2607: if (alt.synPred != null) {
2608: antlrTool
2609: .warning(
2610: "Syntactic predicate ignored for single alternative",
2611: grammar.getFilename(), alt.synPred
2612: .getLine(), alt.synPred
2613: .getColumn());
2614: }
2615: genAlt(alt, rblk);
2616: } else {
2617: // Multiple alternatives -- generate complex form
2618: boolean ok = grammar.theLLkAnalyzer.deterministic(rblk);
2619:
2620: JavaBlockFinishingInfo howToFinish = genCommonBlock(rblk,
2621: false);
2622: genBlockFinish(howToFinish, throwNoViable);
2623: }
2624:
2625: // Generate catch phrase for error handling
2626: if (unlabeledUserSpec != null || rblk.getDefaultErrorHandler()) {
2627: // Close the try block
2628: tabs--;
2629: println("}");
2630: }
2631:
2632: // Generate user-defined or default catch phrases
2633: if (unlabeledUserSpec != null) {
2634: genErrorHandler(unlabeledUserSpec);
2635: } else if (rblk.getDefaultErrorHandler()) {
2636: // Generate default catch phrase
2637: println("catch (" + exceptionThrown + " ex) {");
2638: tabs++;
2639: // Generate code to handle error if not guessing
2640: if (grammar.hasSyntacticPredicate) {
2641: println("if (inputState.guessing==0) {");
2642: tabs++;
2643: }
2644: println("reportError(ex);");
2645: if (!(grammar instanceof TreeWalkerGrammar)) {
2646: // Generate code to consume until token in k==1 follow set
2647: Lookahead follow = grammar.theLLkAnalyzer.FOLLOW(1,
2648: rblk.endNode);
2649: String followSetName = getBitsetName(markBitsetForGen(follow.fset));
2650: println("consume();");
2651: println("consumeUntil(" + followSetName + ");");
2652: } else {
2653: // Just consume one token
2654: println("if (_t!=null) {_t = _t.getNextSibling();}");
2655: }
2656: if (grammar.hasSyntacticPredicate) {
2657: tabs--;
2658: // When guessing, rethrow exception
2659: println("} else {");
2660: println(" throw ex;");
2661: println("}");
2662: }
2663: // Close catch phrase
2664: tabs--;
2665: println("}");
2666: }
2667:
2668: // Squirrel away the AST "return" value
2669: if (grammar.buildAST) {
2670: println("returnAST = " + s.getId() + "_AST;");
2671: }
2672:
2673: // Set return tree value for tree walkers
2674: if (grammar instanceof TreeWalkerGrammar) {
2675: println("_retTree = _t;");
2676: }
2677:
2678: // Generate literals test for lexer rules so marked
2679: if (rblk.getTestLiterals()) {
2680: if (s.access.equals("protected")) {
2681: genLiteralsTestForPartialToken();
2682: } else {
2683: genLiteralsTest();
2684: }
2685: }
2686:
2687: // if doing a lexer rule, dump code to create token if necessary
2688: if (grammar instanceof LexerGrammar) {
2689: println("if ( _createToken && _token==null && _ttype!=Token.SKIP ) {");
2690: println(" _token = makeToken(_ttype);");
2691: println(" _token.setText(new String(text.getBuffer(), _begin, text.length()-_begin));");
2692: println("}");
2693: println("_returnToken = _token;");
2694: }
2695:
2696: // Gen the return statement if there is one (lexer has hard-wired return action)
2697: if (rblk.returnAction != null) {
2698: println("return "
2699: + extractIdOfAction(rblk.returnAction, rblk
2700: .getLine(), rblk.getColumn()) + ";");
2701: }
2702:
2703: if (grammar.debuggingOutput || grammar.traceRules) {
2704: tabs--;
2705: println("} finally { // debugging");
2706: tabs++;
2707:
2708: // If debugging, generate calls to mark exit of rule
2709: if (grammar.debuggingOutput)
2710: if (grammar instanceof ParserGrammar)
2711: println("fireExitRule(" + ruleNum + ",0);");
2712: else if (grammar instanceof LexerGrammar)
2713: println("fireExitRule(" + ruleNum + ",_ttype);");
2714:
2715: if (grammar.traceRules) {
2716: if (grammar instanceof TreeWalkerGrammar) {
2717: println("traceOut(\"" + s.getId() + "\",_t);");
2718: } else {
2719: println("traceOut(\"" + s.getId() + "\");");
2720: }
2721: }
2722:
2723: tabs--;
2724: println("}");
2725: }
2726:
2727: tabs--;
2728: println("}");
2729: println("");
2730:
2731: // Restore the AST generation state
2732: genAST = savegenAST;
2733:
2734: // restore char save state
2735: // saveText = oldsaveTest;
2736: }
2737:
2738: private void GenRuleInvocation(RuleRefElement rr) {
2739: // dump rule name
2740: _print(rr.targetRule + "(");
2741:
2742: // lexers must tell rule if it should set _returnToken
2743: if (grammar instanceof LexerGrammar) {
2744: // if labeled, could access Token, so tell rule to create
2745: if (rr.getLabel() != null) {
2746: _print("true");
2747: } else {
2748: _print("false");
2749: }
2750: if (commonExtraArgs.length() != 0 || rr.args != null) {
2751: _print(",");
2752: }
2753: }
2754:
2755: // Extra arguments common to all rules for this grammar
2756: _print(commonExtraArgs);
2757: if (commonExtraArgs.length() != 0 && rr.args != null) {
2758: _print(",");
2759: }
2760:
2761: // Process arguments to method, if any
2762: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(rr.targetRule);
2763: if (rr.args != null) {
2764: // When not guessing, execute user arg action
2765: ActionTransInfo tInfo = new ActionTransInfo();
2766: String args = processActionForSpecialSymbols(rr.args, 0,
2767: currentRule, tInfo);
2768: if (tInfo.assignToRoot || tInfo.refRuleRoot != null) {
2769: antlrTool.error("Arguments of rule reference '"
2770: + rr.targetRule + "' cannot set or ref #"
2771: + currentRule.getRuleName(), grammar
2772: .getFilename(), rr.getLine(), rr.getColumn());
2773: }
2774: _print(args);
2775:
2776: // Warn if the rule accepts no arguments
2777: if (rs.block.argAction == null) {
2778: antlrTool.warning("Rule '" + rr.targetRule
2779: + "' accepts no arguments", grammar
2780: .getFilename(), rr.getLine(), rr.getColumn());
2781: }
2782: } else {
2783: // For C++, no warning if rule has parameters, because there may be default
2784: // values for all of the parameters
2785: if (rs.block.argAction != null) {
2786: antlrTool.warning(
2787: "Missing parameters on reference to rule "
2788: + rr.targetRule, grammar.getFilename(),
2789: rr.getLine(), rr.getColumn());
2790: }
2791: }
2792: _println(");");
2793:
2794: // move down to the first child while parsing
2795: if (grammar instanceof TreeWalkerGrammar) {
2796: println("_t = _retTree;");
2797: }
2798: }
2799:
2800: protected void genSemPred(String pred, int line) {
2801: // translate $ and # references
2802: ActionTransInfo tInfo = new ActionTransInfo();
2803: pred = processActionForSpecialSymbols(pred, line, currentRule,
2804: tInfo);
2805: // ignore translation info...we don't need to do anything with it.
2806: String escapedPred = charFormatter.escapeString(pred);
2807:
2808: // if debugging, wrap the semantic predicate evaluation in a method
2809: // that can tell SemanticPredicateListeners the result
2810: if (grammar.debuggingOutput
2811: && ((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar)))
2812: pred = "fireSemanticPredicateEvaluated(persistence.antlr.debug.SemanticPredicateEvent.VALIDATING,"
2813: + addSemPred(escapedPred) + "," + pred + ")";
2814: println("if (!(" + pred + "))");
2815: println(" throw new SemanticException(\"" + escapedPred
2816: + "\");");
2817: }
2818:
2819: /** Write an array of Strings which are the semantic predicate
2820: * expressions. The debugger will reference them by number only
2821: */
2822: protected void genSemPredMap() {
2823: Enumeration e = semPreds.elements();
2824: println("private String _semPredNames[] = {");
2825: while (e.hasMoreElements())
2826: println("\"" + e.nextElement() + "\",");
2827: println("};");
2828: }
2829:
2830: protected void genSynPred(SynPredBlock blk, String lookaheadExpr) {
2831: if (DEBUG_CODE_GENERATOR)
2832: System.out.println("gen=>(" + blk + ")");
2833:
2834: // Dump synpred result variable
2835: println("boolean synPredMatched" + blk.ID + " = false;");
2836: // Gen normal lookahead test
2837: println("if (" + lookaheadExpr + ") {");
2838: tabs++;
2839:
2840: // Save input state
2841: if (grammar instanceof TreeWalkerGrammar) {
2842: println("AST __t" + blk.ID + " = _t;");
2843: } else {
2844: println("int _m" + blk.ID + " = mark();");
2845: }
2846:
2847: // Once inside the try, assume synpred works unless exception caught
2848: println("synPredMatched" + blk.ID + " = true;");
2849: println("inputState.guessing++;");
2850:
2851: // if debugging, tell listeners that a synpred has started
2852: if (grammar.debuggingOutput
2853: && ((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar))) {
2854: println("fireSyntacticPredicateStarted();");
2855: }
2856:
2857: syntacticPredLevel++;
2858: println("try {");
2859: tabs++;
2860: gen((AlternativeBlock) blk); // gen code to test predicate
2861: tabs--;
2862: //println("System.out.println(\"pred "+blk+" succeeded\");");
2863: println("}");
2864: println("catch (" + exceptionThrown + " pe) {");
2865: tabs++;
2866: println("synPredMatched" + blk.ID + " = false;");
2867: //println("System.out.println(\"pred "+blk+" failed\");");
2868: tabs--;
2869: println("}");
2870:
2871: // Restore input state
2872: if (grammar instanceof TreeWalkerGrammar) {
2873: println("_t = __t" + blk.ID + ";");
2874: } else {
2875: println("rewind(_m" + blk.ID + ");");
2876: }
2877:
2878: println("inputState.guessing--;");
2879:
2880: // if debugging, tell listeners how the synpred turned out
2881: if (grammar.debuggingOutput
2882: && ((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar))) {
2883: println("if (synPredMatched" + blk.ID + ")");
2884: println(" fireSyntacticPredicateSucceeded();");
2885: println("else");
2886: println(" fireSyntacticPredicateFailed();");
2887: }
2888:
2889: syntacticPredLevel--;
2890: tabs--;
2891:
2892: // Close lookahead test
2893: println("}");
2894:
2895: // Test synred result
2896: println("if ( synPredMatched" + blk.ID + " ) {");
2897: }
2898:
2899: /** Generate a static array containing the names of the tokens,
2900: * indexed by the token type values. This static array is used
2901: * to format error messages so that the token identifers or literal
2902: * strings are displayed instead of the token numbers.
2903: *
2904: * If a lexical rule has a paraphrase, use it rather than the
2905: * token label.
2906: */
2907: public void genTokenStrings() {
2908: // Generate a string for each token. This creates a static
2909: // array of Strings indexed by token type.
2910: println("");
2911: println("public static final String[] _tokenNames = {");
2912: tabs++;
2913:
2914: // Walk the token vocabulary and generate a Vector of strings
2915: // from the tokens.
2916: Vector v = grammar.tokenManager.getVocabulary();
2917: for (int i = 0; i < v.size(); i++) {
2918: String s = (String) v.elementAt(i);
2919: if (s == null) {
2920: s = "<" + String.valueOf(i) + ">";
2921: }
2922: if (!s.startsWith("\"") && !s.startsWith("<")) {
2923: TokenSymbol ts = (TokenSymbol) grammar.tokenManager
2924: .getTokenSymbol(s);
2925: if (ts != null && ts.getParaphrase() != null) {
2926: s = StringUtils.stripFrontBack(ts.getParaphrase(),
2927: "\"", "\"");
2928: }
2929: }
2930: print(charFormatter.literalString(s));
2931: if (i != v.size() - 1) {
2932: _print(",");
2933: }
2934: _println("");
2935: }
2936:
2937: // Close the string array initailizer
2938: tabs--;
2939: println("};");
2940: }
2941:
2942: /** Create and set Integer token type objects that map
2943: * to Java Class objects (which AST node to create).
2944: */
2945: protected void genTokenASTNodeMap() {
2946: println("");
2947: println("protected void buildTokenTypeASTClassMap() {");
2948: // Generate a map.put("T","TNode") for each token
2949: // if heterogeneous node known for that token T.
2950: tabs++;
2951: boolean generatedNewHashtable = false;
2952: int n = 0;
2953: // Walk the token vocabulary and generate puts.
2954: Vector v = grammar.tokenManager.getVocabulary();
2955: for (int i = 0; i < v.size(); i++) {
2956: String s = (String) v.elementAt(i);
2957: if (s != null) {
2958: TokenSymbol ts = grammar.tokenManager.getTokenSymbol(s);
2959: if (ts != null && ts.getASTNodeType() != null) {
2960: n++;
2961: if (!generatedNewHashtable) {
2962: // only generate if we are going to add a mapping
2963: println("tokenTypeToASTClassMap = new Hashtable();");
2964: generatedNewHashtable = true;
2965: }
2966: println("tokenTypeToASTClassMap.put(new Integer("
2967: + ts.getTokenType() + "), "
2968: + ts.getASTNodeType() + ".class);");
2969: }
2970: }
2971: }
2972:
2973: if (n == 0) {
2974: println("tokenTypeToASTClassMap=null;");
2975: }
2976: tabs--;
2977: println("};");
2978: }
2979:
2980: /** Generate the token types Java file */
2981: protected void genTokenTypes(TokenManager tm) throws IOException {
2982: // Open the token output Java file and set the currentOutput stream
2983: // SAS: file open was moved to a method so a subclass can override
2984: // This was mainly for the VAJ interface
2985: setupOutput(tm.getName() + TokenTypesFileSuffix);
2986:
2987: tabs = 0;
2988:
2989: // Generate the header common to all Java files
2990: genHeader();
2991: // Do not use printAction because we assume tabs==0
2992: println(behavior.getHeaderAction(""));
2993:
2994: // Encapsulate the definitions in an interface. This can be done
2995: // because they are all constants.
2996: println("public interface " + tm.getName()
2997: + TokenTypesFileSuffix + " {");
2998: tabs++;
2999:
3000: // Generate a definition for each token type
3001: Vector v = tm.getVocabulary();
3002:
3003: // Do special tokens manually
3004: println("int EOF = " + Token.EOF_TYPE + ";");
3005: println("int NULL_TREE_LOOKAHEAD = "
3006: + Token.NULL_TREE_LOOKAHEAD + ";");
3007:
3008: for (int i = Token.MIN_USER_TYPE; i < v.size(); i++) {
3009: String s = (String) v.elementAt(i);
3010: if (s != null) {
3011: if (s.startsWith("\"")) {
3012: // a string literal
3013: StringLiteralSymbol sl = (StringLiteralSymbol) tm
3014: .getTokenSymbol(s);
3015: if (sl == null) {
3016: antlrTool.panic("String literal " + s
3017: + " not in symbol table");
3018: } else if (sl.label != null) {
3019: println("int " + sl.label + " = " + i + ";");
3020: } else {
3021: String mangledName = mangleLiteral(s);
3022: if (mangledName != null) {
3023: // We were able to create a meaningful mangled token name
3024: println("int " + mangledName + " = " + i
3025: + ";");
3026: // if no label specified, make the label equal to the mangled name
3027: sl.label = mangledName;
3028: } else {
3029: println("// " + s + " = " + i);
3030: }
3031: }
3032: } else if (!s.startsWith("<")) {
3033: println("int " + s + " = " + i + ";");
3034: }
3035: }
3036: }
3037:
3038: // Close the interface
3039: tabs--;
3040: println("}");
3041:
3042: // Close the tokens output file
3043: currentOutput.close();
3044: currentOutput = null;
3045: exitIfError();
3046: }
3047:
3048: /** Get a string for an expression to generate creation of an AST subtree.
3049: * @param v A Vector of String, where each element is an expression in the target language yielding an AST node.
3050: */
3051: public String getASTCreateString(Vector v) {
3052: if (v.size() == 0) {
3053: return "";
3054: }
3055: StringBuffer buf = new StringBuffer();
3056: buf.append("(" + labeledElementASTType
3057: + ")astFactory.make( (new ASTArray(" + v.size() + "))");
3058: for (int i = 0; i < v.size(); i++) {
3059: buf.append(".add(" + v.elementAt(i) + ")");
3060: }
3061: buf.append(")");
3062: return buf.toString();
3063: }
3064:
3065: /** Get a string for an expression to generate creating of an AST node
3066: * @param atom The grammar node for which you are creating the node
3067: * @param str The arguments to the AST constructor
3068: */
3069: public String getASTCreateString(GrammarAtom atom,
3070: String astCtorArgs) {
3071: //System.out.println("getASTCreateString("+atom+","+astCtorArgs+")");
3072: if (atom != null && atom.getASTNodeType() != null) {
3073: // they specified a type either on the reference or in tokens{} section
3074: return "(" + atom.getASTNodeType() + ")"
3075: + "astFactory.create(" + astCtorArgs + ",\""
3076: + atom.getASTNodeType() + "\")";
3077: } else {
3078: // must be an action or something since not referencing an atom
3079: return getASTCreateString(astCtorArgs);
3080: }
3081: }
3082:
3083: /** Get a string for an expression to generate creating of an AST node.
3084: * Parse the first (possibly only) argument looking for the token type.
3085: * If the token type is a valid token symbol, ask for it's AST node type
3086: * and add to the end if only 2 arguments. The forms are #[T], #[T,"t"],
3087: * and as of 2.7.2 #[T,"t",ASTclassname].
3088: *
3089: * @param str The arguments to the AST constructor
3090: */
3091: public String getASTCreateString(String astCtorArgs) {
3092: //System.out.println("AST CTOR: "+astCtorArgs);
3093: if (astCtorArgs == null) {
3094: astCtorArgs = "";
3095: }
3096: int nCommas = 0;
3097: for (int i = 0; i < astCtorArgs.length(); i++) {
3098: if (astCtorArgs.charAt(i) == ',') {
3099: nCommas++;
3100: }
3101: }
3102: //System.out.println("num commas="+nCommas);
3103: if (nCommas < 2) { // if 1 or 2 args
3104: int firstComma = astCtorArgs.indexOf(',');
3105: int lastComma = astCtorArgs.lastIndexOf(',');
3106: String tokenName = astCtorArgs;
3107: if (nCommas > 0) {
3108: tokenName = astCtorArgs.substring(0, firstComma);
3109: }
3110: //System.out.println("Checking for ast node type of "+tokenName);
3111: TokenSymbol ts = grammar.tokenManager
3112: .getTokenSymbol(tokenName);
3113: if (ts != null) {
3114: String astNodeType = ts.getASTNodeType();
3115: //System.out.println("node type of "+tokenName+" is "+astNodeType);
3116: String emptyText = "";
3117: if (nCommas == 0) {
3118: // need to add 2nd arg of blank text for token text
3119: emptyText = ",\"\"";
3120: }
3121: if (astNodeType != null) {
3122: return "(" + astNodeType + ")"
3123: + "astFactory.create(" + astCtorArgs
3124: + emptyText + ",\"" + astNodeType + "\")";
3125: }
3126: // fall through and just do a regular create with cast on front
3127: // if necessary (it differs from default "AST").
3128: }
3129: if (labeledElementASTType.equals("AST")) {
3130: return "astFactory.create(" + astCtorArgs + ")";
3131: }
3132: return "(" + labeledElementASTType + ")"
3133: + "astFactory.create(" + astCtorArgs + ")";
3134: }
3135: // create default type or (since 2.7.2) 3rd arg is classname
3136: return "(" + labeledElementASTType + ")astFactory.create("
3137: + astCtorArgs + ")";
3138: }
3139:
3140: protected String getLookaheadTestExpression(Lookahead[] look, int k) {
3141: StringBuffer e = new StringBuffer(100);
3142: boolean first = true;
3143:
3144: e.append("(");
3145: for (int i = 1; i <= k; i++) {
3146: BitSet p = look[i].fset;
3147: if (!first) {
3148: e.append(") && (");
3149: }
3150: first = false;
3151:
3152: // Syn preds can yield <end-of-syn-pred> (epsilon) lookahead.
3153: // There is no way to predict what that token would be. Just
3154: // allow anything instead.
3155: if (look[i].containsEpsilon()) {
3156: e.append("true");
3157: } else {
3158: e.append(getLookaheadTestTerm(i, p));
3159: }
3160: }
3161: e.append(")");
3162:
3163: return e.toString();
3164: }
3165:
3166: /**Generate a lookahead test expression for an alternate. This
3167: * will be a series of tests joined by '&&' and enclosed by '()',
3168: * the number of such tests being determined by the depth of the lookahead.
3169: */
3170: protected String getLookaheadTestExpression(Alternative alt,
3171: int maxDepth) {
3172: int depth = alt.lookaheadDepth;
3173: if (depth == GrammarAnalyzer.NONDETERMINISTIC) {
3174: // if the decision is nondeterministic, do the best we can: LL(k)
3175: // any predicates that are around will be generated later.
3176: depth = grammar.maxk;
3177: }
3178:
3179: if (maxDepth == 0) {
3180: // empty lookahead can result from alt with sem pred
3181: // that can see end of token. E.g., A : {pred}? ('a')? ;
3182: return "( true )";
3183: }
3184:
3185: return "(" + getLookaheadTestExpression(alt.cache, depth) + ")";
3186: }
3187:
3188: /**Generate a depth==1 lookahead test expression given the BitSet.
3189: * This may be one of:
3190: * 1) a series of 'x==X||' tests
3191: * 2) a range test using >= && <= where possible,
3192: * 3) a bitset membership test for complex comparisons
3193: * @param k The lookahead level
3194: * @param p The lookahead set for level k
3195: */
3196: protected String getLookaheadTestTerm(int k, BitSet p) {
3197: // Determine the name of the item to be compared
3198: String ts = lookaheadString(k);
3199:
3200: // Generate a range expression if possible
3201: int[] elems = p.toArray();
3202: if (elementsAreRange(elems)) {
3203: return getRangeExpression(k, elems);
3204: }
3205:
3206: // Generate a bitset membership test if possible
3207: StringBuffer e;
3208: int degree = p.degree();
3209: if (degree == 0) {
3210: return "true";
3211: }
3212:
3213: if (degree >= bitsetTestThreshold) {
3214: int bitsetIdx = markBitsetForGen(p);
3215: return getBitsetName(bitsetIdx) + ".member(" + ts + ")";
3216: }
3217:
3218: // Otherwise, generate the long-winded series of "x==X||" tests
3219: e = new StringBuffer();
3220: for (int i = 0; i < elems.length; i++) {
3221: // Get the compared-to item (token or character value)
3222: String cs = getValueString(elems[i]);
3223:
3224: // Generate the element comparison
3225: if (i > 0)
3226: e.append("||");
3227: e.append(ts);
3228: e.append("==");
3229: e.append(cs);
3230: }
3231: return e.toString();
3232: }
3233:
3234: /** Return an expression for testing a contiguous renage of elements
3235: * @param k The lookahead level
3236: * @param elems The elements representing the set, usually from BitSet.toArray().
3237: * @return String containing test expression.
3238: */
3239: public String getRangeExpression(int k, int[] elems) {
3240: if (!elementsAreRange(elems)) {
3241: antlrTool.panic("getRangeExpression called with non-range");
3242: }
3243: int begin = elems[0];
3244: int end = elems[elems.length - 1];
3245: return "(" + lookaheadString(k) + " >= "
3246: + getValueString(begin) + " && " + lookaheadString(k)
3247: + " <= " + getValueString(end) + ")";
3248: }
3249:
3250: /** getValueString: get a string representation of a token or char value
3251: * @param value The token or char value
3252: */
3253: private String getValueString(int value) {
3254: String cs;
3255: if (grammar instanceof LexerGrammar) {
3256: cs = charFormatter.literalChar(value);
3257: } else {
3258: TokenSymbol ts = grammar.tokenManager
3259: .getTokenSymbolAt(value);
3260: if (ts == null) {
3261: return "" + value; // return token type as string
3262: // tool.panic("vocabulary for token type " + value + " is null");
3263: }
3264: String tId = ts.getId();
3265: if (ts instanceof StringLiteralSymbol) {
3266: // if string literal, use predefined label if any
3267: // if no predefined, try to mangle into LITERAL_xxx.
3268: // if can't mangle, use int value as last resort
3269: StringLiteralSymbol sl = (StringLiteralSymbol) ts;
3270: String label = sl.getLabel();
3271: if (label != null) {
3272: cs = label;
3273: } else {
3274: cs = mangleLiteral(tId);
3275: if (cs == null) {
3276: cs = String.valueOf(value);
3277: }
3278: }
3279: } else {
3280: cs = tId;
3281: }
3282: }
3283: return cs;
3284: }
3285:
3286: /**Is the lookahead for this alt empty? */
3287: protected boolean lookaheadIsEmpty(Alternative alt, int maxDepth) {
3288: int depth = alt.lookaheadDepth;
3289: if (depth == GrammarAnalyzer.NONDETERMINISTIC) {
3290: depth = grammar.maxk;
3291: }
3292: for (int i = 1; i <= depth && i <= maxDepth; i++) {
3293: BitSet p = alt.cache[i].fset;
3294: if (p.degree() != 0) {
3295: return false;
3296: }
3297: }
3298: return true;
3299: }
3300:
3301: private String lookaheadString(int k) {
3302: if (grammar instanceof TreeWalkerGrammar) {
3303: return "_t.getType()";
3304: }
3305: return "LA(" + k + ")";
3306: }
3307:
3308: /** Mangle a string literal into a meaningful token name. This is
3309: * only possible for literals that are all characters. The resulting
3310: * mangled literal name is literalsPrefix with the text of the literal
3311: * appended.
3312: * @return A string representing the mangled literal, or null if not possible.
3313: */
3314: private String mangleLiteral(String s) {
3315: String mangled = antlrTool.literalsPrefix;
3316: for (int i = 1; i < s.length() - 1; i++) {
3317: if (!Character.isLetter(s.charAt(i)) && s.charAt(i) != '_') {
3318: return null;
3319: }
3320: mangled += s.charAt(i);
3321: }
3322: if (antlrTool.upperCaseMangledLiterals) {
3323: mangled = mangled.toUpperCase();
3324: }
3325: return mangled;
3326: }
3327:
3328: /** Map an identifier to it's corresponding tree-node variable.
3329: * This is context-sensitive, depending on the rule and alternative
3330: * being generated
3331: * @param idParam The identifier name to map
3332: * @return The mapped id (which may be the same as the input), or null if the mapping is invalid due to duplicates
3333: */
3334: public String mapTreeId(String idParam, ActionTransInfo transInfo) {
3335: // if not in an action of a rule, nothing to map.
3336: if (currentRule == null)
3337: return idParam;
3338:
3339: boolean in_var = false;
3340: String id = idParam;
3341: if (grammar instanceof TreeWalkerGrammar) {
3342: if (!grammar.buildAST) {
3343: in_var = true;
3344: }
3345: // If the id ends with "_in", then map it to the input variable
3346: else if (id.length() > 3
3347: && id.lastIndexOf("_in") == id.length() - 3) {
3348: // Strip off the "_in"
3349: id = id.substring(0, id.length() - 3);
3350: in_var = true;
3351: }
3352: }
3353:
3354: // Check the rule labels. If id is a label, then the output
3355: // variable is label_AST, and the input variable is plain label.
3356: for (int i = 0; i < currentRule.labeledElements.size(); i++) {
3357: AlternativeElement elt = (AlternativeElement) currentRule.labeledElements
3358: .elementAt(i);
3359: if (elt.getLabel().equals(id)) {
3360: return in_var ? id : id + "_AST";
3361: }
3362: }
3363:
3364: // Failing that, check the id-to-variable map for the alternative.
3365: // If the id is in the map, then output variable is the name in the
3366: // map, and input variable is name_in
3367: String s = (String) treeVariableMap.get(id);
3368: if (s != null) {
3369: if (s == NONUNIQUE) {
3370: // There is more than one element with this id
3371: antlrTool.error("Ambiguous reference to AST element "
3372: + id + " in rule " + currentRule.getRuleName());
3373:
3374: return null;
3375: } else if (s.equals(currentRule.getRuleName())) {
3376: // a recursive call to the enclosing rule is
3377: // ambiguous with the rule itself.
3378: antlrTool.error("Ambiguous reference to AST element "
3379: + id + " in rule " + currentRule.getRuleName());
3380: return null;
3381: } else {
3382: return in_var ? s + "_in" : s;
3383: }
3384: }
3385:
3386: // Failing that, check the rule name itself. Output variable
3387: // is rule_AST; input variable is rule_AST_in (treeparsers).
3388: if (id.equals(currentRule.getRuleName())) {
3389: String r = in_var ? id + "_AST_in" : id + "_AST";
3390: if (transInfo != null) {
3391: if (!in_var) {
3392: transInfo.refRuleRoot = r;
3393: }
3394: }
3395: return r;
3396: } else {
3397: // id does not map to anything -- return itself.
3398: return id;
3399: }
3400: }
3401:
3402: /** Given an element and the name of an associated AST variable,
3403: * create a mapping between the element "name" and the variable name.
3404: */
3405: private void mapTreeVariable(AlternativeElement e, String name) {
3406: // For tree elements, defer to the root
3407: if (e instanceof TreeElement) {
3408: mapTreeVariable(((TreeElement) e).root, name);
3409: return;
3410: }
3411:
3412: // Determine the name of the element, if any, for mapping purposes
3413: String elName = null;
3414:
3415: // Don't map labeled items
3416: if (e.getLabel() == null) {
3417: if (e instanceof TokenRefElement) {
3418: // use the token id
3419: elName = ((TokenRefElement) e).atomText;
3420: } else if (e instanceof RuleRefElement) {
3421: // use the rule name
3422: elName = ((RuleRefElement) e).targetRule;
3423: }
3424: }
3425: // Add the element to the tree variable map if it has a name
3426: if (elName != null) {
3427: if (treeVariableMap.get(elName) != null) {
3428: // Name is already in the map -- mark it as duplicate
3429: treeVariableMap.remove(elName);
3430: treeVariableMap.put(elName, NONUNIQUE);
3431: } else {
3432: treeVariableMap.put(elName, name);
3433: }
3434: }
3435: }
3436:
3437: /** Lexically process $var and tree-specifiers in the action.
3438: * This will replace #id and #(...) with the appropriate
3439: * function calls and/or variables etc...
3440: */
3441: protected String processActionForSpecialSymbols(String actionStr,
3442: int line, RuleBlock currentRule, ActionTransInfo tInfo) {
3443: if (actionStr == null || actionStr.length() == 0)
3444: return null;
3445:
3446: // The action trans info tells us (at the moment) whether an
3447: // assignment was done to the rule's tree root.
3448: if (grammar == null)
3449: return actionStr;
3450:
3451: // see if we have anything to do...
3452: if ((grammar.buildAST && actionStr.indexOf('#') != -1)
3453: || grammar instanceof TreeWalkerGrammar
3454: || ((grammar instanceof LexerGrammar || grammar instanceof ParserGrammar) && actionStr
3455: .indexOf('$') != -1)) {
3456: // Create a lexer to read an action and return the translated version
3457: persistence.antlr.actions.java.ActionLexer lexer = new persistence.antlr.actions.java.ActionLexer(
3458: actionStr, currentRule, this , tInfo);
3459:
3460: lexer.setLineOffset(line);
3461: lexer.setFilename(grammar.getFilename());
3462: lexer.setTool(antlrTool);
3463:
3464: try {
3465: lexer.mACTION(true);
3466: actionStr = lexer.getTokenObject().getText();
3467: // System.out.println("action translated: "+actionStr);
3468: // System.out.println("trans info is "+tInfo);
3469: } catch (RecognitionException ex) {
3470: lexer.reportError(ex);
3471: return actionStr;
3472: } catch (TokenStreamException tex) {
3473: antlrTool.panic("Error reading action:" + actionStr);
3474: return actionStr;
3475: } catch (CharStreamException io) {
3476: antlrTool.panic("Error reading action:" + actionStr);
3477: return actionStr;
3478: }
3479: }
3480: return actionStr;
3481: }
3482:
3483: private void setupGrammarParameters(Grammar g) {
3484: if (g instanceof ParserGrammar) {
3485: labeledElementASTType = "AST";
3486: if (g.hasOption("ASTLabelType")) {
3487: Token tsuffix = g.getOption("ASTLabelType");
3488: if (tsuffix != null) {
3489: String suffix = StringUtils.stripFrontBack(tsuffix
3490: .getText(), "\"", "\"");
3491: if (suffix != null) {
3492: labeledElementASTType = suffix;
3493: }
3494: }
3495: }
3496: labeledElementType = "Token ";
3497: labeledElementInit = "null";
3498: commonExtraArgs = "";
3499: commonExtraParams = "";
3500: commonLocalVars = "";
3501: lt1Value = "LT(1)";
3502: exceptionThrown = "RecognitionException";
3503: throwNoViable = "throw new NoViableAltException(LT(1), getFilename());";
3504: } else if (g instanceof LexerGrammar) {
3505: labeledElementType = "char ";
3506: labeledElementInit = "'\\0'";
3507: commonExtraArgs = "";
3508: commonExtraParams = "boolean _createToken";
3509: commonLocalVars = "int _ttype; Token _token=null; int _begin=text.length();";
3510: lt1Value = "LA(1)";
3511: exceptionThrown = "RecognitionException";
3512: throwNoViable = "throw new NoViableAltForCharException((char)LA(1), getFilename(), getLine(), getColumn());";
3513: } else if (g instanceof TreeWalkerGrammar) {
3514: labeledElementASTType = "AST";
3515: labeledElementType = "AST";
3516: if (g.hasOption("ASTLabelType")) {
3517: Token tsuffix = g.getOption("ASTLabelType");
3518: if (tsuffix != null) {
3519: String suffix = StringUtils.stripFrontBack(tsuffix
3520: .getText(), "\"", "\"");
3521: if (suffix != null) {
3522: labeledElementASTType = suffix;
3523: labeledElementType = suffix;
3524: }
3525: }
3526: }
3527: if (!g.hasOption("ASTLabelType")) {
3528: g.setOption("ASTLabelType", new Token(
3529: ANTLRTokenTypes.STRING_LITERAL, "AST"));
3530: }
3531: labeledElementInit = "null";
3532: commonExtraArgs = "_t";
3533: commonExtraParams = "AST _t";
3534: commonLocalVars = "";
3535: lt1Value = "(" + labeledElementASTType + ")_t";
3536: exceptionThrown = "RecognitionException";
3537: throwNoViable = "throw new NoViableAltException(_t);";
3538: } else {
3539: antlrTool.panic("Unknown grammar type");
3540: }
3541: }
3542:
3543: /** This method exists so a subclass, namely VAJCodeGenerator,
3544: * can open the file in its own evil way. JavaCodeGenerator
3545: * simply opens a text file...
3546: */
3547: public void setupOutput(String className) throws IOException {
3548: currentOutput = antlrTool.openOutputFile(className + ".java");
3549: }
3550: }
|