0001: package antlr;
0002:
0003: /* ANTLR Translator Generator
0004: * Project led by Terence Parr at http://www.cs.usfca.edu
0005: * Software rights: http://www.antlr.org/license.html
0006: */
0007:
0008: import java.io.IOException;
0009: import java.util.Enumeration;
0010: import java.util.HashSet;
0011: import java.util.Hashtable;
0012:
0013: import antlr.collections.impl.BitSet;
0014: import antlr.collections.impl.Vector;
0015: import java.util.ArrayList;
0016: import java.util.List;
0017:
0018: /**Generate MyParser.java, MyLexer.java and MyParserTokenTypes.java */
0019: public class JavaCodeGenerator extends CodeGenerator {
0020: // non-zero if inside syntactic predicate generation
0021: /** Constant to indicate that we shouldn't generate a mapping entry */
0022: public static final int NO_MAPPING = -999;
0023: /**
0024: * Constant to indicate that we should continue mapping based on the
0025: * last mapping seen by the SMAP generator (there may have been
0026: * intermediate mappings generated by nested elements)
0027: */
0028: public static final int CONTINUE_LAST_MAPPING = -888;
0029:
0030: private JavaCodeGeneratorPrintWriterManager printWriterManager;
0031: private int defaultLine = NO_MAPPING;
0032:
0033: //vk++
0034: private int syntacticPredId = 0;
0035: //vk--
0036:
0037: //vv++
0038: public static final boolean RECOVER_AST = true;
0039: //vv--
0040: protected int syntacticPredLevel = 0;
0041:
0042: // Are we generating ASTs (for parsers and tree parsers) right now?
0043: protected boolean genAST = false;
0044:
0045: // Are we saving the text consumed (for lexers) right now?
0046: protected boolean saveText = false;
0047:
0048: java.util.Vector guessedRules = new java.util.Vector();
0049: HashSet guessedRules_created = new HashSet();
0050:
0051: // Grammar parameters set up to handle different grammar classes.
0052: // These are used to get instanceof tests out of code generation
0053: String labeledElementType;
0054: String labeledElementASTType;
0055: String labeledElementInit;
0056: String commonExtraArgs;
0057: String commonExtraParams;
0058: String commonLocalVars;
0059: String lt1Value;
0060: String exceptionThrown;
0061: String throwNoViable;
0062:
0063: /** Tracks the rule being generated. Used for mapTreeId */
0064: RuleBlock currentRule;
0065:
0066: /** Tracks the rule or labeled subrule being generated. Used for
0067: AST generation. */
0068: String currentASTResult;
0069:
0070: /** Mapping between the ids used in the current alt, and the
0071: * names of variables used to represent their AST values.
0072: */
0073: Hashtable treeVariableMap = new Hashtable();
0074:
0075: /** Used to keep track of which AST variables have been defined in a rule
0076: * (except for the #rule_name and #rule_name_in var's
0077: */
0078: Hashtable declaredASTVariables = new Hashtable();
0079:
0080: /* Count of unnamed generated variables */
0081: int astVarNumber = 1;
0082:
0083: /** Special value used to mark duplicate in treeVariableMap */
0084: protected static final String NONUNIQUE = new String();
0085:
0086: public static final int caseSizeThreshold = 127; // ascii is max
0087:
0088: private Vector semPreds;
0089:
0090: /** Create a Java code-generator using the given Grammar.
0091: * The caller must still call setTool, setBehavior, and setAnalyzer
0092: * before generating code.
0093: */
0094: public JavaCodeGenerator() {
0095: super ();
0096: charFormatter = new JavaCharFormatter();
0097: }
0098:
0099: protected void printAction(String code) {
0100: printAction(code, defaultLine);
0101: }
0102:
0103: protected void printAction(String code, int sourceStartLine) {
0104: getPrintWriterManager().startMapping(sourceStartLine);
0105: super .printAction(code);
0106: getPrintWriterManager().endMapping();
0107: }
0108:
0109: public void println(String code) {
0110: println(code, defaultLine);
0111: }
0112:
0113: public void println(String code, int sourceStartLine) {
0114: if (sourceStartLine > 0
0115: || sourceStartLine == CONTINUE_LAST_MAPPING)
0116: getPrintWriterManager().startSingleSourceLineMapping(
0117: sourceStartLine);
0118: super .println(code);
0119: if (sourceStartLine > 0
0120: || sourceStartLine == CONTINUE_LAST_MAPPING)
0121: getPrintWriterManager().endMapping();
0122: }
0123:
0124: protected void print(String code) {
0125: print(code, defaultLine);
0126: }
0127:
0128: protected void print(String code, int sourceStartLine) {
0129: if (sourceStartLine > 0
0130: || sourceStartLine == CONTINUE_LAST_MAPPING)
0131: getPrintWriterManager().startMapping(sourceStartLine);
0132: super .print(code);
0133: if (sourceStartLine > 0
0134: || sourceStartLine == CONTINUE_LAST_MAPPING)
0135: getPrintWriterManager().endMapping();
0136: }
0137:
0138: protected void _print(String code) {
0139: _print(code, defaultLine);
0140: }
0141:
0142: protected void _print(String code, int sourceStartLine) {
0143: if (sourceStartLine > 0
0144: || sourceStartLine == CONTINUE_LAST_MAPPING)
0145: getPrintWriterManager().startMapping(sourceStartLine);
0146: super ._print(code);
0147: if (sourceStartLine > 0
0148: || sourceStartLine == CONTINUE_LAST_MAPPING)
0149: getPrintWriterManager().endMapping();
0150: }
0151:
0152: protected void _println(String code) {
0153: _println(code, defaultLine);
0154: }
0155:
0156: protected void _println(String code, int sourceStartLine) {
0157: if (sourceStartLine > 0
0158: || sourceStartLine == CONTINUE_LAST_MAPPING)
0159: getPrintWriterManager().startMapping(sourceStartLine);
0160: super ._println(code);
0161: if (sourceStartLine > 0
0162: || sourceStartLine == CONTINUE_LAST_MAPPING)
0163: getPrintWriterManager().endMapping();
0164: }
0165:
0166: /** Adds a semantic predicate string to the sem pred vector
0167: These strings will be used to build an array of sem pred names
0168: when building a debugging parser. This method should only be
0169: called when the debug option is specified
0170: */
0171: protected int addSemPred(String predicate) {
0172: semPreds.appendElement(predicate);
0173: return semPreds.size() - 1;
0174: }
0175:
0176: public void exitIfError() {
0177: if (antlrTool.hasError()) {
0178: antlrTool.fatalError("Exiting due to errors.");
0179: }
0180: }
0181:
0182: /**Generate the parser, lexer, treeparser, and token types in Java */
0183: public void gen() {
0184: // Do the code generation
0185: try {
0186: // Loop over all grammars
0187: Enumeration grammarIter = behavior.grammars.elements();
0188: while (grammarIter.hasMoreElements()) {
0189: Grammar g = (Grammar) grammarIter.nextElement();
0190: // Connect all the components to each other
0191: g.setGrammarAnalyzer(analyzer);
0192: g.setCodeGenerator(this );
0193: analyzer.setGrammar(g);
0194: // To get right overloading behavior across hetrogeneous grammars
0195: setupGrammarParameters(g);
0196: g.generate();
0197: // print out the grammar with lookahead sets (and FOLLOWs)
0198: // System.out.print(g.toString());
0199: exitIfError();
0200: }
0201:
0202: // Loop over all token managers (some of which are lexers)
0203: Enumeration tmIter = behavior.tokenManagers.elements();
0204: while (tmIter.hasMoreElements()) {
0205: TokenManager tm = (TokenManager) tmIter.nextElement();
0206: if (!tm.isReadOnly()) {
0207: // Write the token manager tokens as Java
0208: // this must appear before genTokenInterchange so that
0209: // labels are set on string literals
0210: genTokenTypes(tm);
0211: // Write the token manager tokens as plain text
0212: genTokenInterchange(tm);
0213: }
0214: exitIfError();
0215: }
0216: } catch (IOException e) {
0217: antlrTool.reportException(e, null);
0218: }
0219: }
0220:
0221: /** Generate code for the given grammar element.
0222: * @param blk The {...} action to generate
0223: */
0224: public void gen(ActionElement action, Context context) {
0225: int oldDefaultLine = defaultLine;
0226: try {
0227: defaultLine = action.getLine();
0228: if (DEBUG_CODE_GENERATOR)
0229: System.out.println("genAction(" + action + ")");
0230: if (action.isSemPred) {
0231: genSemPred(action.actionText, action.line, context);
0232: } else {
0233: if (!Tool.cloneGuessing
0234: || (context.guessing == Context.NO_GUESSING)) {
0235: if (grammar.hasSyntacticPredicate
0236: && !Tool.cloneGuessing) {
0237: println("if ( inputState.guessing==0 ) {");
0238: tabs++;
0239: }
0240: // get the name of the followSet for the current rule so that we
0241: // can replace $FOLLOW in the .g file.
0242: ActionTransInfo tInfo = new ActionTransInfo();
0243: String actionStr = processActionForSpecialSymbols(
0244: action.actionText, action.getLine(),
0245: currentRule, tInfo);
0246:
0247: if (tInfo.refRuleRoot != null) {
0248: // Somebody referenced "#rule", make sure translated var is valid
0249: // assignment to #rule is left as a ref also, meaning that assignments
0250: // with no other refs like "#rule = foo();" still forces this code to be
0251: // generated (unnecessarily).
0252: println(tInfo.refRuleRoot + " = ("
0253: + labeledElementASTType
0254: + ")currentAST.root;");
0255: }
0256:
0257: // dump the translated action
0258: printAction(actionStr);
0259:
0260: if (tInfo.assignToRoot) {
0261: // Somebody did a "#rule=", reset internal currentAST.root
0262: println("currentAST.root = "
0263: + tInfo.refRuleRoot + ";");
0264: // reset the child pointer too to be last sibling in sibling list
0265: println("currentAST.child = "
0266: + tInfo.refRuleRoot + "!=null &&"
0267: + tInfo.refRuleRoot
0268: + ".getFirstChild()!=null ?",
0269: NO_MAPPING);
0270: tabs++;
0271: println(tInfo.refRuleRoot
0272: + ".getFirstChild() : "
0273: + tInfo.refRuleRoot + ";");
0274: tabs--;
0275: println("currentAST.advanceChildToEnd();");
0276: }
0277:
0278: if (grammar.hasSyntacticPredicate
0279: && !Tool.cloneGuessing) {
0280: tabs--;
0281: println("}", NO_MAPPING);
0282: }
0283: }
0284: }
0285: } finally {
0286: defaultLine = oldDefaultLine;
0287: }
0288: }
0289:
0290: /** Generate code for the given grammar element.
0291: * @param blk The "x|y|z|..." block to generate
0292: */
0293: public void gen(AlternativeBlock blk, Context context) {
0294: if (DEBUG_CODE_GENERATOR)
0295: System.out.println("gen(" + blk + ") breakLabel="
0296: + context.breakLabel);
0297: println("{", NO_MAPPING);
0298: genBlockPreamble(blk);
0299: genBlockInitAction(blk, context);
0300:
0301: // Tell AST generation to build subrule result
0302: String saveCurrentASTResult = currentASTResult;
0303: if (blk.getLabel() != null) {
0304: currentASTResult = blk.getLabel();
0305: }
0306:
0307: boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
0308:
0309: JavaBlockFinishingInfo howToFinish = genCommonBlock(blk, true,
0310: context);
0311: String noExcVialble = getThrowNoViableStr(context);
0312: genBlockFinish(howToFinish, noExcVialble, blk.getLine());
0313:
0314: println("}", NO_MAPPING);
0315:
0316: // Restore previous AST generation
0317: currentASTResult = saveCurrentASTResult;
0318: }
0319:
0320: /** Generate code for the given grammar element.
0321: * @param blk The block-end element to generate. Block-end
0322: * elements are synthesized by the grammar parser to represent
0323: * the end of a block.
0324: */
0325: public void gen(BlockEndElement end, Context context) {
0326: if (DEBUG_CODE_GENERATOR)
0327: System.out.println("genRuleEnd(" + end + ")");
0328: }
0329:
0330: /** Generate code for the given grammar element.
0331: * @param blk The character literal reference to generate
0332: */
0333: public void gen(CharLiteralElement atom, Context context) {
0334: if (DEBUG_CODE_GENERATOR)
0335: System.out.println("genChar(" + atom + ") breakLabel="
0336: + context.breakLabel);
0337:
0338: if (atom.getLabel() != null) {
0339: println(atom.getLabel() + " = " + lt1Value + ";", atom
0340: .getLine());
0341: }
0342:
0343: boolean oldsaveText = saveText;
0344: saveText = saveText
0345: && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;
0346: if (Tool.avoidLAMatch && (context.checkedLA > 0)) {
0347: println("//already checked char, just skip");
0348: if (!(grammar instanceof TreeWalkerGrammar)) {
0349: println("consume();");
0350: }
0351: context.decreaseLAChecked();
0352: } else {
0353: genMatch(atom);
0354: printCheck(context);
0355: }
0356: saveText = oldsaveText;
0357: }
0358:
0359: /** Generate code for the given grammar element.
0360: * @param blk The character-range reference to generate
0361: */
0362: public void gen(CharRangeElement r, Context context) {
0363: int oldDefaultLine = defaultLine;
0364: try {
0365: defaultLine = r.getLine();
0366: if (r.getLabel() != null && syntacticPredLevel == 0) {
0367: println(r.getLabel() + " = " + lt1Value + ";");
0368: }
0369: boolean flag = (grammar instanceof LexerGrammar && (!saveText || r
0370: .getAutoGenType() == GrammarElement.AUTO_GEN_BANG));
0371: if (flag) {
0372: println("_saveIndex=text.length();");
0373: }
0374:
0375: if (Tool.avoidLAMatch && (context.checkedLA > 0)) {
0376: println("//already checked char range, just skip");
0377: if (!(grammar instanceof TreeWalkerGrammar)) {
0378: println("consume();");
0379: }
0380: context.decreaseLAChecked();
0381: } else {
0382: println("matchRange(" + r.beginText + "," + r.endText
0383: + ");");
0384: printCheck(context);
0385: }
0386:
0387: if (flag) {
0388: println("text.setLength(_saveIndex);");
0389: }
0390: } finally {
0391: defaultLine = oldDefaultLine;
0392: }
0393: }
0394:
0395: /** Generate the lexer Java file */
0396: public void gen(LexerGrammar g) throws IOException {
0397: int oldDefaultLine = defaultLine;
0398: try {
0399: defaultLine = NO_MAPPING;
0400: // If debugging, create a new sempred vector for this grammar
0401: if (g.debuggingOutput)
0402: semPreds = new Vector();
0403:
0404: setGrammar(g);
0405: if (!(grammar instanceof LexerGrammar)) {
0406: antlrTool.fatalError("Internal error generating lexer");
0407: }
0408:
0409: // SAS: moved output creation to method so a subclass can change
0410: // how the output is generated (for VAJ interface)
0411: currentOutput = getPrintWriterManager().setupOutput(
0412: antlrTool, grammar);
0413:
0414: genAST = false; // no way to gen trees.
0415: saveText = true; // save consumed characters.
0416:
0417: tabs = 0;
0418:
0419: // Generate header common to all Java output files
0420: genHeader();
0421: // Do not use printAction because we assume tabs==0
0422:
0423: try {
0424: defaultLine = behavior.getHeaderActionLine("");
0425: println(behavior.getHeaderAction(""));
0426: } finally {
0427: defaultLine = NO_MAPPING;
0428: }
0429:
0430: // Generate header specific to lexer Java file
0431: // println("import java.io.FileInputStream;");
0432: println("import java.io.InputStream;");
0433: println("import antlr.TokenStreamException;");
0434: println("import antlr.TokenStreamIOException;");
0435: println("import antlr.TokenStreamRecognitionException;");
0436: //println("import antlr.CharStreamException;");
0437: println("import antlr.CharStreamIOException;");
0438: println("import antlr.ANTLRException;");
0439: println("import java.io.Reader;");
0440: println("import java.util.Hashtable;");
0441: println("import antlr." + grammar.getSuperClass() + ";");
0442: println("import antlr.InputBuffer;");
0443: println("import antlr.ByteBuffer;");
0444: println("import antlr.CharBuffer;");
0445: println("import antlr.Token;");
0446: println("import antlr.CommonToken;");
0447: println("import antlr.RecognitionException;");
0448: println("import antlr.NoViableAltForCharException;");
0449: println("import antlr.MismatchedCharException;");
0450: println("import antlr.TokenStream;");
0451: println("import antlr.ANTLRHashString;");
0452: println("import antlr.LexerSharedInputState;");
0453: println("import antlr.collections.impl.BitSet;");
0454: println("import antlr.SemanticException;");
0455:
0456: // Generate user-defined lexer file preamble
0457: println(grammar.preambleAction.getText());
0458:
0459: // Generate lexer class definition
0460: String sup = null;
0461: if (grammar.super Class != null) {
0462: sup = grammar.super Class;
0463: } else {
0464: sup = "antlr." + grammar.getSuperClass();
0465: }
0466:
0467: // print javadoc comment if any
0468: if (grammar.comment != null) {
0469: _println(grammar.comment);
0470: }
0471:
0472: // get prefix (replaces "public" and lets user specify)
0473: String prefix = "public";
0474: Token tprefix = (Token) grammar.options
0475: .get("classHeaderPrefix");
0476: if (tprefix != null) {
0477: String p = StringUtils.stripFrontBack(
0478: tprefix.getText(), "\"", "\"");
0479: if (p != null) {
0480: prefix = p;
0481: }
0482: }
0483:
0484: print(prefix + " ");
0485: print("class " + grammar.getClassName() + " extends " + sup);
0486: println(" implements " + grammar.tokenManager.getName()
0487: + TokenTypesFileSuffix + ", TokenStream");
0488: Token tsuffix = (Token) grammar.options
0489: .get("classHeaderSuffix");
0490: if (tsuffix != null) {
0491: String suffix = StringUtils.stripFrontBack(tsuffix
0492: .getText(), "\"", "\"");
0493: if (suffix != null) {
0494: print(", " + suffix); // must be an interface name for Java
0495: }
0496: }
0497: println(" {");
0498:
0499: // Generate user-defined lexer class members
0500: print(processActionForSpecialSymbols(
0501: grammar.classMemberAction.getText(),
0502: grammar.classMemberAction.getLine(), currentRule,
0503: null), grammar.classMemberAction.getLine());
0504:
0505: //
0506: // Generate the constructor from InputStream, which in turn
0507: // calls the ByteBuffer constructor
0508: //
0509: println("public " + grammar.getClassName()
0510: + "(InputStream in) {");
0511: tabs++;
0512: println("this(new ByteBuffer(in));");
0513: tabs--;
0514: println("}");
0515:
0516: //
0517: // Generate the constructor from Reader, which in turn
0518: // calls the CharBuffer constructor
0519: //
0520: println("public " + grammar.getClassName()
0521: + "(Reader in) {");
0522: tabs++;
0523: println("this(new CharBuffer(in));");
0524: tabs--;
0525: println("}");
0526:
0527: println("public " + grammar.getClassName()
0528: + "(InputBuffer ib) {");
0529: tabs++;
0530: // if debugging, wrap the input buffer in a debugger
0531: //if (grammar.debuggingOutput)
0532: //println("this(new LexerSharedInputState(new antlr.debug.DebuggingInputBuffer(ib)));");
0533: //else
0534: println("this(new LexerSharedInputState(ib));");
0535: tabs--;
0536: println("}");
0537:
0538: //
0539: // Generate the constructor from InputBuffer (char or byte)
0540: //
0541: println("public " + grammar.getClassName()
0542: + "(LexerSharedInputState state) {");
0543: tabs++;
0544:
0545: println("super(state);");
0546: // if debugging, set up array variables and call user-overridable
0547: // debugging setup method
0548: if (grammar.debuggingOutput) {
0549: println(" ruleNames = _ruleNames;");
0550: println(" semPredNames = _semPredNames;");
0551: println(" setupDebugging();");
0552: }
0553:
0554: // Generate the setting of various generated options.
0555: // These need to be before the literals since ANTLRHashString depends on
0556: // the casesensitive stuff.
0557: println("caseSensitiveLiterals = "
0558: + g.caseSensitiveLiterals + ";");
0559: println("setCaseSensitive(" + g.caseSensitive + ");");
0560:
0561: // Generate the initialization of a hashtable
0562: // containing the string literals used in the lexer
0563: // The literals variable itself is in CharScanner
0564: println("literals = new Hashtable();");
0565: Enumeration keys = grammar.tokenManager
0566: .getTokenSymbolKeys();
0567: while (keys.hasMoreElements()) {
0568: String key = (String) keys.nextElement();
0569: if (key.charAt(0) != '"') {
0570: continue;
0571: }
0572: TokenSymbol sym = grammar.tokenManager
0573: .getTokenSymbol(key);
0574: if (sym instanceof StringLiteralSymbol) {
0575: StringLiteralSymbol s = (StringLiteralSymbol) sym;
0576: println("literals.put(new ANTLRHashString("
0577: + s.getId() + ", this), new Integer("
0578: + s.getTokenType() + "));");
0579: }
0580: }
0581: tabs--;
0582:
0583: Enumeration ids;
0584: println("}");
0585:
0586: // generate the rule name array for debugging
0587: if (grammar.debuggingOutput) {
0588: println("private static final String _ruleNames[] = {");
0589:
0590: ids = grammar.rules.elements();
0591: int ruleNum = 0;
0592: while (ids.hasMoreElements()) {
0593: GrammarSymbol sym = (GrammarSymbol) ids
0594: .nextElement();
0595: if (sym instanceof RuleSymbol)
0596: println(" \"" + ((RuleSymbol) sym).getId()
0597: + "\",");
0598: }
0599: println("};");
0600: }
0601:
0602: // Generate nextToken() rule.
0603: // nextToken() is a synthetic lexer rule that is the implicit OR of all
0604: // user-defined lexer rules.
0605: genNextToken();
0606:
0607: // Generate code for each rule in the lexer
0608: ids = grammar.rules.elements();
0609: int ruleNum = 0;
0610: while (ids.hasMoreElements()) {
0611: RuleSymbol sym = (RuleSymbol) ids.nextElement();
0612: // Don't generate the synthetic rules
0613: if (!sym.getId().equals("mnextToken")) {
0614: genRule(sym, false, ruleNum++, new Context("",
0615: Context.NO_GUESSING));
0616: }
0617: exitIfError();
0618: }
0619:
0620: printGuessedRules(ruleNum);
0621:
0622: // Generate the semantic predicate map for debugging
0623: if (grammar.debuggingOutput)
0624: genSemPredMap();
0625:
0626: // Generate the bitsets used throughout the lexer
0627: genBitsets(bitsetsUsed,
0628: ((LexerGrammar) grammar).charVocabulary.size());
0629:
0630: println("");
0631: println("}");
0632:
0633: // Close the lexer output stream
0634: getPrintWriterManager().finishOutput();
0635: } finally {
0636: defaultLine = oldDefaultLine;
0637: }
0638: }
0639:
0640: /** Generate code for the given grammar element.
0641: * @param blk The (...)+ block to generate
0642: */
0643: public void gen(OneOrMoreBlock blk, Context context) {
0644: int oldDefaultLine = defaultLine;
0645: try {
0646: defaultLine = blk.getLine();
0647: if (DEBUG_CODE_GENERATOR)
0648: System.out.println("gen+(" + blk + ")");
0649: String label;
0650: String cnt;
0651: println("{", NO_MAPPING);
0652: genBlockPreamble(blk);
0653: if (blk.getLabel() != null) {
0654: cnt = "_cnt_" + blk.getLabel();
0655: } else {
0656: cnt = "_cnt" + blk.ID;
0657: }
0658: println("int " + cnt + "=0;");
0659: if (blk.getLabel() != null) {
0660: label = blk.getLabel();
0661: } else {
0662: label = "_loop" + blk.ID;
0663: }
0664: println(label + ":");
0665: println("do {");
0666: tabs++;
0667: // generate the init action for ()+ ()* inside the loop
0668: // this allows us to do usefull EOF checking...
0669: genBlockInitAction(blk, context);
0670:
0671: // Tell AST generation to build subrule result
0672: String saveCurrentASTResult = currentASTResult;
0673: if (blk.getLabel() != null) {
0674: currentASTResult = blk.getLabel();
0675: }
0676:
0677: boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
0678:
0679: // generate exit test if greedy set to false
0680: // and an alt is ambiguous with exit branch
0681: // or when lookahead derived purely from end-of-file
0682: // Lookahead analysis stops when end-of-file is hit,
0683: // returning set {epsilon}. Since {epsilon} is not
0684: // ambig with any real tokens, no error is reported
0685: // by deterministic() routines and we have to check
0686: // for the case where the lookahead depth didn't get
0687: // set to NONDETERMINISTIC (this only happens when the
0688: // FOLLOW contains real atoms + epsilon).
0689: boolean generateNonGreedyExitPath = false;
0690: int nonGreedyExitDepth = grammar.maxk;
0691:
0692: if (!blk.greedy
0693: && blk.exitLookaheadDepth <= grammar.maxk
0694: && blk.exitCache[blk.exitLookaheadDepth]
0695: .containsEpsilon()) {
0696: generateNonGreedyExitPath = true;
0697: nonGreedyExitDepth = blk.exitLookaheadDepth;
0698: } else if (!blk.greedy
0699: && blk.exitLookaheadDepth == LLkGrammarAnalyzer.NONDETERMINISTIC) {
0700: generateNonGreedyExitPath = true;
0701: }
0702:
0703: // generate exit test if greedy set to false
0704: // and an alt is ambiguous with exit branch
0705: if (generateNonGreedyExitPath) {
0706: if (DEBUG_CODE_GENERATOR) {
0707: System.out
0708: .println("nongreedy (...)+ loop; exit depth is "
0709: + blk.exitLookaheadDepth);
0710: }
0711: String predictExit = getLookaheadTestExpression(
0712: blk.exitCache, nonGreedyExitDepth, 0);
0713: println("// nongreedy exit test", NO_MAPPING);
0714: println("if ( " + cnt + ">=1 && " + predictExit
0715: + ") break " + label + ";",
0716: CONTINUE_LAST_MAPPING);
0717: }
0718:
0719: JavaBlockFinishingInfo howToFinish = genCommonBlock(blk,
0720: false, context);
0721: genBlockFinish(howToFinish, "if ( " + cnt
0722: + ">=1 ) { break " + label + "; } else {"
0723: + getThrowNoViableStr(context) + "}", blk.getLine());
0724:
0725: println(cnt + "++;");
0726: tabs--;
0727: println("} while (true);");
0728: println("}");
0729:
0730: // Restore previous AST generation
0731: currentASTResult = saveCurrentASTResult;
0732: } finally {
0733: defaultLine = oldDefaultLine;
0734: }
0735: }
0736:
0737: /** Generate the parser Java file */
0738: public void gen(ParserGrammar g) throws IOException {
0739: int oldDefaultLine = defaultLine;
0740: try {
0741: defaultLine = NO_MAPPING;
0742: // if debugging, set up a new vector to keep track of sempred
0743: // strings for this grammar
0744: if (g.debuggingOutput)
0745: semPreds = new Vector();
0746:
0747: setGrammar(g);
0748: if (!(grammar instanceof ParserGrammar)) {
0749: antlrTool
0750: .fatalError("Internal error generating parser");
0751: }
0752:
0753: // Open the output stream for the parser and set the currentOutput
0754: // SAS: moved file setup so subclass could do it (for VAJ interface)
0755: currentOutput = getPrintWriterManager().setupOutput(
0756: antlrTool, grammar);
0757:
0758: genAST = grammar.buildAST;
0759:
0760: tabs = 0;
0761:
0762: // Generate the header common to all output files.
0763: genHeader();
0764: // Do not use printAction because we assume tabs==0
0765: try {
0766: defaultLine = behavior.getHeaderActionLine("");
0767: println(behavior.getHeaderAction(""));
0768: } finally {
0769: defaultLine = NO_MAPPING;
0770: }
0771:
0772: // Generate header for the parser
0773: println("import antlr.TokenBuffer;");
0774: //println("import antlr.TokenStreamException;");
0775: println("import antlr.TokenStreamIOException;");
0776: println("import antlr.ANTLRException;");
0777: println("import antlr." + grammar.getSuperClass() + ";");
0778: println("import antlr.Token;");
0779: println("import antlr.TokenStream;");
0780: println("import antlr.RecognitionException;");
0781: println("import antlr.NoViableAltException;");
0782: println("import antlr.MismatchedTokenException;");
0783: println("import antlr.SemanticException;");
0784: println("import antlr.ParserSharedInputState;");
0785: println("import antlr.collections.impl.BitSet;");
0786: if (genAST) {
0787: println("import antlr.collections.AST;");
0788: println("import java.util.Hashtable;");
0789: println("import antlr.ASTFactory;");
0790: println("import antlr.ASTPair;");
0791: println("import antlr.collections.impl.ASTArray;");
0792: }
0793: if (Tool.memoization) {
0794: println("import java.util.HashMap;");
0795: }
0796:
0797: // Output the user-defined parser preamble
0798: println(grammar.preambleAction.getText());
0799:
0800: // Generate parser class definition
0801: String sup = null;
0802: if (grammar.super Class != null)
0803: sup = grammar.super Class;
0804: else
0805: sup = "antlr." + grammar.getSuperClass();
0806:
0807: // print javadoc comment if any
0808: if (grammar.comment != null) {
0809: _println(grammar.comment);
0810: }
0811:
0812: // get prefix (replaces "public" and lets user specify)
0813: String prefix = "public";
0814: Token tprefix = (Token) grammar.options
0815: .get("classHeaderPrefix");
0816: if (tprefix != null) {
0817: String p = StringUtils.stripFrontBack(
0818: tprefix.getText(), "\"", "\"");
0819: if (p != null) {
0820: prefix = p;
0821: }
0822: }
0823:
0824: print(prefix + " ");
0825: print("class " + grammar.getClassName() + " extends " + sup);
0826: println(" implements "
0827: + grammar.tokenManager.getName()
0828: + TokenTypesFileSuffix);
0829:
0830: Token tsuffix = (Token) grammar.options
0831: .get("classHeaderSuffix");
0832: if (tsuffix != null) {
0833: String suffix = StringUtils.stripFrontBack(tsuffix
0834: .getText(), "\"", "\"");
0835: if (suffix != null)
0836: print(", " + suffix); // must be an interface name for Java
0837: }
0838: println(" {");
0839:
0840: // set up an array of all the rule names so the debugger can
0841: // keep track of them only by number -- less to store in tree...
0842: if (grammar.debuggingOutput) {
0843: println("private static final String _ruleNames[] = {");
0844:
0845: Enumeration ids = grammar.rules.elements();
0846: int ruleNum = 0;
0847: while (ids.hasMoreElements()) {
0848: GrammarSymbol sym = (GrammarSymbol) ids
0849: .nextElement();
0850: if (sym instanceof RuleSymbol)
0851: println(" \"" + ((RuleSymbol) sym).getId()
0852: + "\",");
0853: }
0854: println("};");
0855: }
0856:
0857: // Generate user-defined parser class members
0858: print(processActionForSpecialSymbols(
0859: grammar.classMemberAction.getText(),
0860: grammar.classMemberAction.getLine(), currentRule,
0861: null), grammar.classMemberAction.getLine());
0862:
0863: // Generate parser class constructor from TokenBuffer
0864: println("");
0865: println("protected " + grammar.getClassName()
0866: + "(TokenBuffer tokenBuf, int k) {");
0867: println(" super(tokenBuf,k);");
0868: println(" tokenNames = _tokenNames;");
0869: // if debugging, set up arrays and call the user-overridable
0870: // debugging setup method
0871: if (grammar.debuggingOutput) {
0872: println(" ruleNames = _ruleNames;");
0873: println(" semPredNames = _semPredNames;");
0874: println(" setupDebugging(tokenBuf);");
0875: }
0876: if (grammar.buildAST) {
0877: println(" buildTokenTypeASTClassMap();");
0878: println(" astFactory = new ASTFactory(getTokenTypeToASTClassMap());");
0879: }
0880: println("}");
0881: println("");
0882:
0883: println("public " + grammar.getClassName()
0884: + "(TokenBuffer tokenBuf) {");
0885: println(" this(tokenBuf," + grammar.maxk + ");");
0886: println("}");
0887: println("");
0888:
0889: // Generate parser class constructor from TokenStream
0890: println("protected " + grammar.getClassName()
0891: + "(TokenStream lexer, int k) {");
0892: println(" super(lexer,k);");
0893: println(" tokenNames = _tokenNames;");
0894:
0895: // if debugging, set up arrays and call the user-overridable
0896: // debugging setup method
0897: if (grammar.debuggingOutput) {
0898: println(" ruleNames = _ruleNames;");
0899: println(" semPredNames = _semPredNames;");
0900: println(" setupDebugging(lexer);");
0901: }
0902: if (grammar.buildAST) {
0903: println(" buildTokenTypeASTClassMap();");
0904: println(" astFactory = new ASTFactory(getTokenTypeToASTClassMap());");
0905: }
0906: if (Tool.memoization) {
0907: println(" ruleMemo = new HashMap["
0908: + grammar.rules.size() + "];");
0909: }
0910: println("}");
0911: println("");
0912:
0913: println("public " + grammar.getClassName()
0914: + "(TokenStream lexer) {");
0915: println(" this(lexer," + grammar.maxk + ");");
0916: println("}");
0917: println("");
0918:
0919: println("public " + grammar.getClassName()
0920: + "(ParserSharedInputState state) {");
0921: println(" super(state," + grammar.maxk + ");");
0922: println(" tokenNames = _tokenNames;");
0923: if (grammar.buildAST) {
0924: println(" buildTokenTypeASTClassMap();");
0925: println(" astFactory = new ASTFactory(getTokenTypeToASTClassMap());");
0926: }
0927: println("}");
0928: println("");
0929:
0930: // Generate code for each rule in the grammar
0931: Enumeration ids = grammar.rules.elements();
0932: int ruleNum = 0;
0933: // First pass, create non-guessing rules and mark rules used for guessing
0934: while (ids.hasMoreElements()) {
0935: GrammarSymbol sym = (GrammarSymbol) ids.nextElement();
0936: //System.out.println("Creating rule: " + sym.id + ", " + sym);
0937: if (sym instanceof RuleSymbol) {
0938: RuleSymbol rs = (RuleSymbol) sym;
0939: genRule(rs, rs.references.size() == 0, ruleNum++,
0940: new Context("", Context.NO_GUESSING));
0941: }
0942: exitIfError();
0943: }
0944:
0945: // Second pass, create guessed rules
0946: printGuessedRules(ruleNum);
0947:
0948: // Generate the token names
0949: genTokenStrings();
0950:
0951: if (grammar.buildAST) {
0952: genTokenASTNodeMap();
0953: genTokenASTCreateMethod();
0954: }
0955:
0956: // Generate the bitsets used throughout the grammar
0957: genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType());
0958:
0959: // Generate the semantic predicate map for debugging
0960: if (grammar.debuggingOutput)
0961: genSemPredMap();
0962:
0963: // Close class definition
0964: println("");
0965: println("}");
0966:
0967: // Close the parser output stream
0968: getPrintWriterManager().finishOutput();
0969: } finally {
0970: defaultLine = oldDefaultLine;
0971: }
0972: }
0973:
0974: private final void printGuessedRules(int ruleNum) {
0975: //System.out.println("Guesed rules size=" + guessedRules.size());
0976: while (!guessedRules.isEmpty()) {
0977: String name = (String) guessedRules.elementAt(0);
0978: GrammarSymbol sym = grammar.getSymbol(name);
0979: //System.out.println("Guesed rule: " + name + ", " + sym);
0980: if ((sym instanceof RuleSymbol)
0981: && (!guessedRules_created.contains(name))) {
0982: //System.out.println("Creating rule: " + sym);
0983: RuleSymbol rs = (RuleSymbol) sym;
0984: genRule(rs, rs.references.size() == 0, ruleNum++,
0985: new Context("", Context.CLONE_GUESSING));
0986: guessedRules_created.add(name);
0987: } else {
0988: //System.out.println("Not created");
0989: }
0990: exitIfError();
0991: guessedRules.removeElementAt(0);
0992: }
0993: }
0994:
0995: /** Generate code for the given grammar element.
0996: * @param blk The rule-reference to generate
0997: */
0998: public void gen(RuleRefElement rr, Context context) {
0999: int oldDefaultLine = defaultLine;
1000: try {
1001: defaultLine = rr.getLine();
1002: if (DEBUG_CODE_GENERATOR)
1003: System.out.println("genRR(" + rr + ")");
1004: RuleSymbol rs = (RuleSymbol) grammar
1005: .getSymbol(rr.targetRule);
1006: if (rs == null || !rs.isDefined()) {
1007: // Is this redundant???
1008: antlrTool.error("Rule '" + rr.targetRule
1009: + "' is not defined", grammar.getFilename(), rr
1010: .getLine(), rr.getColumn());
1011: return;
1012: }
1013: if (!(rs instanceof RuleSymbol)) {
1014: // Is this redundant???
1015: antlrTool.error("'" + rr.targetRule
1016: + "' does not name a grammar rule", grammar
1017: .getFilename(), rr.getLine(), rr.getColumn());
1018: return;
1019: }
1020:
1021: genErrorTryForElement(rr);
1022:
1023: // AST value for labeled rule refs in tree walker.
1024: // This is not AST construction; it is just the input tree node value.
1025: if (grammar instanceof TreeWalkerGrammar
1026: && rr.getLabel() != null && syntacticPredLevel == 0) {
1027: println(rr.getLabel() + " = _t==ASTNULL ? null : "
1028: + lt1Value + ";");
1029: }
1030:
1031: // if in lexer and ! on rule ref or alt or rule, save buffer index to kill later
1032: if (grammar instanceof LexerGrammar
1033: && (!saveText || rr.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
1034: println("_saveIndex=text.length();");
1035: }
1036:
1037: // Process return value assignment if any
1038: printTabs();
1039: if (rr.idAssign != null) {
1040: // Warn if the rule has no return type
1041: if (rs.block.returnAction == null) {
1042: antlrTool.warning("Rule '" + rr.targetRule
1043: + "' has no return type", grammar
1044: .getFilename(), rr.getLine(), rr
1045: .getColumn());
1046: }
1047: if (!(Tool.cloneGuessing && Tool.agressive && (context.guessing != context.NO_GUESSING))) {
1048: _print(rr.idAssign + "=");
1049: }
1050: } else {
1051: // Warn about return value if any, but not inside syntactic predicate
1052: if (!(grammar instanceof LexerGrammar)
1053: && syntacticPredLevel == 0
1054: && rs.block.returnAction != null) {
1055: antlrTool.warning("Rule '" + rr.targetRule
1056: + "' returns a value", grammar
1057: .getFilename(), rr.getLine(), rr
1058: .getColumn());
1059: }
1060: }
1061:
1062: // Call the rule
1063: GenRuleInvocation(rr, context);
1064: printCheck(context);
1065:
1066: // if in lexer and ! on element or alt or rule, save buffer index to kill later
1067: if (grammar instanceof LexerGrammar
1068: && (!saveText || rr.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
1069: println("text.setLength(_saveIndex);");
1070: }
1071:
1072: // if not in a syntactic predicate
1073: if (syntacticPredLevel == 0) {
1074: boolean doNoGuessTest = (grammar.hasSyntacticPredicate && (grammar.buildAST
1075: && rr.getLabel() != null || (genAST && rr
1076: .getAutoGenType() == GrammarElement.AUTO_GEN_NONE)));
1077: if (doNoGuessTest) {
1078: // println("if (inputState.guessing==0) {");
1079: // tabs++;
1080: }
1081:
1082: if (grammar.buildAST && rr.getLabel() != null) {
1083: // always gen variable for rule return on labeled rules
1084: println(rr.getLabel() + "_AST = ("
1085: + labeledElementASTType + ")returnAST;");
1086: }
1087: if (genAST) {
1088: switch (rr.getAutoGenType()) {
1089: case GrammarElement.AUTO_GEN_NONE:
1090: // println("theASTFactory.addASTChild(currentAST, returnAST);");
1091: if (!Tool.cloneGuessing
1092: || (context.guessing == Context.NO_GUESSING)) {
1093: if (!Tool.cloneGuessing) {
1094: print("if ( inputState.guessing==0 ) ");
1095: }
1096: println("astFactory.addASTChild(currentAST, returnAST);");
1097: }
1098: break;
1099: case GrammarElement.AUTO_GEN_CARET:
1100: antlrTool
1101: .error("Internal: encountered ^ after rule reference");
1102: break;
1103: default:
1104: break;
1105: }
1106: }
1107:
1108: // if a lexer and labeled, Token label defined at rule level, just set it here
1109: if (grammar instanceof LexerGrammar
1110: && rr.getLabel() != null) {
1111: if (!Tool.agressive) {
1112: println(rr.getLabel() + "=_returnToken;");
1113: }
1114: }
1115:
1116: if (doNoGuessTest) {
1117: // tabs--;
1118: // println("}");
1119: }
1120: }
1121: genErrorCatchForElement(rr, context);
1122: } finally {
1123: defaultLine = oldDefaultLine;
1124: }
1125: }
1126:
1127: /** Generate code for the given grammar element.
1128: * @param blk The string-literal reference to generate
1129: */
1130: public void gen(StringLiteralElement atom, Context context) {
1131: if (DEBUG_CODE_GENERATOR)
1132: System.out.println("genString(" + atom + ") breakLabel="
1133: + context.breakLabel);
1134:
1135: // Variable declarations for labeled elements
1136: if (atom.getLabel() != null && syntacticPredLevel == 0) {
1137: println(atom.getLabel() + " = " + lt1Value + ";", atom
1138: .getLine());
1139: }
1140:
1141: // AST
1142: genElementAST(atom, context);
1143:
1144: // is there a bang on the literal?
1145: boolean oldsaveText = saveText;
1146: saveText = saveText
1147: && atom.getAutoGenType() == GrammarElement.AUTO_GEN_NONE;
1148:
1149: // matching
1150: genMatch(atom);
1151: printCheck(context);
1152:
1153: saveText = oldsaveText;
1154:
1155: // tack on tree cursor motion if doing a tree walker
1156: if (grammar instanceof TreeWalkerGrammar) {
1157: println("_t = _t.getNextSibling();", atom.getLine());
1158: }
1159: }
1160:
1161: /** Generate code for the given grammar element.
1162: * @param r The token-range reference to generate
1163: */
1164: public void gen(TokenRangeElement r, Context context) {
1165: genErrorTryForElement(r);
1166: if (r.getLabel() != null && syntacticPredLevel == 0) {
1167: println(r.getLabel() + " = " + lt1Value + ";", r.getLine());
1168: }
1169:
1170: // AST
1171: genElementAST(r, context);
1172:
1173: // match
1174: if (Tool.avoidLAMatch && (context.checkedLA > 0)) {
1175: println("// already checked token range, just skip");
1176: if (!(grammar instanceof TreeWalkerGrammar)) {
1177: println("consume();");
1178: }
1179: context.decreaseLAChecked();
1180: } else {
1181: println("matchRange(" + r.beginText + "," + r.endText
1182: + ");", r.getLine());
1183: printCheck(context);
1184: }
1185: genErrorCatchForElement(r, context);
1186: }
1187:
1188: /** Generate code for the given grammar element.
1189: * @param blk The token-reference to generate
1190: */
1191: public void gen(TokenRefElement atom, Context context) {
1192: if (DEBUG_CODE_GENERATOR)
1193: System.out.println("genTokenRef(" + atom + ") breakLabel ="
1194: + context.breakLabel);
1195: if (grammar instanceof LexerGrammar) {
1196: antlrTool.fatalError("Token reference found in lexer");
1197: }
1198: genErrorTryForElement(atom);
1199: // Assign Token value to token label variable
1200: if (atom.getLabel() != null && syntacticPredLevel == 0) {
1201: println(atom.getLabel() + " = " + lt1Value + ";", atom
1202: .getLine());
1203: }
1204:
1205: // AST
1206: genElementAST(atom, context);
1207: // matching
1208: if (Tool.avoidLAMatch && (context.checkedLA > 0)) {
1209: println("// already checked token ref, just skip");
1210: if (!(grammar instanceof TreeWalkerGrammar)) {
1211: println("consume();");
1212: }
1213: context.decreaseLAChecked();
1214: } else {
1215: genMatch(atom);
1216: printCheck(context);
1217: }
1218:
1219: genErrorCatchForElement(atom, context);
1220:
1221: // tack on tree cursor motion if doing a tree walker
1222: if (grammar instanceof TreeWalkerGrammar) {
1223: println("_t = _t.getNextSibling();", atom.getLine());
1224: }
1225: }
1226:
1227: public String getCheckString(Context context) {
1228: String action = "";
1229: if ((context.breakLabel == null)
1230: || (context.breakLabel.length() == 0)) {
1231: action = "return " + context.returnVar + ";";
1232: } else {
1233: action = "break " + context.breakLabel + ";";
1234: }
1235: return "if (matchError) {" + action + "}";
1236: }
1237:
1238: public void printCheck(Context context) {
1239: println(getCheckString(context));
1240: }
1241:
1242: public void gen(TreeElement t, Context context) {
1243: int oldDefaultLine = defaultLine;
1244: try {
1245: defaultLine = t.getLine();
1246: // save AST cursor
1247: println("AST __t" + t.ID + " = _t;");
1248:
1249: // If there is a label on the root, then assign that to the variable
1250: if (t.root.getLabel() != null) {
1251: println(t.root.getLabel() + " = _t==ASTNULL ? null :("
1252: + labeledElementASTType + ")_t;", t.root
1253: .getLine());
1254: }
1255:
1256: // check for invalid modifiers ! and ^ on tree element roots
1257: if (t.root.getAutoGenType() == GrammarElement.AUTO_GEN_BANG) {
1258: antlrTool
1259: .error(
1260: "Suffixing a root node with '!' is not implemented",
1261: grammar.getFilename(), t.getLine(), t
1262: .getColumn());
1263: t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE);
1264: }
1265: if (t.root.getAutoGenType() == GrammarElement.AUTO_GEN_CARET) {
1266: antlrTool
1267: .warning(
1268: "Suffixing a root node with '^' is redundant; already a root",
1269: grammar.getFilename(), t.getLine(), t
1270: .getColumn());
1271: t.root.setAutoGenType(GrammarElement.AUTO_GEN_NONE);
1272: }
1273:
1274: // Generate AST variables
1275: genElementAST(t.root, context);
1276: if (grammar.buildAST) {
1277: // Save the AST construction state
1278: println("ASTPair __currentAST" + t.ID
1279: + " = currentAST.copy();");
1280: // Make the next item added a child of the TreeElement root
1281: println("currentAST.root = currentAST.child;");
1282: println("currentAST.child = null;");
1283: }
1284:
1285: // match root
1286: if (t.root instanceof WildcardElement) {
1287: println(
1288: "if ( _t==null ) throw new MismatchedTokenException();",
1289: t.root.getLine());
1290: } else {
1291: if (Tool.avoidLAMatch && (context.checkedLA > 0)) {
1292: println("//already checked tree element, skip");
1293: context.decreaseLAChecked();
1294: } else {
1295: genMatch(t.root);
1296: printCheck(context);
1297: }
1298: }
1299: // move to list of children
1300: println("_t = _t.getFirstChild();");
1301:
1302: // walk list of children, generating code for each
1303: for (int i = 0; i < t.getAlternatives().size(); i++) {
1304: Alternative a = t.getAlternativeAt(i);
1305: AlternativeElement e = a.head;
1306: while (e != null) {
1307: e.generate(context);
1308: e = e.next;
1309: }
1310: }
1311:
1312: if (grammar.buildAST) {
1313: // restore the AST construction state to that just after the
1314: // tree root was added
1315: println("currentAST = __currentAST" + t.ID + ";");
1316: }
1317: // restore AST cursor
1318: println("_t = __t" + t.ID + ";");
1319: // move cursor to sibling of tree just parsed
1320: println("_t = _t.getNextSibling();");
1321: } finally {
1322: defaultLine = oldDefaultLine;
1323: }
1324: }
1325:
1326: /** Generate the tree-parser Java file */
1327: public void gen(TreeWalkerGrammar g) throws IOException {
1328: int oldDefaultLine = defaultLine;
1329: try {
1330: defaultLine = NO_MAPPING;
1331: // SAS: debugging stuff removed for now...
1332: setGrammar(g);
1333: if (!(grammar instanceof TreeWalkerGrammar)) {
1334: antlrTool
1335: .fatalError("Internal error generating tree-walker");
1336: }
1337: // Open the output stream for the parser and set the currentOutput
1338: // SAS: move file open to method so subclass can override it
1339: // (mainly for VAJ interface)
1340: currentOutput = getPrintWriterManager().setupOutput(
1341: antlrTool, grammar);
1342:
1343: genAST = grammar.buildAST;
1344: tabs = 0;
1345:
1346: // Generate the header common to all output files.
1347: genHeader();
1348: // Do not use printAction because we assume tabs==0
1349: try {
1350: defaultLine = behavior.getHeaderActionLine("");
1351: println(behavior.getHeaderAction(""));
1352: } finally {
1353: defaultLine = NO_MAPPING;
1354: }
1355:
1356: // Generate header for the parser
1357: println("import antlr." + grammar.getSuperClass() + ";");
1358: println("import antlr.Token;");
1359: println("import antlr.collections.AST;");
1360: println("import antlr.RecognitionException;");
1361: println("import antlr.ANTLRException;");
1362: println("import antlr.NoViableAltException;");
1363: println("import antlr.MismatchedTokenException;");
1364: println("import antlr.SemanticException;");
1365: println("import antlr.collections.impl.BitSet;");
1366: println("import antlr.ASTPair;");
1367: println("import antlr.collections.impl.ASTArray;");
1368:
1369: // Output the user-defined parser premamble
1370: println(grammar.preambleAction.getText());
1371:
1372: // Generate parser class definition
1373: String sup = null;
1374: if (grammar.super Class != null) {
1375: sup = grammar.super Class;
1376: } else {
1377: sup = "antlr." + grammar.getSuperClass();
1378: }
1379: println("");
1380:
1381: // print javadoc comment if any
1382: if (grammar.comment != null) {
1383: _println(grammar.comment);
1384: }
1385:
1386: // get prefix (replaces "public" and lets user specify)
1387: String prefix = "public";
1388: Token tprefix = (Token) grammar.options
1389: .get("classHeaderPrefix");
1390: if (tprefix != null) {
1391: String p = StringUtils.stripFrontBack(
1392: tprefix.getText(), "\"", "\"");
1393: if (p != null) {
1394: prefix = p;
1395: }
1396: }
1397:
1398: print(prefix + " ");
1399: print("class " + grammar.getClassName() + " extends " + sup);
1400: println(" implements "
1401: + grammar.tokenManager.getName()
1402: + TokenTypesFileSuffix);
1403: Token tsuffix = (Token) grammar.options
1404: .get("classHeaderSuffix");
1405: if (tsuffix != null) {
1406: String suffix = StringUtils.stripFrontBack(tsuffix
1407: .getText(), "\"", "\"");
1408: if (suffix != null) {
1409: print(", " + suffix); // must be an interface name for Java
1410: }
1411: }
1412: println(" {");
1413:
1414: // Generate user-defined parser class members
1415: print(processActionForSpecialSymbols(
1416: grammar.classMemberAction.getText(),
1417: grammar.classMemberAction.getLine(), currentRule,
1418: null), grammar.classMemberAction.getLine());
1419:
1420: // Generate default parser class constructor
1421: println("public " + grammar.getClassName() + "() {");
1422: tabs++;
1423: println("tokenNames = _tokenNames;");
1424: tabs--;
1425: println("}");
1426: println("");
1427:
1428: // Generate code for each rule in the grammar
1429: Enumeration ids = grammar.rules.elements();
1430: int ruleNum = 0;
1431: String ruleNameInits = "";
1432: while (ids.hasMoreElements()) {
1433: GrammarSymbol sym = (GrammarSymbol) ids.nextElement();
1434: if (sym instanceof RuleSymbol) {
1435: RuleSymbol rs = (RuleSymbol) sym;
1436: genRule(rs, rs.references.size() == 0, ruleNum++,
1437: new Context("", Context.NO_GUESSING));
1438: }
1439: exitIfError();
1440: }
1441:
1442: printGuessedRules(ruleNum);
1443:
1444: // Generate the token names
1445: genTokenStrings();
1446:
1447: // Generate the bitsets used throughout the grammar
1448: genBitsets(bitsetsUsed, grammar.tokenManager.maxTokenType());
1449:
1450: // Close class definition
1451: println("}");
1452: println("");
1453:
1454: // Close the parser output stream
1455: getPrintWriterManager().finishOutput();
1456: } finally {
1457: defaultLine = oldDefaultLine;
1458: }
1459: }
1460:
1461: /** Generate code for the given grammar element.
1462: * @param wc The wildcard element to generate
1463: */
1464: public void gen(WildcardElement wc, Context context) {
1465: int oldDefaultLine = defaultLine;
1466: try {
1467: defaultLine = wc.getLine();
1468: // Variable assignment for labeled elements
1469: if (wc.getLabel() != null && syntacticPredLevel == 0) {
1470: println(wc.getLabel() + " = " + lt1Value + ";");
1471: }
1472:
1473: // AST
1474: genElementAST(wc, context);
1475: // Match anything but EOF
1476: if (grammar instanceof TreeWalkerGrammar) {
1477: println("if ( _t==null ) throw new MismatchedTokenException();");
1478: } else if (grammar instanceof LexerGrammar) {
1479: if (grammar instanceof LexerGrammar
1480: && (!saveText || wc.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
1481: println("_saveIndex=text.length();");
1482: }
1483: println("matchNot(EOF_CHAR);");
1484: printCheck(context);
1485: if (grammar instanceof LexerGrammar
1486: && (!saveText || wc.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
1487: println("text.setLength(_saveIndex);"); // kill text atom put in buffer
1488: }
1489: } else {
1490: println("matchNot(" + getValueString(Token.EOF_TYPE)
1491: + ");");
1492: printCheck(context);
1493: }
1494:
1495: // tack on tree cursor motion if doing a tree walker
1496: if (grammar instanceof TreeWalkerGrammar) {
1497: println("_t = _t.getNextSibling();");
1498: }
1499: } finally {
1500: defaultLine = oldDefaultLine;
1501: }
1502: }
1503:
1504: /** Generate code for the given grammar element.
1505: * @param blk The (...)* block to generate
1506: */
1507: public void gen(ZeroOrMoreBlock blk, Context context) {
1508: int oldDefaultLine = defaultLine;
1509: try {
1510: defaultLine = blk.getLine();
1511: if (DEBUG_CODE_GENERATOR)
1512: System.out.println("gen*(" + blk + ") breakLabel ="
1513: + context.breakLabel);
1514: println("{");
1515: genBlockPreamble(blk);
1516: String label;
1517: if (blk.getLabel() != null) {
1518: label = blk.getLabel();
1519: } else {
1520: label = "_loop" + blk.ID;
1521: }
1522: println(label + ":");
1523: println("do {");
1524: tabs++;
1525: // generate the init action for ()* inside the loop
1526: // this allows us to do usefull EOF checking...
1527: genBlockInitAction(blk, context);
1528:
1529: // Tell AST generation to build subrule result
1530: String saveCurrentASTResult = currentASTResult;
1531: if (blk.getLabel() != null) {
1532: currentASTResult = blk.getLabel();
1533: }
1534:
1535: boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
1536:
1537: // generate exit test if greedy set to false
1538: // and an alt is ambiguous with exit branch
1539: // or when lookahead derived purely from end-of-file
1540: // Lookahead analysis stops when end-of-file is hit,
1541: // returning set {epsilon}. Since {epsilon} is not
1542: // ambig with any real tokens, no error is reported
1543: // by deterministic() routines and we have to check
1544: // for the case where the lookahead depth didn't get
1545: // set to NONDETERMINISTIC (this only happens when the
1546: // FOLLOW contains real atoms + epsilon).
1547: boolean generateNonGreedyExitPath = false;
1548: int nonGreedyExitDepth = grammar.maxk;
1549:
1550: if (!blk.greedy
1551: && blk.exitLookaheadDepth <= grammar.maxk
1552: && blk.exitCache[blk.exitLookaheadDepth]
1553: .containsEpsilon()) {
1554: generateNonGreedyExitPath = true;
1555: nonGreedyExitDepth = blk.exitLookaheadDepth;
1556: } else if (!blk.greedy
1557: && blk.exitLookaheadDepth == LLkGrammarAnalyzer.NONDETERMINISTIC) {
1558: generateNonGreedyExitPath = true;
1559: }
1560: if (generateNonGreedyExitPath) {
1561: if (DEBUG_CODE_GENERATOR) {
1562: System.out
1563: .println("nongreedy (...)* loop; exit depth is "
1564: + blk.exitLookaheadDepth);
1565: }
1566: String predictExit = getLookaheadTestExpression(
1567: blk.exitCache, nonGreedyExitDepth, 0);
1568: println("// nongreedy exit test");
1569: println("if (" + predictExit + ") break " + label + ";");
1570: }
1571:
1572: JavaBlockFinishingInfo howToFinish = genCommonBlock(blk,
1573: false, context);
1574: genBlockFinish(howToFinish, "break " + label + ";", blk
1575: .getLine());
1576:
1577: tabs--;
1578: println("} while (true);");
1579: println("}");
1580:
1581: // Restore previous AST generation
1582: currentASTResult = saveCurrentASTResult;
1583: } finally {
1584: defaultLine = oldDefaultLine;
1585: }
1586: }
1587:
1588: public void printLoopStart(String label) {
1589: if (label == null) {
1590: label = "";
1591: }
1592: println(label + ": while (true) {");
1593: }
1594:
1595: public void printLoopEnd(String label) {
1596: println("break;} // End of loop " + label);
1597: }
1598:
1599: /** Generate an alternative.
1600: * @param alt The alternative to generate
1601: * @param blk The block to which the alternative belongs
1602: */
1603: protected void genAlt(Alternative alt, AlternativeBlock blk,
1604: Context context) {
1605: // Save the AST generation state, and set it to that of the alt
1606: boolean savegenAST = genAST;
1607: genAST = genAST && alt.getAutoGen();
1608:
1609: boolean oldsaveTest = saveText;
1610: saveText = saveText && alt.getAutoGen();
1611:
1612: // Reset the variable name map for the alternative
1613: Hashtable saveMap = treeVariableMap;
1614: treeVariableMap = new Hashtable();
1615:
1616: //println("//la depth = " + alt.lookaheadDepth);
1617:
1618: // Generate try block around the alt for error handling
1619: if (alt.exceptionSpec != null) {
1620: if (MatchExceptionState.throwRecExceptions) {
1621: println("try { // for error handling", alt.head
1622: .getLine());
1623: }
1624: context = new Context("loop" + loopCount++,
1625: context.guessing);
1626: //breakLabel = "loop" + loopCount++;
1627: printLoopStart(context.breakLabel);
1628: tabs++;
1629: }
1630:
1631: AlternativeElement elem = alt.head;
1632: while (!(elem instanceof BlockEndElement)) {
1633: elem.generate(context); // alt can begin with anything. Ask target to gen.
1634:
1635: // fix for avoid checked LA optimization
1636: if ((elem instanceof CharLiteralElement)
1637: || (elem instanceof CharRangeElement)
1638: || (elem instanceof TokenRefElement)
1639: || (elem instanceof TokenRangeElement)
1640: || (elem instanceof TreeElement)) {
1641: } else {
1642: context.setCheckedLA(0);
1643: }
1644:
1645: elem = elem.next;
1646: }
1647:
1648: if (genAST) {
1649: if (blk instanceof RuleBlock) {
1650: // Set the AST return value for the rule
1651: RuleBlock rblk = (RuleBlock) blk;
1652: if (grammar.hasSyntacticPredicate) {
1653: // println("if ( inputState.guessing==0 ) {");
1654: // tabs++;
1655: }
1656: println(rblk.getRuleName() + "_AST = ("
1657: + labeledElementASTType + ")currentAST.root;",
1658: CONTINUE_LAST_MAPPING);
1659: if (grammar.hasSyntacticPredicate) {
1660: // --tabs;
1661: // println("}");
1662: }
1663: } else if (blk.getLabel() != null) {
1664: // ### future: also set AST value for labeled subrules.
1665: // println(blk.getLabel() + "_AST = ("+labeledElementASTType+")currentAST.root;");
1666: antlrTool.warning("Labeled subrules not yet supported",
1667: grammar.getFilename(), blk.getLine(), blk
1668: .getColumn());
1669: }
1670: }
1671:
1672: if (alt.exceptionSpec != null) {
1673: // close try block
1674: tabs--;
1675: printLoopEnd(context.breakLabel);
1676: println("}", NO_MAPPING);
1677: genErrorHandler(alt.exceptionSpec, context);
1678: }
1679:
1680: genAST = savegenAST;
1681: saveText = oldsaveTest;
1682:
1683: treeVariableMap = saveMap;
1684: }
1685:
1686: /** Generate all the bitsets to be used in the parser or lexer
1687: * Generate the raw bitset data like "long _tokenSet1_data[] = {...};"
1688: * and the BitSet object declarations like "BitSet _tokenSet1 = new BitSet(_tokenSet1_data);"
1689: * Note that most languages do not support object initialization inside a
1690: * class definition, so other code-generators may have to separate the
1691: * bitset declarations from the initializations (e.g., put the initializations
1692: * in the generated constructor instead).
1693: * @param bitsetList The list of bitsets to generate.
1694: * @param maxVocabulary Ensure that each generated bitset can contain at least this value.
1695: */
1696: protected void genBitsets(Vector bitsetList, int maxVocabulary) {
1697: println("", NO_MAPPING);
1698: for (int i = 0; i < bitsetList.size(); i++) {
1699: BitSet p = (BitSet) bitsetList.elementAt(i);
1700: // Ensure that generated BitSet is large enough for vocabulary
1701: p.growToInclude(maxVocabulary);
1702: genBitSet(p, i);
1703: }
1704: }
1705:
1706: /** Do something simple like:
1707: * private static final long[] mk_tokenSet_0() {
1708: * long[] data = { -2305839160922996736L, 63L, 16777216L, 0L, 0L, 0L };
1709: * return data;
1710: * }
1711: * public static final BitSet _tokenSet_0 = new BitSet(mk_tokenSet_0());
1712: *
1713: * Or, for large bitsets, optimize init so ranges are collapsed into loops.
1714: * This is most useful for lexers using unicode.
1715: */
1716: private void genBitSet(BitSet p, int id) {
1717: int oldDefaultLine = defaultLine;
1718: try {
1719: defaultLine = NO_MAPPING;
1720: // initialization data
1721: println("private static final long[] mk"
1722: + getBitsetName(id) + "() {");
1723: int n = p.lengthInLongWords();
1724: if (n < BITSET_OPTIMIZE_INIT_THRESHOLD) {
1725: println("\tlong[] data = { " + p.toStringOfWords()
1726: + "};");
1727: } else {
1728: // will init manually, allocate space then set values
1729: println("\tlong[] data = new long[" + n + "];");
1730: long[] elems = p.toPackedArray();
1731: for (int i = 0; i < elems.length;) {
1732: if (elems[i] == 0) {
1733: // done automatically by Java, don't waste time/code
1734: i++;
1735: continue;
1736: }
1737: if ((i + 1) == elems.length
1738: || elems[i] != elems[i + 1]) {
1739: // last number or no run of numbers, just dump assignment
1740: println("\tdata[" + i + "]=" + elems[i] + "L;");
1741: i++;
1742: } else {
1743: // scan to find end of run
1744: int j;
1745: for (j = i + 1; j < elems.length
1746: && elems[j] == elems[i]; j++) {
1747: }
1748: // j-1 is last member of run
1749: println("\tfor (int i = " + i + "; i<="
1750: + (j - 1) + "; i++) { data[i]="
1751: + elems[i] + "L; }");
1752: i = j;
1753: }
1754: }
1755: }
1756:
1757: println("\treturn data;");
1758: println("}");
1759: // BitSet object
1760: println("public static final BitSet " + getBitsetName(id)
1761: + " = new BitSet(" + "mk" + getBitsetName(id)
1762: + "()" + ");");
1763: } finally {
1764: defaultLine = oldDefaultLine;
1765: }
1766: }
1767:
1768: /** Generate the finish of a block, using a combination of the info
1769: * returned from genCommonBlock() and the action to perform when
1770: * no alts were taken
1771: * @param howToFinish The return of genCommonBlock()
1772: * @param noViableAction What to generate when no alt is taken
1773: */
1774: private void genBlockFinish(JavaBlockFinishingInfo howToFinish,
1775: String noViableAction, int line) {
1776: int oldDefaultLine = defaultLine;
1777: try {
1778: defaultLine = line;
1779: if (howToFinish.needAnErrorClause
1780: && (howToFinish.generatedAnIf || howToFinish.generatedSwitch)) {
1781: if (howToFinish.generatedAnIf) {
1782: println("else {");
1783: } else {
1784: println("{");
1785: }
1786: tabs++;
1787: println(noViableAction);
1788: tabs--;
1789: println("}");
1790: }
1791:
1792: if (howToFinish.postscript != null) {
1793: println(howToFinish.postscript);
1794: }
1795: } finally {
1796: defaultLine = oldDefaultLine;
1797: }
1798: }
1799:
1800: /** Generate the init action for a block, which may be a RuleBlock or a
1801: * plain AlternativeBLock.
1802: * @blk The block for which the preamble is to be generated.
1803: */
1804: protected void genBlockInitAction(AlternativeBlock blk,
1805: Context context) {
1806: if (!(Tool.cloneGuessing && Tool.agressive && (context.guessing != Context.NO_GUESSING))) {
1807: // dump out init action
1808: if (blk.initAction != null) {
1809: printAction(processActionForSpecialSymbols(
1810: blk.initAction, blk.getLine(), currentRule,
1811: null), blk.getLine());
1812: }
1813: }
1814: }
1815:
1816: /** Generate the header for a block, which may be a RuleBlock or a
1817: * plain AlternativeBLock. This generates any variable declarations
1818: * and syntactic-predicate-testing variables.
1819: * @blk The block for which the preamble is to be generated.
1820: */
1821: protected void genBlockPreamble(AlternativeBlock blk) {
1822: // define labels for rule blocks.
1823: if (blk instanceof RuleBlock) {
1824: RuleBlock rblk = (RuleBlock) blk;
1825: if (rblk.labeledElements != null) {
1826: for (int i = 0; i < rblk.labeledElements.size(); i++) {
1827: AlternativeElement a = (AlternativeElement) rblk.labeledElements
1828: .elementAt(i);
1829: int oldDefaultLine = defaultLine;
1830: try {
1831: defaultLine = a.getLine();
1832: // System.out.println("looking at labeled element: "+a);
1833: // Variables for labeled rule refs and
1834: // subrules are different than variables for
1835: // grammar atoms. This test is a little tricky
1836: // because we want to get all rule refs and ebnf,
1837: // but not rule blocks or syntactic predicates
1838: if (a instanceof RuleRefElement
1839: || a instanceof AlternativeBlock
1840: && !(a instanceof RuleBlock)
1841: && !(a instanceof SynPredBlock)) {
1842:
1843: if (!(a instanceof RuleRefElement)
1844: && ((AlternativeBlock) a).not
1845: && analyzer
1846: .subruleCanBeInverted(
1847: ((AlternativeBlock) a),
1848: grammar instanceof LexerGrammar)) {
1849: // Special case for inverted subrules that
1850: // will be inlined. Treat these like
1851: // token or char literal references
1852: println(labeledElementType + " "
1853: + a.getLabel() + " = "
1854: + labeledElementInit + ";");
1855: if (grammar.buildAST) {
1856: genASTDeclaration(a);
1857: }
1858: } else {
1859: if (grammar.buildAST) {
1860: // Always gen AST variables for
1861: // labeled elements, even if the
1862: // element itself is marked with !
1863: genASTDeclaration(a);
1864: }
1865: if (grammar instanceof LexerGrammar) {
1866: println("Token " + a.getLabel()
1867: + "=null;");
1868: }
1869: if (grammar instanceof TreeWalkerGrammar) {
1870: // always generate rule-ref variables
1871: // for tree walker
1872: println(labeledElementType + " "
1873: + a.getLabel() + " = "
1874: + labeledElementInit + ";");
1875: }
1876: }
1877: } else {
1878: // It is a token or literal reference. Generate the
1879: // correct variable type for this grammar
1880: println(labeledElementType + " "
1881: + a.getLabel() + " = "
1882: + labeledElementInit + ";");
1883:
1884: // In addition, generate *_AST variables if
1885: // building ASTs
1886: if (grammar.buildAST) {
1887: if (a instanceof GrammarAtom
1888: && ((GrammarAtom) a)
1889: .getASTNodeType() != null) {
1890: GrammarAtom ga = (GrammarAtom) a;
1891: genASTDeclaration(a, ga
1892: .getASTNodeType());
1893: } else {
1894: genASTDeclaration(a);
1895: }
1896: }
1897: }
1898: } finally {
1899: defaultLine = oldDefaultLine;
1900: }
1901: }
1902: }
1903: }
1904: }
1905:
1906: /** Generate a series of case statements that implement a BitSet test.
1907: * @param p The Bitset for which cases are to be generated
1908: */
1909: protected void genCases(BitSet p, int line) {
1910: int oldDefaultLine = defaultLine;
1911: try {
1912: defaultLine = line;
1913: if (DEBUG_CODE_GENERATOR)
1914: System.out.println("genCases(" + p + ")");
1915: int[] elems;
1916:
1917: elems = p.toArray();
1918: // Wrap cases four-per-line for lexer, one-per-line for parser
1919: int wrap = (grammar instanceof LexerGrammar) ? 4 : 1;
1920: int j = 1;
1921: boolean startOfLine = true;
1922: for (int i = 0; i < elems.length; i++) {
1923: if (j == 1) {
1924: print("");
1925: } else {
1926: _print(" ");
1927: }
1928: _print("case " + getValueString(elems[i]) + ":");
1929:
1930: if (j == wrap) {
1931: _println("");
1932: startOfLine = true;
1933: j = 1;
1934: } else {
1935: j++;
1936: startOfLine = false;
1937: }
1938: }
1939: if (!startOfLine) {
1940: _println("");
1941: }
1942: } finally {
1943: defaultLine = oldDefaultLine;
1944: }
1945: }
1946:
1947: private static int la_cache_no = 0;
1948:
1949: /**Generate common code for a block of alternatives; return a
1950: * postscript that needs to be generated at the end of the
1951: * block. Other routines may append else-clauses and such for
1952: * error checking before the postfix is generated. If the
1953: * grammar is a lexer, then generate alternatives in an order
1954: * where alternatives requiring deeper lookahead are generated
1955: * first, and EOF in the lookahead set reduces the depth of
1956: * the lookahead. @param blk The block to generate @param
1957: * noTestForSingle If true, then it does not generate a test
1958: * for a single alternative.
1959: */
1960: public JavaBlockFinishingInfo genCommonBlock(AlternativeBlock blk,
1961: boolean noTestForSingle, Context context) {
1962: int oldDefaultLine = defaultLine;
1963: try {
1964: defaultLine = blk.getLine();
1965: int nIF = 0;
1966: boolean createdLL1Switch = false;
1967: int closingBracesOfIFSequence = 0;
1968: JavaBlockFinishingInfo finishingInfo = new JavaBlockFinishingInfo();
1969: if (DEBUG_CODE_GENERATOR)
1970: System.out.println("genCommonBlock(" + blk
1971: + ") breakLabel=" + context.breakLabel);
1972:
1973: // Save the AST generation state, and set it to that of the block
1974: boolean savegenAST = genAST;
1975: genAST = genAST && blk.getAutoGen();
1976:
1977: boolean oldsaveTest = saveText;
1978: saveText = saveText && blk.getAutoGen();
1979:
1980: // Is this block inverted? If so, generate special-case code
1981: if (blk.not
1982: && analyzer.subruleCanBeInverted(blk,
1983: grammar instanceof LexerGrammar)) {
1984: if (DEBUG_CODE_GENERATOR)
1985: System.out.println("special case: ~(subrule)");
1986: Lookahead p = analyzer.look(1, blk);
1987: // Variable assignment for labeled elements
1988: if (blk.getLabel() != null && syntacticPredLevel == 0) {
1989: println(blk.getLabel() + " = " + lt1Value + ";");
1990: }
1991:
1992: // AST
1993: genElementAST(blk, context);
1994:
1995: String astArgs = "";
1996: if (grammar instanceof TreeWalkerGrammar) {
1997: astArgs = "_t,";
1998: }
1999:
2000: // match the bitset for the alternative
2001: println("match(" + astArgs
2002: + getBitsetName(markBitsetForGen(p.fset))
2003: + ");");
2004: printCheck(context);
2005:
2006: // tack on tree cursor motion if doing a tree walker
2007: if (grammar instanceof TreeWalkerGrammar) {
2008: println("_t = _t.getNextSibling();");
2009: }
2010: return finishingInfo;
2011: }
2012:
2013: // Special handling for single alt
2014: if (blk.getAlternatives().size() == 1) {
2015: Alternative alt = blk.getAlternativeAt(0);
2016: // Generate a warning if there is a synPred for single alt.
2017: if (alt.synPred != null) {
2018: antlrTool
2019: .warning(
2020: "Syntactic predicate superfluous for single alternative",
2021: grammar.getFilename(),
2022: blk.getAlternativeAt(0).synPred
2023: .getLine(),
2024: blk.getAlternativeAt(0).synPred
2025: .getColumn());
2026: }
2027: if (noTestForSingle) {
2028: if (alt.semPred != null) {
2029: // Generate validating predicate
2030: genSemPred(alt.semPred, blk.line, context);
2031: }
2032: context = new Context(context);
2033: context.setCheckedLA(0);
2034: genAlt(alt, blk, context);
2035: return finishingInfo;
2036: }
2037: }
2038:
2039: // count number of simple LL(1) cases; only do switch for
2040: // many LL(1) cases (no preds, no end of token refs)
2041: // We don't care about exit paths for (...)*, (...)+
2042: // because we don't explicitly have a test for them
2043: // as an alt in the loop.
2044: //
2045: // Also, we now count how many unicode lookahead sets
2046: // there are--they must be moved to DEFAULT or ELSE
2047: // clause.
2048: int nLL1 = 0;
2049: int maxDepth = 0;
2050: for (int i = 0; i < blk.getAlternatives().size(); i++) {
2051: Alternative a = blk.getAlternativeAt(i);
2052: if (suitableForCaseExpression(a)) {
2053: nLL1++;
2054: }
2055: if (maxDepth < a.lookaheadDepth) {
2056: maxDepth = a.lookaheadDepth;
2057: }
2058: }
2059:
2060: if (maxDepth == GrammarAnalyzer.NONDETERMINISTIC) {
2061: maxDepth = grammar.maxk;
2062: }
2063:
2064: int cur_no = 0;
2065: // Local LA cache mechanism
2066: if (Tool.localLACache
2067: && !(grammar instanceof TreeWalkerGrammar)
2068: && (maxDepth > 1)) {
2069: la_cache_no++;
2070: String type = (grammar instanceof LexerGrammar) ? "char"
2071: : "int";
2072: println("// Local LA Cache for " + maxDepth
2073: + " element(s):");
2074: for (int i = 1; i < maxDepth + 1; i++) {
2075: println(type + " LA" + i + "_" + la_cache_no
2076: + " = LA(" + i + ");");
2077: }
2078: println("");
2079: cur_no = la_cache_no;
2080: }
2081:
2082: // do LL(1) cases
2083: if (nLL1 >= makeSwitchThreshold) {
2084: // Determine the name of the item to be compared
2085: String testExpr = lookaheadString(1, cur_no);
2086: createdLL1Switch = true;
2087: // when parsing trees, convert null to valid tree node with NULL lookahead
2088: if (grammar instanceof TreeWalkerGrammar) {
2089: println("if (_t==null) _t=ASTNULL;");
2090: }
2091: println("switch ( " + testExpr + ") {");
2092: for (int i = 0; i < blk.alternatives.size(); i++) {
2093: Alternative alt = blk.getAlternativeAt(i);
2094: // ignore any non-LL(1) alts, predicated alts,
2095: // or end-of-token alts for case expressions
2096: if (!suitableForCaseExpression(alt)) {
2097: continue;
2098: }
2099: Lookahead p = alt.cache[1];
2100: if (p.fset.degree() == 0 && !p.containsEpsilon()) {
2101: antlrTool
2102: .warning(
2103: "Alternate omitted due to empty prediction set",
2104: grammar.getFilename(), alt.head
2105: .getLine(), alt.head
2106: .getColumn());
2107: } else {
2108: genCases(p.fset, alt.head.getLine());
2109: if (!blk.isCombineChars()) {
2110: println("{", alt.head.getLine());
2111: tabs++;
2112: context = new Context(context);
2113: context.setCheckedLA(1);
2114: genAlt(alt, blk, context);
2115: println("break;", NO_MAPPING);
2116: tabs--;
2117: println("}", NO_MAPPING);
2118: }
2119: }
2120: }
2121: if (blk.isCombineChars()) {
2122: tabs++;
2123: //genAlt(alt, blk, 1, context);
2124: println("consume();", NO_MAPPING);
2125: println("break;", NO_MAPPING);
2126: tabs--;
2127: }
2128: println("default:");
2129: tabs++;
2130: }
2131:
2132: // do non-LL(1) and nondeterministic cases This is tricky in
2133: // the lexer, because of cases like: STAR : '*' ; ASSIGN_STAR
2134: // : "*="; Since nextToken is generated without a loop, then
2135: // the STAR will have end-of-token as it's lookahead set for
2136: // LA(2). So, we must generate the alternatives containing
2137: // trailing end-of-token in their lookahead sets *after* the
2138: // alternatives without end-of-token. This implements the
2139: // usual lexer convention that longer matches come before
2140: // shorter ones, e.g. "*=" matches ASSIGN_STAR not STAR
2141: //
2142: // For non-lexer grammars, this does not sort the alternates
2143: // by depth Note that alts whose lookahead is purely
2144: // end-of-token at k=1 end up as default or else clauses.
2145: int startDepth = (grammar instanceof LexerGrammar) ? grammar.maxk
2146: : 0;
2147: for (int altDepth = startDepth; altDepth >= 0; altDepth--) {
2148: if (DEBUG_CODE_GENERATOR)
2149: System.out.println("checking depth " + altDepth);
2150: for (int i = 0; i < blk.alternatives.size(); i++) {
2151: Alternative alt = blk.getAlternativeAt(i);
2152: if (DEBUG_CODE_GENERATOR)
2153: System.out.println("genAlt: " + i);
2154: // if we made a switch above, ignore what we already took care
2155: // of. Specifically, LL(1) alts with no preds
2156: // that do not have end-of-token in their prediction set
2157: // and that are not giant unicode sets.
2158: if (createdLL1Switch
2159: && suitableForCaseExpression(alt)) {
2160: if (DEBUG_CODE_GENERATOR)
2161: System.out
2162: .println("ignoring alt because it was in the switch");
2163: continue;
2164: }
2165: String e;
2166:
2167: boolean unpredicted = false;
2168: int checkedLA = 0;
2169:
2170: if (grammar instanceof LexerGrammar) {
2171: // Calculate the "effective depth" of the alt,
2172: // which is the max depth at which
2173: // cache[depth]!=end-of-token
2174: int effectiveDepth = alt.lookaheadDepth;
2175: if (effectiveDepth == GrammarAnalyzer.NONDETERMINISTIC) {
2176: // use maximum lookahead
2177: effectiveDepth = grammar.maxk;
2178: }
2179: while (effectiveDepth >= 1
2180: && alt.cache[effectiveDepth]
2181: .containsEpsilon()) {
2182: effectiveDepth--;
2183: }
2184: // Ignore alts whose effective depth is other than
2185: // the ones we are generating for this iteration.
2186: if (effectiveDepth != altDepth) {
2187: if (DEBUG_CODE_GENERATOR)
2188: System.out
2189: .println("ignoring alt because effectiveDepth!=altDepth;"
2190: + effectiveDepth
2191: + "!="
2192: + altDepth);
2193: continue;
2194: }
2195: checkedLA = getAltLASize(alt, effectiveDepth);
2196: unpredicted = lookaheadIsEmpty(alt,
2197: effectiveDepth);
2198: e = getLookaheadTestExpression(alt,
2199: effectiveDepth, cur_no);
2200: } else {
2201: checkedLA = getAltLASize(alt, grammar.maxk);
2202: unpredicted = lookaheadIsEmpty(alt,
2203: grammar.maxk);
2204: e = getLookaheadTestExpression(alt,
2205: grammar.maxk, cur_no);
2206: }
2207:
2208: int oldDefaultLine2 = defaultLine;
2209: try {
2210: defaultLine = alt.head.getLine();
2211: // Was it a big unicode range that forced unsuitability
2212: // for a case expression?
2213: if (alt.cache[1].fset.degree() > caseSizeThreshold
2214: && suitableForCaseExpression(alt)) {
2215: if (nIF == 0) {
2216: println("if " + e + " {");
2217: } else {
2218: println("else if " + e + " {");
2219: }
2220: } else if (unpredicted && alt.semPred == null
2221: && alt.synPred == null) {
2222: // The alt has empty prediction set and no
2223: // predicate to help out. if we have not
2224: // generated a previous if, just put {...} around
2225: // the end-of-token clause
2226: if (nIF == 0) {
2227: println("{");
2228: } else {
2229: println("else {");
2230: }
2231: finishingInfo.needAnErrorClause = false;
2232: } else { // check for sem and syn preds
2233:
2234: // Add any semantic predicate expression to the
2235: // lookahead test
2236: if (alt.semPred != null) {
2237: // if debugging, wrap the evaluation of the
2238: // predicate in a method translate $ and #
2239: // references
2240: ActionTransInfo tInfo = new ActionTransInfo();
2241: String actionStr = processActionForSpecialSymbols(
2242: alt.semPred, blk.line,
2243: currentRule, tInfo);
2244: // ignore translation info...we don't need to
2245: // do anything with it. call that will inform
2246: // SemanticPredicateListeners of the result
2247: if (((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar))
2248: && grammar.debuggingOutput) {
2249: e = "("
2250: + e
2251: + "&& fireSemanticPredicateEvaluated(antlr.debug.SemanticPredicateEvent.PREDICTING,"
2252: + addSemPred(charFormatter
2253: .escapeString(actionStr))
2254: + "," + actionStr + "))";
2255: } else {
2256: e = "(" + e + "&&(" + actionStr
2257: + "))";
2258: }
2259: }
2260:
2261: // Generate any syntactic predicates
2262: if (nIF > 0) {
2263: if (alt.synPred != null) {
2264: println("else {", alt.synPred
2265: .getLine());
2266: tabs++;
2267: genSynPred(alt.synPred, e, context);
2268: closingBracesOfIFSequence++;
2269: } else {
2270: println("else if " + e + " {");
2271: }
2272: } else {
2273: if (alt.synPred != null) {
2274: genSynPred(alt.synPred, e, context);
2275: } else {
2276: // when parsing trees, convert null to
2277: // valid tree node with NULL lookahead.
2278: if (grammar instanceof TreeWalkerGrammar) {
2279: println("if (_t==null) _t=ASTNULL;");
2280: }
2281: println("if " + e + " {");
2282: }
2283: }
2284:
2285: }
2286: } finally {
2287: defaultLine = oldDefaultLine2;
2288: }
2289:
2290: nIF++;
2291: tabs++;
2292: context = new Context(context);
2293: context.setCheckedLA(checkedLA);
2294: genAlt(alt, blk, context);
2295: tabs--;
2296: println("}");
2297: }
2298: }
2299: String ps = "";
2300: for (int i = 1; i <= closingBracesOfIFSequence; i++) {
2301: ps += "}";
2302: }
2303:
2304: // Restore the AST generation state
2305: genAST = savegenAST;
2306:
2307: // restore save text state
2308: saveText = oldsaveTest;
2309:
2310: // Return the finishing info.
2311: if (createdLL1Switch) {
2312: tabs--;
2313: finishingInfo.postscript = ps + "}";
2314: finishingInfo.generatedSwitch = true;
2315: finishingInfo.generatedAnIf = nIF > 0;
2316: //return new JavaBlockFinishingInfo(ps+"}",true,nIF>0); // close up switch statement
2317:
2318: } else {
2319: finishingInfo.postscript = ps;
2320: finishingInfo.generatedSwitch = false;
2321: finishingInfo.generatedAnIf = nIF > 0;
2322: // return new JavaBlockFinishingInfo(ps, false,nIF>0);
2323: }
2324: return finishingInfo;
2325: } finally {
2326: defaultLine = oldDefaultLine;
2327: }
2328: }
2329:
2330: private static boolean suitableForCaseExpression(Alternative a) {
2331: return a.lookaheadDepth == 1 && a.semPred == null
2332: && !a.cache[1].containsEpsilon()
2333: && a.cache[1].fset.degree() <= caseSizeThreshold;
2334: }
2335:
2336: /** Generate code to link an element reference into the AST */
2337: private void genElementAST(AlternativeElement el, Context context) {
2338: int oldDefaultLine = defaultLine;
2339: try {
2340: defaultLine = el.getLine();
2341: // handle case where you're not building trees, but are in tree walker.
2342: // Just need to get labels set up.
2343: if (grammar instanceof TreeWalkerGrammar
2344: && !grammar.buildAST) {
2345: String elementRef;
2346: String astName;
2347:
2348: // Generate names and declarations of the AST variable(s)
2349: if (el.getLabel() == null) {
2350: elementRef = lt1Value;
2351: // Generate AST variables for unlabeled stuff
2352: astName = "tmp" + astVarNumber + "_AST";
2353: astVarNumber++;
2354: // Map the generated AST variable in the alternate
2355: mapTreeVariable(el, astName);
2356: // Generate an "input" AST variable also
2357: println(labeledElementASTType + " " + astName
2358: + "_in = " + elementRef + ";");
2359: }
2360: return;
2361: }
2362:
2363: if (grammar.buildAST && syntacticPredLevel == 0) {
2364: boolean needASTDecl = (genAST && (el.getLabel() != null || el
2365: .getAutoGenType() != GrammarElement.AUTO_GEN_BANG));
2366:
2367: // RK: if we have a grammar element always generate the decl
2368: // since some guy can access it from an action and we can't
2369: // peek ahead (well not without making a mess).
2370: // I'd prefer taking this out.
2371: if (el.getAutoGenType() != GrammarElement.AUTO_GEN_BANG
2372: && (el instanceof TokenRefElement)) {
2373: needASTDecl = true;
2374: }
2375:
2376: boolean doNoGuessTest = (grammar.hasSyntacticPredicate && needASTDecl);
2377:
2378: String elementRef;
2379: String astNameBase;
2380:
2381: // Generate names and declarations of the AST variable(s)
2382: if (el.getLabel() != null) {
2383: elementRef = el.getLabel();
2384: astNameBase = el.getLabel();
2385: } else {
2386: elementRef = lt1Value;
2387: // Generate AST variables for unlabeled stuff
2388: astNameBase = "tmp" + astVarNumber;
2389: ;
2390: astVarNumber++;
2391: }
2392:
2393: // Generate the declaration if required.
2394: if (needASTDecl) {
2395: // Generate the declaration
2396: if (el instanceof GrammarAtom) {
2397: GrammarAtom ga = (GrammarAtom) el;
2398: if (ga.getASTNodeType() != null) {
2399: genASTDeclaration(el, astNameBase, ga
2400: .getASTNodeType());
2401: // println(ga.getASTNodeType()+" " + astName+" = null;");
2402: } else {
2403: genASTDeclaration(el, astNameBase,
2404: labeledElementASTType);
2405: // println(labeledElementASTType+" " + astName + " = null;");
2406: }
2407: } else {
2408: genASTDeclaration(el, astNameBase,
2409: labeledElementASTType);
2410: // println(labeledElementASTType+" " + astName + " = null;");
2411: }
2412: }
2413:
2414: // for convenience..
2415: String astName = astNameBase + "_AST";
2416:
2417: // Map the generated AST variable in the alternate
2418: mapTreeVariable(el, astName);
2419: if (grammar instanceof TreeWalkerGrammar) {
2420: // Generate an "input" AST variable also
2421: println(labeledElementASTType + " " + astName
2422: + "_in = null;");
2423: }
2424:
2425: if (!Tool.cloneGuessing
2426: || (context.guessing == Context.NO_GUESSING)) {
2427: // Enclose actions with !guessing
2428: if (doNoGuessTest && !Tool.cloneGuessing) {
2429: println("if (inputState.guessing==0) {");
2430: tabs++;
2431: }
2432:
2433: // if something has a label assume it will be used
2434: // so we must initialize the RefAST
2435: if (el.getLabel() != null) {
2436: if (el instanceof GrammarAtom) {
2437: println(astName
2438: + " = "
2439: + getASTCreateString(
2440: (GrammarAtom) el,
2441: elementRef) + ";");
2442: } else {
2443: println(astName + " = "
2444: + getASTCreateString(elementRef)
2445: + ";");
2446: }
2447: }
2448:
2449: // if it has no label but a declaration exists initialize it.
2450: if (el.getLabel() == null && needASTDecl) {
2451: elementRef = lt1Value;
2452: if (el instanceof GrammarAtom) {
2453: println(astName
2454: + " = "
2455: + getASTCreateString(
2456: (GrammarAtom) el,
2457: elementRef) + ";");
2458: } else {
2459: println(astName + " = "
2460: + getASTCreateString(elementRef)
2461: + ";");
2462: }
2463: // Map the generated AST variable in the alternate
2464: if (grammar instanceof TreeWalkerGrammar) {
2465: // set "input" AST variable also
2466: println(astName + "_in = " + elementRef
2467: + ";");
2468: }
2469: }
2470:
2471: if (genAST) {
2472: switch (el.getAutoGenType()) {
2473: case GrammarElement.AUTO_GEN_NONE:
2474: println("astFactory.addASTChild(currentAST, "
2475: + astName + ");");
2476: break;
2477: case GrammarElement.AUTO_GEN_CARET:
2478: println("astFactory.makeASTRoot(currentAST, "
2479: + astName + ");");
2480: break;
2481: default:
2482: break;
2483: }
2484: }
2485: if (doNoGuessTest && !Tool.cloneGuessing) {
2486: tabs--;
2487: println("}");
2488: }
2489: }
2490: }
2491: } finally {
2492: defaultLine = oldDefaultLine;
2493: }
2494: }
2495:
2496: /** Close the try block and generate catch phrases
2497: * if the element has a labeled handler in the rule
2498: */
2499: private void genErrorCatchForElement(AlternativeElement el,
2500: Context context) {
2501: if (el.getLabel() == null)
2502: return;
2503: String r = el.enclosingRuleName;
2504: if (grammar instanceof LexerGrammar) {
2505: r = CodeGenerator.encodeLexerRuleName(el.enclosingRuleName);
2506: }
2507: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(r);
2508: if (rs == null) {
2509: antlrTool.fatalError("Enclosing rule not found!");
2510: }
2511: ExceptionSpec ex = rs.block.findExceptionSpec(el.getLabel());
2512: if (ex != null) {
2513: tabs--;
2514: println("}", el.getLine());
2515: genErrorHandler(ex, context);
2516: }
2517: }
2518:
2519: /** Generate the catch phrases for a user-specified error handler */
2520: private void genErrorHandler(ExceptionSpec ex, Context context) {
2521: // Each ExceptionHandler in the ExceptionSpec is a separate catch
2522: for (int i = 0; i < ex.handlers.size(); i++) {
2523: ExceptionHandler handler = (ExceptionHandler) ex.handlers
2524: .elementAt(i);
2525: int oldDefaultLine = defaultLine;
2526: try {
2527: defaultLine = handler.action.getLine();
2528: // Generate catch phrase
2529: println("catch ("
2530: + handler.exceptionTypeAndName.getText()
2531: + ") {", handler.exceptionTypeAndName.getLine());
2532: tabs++;
2533: if (!Tool.cloneGuessing) {
2534: if (grammar.hasSyntacticPredicate) {
2535: println("if (inputState.guessing==0) {");
2536: tabs++;
2537: }
2538: }
2539:
2540: if (!Tool.cloneGuessing
2541: || (context.guessing == Context.NO_GUESSING)) {
2542: // When not guessing, execute user handler action
2543: ActionTransInfo tInfo = new ActionTransInfo();
2544: printAction(processActionForSpecialSymbols(
2545: handler.action.getText(), handler.action
2546: .getLine(), currentRule, tInfo));
2547: }
2548:
2549: if (grammar.hasSyntacticPredicate) {
2550: tabs--;
2551: if (!Tool.cloneGuessing) {
2552: println("} else {");
2553: }
2554: tabs++;
2555: // When guessing, rethrow exception
2556: if (!Tool.cloneGuessing
2557: || (context.guessing != Context.NO_GUESSING)) {
2558: println("throw "
2559: + extractIdOfAction(handler.exceptionTypeAndName)
2560: + ";");
2561: }
2562: tabs--;
2563: if (!Tool.cloneGuessing) {
2564: println("}");
2565: }
2566: }
2567: // Close catch phrase
2568: tabs--;
2569: println("}");
2570: } finally {
2571: defaultLine = oldDefaultLine;
2572: }
2573: }
2574: }
2575:
2576: /** Generate a try { opening if the element has a labeled handler in the rule */
2577: private void genErrorTryForElement(AlternativeElement el) {
2578: if (el.getLabel() == null)
2579: return;
2580: String r = el.enclosingRuleName;
2581: if (grammar instanceof LexerGrammar) {
2582: r = CodeGenerator.encodeLexerRuleName(el.enclosingRuleName);
2583: }
2584: RuleSymbol rs = (RuleSymbol) grammar.getSymbol(r);
2585: if (rs == null) {
2586: antlrTool.fatalError("Enclosing rule not found!");
2587: }
2588: ExceptionSpec ex = rs.block.findExceptionSpec(el.getLabel());
2589: if ((ex != null) && (MatchExceptionState.throwRecExceptions)) {
2590: println("try { // for error handling", el.getLine());
2591: tabs++;
2592: }
2593: }
2594:
2595: protected void genASTDeclaration(AlternativeElement el) {
2596: genASTDeclaration(el, labeledElementASTType);
2597: }
2598:
2599: protected void genASTDeclaration(AlternativeElement el,
2600: String node_type) {
2601: genASTDeclaration(el, el.getLabel(), node_type);
2602: }
2603:
2604: protected void genASTDeclaration(AlternativeElement el,
2605: String var_name, String node_type) {
2606: // already declared?
2607: if (declaredASTVariables.contains(el))
2608: return;
2609:
2610: // emit code
2611: println(node_type + " " + var_name + "_AST = null;");
2612:
2613: // mark as declared
2614: declaredASTVariables.put(el, el);
2615: }
2616:
2617: /** Generate a header that is common to all Java files */
2618: protected void genHeader() {
2619: println("// $ANTLR " + Tool.version + ": " + "\""
2620: + antlrTool.fileMinusPath(antlrTool.grammarFile) + "\""
2621: + " -> " + "\"" + grammar.getClassName() + ".java\"$",
2622: NO_MAPPING);
2623: }
2624:
2625: private void genLiteralsTest() {
2626: println("_ttype = testLiteralsTable(_ttype);");
2627: }
2628:
2629: private void genLiteralsTestForPartialToken() {
2630: println("_ttype = testLiteralsTable(new String(text.getBuffer(),_begin,text.length()-_begin),_ttype);");
2631: }
2632:
2633: protected void genMatch(BitSet b) {
2634: }
2635:
2636: protected void genMatch(GrammarAtom atom) {
2637: if (atom instanceof StringLiteralElement) {
2638: if (grammar instanceof LexerGrammar) {
2639: genMatchUsingAtomText(atom);
2640: } else {
2641: genMatchUsingAtomTokenType(atom);
2642: }
2643: } else if (atom instanceof CharLiteralElement) {
2644: if (grammar instanceof LexerGrammar) {
2645: genMatchUsingAtomText(atom);
2646: } else {
2647: antlrTool
2648: .error("cannot ref character literals in grammar: "
2649: + atom);
2650: }
2651: } else if (atom instanceof TokenRefElement) {
2652: genMatchUsingAtomText(atom);
2653: } else if (atom instanceof WildcardElement) {
2654: gen((WildcardElement) atom, Context.EMPTY);
2655: }
2656: }
2657:
2658: protected void genMatchUsingAtomText(GrammarAtom atom) {
2659: int oldDefaultLine = defaultLine;
2660: try {
2661: defaultLine = atom.getLine();
2662: // match() for trees needs the _t cursor
2663: String astArgs = "";
2664: if (grammar instanceof TreeWalkerGrammar) {
2665: astArgs = "_t,";
2666: }
2667:
2668: // if in lexer and ! on element, save buffer index to kill later
2669: if (grammar instanceof LexerGrammar
2670: && (!saveText || atom.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
2671: println("_saveIndex=text.length();");
2672: }
2673:
2674: print(atom.not ? "matchNot(" : "match(");
2675: _print(astArgs, NO_MAPPING);
2676:
2677: // print out what to match
2678: if (atom.atomText.equals("EOF")) {
2679: // horrible hack to handle EOF case
2680: _print("Token.EOF_TYPE");
2681: } else {
2682: _print(atom.atomText);
2683: }
2684: _println(");");
2685:
2686: if (grammar instanceof LexerGrammar
2687: && (!saveText || atom.getAutoGenType() == GrammarElement.AUTO_GEN_BANG)) {
2688: println("text.setLength(_saveIndex);"); // kill text atom put in buffer
2689: }
2690: } finally {
2691: defaultLine = oldDefaultLine;
2692: }
2693: }
2694:
2695: protected void genMatchUsingAtomTokenType(GrammarAtom atom) {
2696: // match() for trees needs the _t cursor
2697: String astArgs = "";
2698: if (grammar instanceof TreeWalkerGrammar) {
2699: astArgs = "_t,";
2700: }
2701:
2702: // If the literal can be mangled, generate the symbolic constant instead
2703: String mangledName = null;
2704: String s = astArgs + getValueString(atom.getType());
2705:
2706: // matching
2707: println((atom.not ? "matchNot(" : "match(") + s + ");", atom
2708: .getLine());
2709: }
2710:
2711: /** Generate the nextToken() rule. nextToken() is a synthetic
2712: * lexer rule that is the implicit OR of all user-defined
2713: * lexer rules.
2714: */
2715: public void genNextToken() {
2716: int oldDefaultLine = defaultLine;
2717: try {
2718: defaultLine = NO_MAPPING;
2719: // Are there any public rules? If not, then just generate a
2720: // fake nextToken().
2721: boolean hasPublicRules = false;
2722: for (int i = 0; i < grammar.rules.size(); i++) {
2723: RuleSymbol rs = (RuleSymbol) grammar.rules.elementAt(i);
2724: if (rs.isDefined() && rs.access.equals("public")) {
2725: hasPublicRules = true;
2726: break;
2727: }
2728: }
2729: if (!hasPublicRules) {
2730: println("");
2731: println("public Token nextToken() throws TokenStreamException {");
2732: println("\ttry {uponEOF();}");
2733: println("\tcatch(CharStreamIOException csioe) {");
2734: println("\t\tthrow new TokenStreamIOException(csioe.io);");
2735: println("\t}");
2736: println("\tcatch(CharStreamException cse) {");
2737: println("\t\tthrow new TokenStreamException(cse.getMessage());");
2738: println("\t}");
2739: println("\treturn new CommonToken(Token.EOF_TYPE, \"\");");
2740: println("}");
2741: println("");
2742: return;
2743: }
2744:
2745: // Create the synthesized nextToken() rule
2746: RuleBlock nextTokenBlk = MakeGrammar.createNextTokenRule(
2747: grammar, grammar.rules, "nextToken");
2748: // Define the nextToken rule symbol
2749: RuleSymbol nextTokenRs = new RuleSymbol("mnextToken");
2750: nextTokenRs.setDefined();
2751: nextTokenRs.setBlock(nextTokenBlk);
2752: nextTokenRs.access = "private";
2753: grammar.define(nextTokenRs);
2754: // Analyze the nextToken rule
2755: boolean ok = grammar.theLLkAnalyzer
2756: .deterministic(nextTokenBlk);
2757:
2758: // Generate the next token rule
2759: String filterRule = null;
2760: if (((LexerGrammar) grammar).filterMode) {
2761: filterRule = ((LexerGrammar) grammar).filterRule;
2762: }
2763:
2764: println("");
2765: println("public Token nextToken() throws TokenStreamException {");
2766: tabs++;
2767: if (!Tool.agressive) {
2768: println("Token theRetToken=null;");
2769: }
2770: _println("tryAgain:");
2771: println("for (;;) {");
2772: tabs++;
2773: println("Token _token = null;");
2774: println("int _ttype = Token.INVALID_TYPE;");
2775: if (((LexerGrammar) grammar).filterMode) {
2776: println("setCommitToPath(false);");
2777: if (filterRule != null) {
2778: // Here's a good place to ensure that the filter rule actually exists
2779: if (!grammar.isDefined(CodeGenerator
2780: .encodeLexerRuleName(filterRule))) {
2781: grammar.antlrTool.error("Filter rule "
2782: + filterRule
2783: + " does not exist in this lexer");
2784: } else {
2785: RuleSymbol rs = (RuleSymbol) grammar
2786: .getSymbol(CodeGenerator
2787: .encodeLexerRuleName(filterRule));
2788: if (!rs.isDefined()) {
2789: grammar.antlrTool.error("Filter rule "
2790: + filterRule
2791: + " does not exist in this lexer");
2792: } else if (rs.access.equals("public")) {
2793: grammar.antlrTool
2794: .error("Filter rule " + filterRule
2795: + " must be protected");
2796: }
2797: }
2798: println("int _m;");
2799: println("_m = mark();");
2800: }
2801: }
2802: println("resetText();");
2803:
2804: //println("try { // for char stream error handling");
2805: //tabs++;
2806:
2807: // Generate try around whole thing to trap scanner errors
2808: if (MatchExceptionState.throwRecExceptions) {
2809: println("try { // for lexical error handling");
2810: }
2811: //String loopLabel = "loop" + loopCount++;
2812: Context context = new Context("loop" + loopCount++,
2813: Context.NO_GUESSING);
2814: printLoopStart(context.breakLabel);
2815: tabs++;
2816:
2817: // Test for public lexical rules with empty paths
2818: for (int i = 0; i < nextTokenBlk.getAlternatives().size(); i++) {
2819: Alternative a = nextTokenBlk.getAlternativeAt(i);
2820: if (a.cache[1].containsEpsilon()) {
2821: //String r = a.head.toString();
2822: RuleRefElement rr = (RuleRefElement) a.head;
2823: String r = CodeGenerator
2824: .decodeLexerRuleName(rr.targetRule);
2825: antlrTool.warning("public lexical rule " + r
2826: + " is optional (can match \"nothing\")");
2827: }
2828: }
2829:
2830: // Generate the block
2831: String newline = System.getProperty("line.separator");
2832: JavaBlockFinishingInfo howToFinish = genCommonBlock(
2833: nextTokenBlk, false, context);
2834: String errFinish = "if (LA(1)==EOF_CHAR) {uponEOF(); _returnToken = makeToken(Token.EOF_TYPE);}";
2835: errFinish += newline + "\t\t\t\t";
2836: if (((LexerGrammar) grammar).filterMode) {
2837: if (filterRule == null) {
2838: errFinish += "else {consume(); continue tryAgain;}";
2839: } else {
2840: errFinish += "else {"
2841: + newline
2842: + "\t\t\t\t\tcommit();"
2843: + newline
2844: + "\t\t\t\t\ttry {m"
2845: + filterRule
2846: + "(false);}"
2847: + newline
2848: + "\t\t\t\t\tcatch(RecognitionException e) {"
2849: + newline
2850: + "\t\t\t\t\t // catastrophic failure"
2851: + newline + "\t\t\t\t\t reportError(e);"
2852: + newline + "\t\t\t\t\t consume();"
2853: + newline + "\t\t\t\t\t}" + newline
2854: + "\t\t\t\t\tcontinue tryAgain;" + newline
2855: + "\t\t\t\t}";
2856: }
2857: } else {
2858: errFinish += "else {" + getThrowNoViableStr(context)
2859: + "}";
2860: }
2861: genBlockFinish(howToFinish, errFinish, nextTokenBlk
2862: .getLine());
2863:
2864: // at this point a valid token has been matched, undo "mark" that was done
2865: if (((LexerGrammar) grammar).filterMode
2866: && filterRule != null) {
2867: println("commit();");
2868: }
2869:
2870: // Generate literals test if desired
2871: // make sure _ttype is set first; note _returnToken must be
2872: // non-null as the rule was required to create it.
2873: println("if ( _returnToken==null ) continue tryAgain; // found SKIP token");
2874: if (((LexerGrammar) grammar).getTestLiterals()) {
2875: println("_ttype = _returnToken.getType();");
2876: genLiteralsTest();
2877: println("_returnToken.setType(_ttype);");
2878: }
2879:
2880: // return token created by rule reference in switch
2881: println("return _returnToken;");
2882:
2883: // Close try block
2884: tabs--;
2885: println("}"); //break is not needed here due to the previous return statement
2886: //printLoopEnd(loopLabel);
2887: if (MatchExceptionState.throwRecExceptions) {
2888: println("}");
2889: println("catch (RecognitionException e) {");
2890: tabs++;
2891: if (((LexerGrammar) grammar).filterMode) {
2892: if (filterRule == null) {
2893: println("if ( !getCommitToPath() ) {consume(); continue tryAgain;}");
2894: } else {
2895: println("if ( !getCommitToPath() ) {");
2896: tabs++;
2897: println("rewind(_m);");
2898: println("resetText();");
2899: println("try {m" + filterRule + "(false);}");
2900: println("catch(RecognitionException ee) {");
2901: println(" // horrendous failure: error in filter rule");
2902: println(" reportError(ee);");
2903: println(" consume();");
2904: println("}");
2905: println("continue tryAgain;");
2906: tabs--;
2907: println("}");
2908: }
2909: }
2910: if (nextTokenBlk.getDefaultErrorHandler()) {
2911: println("reportError(e);");
2912: println("consume();");
2913: } else {
2914: // pass on to invoking routine
2915: println("throw new TokenStreamRecognitionException(e);");
2916: }
2917: tabs--;
2918: println("}");
2919: }
2920:
2921: //TODO: not always like this, need to copy above text
2922: println("if (matchError) {");
2923: println("\tthrow new TokenStreamRecognitionException(matchException);");
2924: println("}");
2925: // close CharStreamException try
2926: //tabs--;
2927: //println("}");
2928: //println("catch (CharStreamException cse) {");
2929: //println(" if ( cse instanceof CharStreamIOException ) {");
2930: //println(" throw new TokenStreamIOException(((CharStreamIOException)cse).io);");
2931: //println(" }");
2932: //println(" else {");
2933: //println(" throw new TokenStreamException(cse.getMessage());");
2934: //println(" }");
2935: //println("}");
2936:
2937: // close for-loop
2938: tabs--;
2939: println("}");
2940:
2941: // close method nextToken
2942: tabs--;
2943: println("}");
2944: println("");
2945: } finally {
2946: defaultLine = oldDefaultLine;
2947: }
2948: }
2949:
2950: private Boolean checkLexerRuleGenToken(RuleSymbol s) {
2951: Boolean lexerGenerateToken = null; //normal mode by default
2952:
2953: if (grammar instanceof LexerGrammar) {
2954: //System.out.println("Symbol:" + s.getId() + " ref from:" + s.references.size());
2955: for (int i = 0; i < s.references.size(); i++) {
2956: RuleRefElement cur = (RuleRefElement) s.references
2957: .elementAt(i);
2958: if (i == 0) {
2959: lexerGenerateToken = new Boolean(
2960: cur.getLabel() != null);
2961: continue;
2962: }
2963: if (lexerGenerateToken.booleanValue() != (cur
2964: .getLabel() != null)) {
2965: lexerGenerateToken = null;
2966: break;
2967: }
2968: //System.out.println("From: " + cur.getIdAssign() + " label=" + cur.getLabel());
2969: }
2970: }
2971: return lexerGenerateToken;
2972: }
2973:
2974: /** Gen a named rule block.
2975: * ASTs are generated for each element of an alternative unless
2976: * the rule or the alternative have a '!' modifier.
2977: *
2978: * If an alternative defeats the default tree construction, it
2979: * must set <rule>_AST to the root of the returned AST.
2980: *
2981: * Each alternative that does automatic tree construction, builds
2982: * up root and child list pointers in an ASTPair structure.
2983: *
2984: * A rule finishes by setting the returnAST variable from the
2985: * ASTPair.
2986: *
2987: * @param rule The name of the rule to generate
2988: * @param startSymbol true if the rule is a start symbol (i.e., not referenced elsewhere)
2989: */
2990: public void genRule(RuleSymbol s, boolean startSymbol, int ruleNum,
2991: Context context) {
2992: tabs = 1;
2993:
2994: if (DEBUG_CODE_GENERATOR)
2995: System.out.println("genRule(" + s.getId() + ")");
2996: if (!s.isDefined()) {
2997: antlrTool.error("undefined rule: " + s.getId());
2998: return;
2999: }
3000:
3001: Boolean lexerGenerateToken = checkLexerRuleGenToken(s);
3002:
3003: if (grammar instanceof LexerGrammar) {
3004: String callMode = "normal";
3005: if (Boolean.TRUE.equals(lexerGenerateToken)) {
3006: callMode = "always true";
3007: } else if (Boolean.FALSE.equals(lexerGenerateToken)) {
3008: callMode = "always false";
3009: }
3010: println("//Call mode " + callMode);
3011: }
3012:
3013: // Generate rule return type, name, arguments
3014: RuleBlock rblk = s.getBlock();
3015:
3016: int oldDefaultLine = defaultLine;
3017: try {
3018: defaultLine = rblk.getLine();
3019: currentRule = rblk;
3020: currentASTResult = s.getId();
3021:
3022: // clear list of declared ast variables..
3023: declaredASTVariables.clear();
3024:
3025: // Save the AST generation state, and set it to that of the rule
3026: boolean savegenAST = genAST;
3027: genAST = genAST && rblk.getAutoGen();
3028:
3029: // boolean oldsaveTest = saveText;
3030: saveText = rblk.getAutoGen();
3031:
3032: // print javadoc comment if any
3033: if (s.comment != null) {
3034: _println(s.comment);
3035: }
3036:
3037: // Gen method access and final qualifier
3038: print(s.access + " final ");
3039:
3040: // Gen method return type (note lexer return action set at rule creation)
3041: if (rblk.returnAction != null) {
3042: // Has specified return value
3043: _print(extractTypeOfAction(rblk.returnAction, rblk
3044: .getLine(), rblk.getColumn())
3045: + " ");
3046: context.returnVar = extractIdOfAction(
3047: rblk.returnAction, rblk.getLine(), rblk
3048: .getColumn());
3049: } else {
3050: // No specified return value
3051: _print("void ");
3052: }
3053:
3054: String postfix = "";
3055: if (Tool.cloneGuessing) {
3056: postfix = getRuleNamePostfix(context.guessing);
3057: }
3058: // Gen method name
3059: _print(s.getId() + postfix + "(");
3060:
3061: // Additional rule parameters common to all rules for this grammar
3062: if (grammar instanceof LexerGrammar) {
3063: if (!Tool.agressive || (lexerGenerateToken == null)) {
3064: _print(commonExtraParams);
3065: if (commonExtraParams.length() != 0
3066: && rblk.argAction != null) {
3067: _print(",");
3068: }
3069: }
3070: } else {
3071: _print(commonExtraParams);
3072: if (commonExtraParams.length() != 0
3073: && rblk.argAction != null) {
3074: _print(",");
3075: }
3076: }
3077:
3078: // Gen arguments
3079: if ((rblk.argAction != null)
3080: && !(Tool.cloneGuessing && Tool.agressive && (context.guessing != Context.NO_GUESSING))) {
3081: // Has specified arguments
3082: _println("");
3083: tabs++;
3084: println(rblk.argAction);
3085: tabs--;
3086: print(")");
3087: } else {
3088: // No specified arguments
3089: _print(")");
3090: }
3091:
3092: // Gen throws clause and open curly
3093: final String throwConst = " throws ";
3094: String throwsStr = throwConst;
3095: if (MatchExceptionState.throwRecExceptions) {
3096: throwsStr += exceptionThrown;
3097: }
3098: if (grammar instanceof ParserGrammar) {
3099: if (!throwsStr.equals(throwConst)) {
3100: throwsStr += ", ";
3101: }
3102: //throwsStr += "TokenStreamException";
3103: } else if (grammar instanceof LexerGrammar) {
3104: if (!throwsStr.equals(throwConst)) {
3105: throwsStr += ", ";
3106: }
3107: //throwsStr += "CharStreamException, TokenStreamException";
3108: //throwsStr += "TokenStreamException";
3109: }
3110: if (!throwsStr.equals(throwConst)) {
3111: _print(throwsStr);
3112: }
3113: // Add user-defined exceptions unless lexer (for now)
3114: if (rblk.throwsSpec != null) {
3115: if (grammar instanceof LexerGrammar) {
3116: antlrTool
3117: .error("user-defined throws spec not allowed (yet) for lexer rule "
3118: + rblk.ruleName);
3119: } else {
3120: _print(", " + rblk.throwsSpec);
3121: }
3122: }
3123:
3124: _println(" {");
3125: tabs++;
3126:
3127: // Convert return action to variable declaration
3128: if (rblk.returnAction != null)
3129: println(rblk.returnAction + ";");
3130:
3131: if (!Tool.agressive || !(grammar instanceof LexerGrammar)) {
3132: println(commonLocalVars);
3133: } else if (rblk.isConstText()) {
3134: println("int _ttype;");
3135: } else if (!Boolean.FALSE.equals(lexerGenerateToken)) {
3136: println(commonLocalVars);
3137: } else {
3138: }
3139:
3140: if (grammar.traceRules) {
3141: if (grammar instanceof TreeWalkerGrammar) {
3142: println("traceIn(\"" + s.getId() + "\",_t);");
3143: } else {
3144: println("traceIn(\"" + s.getId() + "\");");
3145: }
3146: }
3147:
3148: // MEMOIZATION
3149: if (Tool.memoization && grammar instanceof ParserGrammar) {
3150: println("int _startIndex = inputState.input.index();");
3151: println("if ( inputState.guessing>0 && alreadyParsedRule("
3152: + ruleNum + ") ) {");
3153: if (grammar.traceRules) {
3154: if (grammar instanceof TreeWalkerGrammar) {
3155: println("traceOut(\"" + s.getId() + "\",_t);");
3156: } else {
3157: println("traceOut(\"" + s.getId() + "\");");
3158: }
3159: }
3160: //println(" if ( matchError ) throw new RecognitionException(\"failed previously\");");
3161: //println(" if ( matchError ) ");
3162: if (rblk.returnAction != null) {
3163: println(" return "
3164: + extractIdOfAction(rblk.returnAction, rblk
3165: .getLine(), rblk.getColumn()) + ";");
3166: } else {
3167: println(" return; ");
3168: }
3169: println("}");
3170: }
3171:
3172: if (grammar instanceof LexerGrammar) {
3173: if (!Tool.agressive
3174: || !Boolean.FALSE.equals(lexerGenerateToken)) {
3175: // lexer rule default return value is the rule's token name
3176: // This is a horrible hack to support the built-in EOF lexer rule.
3177: if (s.getId().equals("mEOF"))
3178: println("_ttype = Token.EOF_TYPE;");
3179: else
3180: println("_ttype = " + s.getId().substring(1)
3181: + ";");
3182: println("int _saveIndex;"); // used for element! (so we can kill text matched for element)
3183: /*
3184: println("boolean old_saveConsumedInput=saveConsumedInput;");
3185: if ( !rblk.getAutoGen() ) { // turn off "save input" if ! on rule
3186: println("saveConsumedInput=false;");
3187: }
3188: */
3189: }
3190: }
3191:
3192: // if debugging, write code to mark entry to the rule
3193: if (grammar.debuggingOutput)
3194: if (grammar instanceof ParserGrammar)
3195: println("fireEnterRule(" + ruleNum + ",0);");
3196: else if (grammar instanceof LexerGrammar)
3197: println("fireEnterRule(" + ruleNum + ",_ttype);");
3198:
3199: // Generate trace code if desired
3200: if (grammar.debuggingOutput || grammar.traceRules
3201: || Tool.memoization) {
3202: println("try { // debugging");
3203: tabs++;
3204: }
3205:
3206: // Initialize AST variables
3207: if (grammar instanceof TreeWalkerGrammar) {
3208: // "Input" value for rule
3209: println(labeledElementASTType + " " + s.getId()
3210: + "_AST_in = (_t == ASTNULL) ? null : ("
3211: + labeledElementASTType + ")_t;", NO_MAPPING);
3212: }
3213: if (grammar.buildAST) {
3214: // Parser member used to pass AST returns from rule invocations
3215: println("returnAST = null;");
3216: // Tracks AST construction
3217: // println("ASTPair currentAST = (inputState.guessing==0) ? new ASTPair() : null;");
3218: println("ASTPair currentAST = new ASTPair();");
3219: // User-settable return value for rule.
3220: println(labeledElementASTType + " " + s.getId()
3221: + "_AST = null;");
3222: }
3223:
3224: genBlockPreamble(rblk);
3225: genBlockInitAction(rblk, context);
3226: println("");
3227:
3228: // Search for an unlabeled exception specification attached to the rule
3229: ExceptionSpec unlabeledUserSpec = rblk
3230: .findExceptionSpec("");
3231:
3232: //String loopLabel = null;
3233:
3234: // Generate try block around the entire rule for error handling
3235: if (unlabeledUserSpec != null
3236: || rblk.getDefaultErrorHandler()) {
3237: if (MatchExceptionState.throwRecExceptions) {
3238: println("try { // for error handling");
3239: }
3240: //loopLabel = "loop" + loopCount++;
3241: context = new Context("loop" + loopCount++,
3242: context.guessing);
3243: printLoopStart(context.breakLabel);
3244: tabs++;
3245: }
3246:
3247: // Generate the alternatives
3248: if (rblk.alternatives.size() == 1) {
3249: // One alternative -- use simple form
3250: Alternative alt = rblk.getAlternativeAt(0);
3251: String pred = alt.semPred;
3252: if (pred != null)
3253: genSemPred(pred, currentRule.line, context);
3254: if (alt.synPred != null) {
3255: antlrTool
3256: .warning(
3257: "Syntactic predicate ignored for single alternative",
3258: grammar.getFilename(), alt.synPred
3259: .getLine(), alt.synPred
3260: .getColumn());
3261: }
3262: context = new Context(context);
3263: context.setCheckedLA(0);
3264: genAlt(alt, rblk, context);
3265: } else {
3266: // Multiple alternatives -- generate complex form
3267: boolean ok = grammar.theLLkAnalyzer.deterministic(rblk);
3268:
3269: JavaBlockFinishingInfo howToFinish = genCommonBlock(
3270: rblk, false, context);
3271:
3272: String noExcVialble = getThrowNoViableStr(context);
3273: genBlockFinish(howToFinish, noExcVialble, rblk
3274: .getLine());
3275: }
3276:
3277: // Generate catch phrase for error handling
3278: if (unlabeledUserSpec != null
3279: || rblk.getDefaultErrorHandler()) {
3280: // Close the try block
3281: tabs--;
3282: printLoopEnd(context.breakLabel);
3283: if (MatchExceptionState.throwRecExceptions) {
3284: println("}");
3285: }
3286: }
3287:
3288: // Generate user-defined or default catch phrases
3289: if (unlabeledUserSpec != null) {
3290: genErrorHandler(unlabeledUserSpec, context);
3291: } else if (rblk.getDefaultErrorHandler()) {
3292: genRuleCatch(rblk.endNode, false, context);
3293: genRuleCatch(rblk.endNode, true, context);
3294: }
3295:
3296: // Squirrel away the AST "return" value
3297: if (grammar.buildAST) {
3298: println("returnAST = " + s.getId() + "_AST;");
3299: }
3300:
3301: // Set return tree value for tree walkers
3302: if (grammar instanceof TreeWalkerGrammar) {
3303: println("_retTree = _t;");
3304: }
3305:
3306: // Generate literals test for lexer rules so marked
3307: if (rblk.getTestLiterals()) {
3308: if (s.access.equals("protected")) {
3309: genLiteralsTestForPartialToken();
3310: } else {
3311: genLiteralsTest();
3312: }
3313: }
3314:
3315: // if doing a lexer rule, dump code to create token if necessary
3316: if (grammar instanceof LexerGrammar) {
3317: if (Tool.agressive
3318: && Boolean.FALSE.equals(lexerGenerateToken)) {
3319:
3320: } else {
3321: String createTokenStr = "_createToken &&";
3322: if (Tool.agressive
3323: && Boolean.TRUE.equals(lexerGenerateToken)) {
3324: createTokenStr = "";
3325: }
3326:
3327: String checkTokenNullStr = " _token==null &&";
3328: String checkSkip = " _ttype!=Token.SKIP";
3329: if (Tool.agressive) {
3330: checkTokenNullStr = "";
3331: if (!rblk.isCheckSkip()) {
3332: checkSkip = "true";
3333: }
3334: }
3335:
3336: println("if (" + createTokenStr + checkTokenNullStr
3337: + checkSkip + ") {");
3338: if (!Tool.agressive || !rblk.isConstText()) {
3339: println(" _token = makeToken(_ttype);");
3340: println(" if (_token != null) _token.setText(new String(text.getBuffer(), _begin, text.length()-_begin));");
3341: //println("_returnToken = _token;");
3342: println("}");
3343: println("_returnToken = _token;");
3344: } else {
3345: println(" _returnToken = makeToken(_ttype);");
3346: println(" // Const text, no need to set text");
3347: println("}");
3348: }
3349: }
3350: }
3351:
3352: // Gen the return statement if there is one (lexer has hard-wired return action)
3353: if (rblk.returnAction != null) {
3354: println("return "
3355: + extractIdOfAction(rblk.returnAction, rblk
3356: .getLine(), rblk.getColumn()) + ";");
3357: }
3358:
3359: if (grammar.debuggingOutput || grammar.traceRules
3360: || Tool.memoization) {
3361: tabs--;
3362: println("} finally { // debugging");
3363: tabs++;
3364:
3365: if (Tool.memoization
3366: && grammar instanceof ParserGrammar) {
3367: println("if ( inputState.guessing>0 ) { memoize("
3368: + ruleNum + ", _startIndex); }");
3369: }
3370:
3371: // If debugging, generate calls to mark exit of rule
3372: if (grammar.debuggingOutput)
3373: if (grammar instanceof ParserGrammar)
3374: println("fireExitRule(" + ruleNum + ",0);");
3375: else if (grammar instanceof LexerGrammar)
3376: println("fireExitRule(" + ruleNum + ",_ttype);");
3377:
3378: if (grammar.traceRules) {
3379: if (grammar instanceof TreeWalkerGrammar) {
3380: println("traceOut(\"" + s.getId() + "\",_t);");
3381: } else {
3382: println("traceOut(\"" + s.getId() + "\");");
3383: }
3384: }
3385:
3386: tabs--;
3387: println("}");
3388: }
3389:
3390: tabs--;
3391: println("}");
3392: println("");
3393:
3394: // Restore the AST generation state
3395: genAST = savegenAST;
3396:
3397: // restore char save state
3398: // saveText = oldsaveTest;
3399: } finally {
3400: defaultLine = oldDefaultLine;
3401: }
3402: }
3403:
3404: private void findBuildASTActions(AlternativeBlock block,
3405: List<ActionElement> out) {
3406: ActionElement action = null;
3407: //println("// altBlock.analysisAlt is " + block.analysisAlt);
3408: if (block.analysisAlt >= 0) {
3409: Alternative a = block.getAlternativeAt(block.analysisAlt);
3410: AlternativeElement e = a.head;
3411: while (e != null) {
3412: //println("// alternative is " + e.getLabel() + " class:" + e.getClass());
3413: if (e instanceof ActionElement) {
3414: //println(" // found action element");
3415: action = (ActionElement) e;
3416: ActionTransInfo tInfo = new ActionTransInfo();
3417: String actionStr = processActionForSpecialSymbols(
3418: action.actionText, action.getLine(),
3419: currentRule, tInfo);
3420: if (tInfo.refRuleRoot != null) {
3421: out.add(action);
3422: }
3423: } else if (e instanceof AlternativeBlock) {
3424: //println(" // analyze next block element");
3425: findBuildASTActions((AlternativeBlock) e, out);
3426: }
3427: e = e.next;
3428: }
3429: }
3430: }
3431:
3432: private void genRuleCatch(RuleEndElement endNode,
3433: boolean checkMatchFlag, Context context) {
3434: // generate nothing if catch block requested but not needed
3435: if (!MatchExceptionState.throwRecExceptions && !checkMatchFlag) {
3436: return;
3437: }
3438: // Generate default catch phrase
3439: if (checkMatchFlag) {
3440: println("if (matchError) {");
3441: } else {
3442: println("catch (" + exceptionThrown + " ex) {");
3443: }
3444: tabs++;
3445: // Generate code to handle error if not guessing
3446: if (!Tool.cloneGuessing) {
3447: if (grammar.hasSyntacticPredicate) {
3448: println("if (inputState.guessing==0) {");
3449: tabs++;
3450: }
3451: }
3452: if (!Tool.cloneGuessing
3453: || (context.guessing == Context.NO_GUESSING)) {
3454: String exception = (checkMatchFlag) ? "matchException"
3455: : "ex";
3456:
3457: //////////
3458: if (JavaCodeGenerator.RECOVER_AST) {
3459: List<ActionElement> actions = new ArrayList(10);
3460: findBuildASTActions(endNode.block, actions);
3461: for (ActionElement action : actions) {
3462: ActionTransInfo tInfo = new ActionTransInfo();
3463: String actionStr = processActionForSpecialSymbols(
3464: action.actionText, action.getLine(),
3465: currentRule, tInfo);
3466:
3467: assert (tInfo.refRuleRoot != null);
3468: println("// when recover we'd like to perform any \"build AST\" actions");
3469: println("if (("
3470: + tInfo.refRuleRoot
3471: + " == null) && (currentAST.root != null)) {");
3472: // Somebody referenced "#rule", make sure translated var is valid
3473: // assignment to #rule is left as a ref also, meaning that assignments
3474: // with no other refs like "#rule = foo();" still forces this code to be
3475: // generated (unnecessarily).
3476: println(tInfo.refRuleRoot + " = ("
3477: + labeledElementASTType
3478: + ")currentAST.root;");
3479: // dump the translated action
3480: printAction(actionStr);
3481: println("}");
3482: }
3483: }
3484: //////////
3485: println("reportError(" + exception + ");");
3486: if (!(grammar instanceof TreeWalkerGrammar)) {
3487: // Generate code to consume until token in k==1 follow set
3488: Lookahead follow = grammar.theLLkAnalyzer.FOLLOW(1,
3489: endNode);
3490: String followSetName = getBitsetName(markBitsetForGen(follow.fset));
3491: println("recover(" + exception + "," + followSetName
3492: + ");");
3493: } else {
3494: // Just consume one token
3495: println("if (_t!=null) {_t = _t.getNextSibling();}");
3496: }
3497: println("resetMatchError();");
3498: }
3499: if (grammar.hasSyntacticPredicate) {
3500: tabs--;
3501: // When guessing, rethrow exception
3502: if (!Tool.cloneGuessing) {
3503: println("} else {");
3504: }
3505: if (!Tool.cloneGuessing
3506: || (context.guessing != Context.NO_GUESSING)) {
3507: if (checkMatchFlag) {
3508: //println("return;");
3509: } else {
3510: println(" throw ex;");
3511: }
3512: }
3513: if (!Tool.cloneGuessing) {
3514: println("}");
3515: }
3516: }
3517: // Close catch phrase
3518: tabs--;
3519: println("}");
3520:
3521: }
3522:
3523: private static final String GUESSED_POSTFIX = "_g";
3524: private static final String NORMAL_POSTFIX = "";
3525:
3526: private String getRuleNamePostfix(int guessing) {
3527: /*if (!(grammar instanceof ParserGrammar)) {
3528: return "";
3529: }*/
3530: if (guessing == Context.NO_GUESSING) {
3531: return NORMAL_POSTFIX;
3532: } else {
3533: return GUESSED_POSTFIX;
3534: }
3535: }
3536:
3537: private void GenRuleInvocation(RuleRefElement rr, Context context) {
3538: int oldDefaultLine = defaultLine;
3539: try {
3540: defaultLine = rr.getLine();
3541: // dump rule name
3542: getPrintWriterManager().startSingleSourceLineMapping(
3543: rr.getLine());
3544: String postfix = "";
3545: if (Tool.cloneGuessing) {
3546: if (context.guessing != Context.NO_GUESSING) {
3547: String name = rr.targetRule;
3548: //System.out.println("Guessing ruleref found: " + name);
3549: /*if (grammar instanceof LexerGrammar) {
3550: name = decodeLexerRuleName(name);
3551: }*/
3552: if (!guessedRules.contains(name)) {
3553: //System.out.println("Guessing ruleref added: " + name);
3554: guessedRules.add(name);
3555: }
3556: }
3557: postfix = getRuleNamePostfix(context.guessing);
3558: }
3559: _print(rr.targetRule + postfix + "(");
3560: getPrintWriterManager().endMapping();
3561:
3562: // lexers must tell rule if it should set _returnToken
3563: if (grammar instanceof LexerGrammar) {
3564: RuleSymbol target = (RuleSymbol) grammar
3565: .getSymbol(rr.targetRule);
3566: Boolean alwaysGenToken = checkLexerRuleGenToken(target);
3567: if (!Tool.agressive || (alwaysGenToken == null)) {
3568: // if labeled, could access Token, so tell rule to create
3569: if (rr.getLabel() != null) {
3570: _print("true");
3571: } else {
3572: _print("false");
3573: }
3574: if (commonExtraArgs.length() != 0
3575: || rr.args != null) {
3576: _print(",");
3577: }
3578: } else {
3579: _print("/*" + alwaysGenToken.booleanValue() + "*/");
3580: }
3581: }
3582:
3583: // Extra arguments common to all rules for this grammar
3584: _print(commonExtraArgs);
3585: if (commonExtraArgs.length() != 0 && rr.args != null) {
3586: _print(",");
3587: }
3588:
3589: // Process arguments to method, if any
3590: RuleSymbol rs = (RuleSymbol) grammar
3591: .getSymbol(rr.targetRule);
3592: if (rr.args != null) {
3593: if (!(Tool.cloneGuessing && Tool.agressive && (context.guessing != Context.NO_GUESSING))) {
3594: // When not guessing, execute user arg action
3595: ActionTransInfo tInfo = new ActionTransInfo();
3596: String args = processActionForSpecialSymbols(
3597: rr.args, 0, currentRule, tInfo);
3598: if (tInfo.assignToRoot || tInfo.refRuleRoot != null) {
3599: antlrTool.error("Arguments of rule reference '"
3600: + rr.targetRule
3601: + "' cannot set or ref #"
3602: + currentRule.getRuleName(), grammar
3603: .getFilename(), rr.getLine(), rr
3604: .getColumn());
3605: }
3606: _print(args);
3607:
3608: // Warn if the rule accepts no arguments
3609: if (rs.block.argAction == null) {
3610: antlrTool.warning("Rule '" + rr.targetRule
3611: + "' accepts no arguments", grammar
3612: .getFilename(), rr.getLine(), rr
3613: .getColumn());
3614: }
3615: }
3616: } else {
3617: // For C++, no warning if rule has parameters, because there may be default
3618: // values for all of the parameters
3619: if (rs.block.argAction != null) {
3620: antlrTool.warning(
3621: "Missing parameters on reference to rule "
3622: + rr.targetRule, grammar
3623: .getFilename(), rr.getLine(), rr
3624: .getColumn());
3625: }
3626: }
3627: _println(");");
3628:
3629: // move down to the first child while parsing
3630: if (grammar instanceof TreeWalkerGrammar) {
3631: println("_t = _retTree;");
3632: }
3633: } finally {
3634: defaultLine = oldDefaultLine;
3635: }
3636: }
3637:
3638: protected void genSemPred(String pred, int line, Context context) {
3639: // translate $ and # references
3640: ActionTransInfo tInfo = new ActionTransInfo();
3641: pred = processActionForSpecialSymbols(pred, line, currentRule,
3642: tInfo);
3643: // ignore translation info...we don't need to do anything with it.
3644: String escapedPred = charFormatter.escapeString(pred);
3645:
3646: // if debugging, wrap the semantic predicate evaluation in a method
3647: // that can tell SemanticPredicateListeners the result
3648: if (grammar.debuggingOutput
3649: && ((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar)))
3650: pred = "fireSemanticPredicateEvaluated(antlr.debug.SemanticPredicateEvent.VALIDATING,"
3651: + addSemPred(escapedPred) + "," + pred + ")";
3652: println("if (!(" + pred + "))", line);
3653: if (MatchExceptionState.throwRecExceptions) {
3654: println(" throw new SemanticException(\"" + escapedPred
3655: + "\");", line);
3656: } else {
3657: println("{matchError=true;");
3658: if (!Tool.cloneGuessing
3659: || (context.guessing == Context.NO_GUESSING)) {
3660: if (!Tool.cloneGuessing) {
3661: print("if (inputState.guessing == 0) ");
3662: }
3663: println("matchException = new SemanticException(\""
3664: + escapedPred + "\");", line);
3665: }
3666: println(getCheckString(context) + "}");
3667: }
3668: }
3669:
3670: /** Write an array of Strings which are the semantic predicate
3671: * expressions. The debugger will reference them by number only
3672: */
3673: protected void genSemPredMap() {
3674: Enumeration e = semPreds.elements();
3675: println("private String _semPredNames[] = {", NO_MAPPING);
3676: while (e.hasMoreElements())
3677: println("\"" + e.nextElement() + "\",", NO_MAPPING);
3678: println("};", NO_MAPPING);
3679: }
3680:
3681: public int loopCount = 0;
3682:
3683: protected void genSynPred(SynPredBlock blk, String lookaheadExpr,
3684: Context context) {
3685: int oldDefaultLine = defaultLine;
3686: try {
3687: defaultLine = blk.getLine();
3688: if (DEBUG_CODE_GENERATOR)
3689: System.out.println("gen=>(" + blk + ")");
3690:
3691: // Dump synpred result variable
3692: println("boolean synPredMatched" + blk.ID + " = false;");
3693:
3694: // inserted by Ole Kniemeyer, December 9, 2005
3695: if (grammar instanceof TreeWalkerGrammar) {
3696: println("if (_t==null) _t=ASTNULL;");
3697: }
3698:
3699: // Gen normal lookahead test
3700: println("if (" + lookaheadExpr + ") {");
3701: tabs++;
3702:
3703: // Save input state
3704: if (grammar instanceof TreeWalkerGrammar) {
3705: println("AST __t" + blk.ID + " = _t;");
3706: } else {
3707: println("int _m" + blk.ID + " = mark();");
3708: }
3709:
3710: // Once inside the try, assume synpred works unless exception caught
3711: //println("synPredMatched" + blk.ID + " = true;");
3712: println("inputState.guessing++;");
3713:
3714: // if debugging, tell listeners that a synpred has started
3715: if (grammar.debuggingOutput
3716: && ((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar))) {
3717: println("fireSyntacticPredicateStarted();");
3718: }
3719:
3720: //vk++
3721: if (grammar.traceSyntacticPredicates
3722: && (grammar instanceof ParserGrammar)) {
3723: println(" syntacticPredicateStarted("
3724: + syntacticPredId + ','
3725: + "inputState.guessing-1" + ',' + blk.line
3726: + ");");
3727: }
3728: //vk--
3729:
3730: syntacticPredLevel++;
3731: if (MatchExceptionState.throwRecExceptions) {
3732: println("try {");
3733: }
3734: String guessLoopLabel = "guess" + blk.ID;
3735: printLoopStart(guessLoopLabel);
3736: tabs++;
3737: gen((AlternativeBlock) blk, new Context(guessLoopLabel,
3738: Context.DIRECT_GUESSING)); // gen code to test predicate
3739: tabs--;
3740: //println("System.out.println(\"pred "+blk+" succeeded\");");
3741: println("synPredMatched" + blk.ID + " = true;");
3742: printLoopEnd(guessLoopLabel);
3743: if (MatchExceptionState.throwRecExceptions) {
3744: println("}");
3745: println("catch (" + exceptionThrown + " pe) {");
3746: tabs++;
3747: println("synPredMatched" + blk.ID + " = false;");
3748: //println("System.out.println(\"pred "+blk+" failed\");");
3749: tabs--;
3750: println("}");
3751: }
3752:
3753: println("if (matchError) {");
3754: println(" resetMatchError();");
3755: println("}");
3756:
3757: // Restore input state
3758: if (grammar instanceof TreeWalkerGrammar) {
3759: println("_t = __t" + blk.ID + ";");
3760: } else {
3761: println("rewind(_m" + blk.ID + ");");
3762: }
3763:
3764: println("inputState.guessing--;");
3765:
3766: // if debugging, tell listeners how the synpred turned out
3767: if (grammar.debuggingOutput
3768: && ((grammar instanceof ParserGrammar) || (grammar instanceof LexerGrammar))) {
3769: println("if (synPredMatched" + blk.ID + ")");
3770: println(" fireSyntacticPredicateSucceeded();");
3771: println("else");
3772: println(" fireSyntacticPredicateFailed();");
3773: }
3774:
3775: //vk++
3776: if (grammar.traceSyntacticPredicates
3777: && (grammar instanceof ParserGrammar)) {
3778: println("if (synPredMatched" + blk.ID + ")");
3779: println(" syntacticPredicateSucceeded("
3780: + syntacticPredId + ','
3781: + "inputState.guessing);");
3782: println("else");
3783: println(" syntacticPredicateFailed(" + syntacticPredId
3784: + ',' + "inputState.guessing);");
3785: }
3786: syntacticPredId++;
3787: //vk--
3788:
3789: syntacticPredLevel--;
3790: tabs--;
3791:
3792: // Close lookahead test
3793: println("}");
3794:
3795: // Test synpred result
3796: println("if ( synPredMatched" + blk.ID + " ) {");
3797: } finally {
3798: defaultLine = oldDefaultLine;
3799: }
3800: }
3801:
3802: /** Generate a static array containing the names of the tokens,
3803: * indexed by the token type values. This static array is used
3804: * to format error messages so that the token identifers or literal
3805: * strings are displayed instead of the token numbers.
3806: *
3807: * If a lexical rule has a paraphrase, use it rather than the
3808: * token label.
3809: */
3810: public void genTokenStrings() {
3811: int oldDefaultLine = defaultLine;
3812: try {
3813: defaultLine = NO_MAPPING;
3814: // Generate a string for each token. This creates a static
3815: // array of Strings indexed by token type.
3816: println("");
3817: println("public static final String[] _tokenNames = {");
3818: tabs++;
3819:
3820: // Walk the token vocabulary and generate a Vector of strings
3821: // from the tokens.
3822: Vector v = grammar.tokenManager.getVocabulary();
3823: for (int i = 0; i < v.size(); i++) {
3824: String s = (String) v.elementAt(i);
3825: if (s == null) {
3826: s = "<" + String.valueOf(i) + ">";
3827: }
3828: if (!s.startsWith("\"") && !s.startsWith("<")) {
3829: TokenSymbol ts = (TokenSymbol) grammar.tokenManager
3830: .getTokenSymbol(s);
3831: if (ts != null && ts.getParaphrase() != null) {
3832: s = StringUtils.stripFrontBack(ts
3833: .getParaphrase(), "\"", "\"");
3834: }
3835: }
3836: print(charFormatter.literalString(s));
3837: if (i != v.size() - 1) {
3838: _print(",");
3839: }
3840: _println("");
3841: }
3842:
3843: // Close the string array initailizer
3844: tabs--;
3845: println("};");
3846: } finally {
3847: defaultLine = oldDefaultLine;
3848: }
3849: }
3850:
3851: /** Create and set Integer token type objects that map
3852: * to Java Class objects (which AST node to create).
3853: */
3854: protected void genTokenASTNodeMap() {
3855: int oldDefaultLine = defaultLine;
3856: try {
3857: defaultLine = NO_MAPPING;
3858: println("");
3859: println("protected void buildTokenTypeASTClassMap() {");
3860: // Generate a map.put("T","TNode") for each token
3861: // if heterogeneous node known for that token T.
3862: tabs++;
3863: boolean generatedNewHashtable = false;
3864: int n = 0;
3865: // Walk the token vocabulary and generate puts.
3866: Vector v = grammar.tokenManager.getVocabulary();
3867: for (int i = 0; i < v.size(); i++) {
3868: String s = (String) v.elementAt(i);
3869: if (s != null) {
3870: TokenSymbol ts = grammar.tokenManager
3871: .getTokenSymbol(s);
3872: if (ts != null && ts.getASTNodeType() != null) {
3873: n++;
3874: if (!generatedNewHashtable) {
3875: // only generate if we are going to add a mapping
3876: println("tokenTypeToASTClassMap = new Hashtable();");
3877: generatedNewHashtable = true;
3878: }
3879: println("tokenTypeToASTClassMap.put(new Integer("
3880: + ts.getTokenType()
3881: + "), "
3882: + ts.getASTNodeType() + ".class);");
3883: }
3884: }
3885: }
3886:
3887: if (n == 0) {
3888: println("tokenTypeToASTClassMap=null;");
3889: }
3890: tabs--;
3891: println("};");
3892: } finally {
3893: defaultLine = oldDefaultLine;
3894: }
3895: }
3896:
3897: protected void genTokenASTCreateMethod() {
3898: int oldDefaultLine = defaultLine;
3899: try {
3900: defaultLine = NO_MAPPING;
3901: println("");
3902: println("protected static AST createTokenASTByType(int type) {");
3903: // Generate a map.put("T","TNode") for each token
3904: // if heterogeneous node known for that token T.
3905: tabs++;
3906: println("switch(type) {");
3907: tabs++;
3908: // Walk the token vocabulary and generate puts.
3909: Vector v = grammar.tokenManager.getVocabulary();
3910: for (int i = 0; i < v.size(); i++) {
3911: String s = (String) v.elementAt(i);
3912: if (s != null) {
3913: TokenSymbol ts = grammar.tokenManager
3914: .getTokenSymbol(s);
3915: if (ts != null && ts.getASTNodeType() != null) {
3916: println("case " + ts.getTokenType()
3917: + " : return new "
3918: + ts.getASTNodeType() + "();");
3919: }
3920: }
3921: }
3922: tabs--;
3923: println("}");
3924: //println("assert(true) : \"AST token type not found\";");
3925: println("return null;");
3926: tabs--;
3927: println("};");
3928: } finally {
3929: defaultLine = oldDefaultLine;
3930: }
3931: }
3932:
3933: /** Generate the token types Java file */
3934: protected void genTokenTypes(TokenManager tm) throws IOException {
3935: int oldDefaultLine = defaultLine;
3936: try {
3937: defaultLine = NO_MAPPING;
3938: // Open the token output Java file and set the currentOutput stream
3939: // SAS: file open was moved to a method so a subclass can override
3940: // This was mainly for the VAJ interface
3941: currentOutput = getPrintWriterManager().setupOutput(
3942: antlrTool, tm.getName() + TokenTypesFileSuffix);
3943:
3944: tabs = 0;
3945:
3946: // Generate the header common to all Java files
3947: genHeader();
3948: // Do not use printAction because we assume tabs==0
3949: try {
3950: defaultLine = behavior.getHeaderActionLine("");
3951: println(behavior.getHeaderAction(""));
3952: } finally {
3953: defaultLine = NO_MAPPING;
3954: }
3955:
3956: // Encapsulate the definitions in an interface. This can be done
3957: // because they are all constants.
3958: println("public interface " + tm.getName()
3959: + TokenTypesFileSuffix + " {");
3960: tabs++;
3961:
3962: // Generate a definition for each token type
3963: Vector v = tm.getVocabulary();
3964:
3965: // Do special tokens manually
3966: println("int EOF = " + Token.EOF_TYPE + ";");
3967: println("int NULL_TREE_LOOKAHEAD = "
3968: + Token.NULL_TREE_LOOKAHEAD + ";");
3969:
3970: for (int i = Token.MIN_USER_TYPE; i < v.size(); i++) {
3971: String s = (String) v.elementAt(i);
3972: if (s != null) {
3973: if (s.startsWith("\"")) {
3974: // a string literal
3975: StringLiteralSymbol sl = (StringLiteralSymbol) tm
3976: .getTokenSymbol(s);
3977: if (sl == null) {
3978: antlrTool.fatalError("String literal " + s
3979: + " not in symbol table");
3980: } else if (sl.label != null) {
3981: println("int " + sl.label + " = " + i + ";");
3982: } else {
3983: String mangledName = mangleLiteral(s);
3984: if (mangledName != null) {
3985: // We were able to create a meaningful mangled token name
3986: println("int " + mangledName + " = "
3987: + i + ";");
3988: // if no label specified, make the label equal to the mangled name
3989: sl.label = mangledName;
3990: } else {
3991: println("// " + s + " = " + i);
3992: }
3993: }
3994: } else if (!s.startsWith("<")) {
3995: println("int " + s + " = " + i + ";");
3996: }
3997: }
3998: }
3999:
4000: // Close the interface
4001: tabs--;
4002: println("}");
4003:
4004: // Close the tokens output file
4005: getPrintWriterManager().finishOutput();
4006: exitIfError();
4007: } finally {
4008: defaultLine = oldDefaultLine;
4009: }
4010: }
4011:
4012: /** Get a string for an expression to generate creation of an AST subtree.
4013: * @param v A Vector of String, where each element is an expression in the target language yielding an AST node.
4014: */
4015: public String getASTCreateString(Vector v) {
4016: if (v.size() == 0) {
4017: return "";
4018: }
4019: StringBuffer buf = new StringBuffer();
4020: buf.append("(" + labeledElementASTType
4021: + ")astFactory.make( (new ASTArray(" + v.size() + "))");
4022: for (int i = 0; i < v.size(); i++) {
4023: buf.append(".add(" + v.elementAt(i) + ")");
4024: }
4025: buf.append(")");
4026: return buf.toString();
4027: }
4028:
4029: /** Get a string for an expression to generate creating of an AST node
4030: * @param atom The grammar node for which you are creating the node
4031: * @param str The arguments to the AST constructor
4032: */
4033: public String getASTCreateString(GrammarAtom atom,
4034: String astCtorArgs) {
4035: //System.out.println("getASTCreateString("+atom+","+astCtorArgs+")");
4036: if (atom != null && atom.getASTNodeType() != null) {
4037: // they specified a type either on the reference or in tokens{} section
4038: return "(" + atom.getASTNodeType() + ")"
4039: + "astFactory.create(" + astCtorArgs + ", new "
4040: + atom.getASTNodeType() + "())";
4041: } else {
4042: // must be an action or something since not referencing an atom
4043: return getASTCreateString(astCtorArgs);
4044: }
4045: }
4046:
4047: /** Get a string for an expression to generate creating of an AST node.
4048: * Parse the first (possibly only) argument looking for the token type.
4049: * If the token type is a valid token symbol, ask for it's AST node type
4050: * and add to the end if only 2 arguments. The forms are #[T], #[T,"t"],
4051: * and as of 2.7.2 #[T,"t",ASTclassname].
4052: *
4053: * @param str The arguments to the AST constructor
4054: */
4055: public String getASTCreateString(String astCtorArgs) {
4056: //System.out.println("AST CTOR: "+astCtorArgs);
4057: if (astCtorArgs == null) {
4058: astCtorArgs = "";
4059: }
4060: int nCommas = 0;
4061: for (int i = 0; i < astCtorArgs.length(); i++) {
4062: if (astCtorArgs.charAt(i) == ',') {
4063: nCommas++;
4064: }
4065: }
4066: //System.out.println("num commas="+nCommas);
4067: if (nCommas < 2) { // if 1 or 2 args
4068: int firstComma = astCtorArgs.indexOf(',');
4069: int lastComma = astCtorArgs.lastIndexOf(',');
4070: String tokenName = astCtorArgs;
4071: if (nCommas > 0) {
4072: tokenName = astCtorArgs.substring(0, firstComma);
4073: }
4074: //System.out.println("Checking for ast node type of "+tokenName);
4075: TokenSymbol ts = grammar.tokenManager
4076: .getTokenSymbol(tokenName);
4077: if (ts != null) {
4078: String astNodeType = ts.getASTNodeType();
4079: //System.out.println("node type of "+tokenName+" is "+astNodeType);
4080: String emptyText = "";
4081: if (nCommas == 0) {
4082: // need to add 2nd arg of blank text for token text
4083: emptyText = ",\"\"";
4084: }
4085: if (astNodeType != null) {
4086: //return "("+astNodeType+")"+
4087: return "astFactory.create(" + astCtorArgs
4088: + emptyText + ", new " + astNodeType
4089: + "())";
4090: }
4091: // fall through and just do a regular create with cast on front
4092: // if necessary (it differs from default "AST").
4093: }
4094: if (labeledElementASTType.equals("AST")) {
4095: return "astFactory.create(" + astCtorArgs + ")";
4096: }
4097: return "(" + labeledElementASTType + ")"
4098: + "astFactory.create(" + astCtorArgs + ")";
4099: }
4100: // create default type or (since 2.7.2) 3rd arg is classname
4101: return "(" + labeledElementASTType + ")astFactory.create("
4102: + astCtorArgs + ")";
4103: }
4104:
4105: protected String getLookaheadTestExpression(Lookahead[] look,
4106: int k, int gr) {
4107: StringBuffer e = new StringBuffer(100);
4108: boolean first = true;
4109:
4110: e.append("(");
4111: for (int i = 1; i <= k; i++) {
4112: BitSet p = look[i].fset;
4113: if (!first) {
4114: e.append(") && (");
4115: }
4116: first = false;
4117:
4118: // Syn preds can yield <end-of-syn-pred> (epsilon) lookahead.
4119: // There is no way to predict what that token would be. Just
4120: // allow anything instead.
4121: if (look[i].containsEpsilon()) {
4122: e.append("true");
4123: } else {
4124: e.append(getLookaheadTestTerm(i, p, gr));
4125: }
4126: }
4127: e.append(")");
4128:
4129: return e.toString();
4130: }
4131:
4132: private int getAltLASize(Alternative alt, int maxDepth) {
4133: int depth = alt.lookaheadDepth;
4134: if (depth == GrammarAnalyzer.NONDETERMINISTIC) {
4135: // if the decision is nondeterministic, do the best we can: LL(k)
4136: // any predicates that are around will be generated later.
4137: depth = grammar.maxk;
4138: }
4139: return (maxDepth == 0) ? 0 : depth;
4140: }
4141:
4142: /**Generate a lookahead test expression for an alternate. This
4143: * will be a series of tests joined by '&&' and enclosed by '()',
4144: * the number of such tests being determined by the depth of the lookahead.
4145: */
4146: protected String getLookaheadTestExpression(Alternative alt,
4147: int maxDepth, int gr) {
4148: int depth = getAltLASize(alt, maxDepth);
4149: if (depth == 0) {
4150: // empty lookahead can result from alt with sem pred
4151: // that can see end of token. E.g., A : {pred}? ('a')? ;
4152: return "( true )";
4153: }
4154: return "(" + getLookaheadTestExpression(alt.cache, depth, gr)
4155: + ")";
4156: }
4157:
4158: /**Generate a depth==1 lookahead test expression given the BitSet.
4159: * This may be one of:
4160: * 1) a series of 'x==X||' tests
4161: * 2) a range test using >= && <= where possible,
4162: * 3) a bitset membership test for complex comparisons
4163: * @param k The lookahead level
4164: * @param p The lookahead set for level k
4165: */
4166: protected String getLookaheadTestTerm(int k, BitSet p, int gr) {
4167: // Determine the name of the item to be compared
4168: String ts = lookaheadString(k, gr);
4169:
4170: // Generate a range expression if possible
4171: int[] elems = p.toArray();
4172: if (elementsAreRange(elems)) {
4173: return getRangeExpression(k, elems, gr);
4174: }
4175:
4176: // Generate a bitset membership test if possible
4177: StringBuffer e;
4178: int degree = p.degree();
4179: if (degree == 0) {
4180: return "true";
4181: }
4182:
4183: if (degree >= bitsetTestThreshold) {
4184: int bitsetIdx = markBitsetForGen(p);
4185: return getBitsetName(bitsetIdx) + ".member(" + ts + ")";
4186: }
4187:
4188: // Otherwise, generate the long-winded series of "x==X||" tests
4189: e = new StringBuffer();
4190: for (int i = 0; i < elems.length; i++) {
4191: // Get the compared-to item (token or character value)
4192: String cs = getValueString(elems[i]);
4193:
4194: // Generate the element comparison
4195: if (i > 0)
4196: e.append("||");
4197: e.append(ts);
4198: e.append("==");
4199: e.append(cs);
4200: }
4201: return e.toString();
4202: }
4203:
4204: /** Return an expression for testing a contiguous renage of elements
4205: * @param k The lookahead level
4206: * @param elems The elements representing the set, usually from BitSet.toArray().
4207: * @return String containing test expression.
4208: */
4209: public String getRangeExpression(int k, int[] elems, int gr) {
4210: if (!elementsAreRange(elems)) {
4211: antlrTool
4212: .fatalError("getRangeExpression called with non-range");
4213: }
4214: int begin = elems[0];
4215: int end = elems[elems.length - 1];
4216: return "(" + lookaheadString(k, gr) + " >= "
4217: + getValueString(begin) + " && "
4218: + lookaheadString(k, gr) + " <= " + getValueString(end)
4219: + ")";
4220: }
4221:
4222: /** getValueString: get a string representation of a token or char value
4223: * @param value The token or char value
4224: */
4225: private String getValueString(int value) {
4226: String cs;
4227: if (grammar instanceof LexerGrammar) {
4228: cs = charFormatter.literalChar(value);
4229: } else {
4230: TokenSymbol ts = grammar.tokenManager
4231: .getTokenSymbolAt(value);
4232: if (ts == null) {
4233: return "" + value; // return token type as string
4234: // tool.panic("vocabulary for token type " + value + " is null");
4235: }
4236: String tId = ts.getId();
4237: if (ts instanceof StringLiteralSymbol) {
4238: // if string literal, use predefined label if any
4239: // if no predefined, try to mangle into LITERAL_xxx.
4240: // if can't mangle, use int value as last resort
4241: StringLiteralSymbol sl = (StringLiteralSymbol) ts;
4242: String label = sl.getLabel();
4243: if (label != null) {
4244: cs = label;
4245: } else {
4246: cs = mangleLiteral(tId);
4247: if (cs == null) {
4248: cs = String.valueOf(value);
4249: }
4250: }
4251: } else {
4252: cs = tId;
4253: }
4254: }
4255: return cs;
4256: }
4257:
4258: /**Is the lookahead for this alt empty? */
4259: protected boolean lookaheadIsEmpty(Alternative alt, int maxDepth) {
4260: int depth = alt.lookaheadDepth;
4261: if (depth == GrammarAnalyzer.NONDETERMINISTIC) {
4262: depth = grammar.maxk;
4263: }
4264: for (int i = 1; i <= depth && i <= maxDepth; i++) {
4265: BitSet p = alt.cache[i].fset;
4266: if (p.degree() != 0) {
4267: return false;
4268: }
4269: }
4270: return true;
4271: }
4272:
4273: private String lookaheadString(int k, int gr) {
4274: if (grammar instanceof TreeWalkerGrammar) {
4275: return "_t.getType()";
4276: }
4277: if (gr != 0) {
4278: return "LA" + k + "_" + gr;
4279: }
4280: return "LA(" + k + ")";
4281: }
4282:
4283: /** Mangle a string literal into a meaningful token name. This is
4284: * only possible for literals that are all characters. The resulting
4285: * mangled literal name is literalsPrefix with the text of the literal
4286: * appended.
4287: * @return A string representing the mangled literal, or null if not possible.
4288: */
4289: private String mangleLiteral(String s) {
4290: String mangled = antlrTool.literalsPrefix;
4291: for (int i = 1; i < s.length() - 1; i++) {
4292: if (!Character.isLetter(s.charAt(i)) && s.charAt(i) != '_') {
4293: return null;
4294: }
4295: mangled += s.charAt(i);
4296: }
4297: if (antlrTool.upperCaseMangledLiterals) {
4298: mangled = mangled.toUpperCase();
4299: }
4300: return mangled;
4301: }
4302:
4303: /** Map an identifier to it's corresponding tree-node variable.
4304: * This is context-sensitive, depending on the rule and alternative
4305: * being generated
4306: * @param idParam The identifier name to map
4307: * @return The mapped id (which may be the same as the input), or null if the mapping is invalid due to duplicates
4308: */
4309: public String mapTreeId(String idParam, ActionTransInfo transInfo) {
4310: // if not in an action of a rule, nothing to map.
4311: if (currentRule == null)
4312: return idParam;
4313:
4314: boolean in_var = false;
4315: String id = idParam;
4316: if (grammar instanceof TreeWalkerGrammar) {
4317: if (!grammar.buildAST) {
4318: in_var = true;
4319: }
4320: // If the id ends with "_in", then map it to the input variable
4321: else if (id.length() > 3
4322: && id.lastIndexOf("_in") == id.length() - 3) {
4323: // Strip off the "_in"
4324: id = id.substring(0, id.length() - 3);
4325: in_var = true;
4326: }
4327: }
4328:
4329: // Check the rule labels. If id is a label, then the output
4330: // variable is label_AST, and the input variable is plain label.
4331: for (int i = 0; i < currentRule.labeledElements.size(); i++) {
4332: AlternativeElement elt = (AlternativeElement) currentRule.labeledElements
4333: .elementAt(i);
4334: if (elt.getLabel().equals(id)) {
4335: return in_var ? id : id + "_AST";
4336: }
4337: }
4338:
4339: // Failing that, check the id-to-variable map for the alternative.
4340: // If the id is in the map, then output variable is the name in the
4341: // map, and input variable is name_in
4342: String s = (String) treeVariableMap.get(id);
4343: if (s != null) {
4344: if (s == NONUNIQUE) {
4345: // There is more than one element with this id
4346: antlrTool.error("Ambiguous reference to AST element "
4347: + id + " in rule " + currentRule.getRuleName());
4348:
4349: return null;
4350: } else if (s.equals(currentRule.getRuleName())) {
4351: // a recursive call to the enclosing rule is
4352: // ambiguous with the rule itself.
4353: antlrTool.error("Ambiguous reference to AST element "
4354: + id + " in rule " + currentRule.getRuleName());
4355: return null;
4356: } else {
4357: return in_var ? s + "_in" : s;
4358: }
4359: }
4360:
4361: // Failing that, check the rule name itself. Output variable
4362: // is rule_AST; input variable is rule_AST_in (treeparsers).
4363: if (id.equals(currentRule.getRuleName())) {
4364: String r = in_var ? id + "_AST_in" : id + "_AST";
4365: if (transInfo != null) {
4366: if (!in_var) {
4367: transInfo.refRuleRoot = r;
4368: }
4369: }
4370: return r;
4371: } else {
4372: // id does not map to anything -- return itself.
4373: return id;
4374: }
4375: }
4376:
4377: /** Given an element and the name of an associated AST variable,
4378: * create a mapping between the element "name" and the variable name.
4379: */
4380: private void mapTreeVariable(AlternativeElement e, String name) {
4381: // For tree elements, defer to the root
4382: if (e instanceof TreeElement) {
4383: mapTreeVariable(((TreeElement) e).root, name);
4384: return;
4385: }
4386:
4387: // Determine the name of the element, if any, for mapping purposes
4388: String elName = null;
4389:
4390: // Don't map labeled items
4391: if (e.getLabel() == null) {
4392: if (e instanceof TokenRefElement) {
4393: // use the token id
4394: elName = ((TokenRefElement) e).atomText;
4395: } else if (e instanceof RuleRefElement) {
4396: // use the rule name
4397: elName = ((RuleRefElement) e).targetRule;
4398: }
4399: }
4400: // Add the element to the tree variable map if it has a name
4401: if (elName != null) {
4402: if (treeVariableMap.get(elName) != null) {
4403: // Name is already in the map -- mark it as duplicate
4404: treeVariableMap.remove(elName);
4405: treeVariableMap.put(elName, NONUNIQUE);
4406: } else {
4407: treeVariableMap.put(elName, name);
4408: }
4409: }
4410: }
4411:
4412: /** Lexically process $var and tree-specifiers in the action.
4413: * This will replace #id and #(...) with the appropriate
4414: * function calls and/or variables etc...
4415: */
4416: protected String processActionForSpecialSymbols(String actionStr,
4417: int line, RuleBlock currentRule, ActionTransInfo tInfo) {
4418: if (actionStr == null || actionStr.length() == 0)
4419: return null;
4420:
4421: // The action trans info tells us (at the moment) whether an
4422: // assignment was done to the rule's tree root.
4423: if (grammar == null)
4424: return actionStr;
4425:
4426: // see if we have anything to do...
4427: if ((grammar.buildAST && actionStr.indexOf('#') != -1)
4428: || grammar instanceof TreeWalkerGrammar
4429: || ((grammar instanceof LexerGrammar || grammar instanceof ParserGrammar) && actionStr
4430: .indexOf('$') != -1)) {
4431: // Create a lexer to read an action and return the translated version
4432: antlr.actions.java.ActionLexer lexer = new antlr.actions.java.ActionLexer(
4433: actionStr, currentRule, this , tInfo);
4434:
4435: lexer.setLineOffset(line);
4436: lexer.setFilename(grammar.getFilename());
4437: lexer.setTool(antlrTool);
4438:
4439: try {
4440: lexer.mACTION(true);
4441: actionStr = lexer.getTokenObject().getText();
4442: // System.out.println("action translated: "+actionStr);
4443: // System.out.println("trans info is "+tInfo);
4444: } catch (RecognitionException ex) {
4445: lexer.reportError(ex);
4446: return actionStr;
4447: } catch (TokenStreamException tex) {
4448: antlrTool.fatalError("Error reading action:"
4449: + actionStr);
4450: return actionStr;
4451: } catch (CharStreamException io) {
4452: antlrTool.fatalError("Error reading action:"
4453: + actionStr);
4454: return actionStr;
4455: }
4456: }
4457: return actionStr;
4458: }
4459:
4460: protected String getThrowNoViableStr(Context context) {
4461: if (MatchExceptionState.throwRecExceptions) {
4462: return throwNoViable;
4463: } else {
4464: String result;
4465: result = "matchError=true;";
4466: if (!Tool.cloneGuessing
4467: || (context.guessing == Context.NO_GUESSING)) {
4468: //remove throw word
4469: if (!Tool.cloneGuessing) {
4470: result += "if (inputState.guessing == 0) ";
4471: }
4472: result += "matchException="
4473: + throwNoViable.substring(6);
4474: }
4475: result += getCheckString(context);
4476: return result;
4477: }
4478: }
4479:
4480: private void setupGrammarParameters(Grammar g) {
4481: if (g instanceof ParserGrammar) {
4482: labeledElementASTType = "AST";
4483: if (g.hasOption("ASTLabelType")) {
4484: Token tsuffix = g.getOption("ASTLabelType");
4485: if (tsuffix != null) {
4486: String suffix = StringUtils.stripFrontBack(tsuffix
4487: .getText(), "\"", "\"");
4488: if (suffix != null) {
4489: labeledElementASTType = suffix;
4490: }
4491: }
4492: }
4493: labeledElementType = "Token ";
4494: labeledElementInit = "null";
4495: commonExtraArgs = "";
4496: commonExtraParams = "";
4497: commonLocalVars = "";
4498: lt1Value = "LT(1)";
4499: exceptionThrown = "RecognitionException";
4500: throwNoViable = "throw new NoViableAltException(LT(1), getFilename());";
4501: } else if (g instanceof LexerGrammar) {
4502: labeledElementType = "char ";
4503: labeledElementInit = "'\\0'";
4504: commonExtraArgs = "";
4505: commonExtraParams = "boolean _createToken";
4506: commonLocalVars = "int _ttype; Token _token=null; int _begin=text.length();";
4507: lt1Value = "LA(1)";
4508: exceptionThrown = "RecognitionException";
4509: throwNoViable = "throw new NoViableAltForCharException((char)LA(1), getFilename(), getLine(), getColumn());";
4510: } else if (g instanceof TreeWalkerGrammar) {
4511: labeledElementASTType = "AST";
4512: labeledElementType = "AST";
4513: if (g.hasOption("ASTLabelType")) {
4514: Token tsuffix = g.getOption("ASTLabelType");
4515: if (tsuffix != null) {
4516: String suffix = StringUtils.stripFrontBack(tsuffix
4517: .getText(), "\"", "\"");
4518: if (suffix != null) {
4519: labeledElementASTType = suffix;
4520: labeledElementType = suffix;
4521: }
4522: }
4523: }
4524: if (!g.hasOption("ASTLabelType")) {
4525: g.setOption("ASTLabelType", new TokenImpl(
4526: ANTLRTokenTypes.STRING_LITERAL, "AST"));
4527: }
4528: labeledElementInit = "null";
4529: commonExtraArgs = "_t";
4530: commonExtraParams = "AST _t";
4531: commonLocalVars = "";
4532: lt1Value = "(" + labeledElementASTType + ")_t";
4533: exceptionThrown = "RecognitionException";
4534: throwNoViable = "throw new NoViableAltException(_t);";
4535: } else {
4536: antlrTool.fatalError("Unknown grammar type");
4537: }
4538: }
4539:
4540: /**
4541: * Get the printwriter manager that manages output
4542: * @return The print writer manager
4543: */
4544: public JavaCodeGeneratorPrintWriterManager getPrintWriterManager() {
4545: if (printWriterManager == null)
4546: printWriterManager = new DefaultJavaCodeGeneratorPrintWriterManager();
4547: return printWriterManager;
4548: }
4549:
4550: /**
4551: * Set the print writer manager
4552: * @param printWriterManager the new manager
4553: */
4554: public void setPrintWriterManager(
4555: JavaCodeGeneratorPrintWriterManager printWriterManager) {
4556: this .printWriterManager = printWriterManager;
4557: }
4558:
4559: /** {@inheritDoc} */
4560: public void setTool(Tool tool) {
4561: super.setTool(tool);
4562: }
4563: }
|