001: /*
002: [The "BSD licence"]
003: Copyright (c) 2005-2006 Terence Parr
004: All rights reserved.
005:
006: Redistribution and use in source and binary forms, with or without
007: modification, are permitted provided that the following conditions
008: are met:
009: 1. Redistributions of source code must retain the above copyright
010: notice, this list of conditions and the following disclaimer.
011: 2. Redistributions in binary form must reproduce the above copyright
012: notice, this list of conditions and the following disclaimer in the
013: documentation and/or other materials provided with the distribution.
014: 3. The name of the author may not be used to endorse or promote products
015: derived from this software without specific prior written permission.
016:
017: THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
018: IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
019: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
020: IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
021: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
022: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
023: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
024: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
026: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: */
028: package org.antlr;
029:
030: import org.antlr.tool.*;
031: import org.antlr.codegen.CodeGenerator;
032: import org.antlr.analysis.*;
033: import org.antlr.runtime.misc.Stats;
034:
035: import java.io.*;
036: import java.util.*;
037:
038: /** The main ANTLR entry point. Read a grammar and generate a parser. */
039: public class Tool {
040: public static final String VERSION = "3.0.1";
041:
042: public static final String UNINITIALIZED_DIR = "<unset-dir>";
043:
044: // Input parameters / option
045:
046: protected List grammarFileNames = new ArrayList();
047: protected boolean generate_NFA_dot = false;
048: protected boolean generate_DFA_dot = false;
049: protected String outputDirectory = UNINITIALIZED_DIR;
050: protected String libDirectory = ".";
051: protected boolean debug = false;
052: protected boolean trace = false;
053: protected boolean profile = false;
054: protected boolean report = false;
055: protected boolean printGrammar = false;
056: protected boolean depend = false;
057: protected boolean forceAllFilesToOutputDir = false;
058:
059: // the internal options are for my use on the command line during dev
060:
061: public static boolean internalOption_PrintGrammarTree = false;
062: public static boolean internalOption_PrintDFA = false;
063: public static boolean internalOption_ShowNFConfigsInDFA = false;
064: public static boolean internalOption_watchNFAConversion = false;
065:
066: public static void main(String[] args) {
067: ErrorManager.info("ANTLR Parser Generator Version " + VERSION
068: + " (August 13, 2007) 1989-2007");
069: Tool antlr = new Tool(args);
070: antlr.process();
071: System.exit(0);
072: }
073:
074: public Tool() {
075: }
076:
077: public Tool(String[] args) {
078: processArgs(args);
079: }
080:
081: public void processArgs(String[] args) {
082: if (args == null || args.length == 0) {
083: help();
084: return;
085: }
086: for (int i = 0; i < args.length; i++) {
087: if (args[i].equals("-o") || args[i].equals("-fo")) {
088: if (i + 1 >= args.length) {
089: System.err
090: .println("missing output directory with -fo/-o option; ignoring");
091: } else {
092: if (args[i].equals("-fo")) { // force output into dir
093: forceAllFilesToOutputDir = true;
094: }
095: i++;
096: outputDirectory = args[i];
097: if (outputDirectory.endsWith("/")
098: || outputDirectory.endsWith("\\")) {
099: outputDirectory = outputDirectory.substring(0,
100: outputDirectory.length() - 1);
101: }
102: File outDir = new File(outputDirectory);
103: if (outDir.exists() && !outDir.isDirectory()) {
104: ErrorManager.error(
105: ErrorManager.MSG_OUTPUT_DIR_IS_FILE,
106: outputDirectory);
107: libDirectory = ".";
108: }
109: }
110: } else if (args[i].equals("-lib")) {
111: if (i + 1 >= args.length) {
112: System.err
113: .println("missing library directory with -lib option; ignoring");
114: } else {
115: i++;
116: libDirectory = args[i];
117: if (libDirectory.endsWith("/")
118: || libDirectory.endsWith("\\")) {
119: libDirectory = libDirectory.substring(0,
120: libDirectory.length() - 1);
121: }
122: File outDir = new File(libDirectory);
123: if (!outDir.exists()) {
124: ErrorManager.error(
125: ErrorManager.MSG_DIR_NOT_FOUND,
126: libDirectory);
127: libDirectory = ".";
128: }
129: }
130: } else if (args[i].equals("-nfa")) {
131: generate_NFA_dot = true;
132: } else if (args[i].equals("-dfa")) {
133: generate_DFA_dot = true;
134: } else if (args[i].equals("-debug")) {
135: debug = true;
136: } else if (args[i].equals("-trace")) {
137: trace = true;
138: } else if (args[i].equals("-report")) {
139: report = true;
140: } else if (args[i].equals("-profile")) {
141: profile = true;
142: } else if (args[i].equals("-print")) {
143: printGrammar = true;
144: } else if (args[i].equals("-depend")) {
145: depend = true;
146: } else if (args[i].equals("-message-format")) {
147: if (i + 1 >= args.length) {
148: System.err
149: .println("missing output format with -message-format option; using default");
150: } else {
151: i++;
152: ErrorManager.setFormat(args[i]);
153: }
154: } else if (args[i].equals("-Xgrtree")) {
155: internalOption_PrintGrammarTree = true; // print grammar tree
156: } else if (args[i].equals("-Xdfa")) {
157: internalOption_PrintDFA = true;
158: } else if (args[i].equals("-Xnoprune")) {
159: DFAOptimizer.PRUNE_EBNF_EXIT_BRANCHES = false;
160: } else if (args[i].equals("-Xnocollapse")) {
161: DFAOptimizer.COLLAPSE_ALL_PARALLEL_EDGES = false;
162: } else if (args[i].equals("-Xdbgconversion")) {
163: NFAToDFAConverter.debug = true;
164: } else if (args[i].equals("-Xmultithreaded")) {
165: NFAToDFAConverter.SINGLE_THREADED_NFA_CONVERSION = false;
166: } else if (args[i].equals("-Xnomergestopstates")) {
167: DFAOptimizer.MERGE_STOP_STATES = false;
168: } else if (args[i].equals("-Xdfaverbose")) {
169: internalOption_ShowNFConfigsInDFA = true;
170: } else if (args[i].equals("-Xwatchconversion")) {
171: internalOption_watchNFAConversion = true;
172: } else if (args[i].equals("-XdbgST")) {
173: CodeGenerator.EMIT_TEMPLATE_DELIMITERS = true;
174: } else if (args[i].equals("-Xnoinlinedfa")) {
175: CodeGenerator.GEN_ACYCLIC_DFA_INLINE = false;
176: } else if (args[i].equals("-Xm")) {
177: if (i + 1 >= args.length) {
178: System.err
179: .println("missing max recursion with -Xm option; ignoring");
180: } else {
181: i++;
182: NFAContext.MAX_SAME_RULE_INVOCATIONS_PER_NFA_CONFIG_STACK = Integer
183: .parseInt(args[i]);
184: }
185: } else if (args[i].equals("-Xmaxdfaedges")) {
186: if (i + 1 >= args.length) {
187: System.err
188: .println("missing max number of edges with -Xmaxdfaedges option; ignoring");
189: } else {
190: i++;
191: DFA.MAX_STATE_TRANSITIONS_FOR_TABLE = Integer
192: .parseInt(args[i]);
193: }
194: } else if (args[i].equals("-Xconversiontimeout")) {
195: if (i + 1 >= args.length) {
196: System.err
197: .println("missing max time in ms -Xconversiontimeout option; ignoring");
198: } else {
199: i++;
200: DFA.MAX_TIME_PER_DFA_CREATION = Integer
201: .parseInt(args[i]);
202: }
203: } else if (args[i].equals("-Xnfastates")) {
204: DecisionProbe.verbose = true;
205: } else if (args[i].equals("-X")) {
206: Xhelp();
207: } else {
208: if (args[i].charAt(0) != '-') {
209: // Must be the grammar file
210: grammarFileNames.add(args[i]);
211: }
212: }
213: }
214: }
215:
216: /*
217: protected void checkForInvalidArguments(String[] args, BitSet cmdLineArgValid) {
218: // check for invalid command line args
219: for (int a = 0; a < args.length; a++) {
220: if (!cmdLineArgValid.member(a)) {
221: System.err.println("invalid command-line argument: " + args[a] + "; ignored");
222: }
223: }
224: }
225: */
226:
227: public void process() {
228: int numFiles = grammarFileNames.size();
229: for (int i = 0; i < numFiles; i++) {
230: String grammarFileName = (String) grammarFileNames.get(i);
231: if (numFiles > 1 && !depend) {
232: System.out.println(grammarFileName);
233: }
234: try {
235: if (depend) {
236: BuildDependencyGenerator dep = new BuildDependencyGenerator(
237: this , grammarFileName);
238: List outputFiles = dep.getGeneratedFileList();
239: List dependents = dep.getDependenciesFileList();
240: //System.out.println("output: "+outputFiles);
241: //System.out.println("dependents: "+dependents);
242: System.out.println(dep.getDependencies());
243: continue;
244: }
245: Grammar grammar = getGrammar(grammarFileName);
246: processGrammar(grammar);
247:
248: if (printGrammar) {
249: grammar.printGrammar(System.out);
250: }
251:
252: if (generate_NFA_dot) {
253: generateNFAs(grammar);
254: }
255: if (generate_DFA_dot) {
256: generateDFAs(grammar);
257: }
258: if (report) {
259: GrammarReport report = new GrammarReport(grammar);
260: System.out.println(report.toString());
261: // print out a backtracking report too (that is not encoded into log)
262: System.out.println(report.getBacktrackingReport());
263: // same for aborted NFA->DFA conversions
264: System.out.println(report
265: .getEarlyTerminationReport());
266: }
267: if (profile) {
268: GrammarReport report = new GrammarReport(grammar);
269: Stats.writeReport(
270: GrammarReport.GRAMMAR_STATS_FILENAME,
271: report.toNotifyString());
272: }
273:
274: // now handle the lexer if one was created for a merged spec
275: String lexerGrammarStr = grammar.getLexerGrammar();
276: if (grammar.type == Grammar.COMBINED
277: && lexerGrammarStr != null) {
278: String lexerGrammarFileName = grammar
279: .getImplicitlyGeneratedLexerFileName();
280: Writer w = getOutputFile(grammar,
281: lexerGrammarFileName);
282: w.write(lexerGrammarStr);
283: w.close();
284: StringReader sr = new StringReader(lexerGrammarStr);
285: Grammar lexerGrammar = new Grammar();
286: lexerGrammar.setTool(this );
287: File lexerGrammarFullFile = new File(
288: getFileDirectory(lexerGrammarFileName),
289: lexerGrammarFileName);
290: lexerGrammar.setFileName(lexerGrammarFullFile
291: .toString());
292: lexerGrammar.importTokenVocabulary(grammar);
293: lexerGrammar.setGrammarContent(sr);
294: sr.close();
295: processGrammar(lexerGrammar);
296: }
297: } catch (IOException e) {
298: ErrorManager.error(ErrorManager.MSG_CANNOT_OPEN_FILE,
299: grammarFileName);
300: } catch (Exception e) {
301: ErrorManager.error(ErrorManager.MSG_INTERNAL_ERROR,
302: grammarFileName, e);
303: }
304: }
305: }
306:
307: public Grammar getGrammar(String grammarFileName)
308: throws IOException, antlr.TokenStreamException,
309: antlr.RecognitionException {
310: //StringTemplate.setLintMode(true);
311: FileReader fr = null;
312: fr = new FileReader(grammarFileName);
313: BufferedReader br = new BufferedReader(fr);
314: Grammar grammar = new Grammar(this , grammarFileName, br);
315: grammar
316: .setWatchNFAConversion(internalOption_watchNFAConversion);
317: br.close();
318: fr.close();
319: return grammar;
320: }
321:
322: protected void processGrammar(Grammar grammar) {
323: String language = (String) grammar.getOption("language");
324: if (language != null) {
325: CodeGenerator generator = new CodeGenerator(this , grammar,
326: language);
327: grammar.setCodeGenerator(generator);
328: generator.setDebug(debug);
329: generator.setProfile(profile);
330: generator.setTrace(trace);
331: generator.genRecognizer();
332: }
333: }
334:
335: protected void generateDFAs(Grammar g) {
336: for (int d = 1; d <= g.getNumberOfDecisions(); d++) {
337: DFA dfa = g.getLookaheadDFA(d);
338: if (dfa == null) {
339: continue; // not there for some reason, ignore
340: }
341: DOTGenerator dotGenerator = new DOTGenerator(g);
342: String dot = dotGenerator.getDOT(dfa.startState);
343: String dotFileName = g.name + "_dec-" + d;
344: try {
345: writeDOTFile(g, dotFileName, dot);
346: } catch (IOException ioe) {
347: ErrorManager.error(
348: ErrorManager.MSG_CANNOT_GEN_DOT_FILE,
349: dotFileName, ioe);
350: }
351: }
352: }
353:
354: protected void generateNFAs(Grammar g) {
355: DOTGenerator dotGenerator = new DOTGenerator(g);
356: Collection rules = g.getRules();
357: for (Iterator itr = rules.iterator(); itr.hasNext();) {
358: Rule r = (Rule) itr.next();
359: String ruleName = r.name;
360: try {
361: writeDOTFile(g, ruleName, dotGenerator.getDOT(g
362: .getRuleStartState(ruleName)));
363: } catch (IOException ioe) {
364: ErrorManager.error(ErrorManager.MSG_CANNOT_WRITE_FILE,
365: ioe);
366: }
367: }
368: }
369:
370: protected void writeDOTFile(Grammar g, String name, String dot)
371: throws IOException {
372: Writer fw = getOutputFile(g, name + ".dot");
373: fw.write(dot);
374: fw.close();
375: }
376:
377: private static void help() {
378: System.err
379: .println("usage: java org.antlr.Tool [args] file.g [file2.g file3.g ...]");
380: System.err
381: .println(" -o outputDir specify output directory where all output is generated");
382: System.err
383: .println(" -fo outputDir same as -o but force even files with relative paths to dir");
384: System.err
385: .println(" -lib dir specify location of token files");
386: System.err
387: .println(" -depend generate file dependencies");
388: System.err
389: .println(" -report print out a report about the grammar(s) processed");
390: System.err
391: .println(" -print print out the grammar without actions");
392: System.err
393: .println(" -debug generate a parser that emits debugging events");
394: System.err
395: .println(" -profile generate a parser that computes profiling information");
396: System.err
397: .println(" -nfa generate an NFA for each rule");
398: System.err
399: .println(" -dfa generate a DFA for each decision point");
400: System.err
401: .println(" -message-format name specify output style for messages");
402: System.err
403: .println(" -X display extended argument list");
404: }
405:
406: private static void Xhelp() {
407: System.err
408: .println(" -Xgrtree print the grammar AST");
409: System.err
410: .println(" -Xdfa print DFA as text ");
411: System.err
412: .println(" -Xnoprune test lookahead against EBNF block exit branches");
413: System.err
414: .println(" -Xnocollapse collapse incident edges into DFA states");
415: System.err
416: .println(" -Xdbgconversion dump lots of info during NFA conversion");
417: System.err
418: .println(" -Xmultithreaded run the analysis in 2 threads");
419: System.err
420: .println(" -Xnomergestopstates do not merge stop states");
421: System.err
422: .println(" -Xdfaverbose generate DFA states in DOT with NFA configs");
423: System.err
424: .println(" -Xwatchconversion print a message for each NFA before converting");
425: System.err
426: .println(" -XdbgST put tags at start/stop of all templates in output");
427: System.err
428: .println(" -Xm m max number of rule invocations during conversion");
429: System.err
430: .println(" -Xmaxdfaedges m max \"comfortable\" number of edges for single DFA state");
431: System.err
432: .println(" -Xconversiontimeout t set NFA conversion timeout for each decision");
433: System.err
434: .println(" -Xnoinlinedfa make all DFA with tables; no inline prediction with IFs");
435: System.err
436: .println(" -Xnfastates for nondeterminisms, list NFA states for each path");
437: }
438:
439: public void setOutputDirectory(String outputDirectory) {
440: this .outputDirectory = outputDirectory;
441: }
442:
443: /** This method is used by all code generators to create new output
444: * files. If the outputDir set by -o is not present it will be created.
445: * The final filename is sensitive to the output directory and
446: * the directory where the grammar file was found. If -o is /tmp
447: * and the original grammar file was foo/t.g then output files
448: * go in /tmp/foo.
449: *
450: * The output dir -o spec takes precedence if it's absolute.
451: * E.g., if the grammar file dir is absolute the output dir is given
452: * precendence. "-o /tmp /usr/lib/t.g" results in "/tmp/T.java" as
453: * output (assuming t.g holds T.java).
454: *
455: * If no -o is specified, then just write to the directory where the
456: * grammar file was found.
457: *
458: * If outputDirectory==null then write a String.
459: */
460: public Writer getOutputFile(Grammar g, String fileName)
461: throws IOException {
462: if (outputDirectory == null) {
463: return new StringWriter();
464: }
465: // output directory is a function of where the grammar file lives
466: // for subdir/T.g, you get subdir here. Well, depends on -o etc...
467: File outputDir = getOutputDirectory(g.getFileName());
468: File outputFile = new File(outputDir, fileName);
469:
470: if (!outputDir.exists()) {
471: outputDir.mkdirs();
472: }
473: FileWriter fw = new FileWriter(outputFile);
474: return new BufferedWriter(fw);
475: }
476:
477: public File getOutputDirectory(String fileNameWithPath) {
478: File outputDir = new File(outputDirectory);
479: String fileDirectory = getFileDirectory(fileNameWithPath);
480: if (outputDirectory != UNINITIALIZED_DIR) {
481: // -o /tmp /var/lib/t.g => /tmp/T.java
482: // -o subdir/output /usr/lib/t.g => subdir/output/T.java
483: // -o . /usr/lib/t.g => ./T.java
484: if (fileDirectory != null
485: && (new File(fileDirectory).isAbsolute() || fileDirectory
486: .startsWith("~")) || // isAbsolute doesn't count this :(
487: forceAllFilesToOutputDir) {
488: // somebody set the dir, it takes precendence; write new file there
489: outputDir = new File(outputDirectory);
490: } else {
491: // -o /tmp subdir/t.g => /tmp/subdir/t.g
492: if (fileDirectory != null) {
493: outputDir = new File(outputDirectory, fileDirectory);
494: } else {
495: outputDir = new File(outputDirectory);
496: }
497: }
498: } else {
499: // they didn't specify a -o dir so just write to location
500: // where grammar is, absolute or relative
501: String dir = ".";
502: if (fileDirectory != null) {
503: dir = fileDirectory;
504: }
505: outputDir = new File(dir);
506: }
507: return outputDir;
508: }
509:
510: /** Open a file in the -lib dir. For now, it's just .tokens files */
511: public BufferedReader getLibraryFile(String fileName)
512: throws IOException {
513: String fullName = libDirectory + File.separator + fileName;
514: FileReader fr = new FileReader(fullName);
515: BufferedReader br = new BufferedReader(fr);
516: return br;
517: }
518:
519: public String getLibraryDirectory() {
520: return libDirectory;
521: }
522:
523: /** Return the directory containing the grammar file for this grammar.
524: * normally this is a relative path from current directory. People will
525: * often do "java org.antlr.Tool grammars/*.g3" So the file will be
526: * "grammars/foo.g3" etc... This method returns "grammars".
527: */
528: public String getFileDirectory(String fileName) {
529: File f = new File(fileName);
530: return f.getParent();
531: }
532:
533: /** If the tool needs to panic/exit, how do we do that? */
534: public void panic() {
535: throw new Error("ANTLR panic");
536: }
537:
538: /** Return a time stamp string accurate to sec: yyyy-mm-dd hh:mm:ss */
539: public static String getCurrentTimeStamp() {
540: GregorianCalendar calendar = new java.util.GregorianCalendar();
541: int y = calendar.get(Calendar.YEAR);
542: int m = calendar.get(Calendar.MONTH) + 1; // zero-based for months
543: int d = calendar.get(Calendar.DAY_OF_MONTH);
544: int h = calendar.get(Calendar.HOUR_OF_DAY);
545: int min = calendar.get(Calendar.MINUTE);
546: int sec = calendar.get(Calendar.SECOND);
547: String sy = String.valueOf(y);
548: String sm = m < 10 ? "0" + m : String.valueOf(m);
549: String sd = d < 10 ? "0" + d : String.valueOf(d);
550: String sh = h < 10 ? "0" + h : String.valueOf(h);
551: String smin = min < 10 ? "0" + min : String.valueOf(min);
552: String ssec = sec < 10 ? "0" + sec : String.valueOf(sec);
553: return new StringBuffer().append(sy).append("-").append(sm)
554: .append("-").append(sd).append(" ").append(sh).append(
555: ":").append(smin).append(":").append(ssec)
556: .toString();
557: }
558:
559: }
|