001: /*
002: [The "BSD licence"]
003: Copyright (c) 2005-2006 Terence Parr
004: All rights reserved.
005:
006: Redistribution and use in source and binary forms, with or without
007: modification, are permitted provided that the following conditions
008: are met:
009: 1. Redistributions of source code must retain the above copyright
010: notice, this list of conditions and the following disclaimer.
011: 2. Redistributions in binary form must reproduce the above copyright
012: notice, this list of conditions and the following disclaimer in the
013: documentation and/or other materials provided with the distribution.
014: 3. The name of the author may not be used to endorse or promote products
015: derived from this software without specific prior written permission.
016:
017: THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
018: IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
019: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
020: IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
021: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
022: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
023: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
024: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
026: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: */
028: package org.antlr.codegen;
029:
030: import org.antlr.Tool;
031: import org.antlr.analysis.Label;
032: import org.antlr.misc.Utils;
033: import org.antlr.stringtemplate.StringTemplate;
034: import org.antlr.tool.Grammar;
035:
036: import java.io.IOException;
037: import java.util.List;
038:
039: /** The code generator for ANTLR can usually be retargeted just by providing
040: * a new X.stg file for language X, however, sometimes the files that must
041: * be generated vary enough that some X-specific functionality is required.
042: * For example, in C, you must generate header files whereas in Java you do not.
043: * Other languages may want to keep DFA separate from the main
044: * generated recognizer file.
045: *
046: * The notion of a Code Generator target abstracts out the creation
047: * of the various files. As new language targets get added to the ANTLR
048: * system, this target class may have to be altered to handle more
049: * functionality. Eventually, just about all language generation issues
050: * will be expressible in terms of these methods.
051: *
052: * If org.antlr.codegen.XTarget class exists, it is used else
053: * Target base class is used. I am using a superclass rather than an
054: * interface for this target concept because I can add functionality
055: * later without breaking previously written targets (extra interface
056: * methods would force adding dummy functions to all code generator
057: * target classes).
058: *
059: */
060: public class Target {
061:
062: /** For pure strings of Java 16-bit unicode char, how can we display
063: * it in the target language as a literal. Useful for dumping
064: * predicates and such that may refer to chars that need to be escaped
065: * when represented as strings. Also, templates need to be escaped so
066: * that the target language can hold them as a string.
067: *
068: * I have defined (via the constructor) the set of typical escapes,
069: * but your Target subclass is free to alter the translated chars or
070: * add more definitions. This is nonstatic so each target can have
071: * a different set in memory at same time.
072: */
073: protected String[] targetCharValueEscape = new String[255];
074:
075: public Target() {
076: targetCharValueEscape['\n'] = "\\n";
077: targetCharValueEscape['\r'] = "\\r";
078: targetCharValueEscape['\t'] = "\\t";
079: targetCharValueEscape['\b'] = "\\b";
080: targetCharValueEscape['\f'] = "\\f";
081: targetCharValueEscape['\\'] = "\\\\";
082: targetCharValueEscape['\''] = "\\'";
083: targetCharValueEscape['"'] = "\\\"";
084: }
085:
086: protected void genRecognizerFile(Tool tool,
087: CodeGenerator generator, Grammar grammar,
088: StringTemplate outputFileST) throws IOException {
089: String fileName = generator.getRecognizerFileName(grammar.name,
090: grammar.type);
091: generator.write(outputFileST, fileName);
092: }
093:
094: protected void genRecognizerHeaderFile(Tool tool,
095: CodeGenerator generator, Grammar grammar,
096: StringTemplate headerFileST, String extName) // e.g., ".h"
097: throws IOException {
098: // no header file by default
099: }
100:
101: protected void performGrammarAnalysis(CodeGenerator generator,
102: Grammar grammar) {
103: // Build NFAs from the grammar AST
104: grammar.createNFAs();
105:
106: // Create the DFA predictors for each decision
107: grammar.createLookaheadDFAs();
108: }
109:
110: /** Is scope in @scope::name {action} valid for this kind of grammar?
111: * Targets like C++ may want to allow new scopes like headerfile or
112: * some such. The action names themselves are not policed at the
113: * moment so targets can add template actions w/o having to recompile
114: * ANTLR.
115: */
116: public boolean isValidActionScope(int grammarType, String scope) {
117: switch (grammarType) {
118: case Grammar.LEXER:
119: if (scope.equals("lexer")) {
120: return true;
121: }
122: break;
123: case Grammar.PARSER:
124: if (scope.equals("parser")) {
125: return true;
126: }
127: break;
128: case Grammar.COMBINED:
129: if (scope.equals("parser")) {
130: return true;
131: }
132: if (scope.equals("lexer")) {
133: return true;
134: }
135: break;
136: case Grammar.TREE_PARSER:
137: if (scope.equals("treeparser")) {
138: return true;
139: }
140: break;
141: }
142: return false;
143: }
144:
145: /** Target must be able to override the labels used for token types */
146: public String getTokenTypeAsTargetLabel(CodeGenerator generator,
147: int ttype) {
148: String name = generator.grammar.getTokenDisplayName(ttype);
149: // If name is a literal, return the token type instead
150: if (name.charAt(0) == '\'') {
151: return String.valueOf(ttype);
152: }
153: return name;
154: }
155:
156: /** Convert from an ANTLR char literal found in a grammar file to
157: * an equivalent char literal in the target language. For most
158: * languages, this means leaving 'x' as 'x'. Actually, we need
159: * to escape '
160: ' so that it doesn't get converted to \n by
161: * the compiler. Convert the literal to the char value and then
162: * to an appropriate target char literal.
163: *
164: * Expect single quotes around the incoming literal.
165: */
166: public String getTargetCharLiteralFromANTLRCharLiteral(
167: CodeGenerator generator, String literal) {
168: StringBuffer buf = new StringBuffer();
169: buf.append('\'');
170: int c = Grammar.getCharValueFromGrammarCharLiteral(literal);
171: if (c < Label.MIN_CHAR_VALUE) {
172: return "'\u0000'";
173: }
174: if (c < targetCharValueEscape.length
175: && targetCharValueEscape[c] != null) {
176: buf.append(targetCharValueEscape[c]);
177: } else if (Character.UnicodeBlock.of((char) c) == Character.UnicodeBlock.BASIC_LATIN
178: && !Character.isISOControl((char) c)) {
179: // normal char
180: buf.append((char) c);
181: } else {
182: // must be something unprintable...use \\uXXXX
183:// turn on the bit above max "\\uFFFF" value so that we pad with zeros
184: // then only take last 4 digits
185: String hex = Integer.toHexString(c | 0x10000).toUpperCase()
186: .substring(1, 5);
187: buf.append("\\u");
188: buf.append(hex);
189: }
190:
191: buf.append('\'');
192: return buf.toString();
193: }
194:
195: /** Convert from an ANTLR string literal found in a grammar file to
196: * an equivalent string literal in the target language. For Java, this
197: * is the translation 'a\n"' -> "a\n\"". Expect single quotes
198: * around the incoming literal. Just flip the quotes and replace
199: * double quotes with \"
200: */
201: public String getTargetStringLiteralFromANTLRStringLiteral(
202: CodeGenerator generator, String literal) {
203: literal = Utils.replace(literal, "\"", "\\\"");
204: StringBuffer buf = new StringBuffer(literal);
205: buf.setCharAt(0, '"');
206: buf.setCharAt(literal.length() - 1, '"');
207: return buf.toString();
208: }
209:
210: /** Given a random string of Java unicode chars, return a new string with
211: * optionally appropriate quote characters for target language and possibly
212: * with some escaped characters. For example, if the incoming string has
213: * actual newline characters, the output of this method would convert them
214: * to the two char sequence \n for Java, C, C++, ... The new string has
215: * double-quotes around it as well. Example String in memory:
216: *
217: * a"[newlinechar]b'c[carriagereturnchar]d[tab]e\f
218: *
219: * would be converted to the valid Java s:
220: *
221: * "a\"\nb'c\rd\te\\f"
222: *
223: * or
224: *
225: * a\"\nb'c\rd\te\\f
226: *
227: * depending on the quoted arg.
228: */
229: public String getTargetStringLiteralFromString(String s,
230: boolean quoted) {
231: if (s == null) {
232: return null;
233: }
234: StringBuffer buf = new StringBuffer();
235: if (quoted) {
236: buf.append('"');
237: }
238: for (int i = 0; i < s.length(); i++) {
239: int c = s.charAt(i);
240: if (c != '\''
241: && // don't escape single quotes in strings for java
242: c < targetCharValueEscape.length
243: && targetCharValueEscape[c] != null) {
244: buf.append(targetCharValueEscape[c]);
245: } else {
246: buf.append((char) c);
247: }
248: }
249: if (quoted) {
250: buf.append('"');
251: }
252: return buf.toString();
253: }
254:
255: public String getTargetStringLiteralFromString(String s) {
256: return getTargetStringLiteralFromString(s, false);
257: }
258:
259: /** Convert long to 0xNNNNNNNNNNNNNNNN by default for spitting out
260: * with bitsets. I.e., convert bytes to hex string.
261: */
262: public String getTarget64BitStringFromValue(long word) {
263: int numHexDigits = 8 * 2;
264: StringBuffer buf = new StringBuffer(numHexDigits + 2);
265: buf.append("0x");
266: String digits = Long.toHexString(word);
267: digits = digits.toUpperCase();
268: int padding = numHexDigits - digits.length();
269: // pad left with zeros
270: for (int i = 1; i <= padding; i++) {
271: buf.append('0');
272: }
273: buf.append(digits);
274: return buf.toString();
275: }
276:
277: /** Some targets only support ASCII or 8-bit chars/strings. For example,
278: * C++ will probably want to return 0xFF here.
279: */
280: public int getMaxCharValue(CodeGenerator generator) {
281: return Label.MAX_CHAR_VALUE;
282: }
283:
284: /** Give target a chance to do some postprocessing on actions.
285: * Python for example will have to fix the indention.
286: */
287: public List postProcessAction(List chunks, antlr.Token actionToken) {
288: return chunks;
289: }
290:
291: }
|