001: /*
002: [The "BSD licence"]
003: Copyright (c) 2005-2006 Terence Parr
004: All rights reserved.
005:
006: Redistribution and use in source and binary forms, with or without
007: modification, are permitted provided that the following conditions
008: are met:
009: 1. Redistributions of source code must retain the above copyright
010: notice, this list of conditions and the following disclaimer.
011: 2. Redistributions in binary form must reproduce the above copyright
012: notice, this list of conditions and the following disclaimer in the
013: documentation and/or other materials provided with the distribution.
014: 3. The name of the author may not be used to endorse or promote products
015: derived from this software without specific prior written permission.
016:
017: THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
018: IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
019: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
020: IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
021: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
022: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
023: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
024: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
026: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: */
028: package org.antlr.codegen;
029:
030: import org.antlr.Tool;
031: import org.antlr.stringtemplate.StringTemplate;
032: import org.antlr.tool.Grammar;
033:
034: import java.io.IOException;
035: import java.util.ArrayList;
036:
037: public class CTarget extends Target {
038:
039: ArrayList strings = new ArrayList();
040:
041: protected void genRecognizerFile(Tool tool,
042: CodeGenerator generator, Grammar grammar,
043: StringTemplate outputFileST) throws IOException {
044: // Before we write this, and cause it to generate its string,
045: // we need to add all the string literals that we are going to match
046: //
047: outputFileST.setAttribute("literals", strings);
048: //System.out.println(outputFileST.toStructureString());
049: String fileName = generator.getRecognizerFileName(grammar.name,
050: grammar.type);
051: generator.write(outputFileST, fileName);
052: }
053:
054: protected void genRecognizerHeaderFile(Tool tool,
055: CodeGenerator generator, Grammar grammar,
056: StringTemplate headerFileST, String extName)
057: throws IOException {
058: generator.write(headerFileST, grammar.name
059: + Grammar.grammarTypeToFileNameSuffix[grammar.type]
060: + extName);
061: }
062:
063: protected StringTemplate chooseWhereCyclicDFAsGo(Tool tool,
064: CodeGenerator generator, Grammar grammar,
065: StringTemplate recognizerST, StringTemplate cyclicDFAST) {
066: return recognizerST;
067: }
068:
069: /** Is scope in @scope::name {action} valid for this kind of grammar?
070: * Targets like C++ may want to allow new scopes like headerfile or
071: * some such. The action names themselves are not policed at the
072: * moment so targets can add template actions w/o having to recompile
073: * ANTLR.
074: */
075: public boolean isValidActionScope(int grammarType, String scope) {
076: switch (grammarType) {
077: case Grammar.LEXER:
078: if (scope.equals("lexer")) {
079: return true;
080: }
081: if (scope.equals("header")) {
082: return true;
083: }
084: if (scope.equals("includes")) {
085: return true;
086: }
087: if (scope.equals("preincludes")) {
088: return true;
089: }
090: if (scope.equals("overrides")) {
091: return true;
092: }
093: break;
094: case Grammar.PARSER:
095: if (scope.equals("parser")) {
096: return true;
097: }
098: if (scope.equals("header")) {
099: return true;
100: }
101: if (scope.equals("includes")) {
102: return true;
103: }
104: if (scope.equals("preincludes")) {
105: return true;
106: }
107: if (scope.equals("overrides")) {
108: return true;
109: }
110: break;
111: case Grammar.COMBINED:
112: if (scope.equals("parser")) {
113: return true;
114: }
115: if (scope.equals("lexer")) {
116: return true;
117: }
118: if (scope.equals("header")) {
119: return true;
120: }
121: if (scope.equals("includes")) {
122: return true;
123: }
124: if (scope.equals("preincludes")) {
125: return true;
126: }
127: if (scope.equals("overrides")) {
128: return true;
129: }
130: break;
131: case Grammar.TREE_PARSER:
132: if (scope.equals("treeparser")) {
133: return true;
134: }
135: if (scope.equals("header")) {
136: return true;
137: }
138: if (scope.equals("includes")) {
139: return true;
140: }
141: if (scope.equals("preincludes")) {
142: return true;
143: }
144: if (scope.equals("overrides")) {
145: return true;
146: }
147: break;
148: }
149: return false;
150: }
151:
152: public String getTargetCharLiteralFromANTLRCharLiteral(
153: CodeGenerator generator, String literal) {
154:
155: if (literal.startsWith("'\\u")) {
156: literal = "0x" + literal.substring(3, 7);
157: } else {
158: int c = literal.charAt(1);
159:
160: if (c < 32 || c > 127) {
161: literal = "0x" + Integer.toHexString(c);
162: }
163: }
164:
165: return literal;
166: }
167:
168: /** Convert from an ANTLR string literal found in a grammar file to
169: * an equivalent string literal in the C target.
170: * Because we msut support Unicode character sets and have chosen
171: * to have the lexer match UTF32 characters, then we must encode
172: * string matches to use 32 bit character arrays. Here then we
173: * must produce the C array and cater for the case where the
174: * lexer has been eoncded with a string such as "xyz\n", which looks
175: * slightly incogrous to me but is not incorrect.
176: */
177: public String getTargetStringLiteralFromANTLRStringLiteral(
178: CodeGenerator generator, String literal) {
179: int index;
180: int outc;
181: String bytes;
182: StringBuffer buf = new StringBuffer();
183:
184: buf.append("{ ");
185:
186: // We need ot lose any escaped characters of the form \x and just
187: // replace them with their actual values as well as lose the surrounding
188: // quote marks.
189: //
190: for (int i = 1; i < literal.length() - 1; i++) {
191: buf.append("0x");
192:
193: if (literal.charAt(i) == '\\') {
194: i++; // Assume that there is a next character, this will just yield
195: // invalid strings if not, which is what the input would be of course - invalid
196: switch (literal.charAt(i)) {
197: case 'u':
198: case 'U':
199: buf.append(literal.substring(i + 1, i + 5)); // Already a hex string
200: i = i + 5; // Move to next string/char/escape
201: break;
202:
203: case 'n':
204: case 'N':
205:
206: buf.append("0A");
207: break;
208:
209: case 'r':
210: case 'R':
211:
212: buf.append("0D");
213: break;
214:
215: case 't':
216: case 'T':
217:
218: buf.append("09");
219: break;
220:
221: case 'b':
222: case 'B':
223:
224: buf.append("08");
225: break;
226:
227: case 'f':
228: case 'F':
229:
230: buf.append("0C");
231: break;
232:
233: default:
234:
235: // Anything else is what it is!
236: //
237: buf.append(Integer.toHexString(
238: (int) literal.charAt(i)).toUpperCase());
239: break;
240: }
241: } else {
242: buf.append(Integer.toHexString((int) literal.charAt(i))
243: .toUpperCase());
244: }
245: buf.append(", ");
246: }
247: buf.append(" ANTLR3_STRING_TERMINATOR}");
248:
249: bytes = buf.toString();
250: index = strings.indexOf(bytes);
251:
252: if (index == -1) {
253: strings.add(bytes);
254: index = strings.indexOf(bytes);
255: }
256:
257: String strref = "lit_" + String.valueOf(index + 1);
258:
259: return strref;
260: }
261:
262: }
|