001: /*
002: * FirstPassAnalyzer.java
003: *
004: * This work is free software; you can redistribute it and/or modify
005: * it under the terms of the GNU General Public License as published
006: * by the Free Software Foundation; either version 2 of the License,
007: * or (at your option) any later version.
008: *
009: * This work is distributed in the hope that it will be useful, but
010: * WITHOUT ANY WARRANTY; without even the implied warranty of
011: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
012: * General Public License for more details.
013: *
014: * You should have received a copy of the GNU General Public License
015: * along with this program; if not, write to the Free Software
016: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
017: * USA
018: *
019: * As a special exception, the copyright holders of this library give
020: * you permission to link this library with independent modules to
021: * produce an executable, regardless of the license terms of these
022: * independent modules, and to copy and distribute the resulting
023: * executable under terms of your choice, provided that you also meet,
024: * for each linked independent module, the terms and conditions of the
025: * license of that module. An independent module is a module which is
026: * not derived from or based on this library. If you modify this
027: * library, you may extend this exception to your version of the
028: * library, but you are not obligated to do so. If you do not wish to
029: * do so, delete this exception statement from your version.
030: *
031: * Copyright (c) 2003 Per Cederberg. All rights reserved.
032: */
033:
034: package net.percederberg.grammatica;
035:
036: import java.util.HashMap;
037:
038: import net.percederberg.grammatica.parser.Node;
039: import net.percederberg.grammatica.parser.ParseException;
040: import net.percederberg.grammatica.parser.Production;
041: import net.percederberg.grammatica.parser.ProductionPattern;
042: import net.percederberg.grammatica.parser.Token;
043: import net.percederberg.grammatica.parser.TokenPattern;
044:
045: /**
046: * A first pass grammar analyzer. This class processes the grammar
047: * parse tree and creates the token and production patterns. Both
048: * token and production patterns are added to the grammar, but the
049: * production patterns will all be empty. In order to analyze the
050: * production pattern rules, all the production pattern names and
051: * identifiers must be present in the grammar, so the pattern rules
052: * must be analyzed in a second pass. This analyzer also adds all
053: * header declarations to the grammar.
054: *
055: * @author Per Cederberg, <per at percederberg dot net>
056: * @version 1.0
057: */
058: class FirstPassAnalyzer extends GrammarAnalyzer {
059:
060: /**
061: * The grammar where objects are added.
062: */
063: private Grammar grammar;
064:
065: /**
066: * The token id to use.
067: */
068: private int nextTokenId = 1001;
069:
070: /**
071: * The production id to use.
072: */
073: private int nextProductionId = 2001;
074:
075: /**
076: * A map with all token and production names. This map is indexed
077: * by the upper-case names (without '_' characters), and maps
078: * these to the declared case-sensitive name.
079: */
080: private HashMap names = new HashMap();
081:
082: /**
083: * Creates a new grammar analyser.
084: *
085: * @param grammar the grammar where objects are added
086: */
087: public FirstPassAnalyzer(Grammar grammar) {
088: this .grammar = grammar;
089: }
090:
091: /**
092: * Sets the node value to the ignore message. If no message is
093: * set, no node value will be added.
094: *
095: * @param node the token node
096: *
097: * @return the token node
098: */
099: protected Node exitIgnore(Token node) {
100: String str = node.getImage();
101:
102: str = str.substring(7, str.length() - 1).trim();
103: if (!str.equals("")) {
104: node.addValue(str);
105: }
106: return node;
107: }
108:
109: /**
110: * Sets the node value to the error message. If no message is set,
111: * no node value will be added.
112: *
113: * @param node the token node
114: *
115: * @return the token node
116: */
117: protected Node exitError(Token node) {
118: String str = node.getImage();
119:
120: str = str.substring(6, str.length() - 1).trim();
121: if (!str.equals("")) {
122: node.addValue(str);
123: }
124: return node;
125: }
126:
127: /**
128: * Sets the node value to the identifier string.
129: *
130: * @param node the token node
131: *
132: * @return the token node
133: */
134: protected Node exitIdentifier(Token node) {
135: node.addValue(node.getImage());
136: return node;
137: }
138:
139: /**
140: * Sets the node value to the contents of the quoted string. The
141: * quotation marks will be removed, but any escaped character
142: * will be left intact.
143: *
144: * @param node the token node
145: *
146: * @return the token node
147: */
148: protected Node exitQuotedString(Token node) {
149: String str = node.getImage();
150:
151: node.addValue(str.substring(1, str.length() - 1));
152: return node;
153: }
154:
155: /**
156: * Sets the node value to the regular expression string. The
157: * quotation marks will be removed, and the "\<" and "\>" will be
158: * unescaped (replaced by the '<' and '>' characters). The rest of
159: * the expression is left intact.
160: *
161: * @param node the token node
162: *
163: * @return the token node
164: */
165: protected Node exitRegexp(Token node) {
166: String str = node.getImage();
167: StringBuffer buf = new StringBuffer();
168:
169: str = str.substring(2, str.length() - 2);
170: for (int i = 0; i < str.length(); i++) {
171: if (str.startsWith("\\<", i)) {
172: buf.append('<');
173: i++;
174: } else if (str.startsWith("\\>", i)) {
175: buf.append('>');
176: i++;
177: } else {
178: buf.append(str.charAt(i));
179: }
180: }
181: node.addValue(buf.toString());
182: return node;
183: }
184:
185: /**
186: * Removes the header part from the parse tree by returning null.
187: *
188: * @param node the production node
189: *
190: * @return the new production node
191: */
192: protected Node exitHeaderPart(Production node) {
193: return null;
194: }
195:
196: /**
197: * Adds the header declaration to the grammar. This method will
198: * also remove the header declaration from the parse tree by
199: * returning null.
200: *
201: * @param node the production node
202: *
203: * @return the new production node
204: *
205: * @throws ParseException if the node analysis discovered errors
206: */
207: protected Node exitHeaderDeclaration(Production node)
208: throws ParseException {
209:
210: String name;
211: String value;
212:
213: name = getStringValue(getChildAt(node, 0), 0);
214: value = getStringValue(getChildAt(node, 2), 0);
215: grammar.addDeclaration(name, value);
216: return null;
217: }
218:
219: /**
220: * Removes the token part from the parse tree by returning null.
221: *
222: * @param node the production node
223: *
224: * @return the new production node
225: */
226: protected Node exitTokenPart(Production node) {
227: return null;
228: }
229:
230: /**
231: * Adds a token pattern to the grammar. This method will also
232: * remove the token declaration from the parse tree by reutrning
233: * null.
234: *
235: * @param node the production node
236: *
237: * @return the new production node
238: *
239: * @throws ParseException if the node analysis discovered errors
240: */
241: protected Node exitTokenDeclaration(Production node)
242: throws ParseException {
243:
244: TokenPattern pattern;
245: String name;
246: int type;
247: String str;
248: Token token;
249: Node child;
250:
251: // Create token pattern
252: name = getIdentifier((Token) getChildAt(node, 0));
253: child = getChildAt(node, 2);
254: type = getIntValue(child, 0);
255: str = getStringValue(child, 1);
256: pattern = new TokenPattern(nextTokenId++, name, type, str);
257:
258: // Process optional ignore or error
259: if (node.getChildCount() == 4) {
260: child = getChildAt(node, 3);
261: token = (Token) getValue(child, 0);
262: str = null;
263: if (child.getValueCount() == 2) {
264: str = getStringValue(child, 1);
265: }
266: switch (token.getId()) {
267: case GrammarConstants.IGNORE:
268: if (str == null) {
269: pattern.setIgnore();
270: } else {
271: pattern.setIgnore(str);
272: }
273: break;
274: case GrammarConstants.ERROR:
275: if (str == null) {
276: pattern.setError();
277: } else {
278: pattern.setError(str);
279: }
280: break;
281: }
282: }
283:
284: // Add token to grammar
285: grammar.addToken(pattern, node.getStartLine(), node
286: .getEndLine());
287: return null;
288: }
289:
290: /**
291: * Sets the node values to the token pattern type and the token
292: * pattern string.
293: *
294: * @param node the production node
295: *
296: * @return the new production node
297: *
298: * @throws ParseException if the node analysis discovered errors
299: */
300: protected Node exitTokenValue(Production node)
301: throws ParseException {
302: switch (getChildAt(node, 0).getId()) {
303: case GrammarConstants.QUOTED_STRING:
304: node.addValue(new Integer(TokenPattern.STRING_TYPE));
305: break;
306: case GrammarConstants.REGEXP:
307: node.addValue(new Integer(TokenPattern.REGEXP_TYPE));
308: break;
309: }
310: node.addValue(getStringValue(getChildAt(node, 0), 0));
311: return node;
312: }
313:
314: /**
315: * Sets the node values to the error or ignore token. If present,
316: * the message string will also be added as a node value.
317: *
318: * @param node the production node
319: *
320: * @return the new production node
321: *
322: * @throws ParseException if the node analysis discovered errors
323: */
324: protected Node exitTokenHandling(Production node)
325: throws ParseException {
326:
327: Node child = getChildAt(node, 0);
328:
329: node.addValue(child);
330: if (child.getValueCount() > 0) {
331: node.addValue(getValue(child, 0));
332: }
333: return node;
334: }
335:
336: /**
337: * Adds an empty production pattern to the grammar. This metod
338: * will return the production node to make it available for the
339: * second pass analyzer.
340: *
341: * @param node the production node
342: *
343: * @return the new production node
344: *
345: * @throws ParseException if the node analysis discovered errors
346: */
347: protected Node exitProductionDeclaration(Production node)
348: throws ParseException {
349:
350: ProductionPattern production;
351: String name;
352:
353: name = getIdentifier((Token) getChildAt(node, 0));
354: production = new ProductionPattern(nextProductionId++, name);
355: grammar.addProduction(production, node.getStartLine(), node
356: .getEndLine());
357: return node;
358: }
359:
360: /**
361: * Returns a token identifier. This method should only be called
362: * with identifier tokens, otherwise an exception will be thrown.
363: * This method also checks that the identifier name found is
364: * globally unique in it's upper-case form, and throws an
365: * exception if it is not.
366: *
367: * @param token the identifier token
368: *
369: * @return the identifier name
370: *
371: * @throws ParseException if the identifier wasn't unique
372: */
373: private String getIdentifier(Token token) throws ParseException {
374: String name = token.getImage();
375: StringBuffer buf = new StringBuffer(name.toUpperCase());
376: char c;
377:
378: // Check for identifier token
379: if (token.getId() != GrammarConstants.IDENTIFIER) {
380: throw new ParseException(ParseException.INTERNAL_ERROR,
381: null, token.getStartLine(), token.getStartColumn());
382: }
383:
384: // Remove all non-identifier characters
385: for (int i = 0; i < buf.length(); i++) {
386: c = buf.charAt(i);
387: if (('A' <= c && c <= 'Z') || ('0' <= c && c <= '9')) {
388: // Do nothing
389: } else {
390: buf.deleteCharAt(i--);
391: }
392: }
393:
394: // Check for name collitions
395: if (names.containsKey(buf.toString())) {
396: throw new ParseException(
397: ParseException.ANALYSIS_ERROR,
398: "duplicate identifier '"
399: + name
400: + "' is similar or "
401: + "equal to previously defined identifier '"
402: + names.get(buf.toString()) + "'", token
403: .getStartLine(), token.getStartColumn());
404: } else {
405: names.put(buf.toString(), name);
406: }
407:
408: // Return the identifier
409: return name;
410: }
411: }
|