001: /*
002: * Grammar.java
003: *
004: * This work is free software; you can redistribute it and/or modify
005: * it under the terms of the GNU General Public License as published
006: * by the Free Software Foundation; either version 2 of the License,
007: * or (at your option) any later version.
008: *
009: * This work is distributed in the hope that it will be useful, but
010: * WITHOUT ANY WARRANTY; without even the implied warranty of
011: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
012: * General Public License for more details.
013: *
014: * You should have received a copy of the GNU General Public License
015: * along with this program; if not, write to the Free Software
016: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
017: * USA
018: *
019: * As a special exception, the copyright holders of this library give
020: * you permission to link this library with independent modules to
021: * produce an executable, regardless of the license terms of these
022: * independent modules, and to copy and distribute the resulting
023: * executable under terms of your choice, provided that you also meet,
024: * for each linked independent module, the terms and conditions of the
025: * license of that module. An independent module is a module which is
026: * not derived from or based on this library. If you modify this
027: * library, you may extend this exception to your version of the
028: * library, but you are not obligated to do so. If you do not wish to
029: * do so, delete this exception statement from your version.
030: *
031: * Copyright (c) 2003 Per Cederberg. All rights reserved.
032: */
033:
034: package net.percederberg.grammatica;
035:
036: import java.io.File;
037: import java.io.FileNotFoundException;
038: import java.io.FileReader;
039: import java.io.Reader;
040: import java.util.HashMap;
041: import java.util.Vector;
042:
043: import net.percederberg.grammatica.parser.Analyzer;
044: import net.percederberg.grammatica.parser.Parser;
045: import net.percederberg.grammatica.parser.ParserCreationException;
046: import net.percederberg.grammatica.parser.ParserLogException;
047: import net.percederberg.grammatica.parser.ProductionPattern;
048: import net.percederberg.grammatica.parser.RecursiveDescentParser;
049: import net.percederberg.grammatica.parser.TokenPattern;
050: import net.percederberg.grammatica.parser.Tokenizer;
051:
052: /**
053: * A grammar definition object. This object supports parsing a grammar
054: * file and create a lexical analyzer (tokenizer) for the grammar.
055: *
056: * @author Per Cederberg, <per at percederberg dot net>
057: * @version 1.2
058: */
059: public class Grammar extends Object {
060:
061: /**
062: * The author grammar declaration constant.
063: */
064: public static final String AUTHOR_DECLARATION = "AUTHOR";
065:
066: /**
067: * The copyright grammar declaration constant.
068: */
069: public static final String COPYRIGHT_DECLARATION = "COPYRIGHT";
070:
071: /**
072: * The date grammar declaration constant.
073: */
074: public static final String DATE_DECLARATION = "DATE";
075:
076: /**
077: * The description grammar declaration constant.
078: */
079: public static final String DESCRIPTION_DECLARATION = "DESCRIPTION";
080:
081: /**
082: * The grammar type grammar declaration constant.
083: */
084: public static final String GRAMMAR_TYPE_DECLARATION = "GRAMMARTYPE";
085:
086: /**
087: * The license grammar declaration constant.
088: */
089: public static final String LICENSE_DECLARATION = "LICENSE";
090:
091: /**
092: * The version grammar declaration constant.
093: */
094: public static final String VERSION_DECLARATION = "VERSION";
095:
096: /**
097: * The grammar file name.
098: */
099: private String fileName = "";
100:
101: /**
102: * The grammar declarations. This is a hash map with all the name
103: * value pairs in the header part of the grammar.
104: */
105: private HashMap declarations = new HashMap();
106:
107: /**
108: * The tokens found in the processing.
109: */
110: private Vector tokens = new Vector();
111:
112: /**
113: * The token id map. This is a map from the token pattern id to
114: * the token pattern.
115: */
116: private HashMap tokenIds = new HashMap();
117:
118: /**
119: * The token name map. This is map from the token pattern name to
120: * the token pattern.
121: */
122: private HashMap tokenNames = new HashMap();
123:
124: /**
125: * The token pattern map. This is map from the token pattern
126: * string to the token pattern object.
127: */
128: private HashMap tokenPatterns = new HashMap();
129:
130: /**
131: * The productions found in the processing.
132: */
133: private Vector productions = new Vector();
134:
135: /**
136: * The production id map. This is a map from the production
137: * pattern id to the production pattern.
138: */
139: private HashMap productionIds = new HashMap();
140:
141: /**
142: * The production name map. This is map from the production
143: * pattern name to the production pattern.
144: */
145: private HashMap productionNames = new HashMap();
146:
147: /**
148: * The map from token or production pattern name to a line range.
149: */
150: private HashMap lines = new HashMap();
151:
152: /**
153: * Creates a new grammar from the specified file.
154: *
155: * @param file the grammar file to read
156: *
157: * @throws FileNotFoundException if the grammar file could not be
158: * found
159: * @throws ParserLogException if the grammar file couldn't be
160: * parsed correctly
161: * @throws GrammarException if the grammar wasn't valid
162: */
163: public Grammar(File file) throws FileNotFoundException,
164: ParserLogException, GrammarException {
165:
166: GrammarParser parser;
167: FirstPassAnalyzer first = new FirstPassAnalyzer(this );
168: SecondPassAnalyzer second = new SecondPassAnalyzer(this );
169:
170: fileName = file.toString();
171: try {
172: parser = new GrammarParser(new FileReader(file), first);
173: second.analyze(parser.parse());
174: } catch (ParserCreationException e) {
175: throw new UnsupportedOperationException(
176: "internal error in grammar parser: "
177: + e.getMessage());
178: }
179: verify();
180: }
181:
182: /**
183: * Checks that the grammar is valid.
184: *
185: * @throws GrammarException if the grammar wasn't valid
186: */
187: private void verify() throws GrammarException {
188: String type;
189:
190: // Check grammar type
191: type = (String) declarations.get(GRAMMAR_TYPE_DECLARATION);
192: if (type == null) {
193: throw new GrammarException(fileName,
194: "grammar header missing "
195: + GRAMMAR_TYPE_DECLARATION + " declaration");
196: } else if (!type.equals("LL")) {
197: throw new GrammarException(fileName, "unrecognized "
198: + GRAMMAR_TYPE_DECLARATION + " value: '" + type
199: + "', currently only 'LL' is supported");
200: }
201:
202: // Check tokens and productions
203: if (productions.size() > 0) {
204: createParser(createTokenizer(null));
205: }
206: }
207:
208: /**
209: * Creates a tokenizer from this grammar.
210: *
211: * @param in the input stream to use
212: *
213: * @return the newly created tokenizer
214: *
215: * @throws GrammarException if the tokenizer couldn't be created
216: * or initialized correctly
217: */
218: public Tokenizer createTokenizer(Reader in) throws GrammarException {
219:
220: Tokenizer tokenizer;
221:
222: try {
223: tokenizer = new Tokenizer(in);
224: for (int i = 0; i < tokens.size(); i++) {
225: tokenizer.addPattern((TokenPattern) tokens.get(i));
226: }
227: } catch (ParserCreationException e) {
228: if (e.getName() == null) {
229: throw new GrammarException(fileName, e.getMessage());
230: } else {
231: LineRange range = (LineRange) lines.get(e.getName());
232: throw new GrammarException(fileName, e.getMessage(),
233: range.getStart(), range.getEnd());
234: }
235: }
236:
237: return tokenizer;
238: }
239:
240: /**
241: * Creates a parser from this grammar.
242: *
243: * @param tokenizer the tokenizer to use
244: *
245: * @return the newly created parser
246: *
247: * @throws GrammarException if the parser couldn't be created or
248: * initialized correctly
249: */
250: public Parser createParser(Tokenizer tokenizer)
251: throws GrammarException {
252:
253: return createParser(tokenizer, null);
254: }
255:
256: /**
257: * Creates a parser from this grammar.
258: *
259: * @param tokenizer the tokenizer to use
260: * @param analyzer the analyzer to use
261: *
262: * @return the newly created parser
263: *
264: * @throws GrammarException if the parser couldn't be created or
265: * initialized correctly
266: */
267: public Parser createParser(Tokenizer tokenizer, Analyzer analyzer)
268: throws GrammarException {
269:
270: Parser parser;
271:
272: try {
273: parser = new RecursiveDescentParser(tokenizer, analyzer);
274: for (int i = 0; i < productions.size(); i++) {
275: parser.addPattern((ProductionPattern) productions
276: .get(i));
277: }
278: parser.prepare();
279: } catch (ParserCreationException e) {
280: LineRange range = (LineRange) lines.get(e.getName());
281: if (range == null) {
282: throw new GrammarException(fileName, e.getMessage());
283: } else {
284: throw new GrammarException(fileName, e.getMessage(),
285: range.getStart(), range.getEnd());
286: }
287: }
288:
289: return parser;
290: }
291:
292: /**
293: * Returns the grammar file name and path.
294: *
295: * @return the grammar file name and path
296: */
297: public String getFileName() {
298: return fileName;
299: }
300:
301: /**
302: * Returns the declaration value for the specified name.
303: *
304: * @param name the declaration name
305: *
306: * @return the declaration value, or
307: * null if not specified in the grammar header
308: */
309: public String getDeclaration(String name) {
310: return (String) declarations.get(name);
311: }
312:
313: /**
314: * Returns the number of token patterns in the grammar.
315: *
316: * @return the number of token patterns
317: */
318: public int getTokenPatternCount() {
319: return tokens.size();
320: }
321:
322: /**
323: * Returns a specific token pattern.
324: *
325: * @param pos the pattern position, 0 <= pos < count
326: *
327: * @return the token pattern
328: */
329: public TokenPattern getTokenPattern(int pos) {
330: return (TokenPattern) tokens.get(pos);
331: }
332:
333: /**
334: * Returns a token pattern identified by its id.
335: *
336: * @param id the pattern id
337: *
338: * @return the token pattern, or null
339: */
340: public TokenPattern getTokenPatternById(int id) {
341: return (TokenPattern) tokenIds.get(new Integer(id));
342: }
343:
344: /**
345: * Returns a token pattern identified by its name.
346: *
347: * @param name the pattern name
348: *
349: * @return the token pattern, or null
350: */
351: public TokenPattern getTokenPatternByName(String name) {
352: return (TokenPattern) tokenNames.get(name);
353: }
354:
355: /**
356: * Returns a token pattern identified by its pattern string. This
357: * method will only return matches for patterns of string type.
358: *
359: * @param image the pattern string
360: *
361: * @return the token pattern, or null
362: */
363: TokenPattern getTokenPatternByImage(String image) {
364: return (TokenPattern) tokenPatterns.get(image);
365: }
366:
367: /**
368: * Returns the number of production patterns in the grammar.
369: *
370: * @return the number of production patterns
371: */
372: public int getProductionPatternCount() {
373: return productions.size();
374: }
375:
376: /**
377: * Returns a specific production pattern.
378: *
379: * @param pos the pattern position, 0 <= pos < count
380: *
381: * @return the production pattern
382: */
383: public ProductionPattern getProductionPattern(int pos) {
384: return (ProductionPattern) productions.get(pos);
385: }
386:
387: /**
388: * Returns a production pattern identified by its id.
389: *
390: * @param id the pattern id
391: *
392: * @return the production pattern, or null
393: */
394: public ProductionPattern getProductionPatternById(int id) {
395: return (ProductionPattern) productionIds.get(new Integer(id));
396: }
397:
398: /**
399: * Returns a production pattern identified by its name.
400: *
401: * @param name the pattern name
402: *
403: * @return the production pattern, or null
404: */
405: public ProductionPattern getProductionPatternByName(String name) {
406: return (ProductionPattern) productionNames.get(name);
407: }
408:
409: /**
410: * Adds a grammar declaration name-value pair.
411: *
412: * @param name the name part
413: * @param value the value part
414: */
415: void addDeclaration(String name, String value) {
416: declarations.put(name, value);
417: }
418:
419: /**
420: * Adds a token pattern to this grammar.
421: *
422: * @param token the token pattern to add
423: * @param start the starting line
424: * @param end the ending line
425: */
426: void addToken(TokenPattern token, int start, int end) {
427: tokens.add(token);
428: tokenIds.put(new Integer(token.getId()), token);
429: tokenNames.put(token.getName(), token);
430: if (token.getType() == TokenPattern.STRING_TYPE) {
431: tokenPatterns.put(token.getPattern(), token);
432: }
433: lines.put(token.getName(), new LineRange(start, end));
434: }
435:
436: /**
437: * Adds a production pattern to this grammar.
438: *
439: * @param production the production pattern to add
440: * @param start the starting line
441: * @param end the ending line
442: */
443: void addProduction(ProductionPattern production, int start, int end) {
444: productions.add(production);
445: productionIds.put(new Integer(production.getId()), production);
446: productionNames.put(production.getName(), production);
447: lines.put(production.getName(), new LineRange(start, end));
448: }
449:
450: /**
451: * A line number range.
452: */
453: private class LineRange {
454:
455: /**
456: * The first line number.
457: */
458: private int start;
459:
460: /**
461: * The last line number.
462: */
463: private int end;
464:
465: /**
466: * Creates a new line number range.
467: *
468: * @param start the first line number
469: * @param end the last line number
470: */
471: public LineRange(int start, int end) {
472: this .start = start;
473: this .end = end;
474: }
475:
476: /**
477: * Returns the first line number.
478: *
479: * @return the first line number
480: */
481: public int getStart() {
482: return start;
483: }
484:
485: /**
486: * Returns the last line number.
487: *
488: * @return the last line number
489: */
490: public int getEnd() {
491: return end;
492: }
493: }
494: }
|