001: package fri.patterns.interpreter.parsergenerator.syntax.builder;
002:
003: import java.util.*;
004: import java.io.IOException;
005: import fri.patterns.interpreter.parsergenerator.*;
006: import fri.patterns.interpreter.parsergenerator.lexer.*;
007: import fri.patterns.interpreter.parsergenerator.syntax.*;
008: import fri.patterns.interpreter.parsergenerator.parsertables.LALRParserTables;
009: import fri.patterns.interpreter.parsergenerator.parsertables.ParserBuildException;
010:
011: /**
012: Connects SyntaxSeparation and LexerBuilder.
013: SyntaxBuilder builds a <i>Syntax</i> object from a text input which can be
014: File, InputStream, Reader, String, StringBuffer. Mind that you DO NOT need
015: a SyntaxBuilder to create a Syntax from a String [][] or a List of rule Lists!
016: <p>
017: Following symbols can be used within the syntax specification text (spaces are ignored):
018: <pre>
019: a ::= b? ; // a derives to one or none b
020: a ::= b* ; // a derives to any number of b including zero
021: a ::= b+ ; // a derives to any number of b excluding zero
022: a ::= (b c)* d ; // grouping of b and c by parenthesis
023: a ::= b | c | ; // a derives to b or c or nothing
024: start ::= "BEGIN" ; // a fixed terminal string
025: letter ::= 'a' .. 'z' ; // character set a-z
026: newline ::= '\r' | '\n' | '\r' '\n' ; // newlines of all wellknown platforms
027: positive ::= digit - '0' ; // digit but not zero
028: id ::= `identifier` ; // using the pre-built lexer rules for <i>identifier</i> (lexer ruleref)
029: source ::= char - comment ; // source is all characters, but without comments
030: </pre>
031: This EBNF-like language is case-sensitive and differs from EBNF only at these symbols:
032: . { } < > [ ]. Archetype was the notation used by the w3c.
033:
034: @author (c) 2002, Fritz Ritzberger
035: */
036:
037: public class SyntaxBuilder {
038: private Syntax syntax;
039: private Syntax lexerSyntax, parserSyntax;
040: private Lexer lexer;
041: private List tokenSymbols, ignoredSymbols;
042: private List initialNonterminals;
043:
044: /**
045: Parse a syntax specification text and process it to a <i>Syntax</i> object.
046: The syntax, a Lexer, a parserSyntax and a token-symbol list will be retrieveable after construction.
047: @param syntaxInput text to parse and build a syntax from, File, InputStream, Reader, String, StringBuffer.
048: If InputStream is used, no Reader will be wrapped around (raw byte input).
049: */
050: public SyntaxBuilder(Object syntaxInput) throws SyntaxException,
051: LexerException, ParserBuildException, IOException {
052: // build the hardcoded default BNF lexer
053:
054: SyntaxSeparation.DEBUG = false; // avoid output of syntax control messages
055: SyntaxSeparation separation = new SyntaxSeparation(new Syntax(
056: StandardLexerRules.lexerSyntax));
057: SyntaxSeparation.DEBUG = true;
058:
059: LexerBuilder builder = new LexerBuilder(separation
060: .getLexerSyntax(), separation.getIgnoredSymbols());
061: Lexer lexer = builder.getLexer();
062: lexer.setInput(syntaxInput);
063:
064: // build the (hardcoded) BNF parser
065:
066: //ParserTables parserTables = new LALRParserTables(new Syntax(SyntaxUtil.ruleArrayToList(SyntaxBuilderSemantic.syntax)));
067: // COMMENT OUT FOLLOWING LINE AND COMMENT IN PREVIOUS LINE TO BUILD NEW SyntaxBuilderParserTables AFTER HAVING CHANGED SYNTAX!
068: ParserTables parserTables = new SyntaxBuilderParserTables();
069:
070: // start the BNF parser with syntax input
071: Parser parser = new Parser(parserTables);
072: initialNonterminals = new ArrayList(64);
073: boolean ok = parser.parse(lexer, new SyntaxBuilderSemantic(
074: initialNonterminals));
075: if (ok == false)
076: throw new SyntaxException("Failed building Syntax from "
077: + syntaxInput);
078:
079: List result = (List) parser.getResult(); // must be a List, according to applied semantic
080: List rules = new ArrayList(); // can not predict size
081: ArtificialRule.resolveArtificialRules(result, rules);
082: this .syntax = new Syntax(rules);
083: //System.err.println("Built result syntax:\n"+this.syntax);
084: }
085:
086: private void ensureSeparation() throws SyntaxException {
087: if (tokenSymbols == null) {
088: SyntaxSeparation separation = new SyntaxSeparation(syntax);
089: this .tokenSymbols = separation.getTokenSymbols();
090: this .ignoredSymbols = separation.getIgnoredSymbols();
091: this .parserSyntax = separation.getParserSyntax();
092: this .lexerSyntax = separation.getLexerSyntax();
093: }
094: }
095:
096: /** Returns a Lexer for the built syntax. */
097: public Lexer getLexer() throws LexerException, SyntaxException {
098: if (lexer == null) {
099: ensureSeparation();
100: LexerBuilder builder = new LexerBuilder(lexerSyntax,
101: ignoredSymbols);
102: this .lexer = builder.getLexer();
103: }
104: return this .lexer;
105: }
106:
107: /** Returns only the ready-made parser syntax (to feed the parser tables). */
108: public Syntax getParserSyntax() throws SyntaxException {
109: ensureSeparation();
110: return this .parserSyntax;
111: }
112:
113: // /** Returns the list of lexer token symbols for <i>setTerminals()</i> call if the Lexer is used standalone (without Parser). */
114: // public List getTokenSymbols()
115: // throws SyntaxException
116: // {
117: // ensureSeparation();
118: // return this.tokenSymbols;
119: // }
120:
121: /** Returns the whole syntax (both parser and lexer syntax). */
122: public Syntax getSyntax() {
123: return syntax;
124: }
125:
126: /**
127: * Returns the list of initial nonterminals (before parenthesis and quantifiers get resolved).
128: * This is for internal use in SourceGenerator.
129: */
130: public List getInitialNonterminals() {
131: return initialNonterminals;
132: }
133:
134: /**
135: Resolves all singular rules (only one symbol on right side, only one occurence).
136: This must be called directly after construction to have an effect.
137: */
138: public Syntax resolveSingulars() {
139: getSyntax().resolveSingulars();
140: return getSyntax();
141: }
142:
143: /** Creates SyntaxBuilderParserTables.java (in this directory) from the rules defined in SyntaxBuilderSemantic. */
144: public static void main(String[] args) {
145: try {
146: new LALRParserTables(new Syntax(
147: SyntaxBuilderSemantic.syntax))
148: .toSourceFile("fri.patterns.interpreter.parsergenerator.syntax.builder.SyntaxBuilderParserTables");
149: } catch (Exception e) {
150: e.printStackTrace();
151: }
152: }
153:
154: }
|