001: package fri.patterns.interpreter.parsergenerator.syntax.builder;
002:
003: import java.util.*;
004: import fri.patterns.interpreter.parsergenerator.Semantic;
005: import fri.patterns.interpreter.parsergenerator.Token;
006: import fri.patterns.interpreter.parsergenerator.syntax.*;
007:
008: /**
009: A syntax specification similar to EBNF.
010: This semantic is used to build a Parser with Lexer from a textual EBNF syntax specification.
011: <p>
012: The semantics of ".." is the description of the set between leading and trailing character.
013: The leading must be the one with the lower UNICODE value.
014: <p>
015: The semantics of "-" is intersection. When specifiying <i>chars - comment - stringdef</i> this means
016: all <i>chars</i> but not <i>comments</i> or <i>stringdef</i>, i.e. <i>stringdef</i> is not subtracted
017: from <i>comment</i> but from <i>chars</i>!
018: <p>
019: TODO: think over repeat number symbol: parser AND lexer would need this.
020: Better define this by written symbols like "a ::= b b b b;"? But what to do on hundred "b"?
021:
022: @author (c) 2002 Fritz Ritzberger
023: */
024:
025: public class SyntaxBuilderSemantic implements Semantic {
026: public static String[][] syntax = {
027: // START rule
028: { "syntax", "syntax", "rule" }, // the grammar consists of rules
029: { "syntax", "rule" },
030:
031: { "set", "`bnf_chardef`", "\"" + Token.UPTO + "\"",
032: "`bnf_chardef`" }, // set of characters
033:
034: { "intersectionstartunit", "set" }, // intersection of character sets
035: { "intersectionstartunit", "`identifier`" },
036: { "intersectionstartunit", "`ruleref`" },
037:
038: { "intersectionunit", "`bnf_chardef`" },
039: { "intersectionunit", "`stringdef`" },
040: { "intersectionunit", "intersectionstartunit" },
041:
042: { "intersectionsubtracts", "intersectionsubtracts",
043: "intersectionsubtract" },
044: { "intersectionsubtracts", "intersectionsubtract" },
045: { "intersectionsubtract", "'" + Token.BUTNOT + "'",
046: "intersectionunit" },
047:
048: { "intersection", "intersectionstartunit",
049: "intersectionsubtracts" },
050:
051: { "sequnit", "intersection" }, // unit of a sequence
052: { "sequnit", "intersectionunit" },
053: { "sequnit", "'('", "unionseq", "')'" },
054:
055: { "quantifiedsequnit", "sequnit", "`quantifier`" }, // unit can be quantified
056: { "quantifiedsequnit", "sequnit" },
057:
058: { "sequence", "sequence", "quantifiedsequnit" }, // sequence of units with significant order
059: { "sequence", "quantifiedsequnit" },
060:
061: { "sequence_opt", "sequence" }, // sequence is nullable
062: { "sequence_opt", /*nothing*/},
063:
064: { "unionseq", "unionseq", "'|'", "sequence_opt" }, // rule alternatives
065: { "unionseq", "sequence_opt" },
066:
067: { "rule", "`identifier`", "\"::=\"", "unionseq", "';'" }, // one rule of a grammar
068:
069: // specify what will be ignored. Using StandardLexerRules.lexerSyntax will include this automatically
070: //{ "ignored", "`comment`" },
071: //{ "ignored", "`spaces`" },
072: //{ "ignored", "`newlines`" },
073: };
074:
075: private List initialNonterminals;
076:
077: /**
078: Creates a syntax builder semantic that resolves parenthesis and quantifiers.
079: */
080: public SyntaxBuilderSemantic() {
081: this (null);
082: }
083:
084: /**
085: Creates a syntax builder semantic that resolves parenthesis and quantifiers.
086: All nonterminals read from the processed syntax will be collected into the passed List.
087: */
088: public SyntaxBuilderSemantic(List initialNonterminals) {
089: this .initialNonterminals = initialNonterminals;
090: }
091:
092: public Object doSemantic(Rule rule, List inputTokens, List ranges) {
093: String nt = rule.getNonterminal();
094:
095: if (nt.equals("set"))
096: return inputTokens;
097:
098: if (nt.equals("intersectionstartunit")
099: || nt.equals("intersectionunit"))
100: return inputTokens.get(0);
101:
102: if (nt.equals("intersectionsubtract"))
103: return inputTokens;
104:
105: if (nt.equals("intersectionsubtracts"))
106: if (inputTokens.size() == 2)
107: return appendAll((List) inputTokens.get(0),
108: (List) inputTokens.get(1));
109: else
110: return inputTokens.get(0);
111:
112: if (nt.equals("intersection"))
113: return insertAtStart(inputTokens.get(0), (List) inputTokens
114: .get(1));
115:
116: if (nt.equals("sequnit"))
117: if (inputTokens.size() == 3)
118: return sequnitInParenthesis(inputTokens.get(1));
119: else
120: return inputTokens.get(0);
121:
122: if (nt.equals("quantifiedsequnit"))
123: if (inputTokens.size() == 2)
124: return quantifiedsequnit(inputTokens.get(0),
125: inputTokens.get(1));
126: else
127: return inputTokens.get(0);
128:
129: if (nt.equals("sequence"))
130: if (inputTokens.size() == 2)
131: return append((List) inputTokens.get(0), inputTokens
132: .get(1));
133: else
134: return inputTokens;
135:
136: if (nt.equals("sequence_opt"))
137: return inputTokens;
138:
139: if (nt.equals("unionseq"))
140: if (inputTokens.size() == 3)
141: return append((List) inputTokens.get(0),
142: (List) inputTokens.get(2));
143: else
144: return inputTokens;
145:
146: if (nt.equals("rule"))
147: return rule((String) inputTokens.get(0), (List) inputTokens
148: .get(2));
149:
150: if (nt.equals("syntax"))
151: if (inputTokens.size() == 2)
152: return syntax((List) inputTokens.get(0),
153: (List) inputTokens.get(1));
154: else
155: return inputTokens.get(0);
156:
157: throw new IllegalArgumentException("Unknown rule: " + rule);
158: }
159:
160: private ArtificialRule sequnitInParenthesis(Object unionseq) {
161: return new ArtificialRule((List) unionseq, "OR");
162: }
163:
164: private ArtificialRule quantifiedsequnit(Object sequnit,
165: Object quantifier) {
166: return new ArtificialRule(sequnit, (String) quantifier);
167: }
168:
169: private List append(List list, Object element) {
170: list.add(element);
171: return list;
172: }
173:
174: private List appendAll(List list, List elements) {
175: for (int i = 0; i < elements.size(); i++)
176: list.add(elements.get(i));
177: return list;
178: }
179:
180: private List insertAtStart(Object intersectionStart,
181: List intersectionList) {
182: intersectionList.add(0, intersectionStart);
183: return intersectionList;
184: }
185:
186: private List rule(String identifier, List unionseq) {
187: if (initialNonterminals != null
188: && initialNonterminals.indexOf(identifier) < 0)
189: initialNonterminals.add(identifier);
190:
191: for (int i = 0; i < unionseq.size(); i++) {
192: List deep = (List) unionseq.get(i);
193: List flat = ArtificialRule.flattenLists(deep,
194: new ArrayList());
195: flat.add(0, identifier);
196: unionseq.set(i, flat);
197: }
198: return unionseq;
199: }
200:
201: private List syntax(List syntax, List rule) {
202: for (int i = 0; i < rule.size(); i++)
203: syntax.add((List) rule.get(i));
204: return syntax;
205: }
206:
207: }
|