001: package fri.patterns.interpreter.parsergenerator.syntax.builder;
002:
003: import java.util.*;
004: import fri.patterns.interpreter.parsergenerator.Token;
005: import fri.patterns.interpreter.parsergenerator.util.SymbolToName;
006:
007: /**
008: ArtificialRule is needed to
009: <ul>
010: <li>create nonterminals and rules for symbols within parenthesis: "(a b c)" -> "_a_b_c_"</li>
011: <li>create nonterminals and rules for symbols that were quantified with "*", "+", "?"</li>
012: </ul>
013: The nonterminal names get created from symbols that are converted to names.
014: Every created nonterminal starts with "_" (Token.ARTIFICIAL_RULE_START_CHARACTER).
015: <p>
016: Example:
017: <pre>
018: sentence1 ::= word* ;
019: sentence2 ::= word+ ;
020: sentence3 ::= word? ;
021: </pre>
022: will be converted to following rules
023: <pre>
024: _sentence1_OPTLIST ::= _sentence1_LIST;
025: _sentence1_OPTLIST ::= ;
026: _sentence1_LIST ::= _sentence1_LIST word;
027: _sentence1_LIST ::= word;
028:
029: _sentence2_LIST ::= _sentence2_LIST word;
030: _sentence2_LIST ::= word;
031:
032: _sentence3_OPT ::= word;
033: _sentence3_OPT ::= ;
034: </pre>
035:
036: @author (c) 2002 Fritz Ritzberger
037: */
038:
039: class ArtificialRule {
040: private List rules;
041: private String nonterminal;
042:
043: /**
044: Creates an artificial rule from an expression within parenthesis.
045: Argument sentencesInParenthesis holds a List of arbitrary deep Lists
046: that contain String or ArtificialRule at end.
047: The method getRules() will return as much rules as are contained on any
048: level in sentencesInParenthesis.
049: */
050: public ArtificialRule(List sentencesInParenthesis, String catSym) { // make "(b c)" to rule "_b_c_ ::= b c"
051: StringBuffer sb = new StringBuffer();
052: parenthesisContentsToString(sentencesInParenthesis, sb, catSym);
053: nonterminal = ensureUnderscore(sb.toString());
054: rules = createRule(nonterminal, sentencesInParenthesis);
055: }
056:
057: private void parenthesisContentsToString(List sentences,
058: StringBuffer sb, String catSym) {
059: for (int i = 0; i < sentences.size(); i++) {
060: Object o = sentences.get(i);
061:
062: if (o instanceof List) {
063: parenthesisContentsToString((List) o, sb, ""); // catenize symbol only on first level
064: } else {
065: if (o instanceof String)
066: o = SymbolToName.makeIdentifier((String) o, ""); // no "_" for quotes
067: sb.append(o.toString()); // ArtificialRule goes here
068: }
069:
070: if (i < sentences.size() - 1)
071: sb.append((catSym.length() > 0 ? "_" + catSym : "")
072: + "_");
073: }
074: }
075:
076: /**
077: Creates an artificial rule from with a quantifier like "*", "+", "?".
078: From this some rules are resulting that getRules() will return.
079: "token" is either String or ArtificialRule.
080: */
081: public ArtificialRule(Object token, String quantifier) { // make "a*", "a+", "a?" to rules
082: nonterminal = token.toString();
083:
084: String listNonterm = null; // needed for * and +
085:
086: if (quantifier.equals("+") || quantifier.equals("*")) {
087: List sentences = new ArrayList();
088:
089: listNonterm = ensureUnderscore(nonterminal) + "_LIST";
090:
091: // for now leave out the left side nonterminal of list
092: List sentence = new ArrayList(); // mandatory tokenlist expands to
093: sentence.add(listNonterm); // the list and
094: sentence.add(token.toString()); // the token
095: sentences.add(sentence);
096:
097: sentence = new ArrayList();
098: sentence.add(token.toString()); // or the token alone
099: sentences.add(sentence);
100:
101: rules = createRule(listNonterm, sentences); // adds the left side nonterminal of list
102:
103: if (quantifier.equals("+"))
104: nonterminal = listNonterm; // "_a_LIST" is the substitute for "a+"
105: }
106:
107: if (quantifier.equals("*") || quantifier.equals("?")) {
108: List sentences = new ArrayList();
109:
110: nonterminal = ensureUnderscore(nonterminal)
111: + (quantifier.equals("*") ? "_OPTLIST" : "_OPT");
112:
113: String nonterm = quantifier.equals("*") ? listNonterm
114: : token.toString();
115:
116: List sentence = new ArrayList(); // optional tokenlist expands to
117: sentence.add(nonterm); // the mandatory list when *, the token when ?
118: sentences.add(sentence);
119:
120: sentences.add(new ArrayList()); // or nothing
121:
122: List mandatoryList = quantifier.equals("*") ? rules : null;
123:
124: rules = createRule(nonterminal, sentences);
125:
126: if (mandatoryList != null)
127: rules.addAll(mandatoryList);
128: }
129:
130: if (token instanceof ArtificialRule) // could be '(' token ')'
131: rules.addAll(((ArtificialRule) token).getRules());
132: }
133:
134: private String ensureUnderscore(String nonterminal) {
135: if (nonterminal
136: .startsWith(Token.ARTIFICIAL_NONTERMINAL_START_CHARACTER) == false)
137: nonterminal = Token.ARTIFICIAL_NONTERMINAL_START_CHARACTER
138: + nonterminal;
139: return nonterminal;
140: }
141:
142: private List createRule(String nonterminal, List sentences) {
143: for (int i = 0; i < sentences.size(); i++) {
144: List deep = (List) sentences.get(i);
145: List flat = flattenLists(deep, new ArrayList());
146: flat.add(0, nonterminal);
147: sentences.set(i, flat);
148: }
149: return sentences;
150: }
151:
152: /**
153: The passed "deep" container contains Lists of arbitrary depth, that means every List
154: element can be a List. This method resolves them to a sequential List of Lists.
155: @param container List that contains Lists that contain Lists ...
156: @param result List containing only Lists, retrieved from the "tree" container.
157: @return the result flattened List (identical with passed "flat" List).
158: */
159: public static List flattenLists(List deep, List flat) {
160: for (int i = 0; i < deep.size(); i++) {
161: Object o = deep.get(i);
162:
163: if (o instanceof List)
164: flattenLists((List) o, flat);
165: else
166: flat.add(o);
167: }
168: return flat;
169: }
170:
171: /**
172: Returns the artificial name of this rule.
173: This is used to represent this rule within other rules.
174: */
175: public String toString() {
176: return nonterminal;
177: }
178:
179: /**
180: Returns all real rules of this artificial rule.
181: */
182: public List getRules() {
183: return rules;
184: }
185:
186: /**
187: Resolves a List of rules that contain ArtificialRules to a
188: to a list of real rules and stores those in resultSyntax.
189: @param rules List of rules to resolve, containing ArtificialRules on the right side.
190: @param resultSyntax List of rules that will not contain ArtificialRules. Will not be cleared when passed!
191: */
192: public static void resolveArtificialRules(List rules,
193: List resultSyntax) {
194: for (int j = 0; j < rules.size(); j++) {
195: List rule = (List) rules.get(j);
196:
197: resultSyntax.add(rule); // add it
198:
199: for (int k = 1; k < rule.size(); k++) { // optionally modify it
200: Object symbol = rule.get(k);
201:
202: if (symbol instanceof ArtificialRule) { // when containing rules
203: rule.set(k, symbol.toString()); // set the rule's nonterminal
204:
205: ArtificialRule ar = (ArtificialRule) symbol;
206: resolveArtificialRules(ar.getRules(), resultSyntax); // add the rule
207: }
208: }
209: }
210: }
211:
212: public static void main(String[] args) {
213: List ruleA = new ArrayList();
214: List ruleB = new ArrayList();
215: List parenthesisRule = new ArrayList();
216: ruleA.add("a1");
217: ruleA.add("a2");
218: ruleB.add("b1");
219: parenthesisRule.add(ruleA);
220: parenthesisRule.add(ruleB);
221: ArtificialRule afr = new ArtificialRule(parenthesisRule, "OR");
222: System.err.println(afr.toString() + " ::= " + afr.getRules());
223:
224: ArtificialRule afr2 = new ArtificialRule(afr, "*");
225: System.err.println(afr2.toString() + " ::= " + afr2.getRules());
226: }
227:
228: }
|