001: package fri.patterns.interpreter.parsergenerator.syntax;
002:
003: import java.io.Serializable;
004: import java.util.*;
005: import fri.patterns.interpreter.parsergenerator.Token;
006:
007: /**
008: A Syntax is a Rule list that can be converted to a parser table.
009: It provides several methods to check and simplify the grammar.
010: Syntax accepts no duplicate rules (ignores any addition except
011: the first by using a backing hashtable).
012:
013: @author (c) 2004 Fritz Ritzberger
014: */
015:
016: public class Syntax implements Serializable {
017: private List rules;
018: private Hashtable ruleHash;
019:
020: public Syntax() {
021: this (new ArrayList());
022: }
023:
024: public Syntax(int size) {
025: this (new ArrayList(size));
026: }
027:
028: public Syntax(String[][] rules) {
029: this (SyntaxUtil.ruleArrayToList(rules));
030: }
031:
032: public Syntax(String[][][] ruleSets) {
033: this (SyntaxUtil.catenizeRulesUnique(ruleSets));
034: }
035:
036: public Syntax(List rules) {
037: appendRules(rules);
038: }
039:
040: /** Append rules to this syntax. List elements can be Rule or List, second will be converted to Rule. */
041: public void appendRules(List rules) {
042: if (rules != null) {
043: if (rules.size() > 0) {
044: if (this .rules == null) {
045: allocateRules(rules);
046: }
047:
048: for (int i = 0; i < rules.size(); i++) {
049: Object o = rules.get(i);
050: if (o instanceof Rule)
051: addRule((Rule) o);
052: else
053: // must convert from List to Rule
054: addRule(new Rule((List) o));
055: }
056: } else {
057: if (this .rules == null) {
058: allocateRules(rules);
059: }
060: }
061: }
062: }
063:
064: /** Returns the number of rules of this Syntax. */
065: public int size() {
066: return rules != null ? rules.size() : 0;
067: }
068:
069: /** Returns the rule at the given position. */
070: public Rule getRule(int i) {
071: return (Rule) rules.get(i);
072: }
073:
074: /** Appends a new rule to this Syntax. */
075: public void addRule(Rule rule) {
076: insertRule(size(), rule);
077: }
078:
079: /** Inserts a new rule into this Syntax (needed to set artificial START rule at position 0). */
080: public void insertRule(int i, Rule rule) {
081: if (rules == null) {
082: allocateRules(null);
083: }
084:
085: if (ruleHash.get(rule) != null)
086: return; // already contained
087:
088: ruleHash.put(rule, rule);
089: rules.add(i, rule);
090: }
091:
092: /** Removes the rule at passed index from this Syntax. */
093: public void removeRule(int index) {
094: rules.remove(index);
095: }
096:
097: private void allocateRules(List rules) {
098: int size = rules == null || rules.size() <= 0 ? 64 : rules
099: .size();
100: this .rules = new ArrayList(size);
101: this .ruleHash = new Hashtable(size);
102: }
103:
104: /** Returns a List of top level Rules (that do not appear on right side of any contained rule). */
105: public List findStartRules() {
106: List roots = new ArrayList(1);
107:
108: for (int i = 0; i < size(); i++) {
109: Rule rule = getRule(i);
110: String nonterm = rule.getNonterminal();
111:
112: // search this left side nonterminal on right sides of all rules
113: boolean found = nonterm.equals(Token.TOKEN)
114: || nonterm.equals(Token.IGNORED);
115: for (int j = 0; found == false && j < size(); j++) {
116: Rule r = getRule(j);
117: if (j != i
118: && r.getNonterminal().equals(nonterm) == false) // start rule is allowed to be recursive
119: for (int k = 0; found == false && k < r.rightSize(); k++)
120: if (r.getRightSymbol(k).equals(nonterm)
121: && false == r.getNonterminal().equals(
122: Token.TOKEN)
123: && false == r.getNonterminal().equals(
124: Token.IGNORED))
125: found = true;
126: }
127:
128: if (found == false) // must be a top level rule
129: roots.add(rule);
130: }
131:
132: return roots;
133: }
134:
135: /**
136: Simplify the syntax. Search rules that have a nonterminal that is defined only once
137: and contains exactly one symbol on right side. Such a rule gets deleted and its references
138: get substituted by the symbol on right side of that rule.
139: <p>
140: This method is called by default from SerializedObject (base class for all builders).
141: <p>
142: CAUTION: Such symbols will not be called within Semantic!
143: */
144: public void resolveSingulars() {
145: Rule singular;
146: while ((singular = removeSingular()) != null) {
147: String substitute = singular.getNonterminal();
148:
149: for (int i = 0; i < size(); i++) { // substitute rules containing the singular
150: Rule rule = getRule(i);
151:
152: for (int j = 0; j < rule.rightSize(); j++)
153: if (rule.getRightSymbol(j).equals(substitute))
154: rule.setRightSymbol(singular.getRightSymbol(0),
155: j);
156: }
157: }
158: }
159:
160: private Rule removeSingular() {
161: for (int i = size() - 1; i >= 0; i--) {
162: Rule rule = getRule(i);
163: String nonterminal = rule.getNonterminal();
164:
165: // check if rule has only one symbol on right side and nonterminal is artificial symbol
166: boolean singular = rule.rightSize() == 1
167: && nonterminal
168: .startsWith(Token.ARTIFICIAL_NONTERMINAL_START_CHARACTER)
169: && nonterminal.equals(Token.TOKEN) == false
170: && nonterminal.equals(Token.IGNORED) == false;
171:
172: // check if defined only once on any left side
173: for (int j = 0; singular && j < size(); j++)
174: if (j != i
175: && getRule(j).getNonterminal().equals(
176: nonterminal))
177: singular = false; // nonterm has been found once more on left side: rule has alternative rules
178:
179: // check if this is not a semantic splitting of a nonterminal in different contexts:
180: // is right symbol defined only once on any right side with rightSize() == 1 ?
181: String rightSymbol = singular ? rule.getRightSymbol(0)
182: : null;
183: for (int j = 0; singular && j < size(); j++)
184: if (j != i
185: && getRule(j).rightSize() == 1
186: && getRule(j).getRightSymbol(0).equals(
187: rightSymbol))
188: singular = false; // nonterm has been found once more on a right side with exactly one symbol
189:
190: if (singular) {
191: if (rule.indexOnRightSide(nonterminal) >= 0) // check if recursive rule
192: System.err
193: .println("WARNING: removing recursive singular rule: "
194: + rule);
195:
196: this .rules.remove(i);
197: return rule;
198: }
199: }
200: return null;
201: }
202:
203: /**
204: Resolves undefined rules in this syntax from another syntax.
205: @param resolvingSyntax the syntax to copy rules from
206: */
207: public void resolveFrom(Syntax resolvingSyntax) {
208: Set unresolved = getUnresolvedNonterminals();
209: Map done = new Hashtable();
210: for (Iterator it = unresolved.iterator(); it.hasNext();) {
211: getRulesUnderSymbol((String) it.next(), resolvingSyntax,
212: done);
213: }
214: }
215:
216: /**
217: Returns a set of nonterminals that have no rule within this syntax.
218: */
219: public Set getUnresolvedNonterminals() {
220: Set unresolved = new HashSet();
221: for (int j = 0; j < size(); j++) {
222: Rule rule = getRule(j);
223: for (int i = 0; i < rule.rightSize(); i++) {
224: String sym = rule.getRightSymbol(i);
225: if (Token.isTerminal(sym) == false
226: && sym.equals(Token.BUTNOT) == false
227: && sym.equals(Token.UPTO) == false
228: && hasRule(sym) == false)
229: unresolved.add(sym);
230: }
231: }
232: return unresolved;
233: }
234:
235: private void getRulesUnderSymbol(String nonterminal,
236: Syntax sourceSyntax, Map done) {
237: if (done.get(nonterminal) != null)
238: return;
239: done.put(nonterminal, nonterminal);
240:
241: for (int i = 0; i < sourceSyntax.size(); i++) {
242: Rule rule = sourceSyntax.getRule(i);
243: if (rule.getNonterminal().equals(nonterminal)) {
244: addRule(rule);
245: for (int j = 0; j < rule.rightSize(); j++) { // cascade to other rules
246: String sym = rule.getRightSymbol(j);
247: if (Token.isTerminal(sym) == false
248: && sym.equals(Token.BUTNOT) == false
249: && sym.equals(Token.UPTO) == false)
250: getRulesUnderSymbol(sym, sourceSyntax, done);
251: }
252: }
253: }
254: }
255:
256: /**
257: Returns true if the passed nonterminal is on the left side of at least one contained rule.
258: */
259: public boolean hasRule(String nonterminal) {
260: for (int i = 0; i < size(); i++)
261: if (getRule(i).getNonterminal().equals(nonterminal))
262: return true;
263: return false;
264: }
265:
266: /** Returns the syntax as a human readable multiline text. */
267: public String toString() {
268: StringBuffer sb = new StringBuffer();
269: for (int i = 0; i < size(); i++) {
270: sb.append(getRule(i).toString());
271: sb.append("\n");
272: }
273: return sb.toString();
274: }
275:
276: }
|