001: //##header
002: /*
003: *******************************************************************************
004: * Copyright (C) 2002-2006, International Business Machines Corporation and *
005: * others. All Rights Reserved. *
006: *******************************************************************************
007: */
008: //#ifndef FOUNDATION
009: package com.ibm.icu.dev.test.util;
010:
011: import java.util.ArrayList;
012: import java.util.Map;
013: import java.util.Set;
014: import java.util.HashMap;
015: import java.util.HashSet;
016: import java.util.Iterator;
017:
018: import com.ibm.icu.text.UnicodeSet;
019: import java.util.Random;
020:
021: public class BNF {
022: private Map map = new HashMap();
023: private Set variables = new HashSet();
024: private Pick pick = null;
025: private Pick.Target target = null;
026: private Tokenizer t;
027: private Quoter quoter;
028: private Random random;
029:
030: public String next() {
031: return target.next();
032: }
033:
034: public String getInternal() {
035: return pick.getInternal(0, new HashSet());
036: }
037:
038: /*
039: + "weight = integer '%';"
040: + "range = '{' integer (',' integer?)? '}' weight*;"
041: + "quote = '@';"
042: + "star = '*' weight*;"
043: + "plus = '+' weight*;"
044: + "maybe = '?' weight?;"
045: + "quantifier = range | star | maybe | plus;"
046: + "core = string | unicodeSet | '(' alternation ')';"
047: + "sequence = (core quantifier*)+;"
048: + "alternation = sequence (weight? ('|' sequence weight?)+)?;"
049: + "rule = string '=' alternation;";
050:
051:
052: * Match 0 or more times
053: + Match 1 or more times
054: ? Match 1 or 0 times
055: {n} Match exactly n times
056: {n,} Match at least n times
057: {n,m} Match at least n but not more than m times
058:
059:
060:
061: */
062:
063: public BNF(Random random, Quoter quoter) {
064: this .random = random;
065: this .quoter = quoter;
066: t = new Tokenizer();
067: }
068:
069: public BNF addRules(String rules) {
070: t.setSource(rules);
071: while (addRule()) {
072: }
073: return this ; // for chaining
074: }
075:
076: public BNF complete() {
077: // check that the rules match the variables, except for $root in rules
078: Set ruleSet = map.keySet();
079: // add also
080: variables.add("$root");
081: variables.addAll(t.getLookedUpItems());
082: if (!ruleSet.equals(variables)) {
083: String msg = showDiff(variables, ruleSet);
084: if (msg.length() != 0)
085: msg = "Error: Missing definitions for: " + msg;
086: String temp = showDiff(ruleSet, variables);
087: if (temp.length() != 0)
088: temp = "Warning: Defined but not used: " + temp;
089: if (msg.length() == 0)
090: msg = temp;
091: else if (temp.length() != 0) {
092: msg = msg + "; " + temp;
093: }
094: error(msg);
095: }
096:
097: if (!ruleSet.equals(variables)) {
098: String msg = showDiff(variables, ruleSet);
099: if (msg.length() != 0)
100: msg = "Missing definitions for: " + msg;
101: String temp = showDiff(ruleSet, variables);
102: if (temp.length() != 0)
103: temp = "Defined but not used: " + temp;
104: if (msg.length() == 0)
105: msg = temp;
106: else if (temp.length() != 0) {
107: msg = msg + "; " + temp;
108: }
109: error(msg);
110: }
111:
112: // replace variables by definitions
113: Iterator it = ruleSet.iterator();
114: while (it.hasNext()) {
115: String key = (String) it.next();
116: Pick expression = (Pick) map.get(key);
117: Iterator it2 = ruleSet.iterator();
118: if (false && key.equals("$crlf")) {
119: System.out.println("debug");
120: }
121: while (it2.hasNext()) {
122: Object key2 = it2.next();
123: if (key.equals(key2))
124: continue;
125: Pick expression2 = (Pick) map.get(key2);
126: expression2.replace(key, expression);
127: }
128: }
129: pick = (Pick) map.get("$root");
130: target = Pick.Target.make(pick, random, quoter);
131: // TODO remove temp collections
132: return this ;
133: }
134:
135: String showDiff(Set a, Set b) {
136: Set temp = new HashSet();
137: temp.addAll(a);
138: temp.removeAll(b);
139: if (temp.size() == 0)
140: return "";
141: StringBuffer buffer = new StringBuffer();
142: Iterator it = temp.iterator();
143: while (it.hasNext()) {
144: if (buffer.length() != 0)
145: buffer.append(", ");
146: buffer.append(it.next().toString());
147: }
148: return buffer.toString();
149: }
150:
151: void error(String msg) {
152: throw new IllegalArgumentException(msg + "\r\n" + t.toString());
153: }
154:
155: private boolean addRule() {
156: int type = t.next();
157: if (type == Tokenizer.DONE)
158: return false;
159: if (type != Tokenizer.STRING)
160: error("missing weight");
161: String s = t.getString();
162: if (s.length() == 0 || s.charAt(0) != '$')
163: error("missing $ in variable");
164: if (t.next() != '=')
165: error("missing =");
166: int startBody = t.index;
167: Pick rule = getAlternation();
168: if (rule == null)
169: error("missing expression");
170: t.addSymbol(s, t.getSource(), startBody, t.index);
171: if (t.next() != ';')
172: error("missing ;");
173: return addPick(s, rule);
174: }
175:
176: protected boolean addPick(String s, Pick rule) {
177: Object temp = map.get(s);
178: if (temp != null)
179: error("duplicate variable");
180: if (rule.name == null)
181: rule.name(s);
182: map.put(s, rule);
183: return true;
184: }
185:
186: public BNF addSet(String variable, UnicodeSet set) {
187: if (set != null) {
188: String body = set.toString();
189: t.addSymbol(variable, body, 0, body.length());
190: addPick(variable, Pick.codePoint(set));
191: }
192: return this ;
193: }
194:
195: int maxRepeat = 99;
196:
197: Pick qualify(Pick item) {
198: int[] weights;
199: int type = t.next();
200: switch (type) {
201: case '@':
202: return new Pick.Quote(item);
203: case '~':
204: return new Pick.Morph(item);
205: case '?':
206: int weight = getWeight();
207: if (weight == NO_WEIGHT)
208: weight = 50;
209: weights = new int[] { 100 - weight, weight };
210: return Pick.repeat(0, 1, weights, item);
211: case '*':
212: weights = getWeights();
213: return Pick.repeat(1, maxRepeat, weights, item);
214: case '+':
215: weights = getWeights();
216: return Pick.repeat(1, maxRepeat, weights, item);
217: case '{':
218: if (t.next() != Tokenizer.NUMBER)
219: error("missing number");
220: int start = (int) t.getNumber();
221: int end = start;
222: type = t.next();
223: if (type == ',') {
224: end = maxRepeat;
225: type = t.next();
226: if (type == Tokenizer.NUMBER) {
227: end = (int) t.getNumber();
228: type = t.next();
229: }
230: }
231: if (type != '}')
232: error("missing }");
233: weights = getWeights();
234: return Pick.repeat(start, end, weights, item);
235: }
236: t.backup();
237: return item;
238: }
239:
240: Pick getCore() {
241: int token = t.next();
242: if (token == Tokenizer.STRING) {
243: String s = t.getString();
244: if (s.charAt(0) == '$')
245: variables.add(s);
246: return Pick.string(s);
247: }
248: if (token == Tokenizer.UNICODESET) {
249: return Pick.codePoint(t.getUnicodeSet());
250: }
251: if (token != '(') {
252: t.backup();
253: return null;
254: }
255: Pick temp = getAlternation();
256: token = t.next();
257: if (token != ')')
258: error("missing )");
259: return temp;
260: }
261:
262: Pick getSequence() {
263: Pick.Sequence result = null;
264: Pick last = null;
265: while (true) {
266: Pick item = getCore();
267: if (item == null) {
268: if (result != null)
269: return result;
270: if (last != null)
271: return last;
272: error("missing item");
273: }
274: // qualify it as many times as possible
275: Pick oldItem;
276: do {
277: oldItem = item;
278: item = qualify(item);
279: } while (item != oldItem);
280: // add it in
281: if (last == null) {
282: last = item;
283: } else {
284: if (result == null)
285: result = Pick.makeSequence().and2(last);
286: result = result.and2(item);
287: }
288: }
289: }
290:
291: // for simplicity, we just use recursive descent
292: Pick getAlternation() {
293: Pick.Alternation result = null;
294: Pick last = null;
295: int lastWeight = NO_WEIGHT;
296: while (true) {
297: Pick temp = getSequence();
298: if (temp == null)
299: error("empty alternation");
300: int weight = getWeight();
301: if (weight == NO_WEIGHT)
302: weight = 1;
303: if (last == null) {
304: last = temp;
305: lastWeight = weight;
306: } else {
307: if (result == null)
308: result = Pick.makeAlternation().or2(lastWeight,
309: last);
310: result = result.or2(weight, temp);
311: }
312: int token = t.next();
313: if (token != '|') {
314: t.backup();
315: if (result != null)
316: return result;
317: if (last != null)
318: return last;
319: }
320: }
321: }
322:
323: private static final int NO_WEIGHT = Integer.MIN_VALUE;
324:
325: int getWeight() {
326: int weight;
327: int token = t.next();
328: if (token != Tokenizer.NUMBER) {
329: t.backup();
330: return NO_WEIGHT;
331: }
332: weight = (int) t.getNumber();
333: token = t.next();
334: if (token != '%')
335: error("missing %");
336: return weight;
337: }
338:
339: int[] getWeights() {
340: ArrayList list = new ArrayList();
341: while (true) {
342: int weight = getWeight();
343: if (weight == NO_WEIGHT)
344: break;
345: list.add(new Integer(weight));
346: }
347: if (list.size() == 0)
348: return null;
349: int[] result = new int[list.size()];
350: for (int i = 0; i < list.size(); ++i) {
351: result[i] = ((Integer) list.get(i)).intValue();
352: }
353: return result;
354: }
355: }
356: //#endif
|