001: //##header
002: /*
003: *******************************************************************************
004: * Copyright (C) 2002-2006, International Business Machines Corporation and *
005: * others. All Rights Reserved. *
006: *******************************************************************************
007: */
008: //#ifndef FOUNDATION
009: package com.ibm.icu.dev.test.util;
010:
011: import java.util.Random;
012:
013: //TODO integrate this into the test framework
014:
015: import com.ibm.icu.text.UnicodeSet;
016:
017: public class TestBNF {
018:
019: static final String[] testRules = {
020: "$root = [ab]{3};",
021:
022: "$root = [ab]{3,};",
023:
024: "$root = [ab]{3,5};",
025:
026: "$root = [ab]*;",
027:
028: "$root = [ab]?;",
029:
030: "$root = [ab]+;",
031:
032: "$us = [a-z];" + "$root = [0-9$us];",
033:
034: "$root = a $foo b? 25% end 30% | $foo 50%;\r\n"
035: + "$foo = c{1,5} 20%;",
036:
037: "$root = [a-z]{1,5}~;",
038:
039: "$root = [a-z]{5}~;",
040:
041: "$root = '\\' (u | U0010 | U000 $hex) $hex{4} ;\r\n"
042: + "$hex = [0-9A-Fa-f];", };
043:
044: static String unicodeSetBNF = ""
045: + "$root = $leaf | '[' $s $root2 $s ']' ;\r\n"
046: + "$root2 = $leaf | '[' $s $root3 $s ']' | ($root3 $s ($op $root3 $s){0,3}) ;\r\n"
047: + "$root3 = $leaf | '[' $s $root4 $s ']' | ($root4 $s ($op $root4 $s){0,3}) ;\r\n"
048: + "$root4 = $leaf | ($leaf $s ($op $leaf $s){0,3}) ;\r\n"
049: + "$op = (('&' | '-') $s)? 70%;"
050: + "$leaf = '[' $s $list $s ']' | $prop;\r\n"
051: + "$list = ($char $s ('-' $s $char $s)? 30%){1,5} ;\r\n"
052: + "$prop = '\\' (p | P) '{' $s $propName $s '}' | '[:' '^'? $s $propName $s ':]';\r\n"
053: + "$needsQuote = [\\-\\][:whitespace:][:control:]] ;\r\n"
054: + "$char = [[\\u0000-\\U00010FFFF]-$needsQuote] | $quoted ;\r\n"
055: + "$quoted = '\\' ('u' | 'U0010' | 'U000' $hex) $hex{4} ;\r\n"
056: + "$hex = [0-9A-Fa-f];\r\n"
057: + "$s = ' '? 20%;\r\n"
058: + "$propName = (whitespace | ws) | (uppercase | uc) | (lowercase | lc) | $category;\r\n"
059: + "$category = ((general | gc) $s '=' $s)? $catvalue;\r\n"
060: + "$catvalue = (C | Other | Cc | Control | Cf | Format | Cn | Unassigned | L | Letter);\r\n";
061:
062: public static void main(String[] args) {
063: testTokenizer();
064: for (int i = 0; i < testRules.length; ++i) {
065: testBNF(testRules[i], null, 20);
066: }
067:
068: testBNF(unicodeSetBNF, null, 20);
069: //testParser();
070: }
071:
072: static void testBNF(String rules, UnicodeSet chars, int count) {
073: BNF bnf = new BNF(new Random(0), new Quoter.RuleQuoter())
074: .addSet("$chars", chars).addRules(rules).complete();
075:
076: System.out.println("====================================");
077: System.out.println("BNF");
078: System.out.println(rules);
079: System.out.println(bnf.getInternal());
080: for (int i = 0; i < count; ++i) {
081: System.out.println(i + ": " + bnf.next());
082: }
083: }
084:
085: /*
086: public static testManual() {
087: Pick p = Pick.maybe(75,Pick.unquoted("a"));
088: testOr(p, 1);
089: p = Pick.or(new String[]{"", "a", "bb", "ccc"});
090: testOr(p, 3);
091: p = Pick.repeat(3, 5, new int[]{20, 30, 20}, "a");
092: testOr(p, 5);
093: p = Pick.codePoint("[a-ce]");
094: testCodePoints(p);
095: p = Pick.codePoint("[a-ce]");
096: testCodePoints(p);
097: p = Pick.string(2, 8, p);
098: testOr(p,10);
099:
100: p = Pick.or(new String[]{"", "a", "bb", "ccc"});
101: p = Pick.and(p).and2(p).and2("&");
102: testMatch(p, "abb&");
103: testMatch(p, "bba");
104:
105: // testEnglish();
106: }
107: */
108:
109: static void testMatch(Pick p, String source) {
110: Pick.Position pp = new Pick.Position();
111: boolean value = p.match(source, pp);
112: System.out.println("Match: " + value + ", " + pp);
113: }
114:
115: /*
116: static void testParser() {
117: try {
118: Pick.Target target = new Pick.Target();
119: for (int i = 0; i < rules.length; ++i) {
120: target.addRule(rules[i]);
121: }
122: } catch (ParseException e) {
123: // TODO Auto-generated catch block
124: e.printStackTrace();
125: }
126: }
127: */
128:
129: static class Counts {
130: int[] counts;
131:
132: Counts(int max) {
133: counts = new int[max + 1];
134: }
135:
136: void inc(int index) {
137: counts[index]++;
138: }
139:
140: void show() {
141: System.out.println("Printing Counts");
142: for (int i = 0; i < counts.length; ++i) {
143: if (counts[i] == 0)
144: continue;
145: System.out.println(i + ": " + counts[i]);
146: }
147: System.out.println();
148: }
149: }
150:
151: static final String[] rules = {
152: "$s = ' ';",
153: "$noun = dog | house | government | wall | street | zebra;",
154: "$adjective = red | glorious | simple | nasty | heavy | clean;",
155: "$article = quickly | oddly | silently | boldly;",
156: "$adjectivePhrase = ($adverb $s)? 50% $adjective* 0% 30% 20% 10%;",
157: "$nounPhrase = $articles $s ($adjectivePhrase $s)? 30% $noun;",
158: "$verb = goes | fishes | walks | sleeps;",
159: "$tverb = carries | lifts | overturns | hits | jumps on;",
160: "$copula = is 30% | seems 10%;",
161: "$sentence1 = $nounPhrase $s $verb $s ($s $adverb)? 30%;",
162: "$sentence2 = $nounPhrase $s $tverb $s $nounPhrase ($s $adverb)? 30%;",
163: "$sentence3 = $nounPhrase $s $copula $s $adjectivePhrase;",
164: "$conj = but | and | or;",
165: "$sentence4 = $sentence1 | $sentence2 | $sentence3 20% | $sentence4 $conj $sentence4 20%;",
166: "$sentence = $sentence4 '.';" };
167:
168: /*
169: private static void testEnglish() {
170: Pick s = Pick.unquoted(" ");
171: Pick verbs = Pick.or(new String[]{"goes", "fishes", "walks", "sleeps"});
172: Pick transitive = Pick.or(new String[]{"carries", "lifts", "overturns", "hits", "jumps on"});
173: Pick nouns = Pick.or(new String[]{"dog", "house", "government", "wall", "street", "zebra"});
174: Pick adjectives = Pick.or(new String[]{"red", "glorious", "simple", "nasty", "heavy", "clean"});
175: Pick articles = Pick.or(new String[]{"the", "a"});
176: Pick adverbs = Pick.or(new String[]{"quickly", "oddly", "silently", "boldly"});
177: Pick adjectivePhrase = Pick.and(0.5, Pick.and(adverbs).and2(s)).and2(adjectives);
178: Pick nounPhrase = Pick.and(articles).and2(s)
179: .and2(0.3, Pick.and(adjectivePhrase).and2(s))
180: .and2(nouns);
181: Pick copula = Pick.or(new String[]{"is", "seems"});
182: Pick sentence1 = Pick.and(nounPhrase).and2(s).and2(verbs)
183: .and2(0.3, Pick.and(s).and2(adverbs)).name("s1");
184: Pick sentence2 = Pick.and(nounPhrase).and2(s).and2(transitive).and2(s).and2(nounPhrase)
185: .and2(0.3, Pick.and(s).and2(adverbs)).name("s2");
186: Pick sentence3 = Pick.and(nounPhrase).and2(s).and2(copula).and2(s).and2(adjectivePhrase).name("s3");
187: Pick conj = Pick.or(new String[]{", but", ", and", ", or"});
188: Pick forward = Pick.unquoted("forward");
189: Pick pair = Pick.and(forward).and2(conj).and2(s).and2(forward).name("part");
190: Pick sentenceBase = Pick.or(sentence1).or2(sentence2).or2(sentence3).or2(0.6666, pair).name("sentence");
191: sentenceBase.replace(forward, sentenceBase);
192: Pick sentence = Pick.and(sentenceBase).and2(Pick.unquoted("."));
193: Pick.Target target = Pick.Target.make(sentence);
194: for (int i = 0; i < 50; ++i) {
195: System.out.println(i + ": " + target.next());
196: }
197: }
198: private static void testOr(Pick p, int count) {
199: Pick.Target target = Pick.Target.make(p);
200: Counts counts = new Counts(count + 10);
201: for (int i = 0; i < 1000; ++i) {
202: String s = target.next();
203: counts.inc(s.length());
204: }
205: counts.show();
206: }
207: private static void testCodePoints(Pick p) {
208: Pick.Target target = Pick.Target.make(p);
209: Counts counts = new Counts(128);
210: for (int i = 0; i < 10000; ++i) {
211: String s = target.next();
212: counts.inc(s.charAt(0));
213: }
214: counts.show();
215: }
216: */
217: public static void printRandoms() {
218: BNF bnf = new BNF(new Random(0), new Quoter.RuleQuoter())
219: .addRules("[a-z]{2,5}").complete();
220: System.out.println("Start");
221: for (int i = 0; i < 100; ++i) {
222: String temp = bnf.next();
223: System.out.println(i + ")\t" + temp);
224: }
225: }
226:
227: public static void testTokenizer() {
228: Tokenizer t = new Tokenizer();
229:
230: String[] samples = { "a'b'c d #abc\r e", "'a '123 321", "\\\\",
231: "a'b", "a'", "abc def%?ghi", "%", "a", "\\ a", "a''''b" };
232: for (int i = 0; i < samples.length; ++i) {
233: t.setSource(samples[i]);
234: System.out.println();
235: System.out.println("Input: " + t.getSource());
236: int type = 0;
237: while (type != Tokenizer.DONE) {
238: type = t.next();
239: System.out.println(t.toString(type, false));
240: }
241: }
242: }
243:
244: }
245:
246: //#endif
|