001: /*
002: * Copyright 2004-2008 H2 Group. Licensed under the H2 License, Version 1.0
003: * (http://h2database.com/html/license.html).
004: * Initial Developer: H2 Group
005: */
006: package org.h2.bnf;
007:
008: import java.io.BufferedReader;
009: import java.io.ByteArrayInputStream;
010: import java.io.InputStreamReader;
011: import java.io.Reader;
012: import java.sql.ResultSet;
013: import java.util.ArrayList;
014: import java.util.HashMap;
015: import java.util.Iterator;
016: import java.util.Random;
017: import java.util.StringTokenizer;
018:
019: import org.h2.server.web.DbContextRule;
020: import org.h2.tools.Csv;
021: import org.h2.util.Resources;
022: import org.h2.util.StringCache;
023: import org.h2.util.StringUtils;
024:
025: /**
026: * This class can read a file that is similar to BNF (Backus-Naur form).
027: * It is made specially to support SQL grammar.
028: */
029: public class Bnf {
030:
031: static final boolean COMBINE_KEYWORDS = false;
032:
033: private static final String SEPARATORS = " [](){}|.,\r\n<>:-+*/=<\">!'";
034: private static final long MAX_PARSE_TIME = 100;
035:
036: private final Random random = new Random();
037: private final HashMap ruleMap = new HashMap();
038: private String syntax;
039: private String currentToken;
040: private String[] tokens;
041: private char firstChar;
042: private int index;
043: private Rule lastRepeat;
044: private ArrayList statements;
045: private String currentTopic;
046:
047: /**
048: * Create an instance using the grammar specified in the CSV file.
049: *
050: * @param csv if not specified, the help.csv is used
051: * @return a new instance
052: */
053: public static Bnf getInstance(Reader csv) throws Exception {
054: Bnf bnf = new Bnf();
055: if (csv == null) {
056: byte[] data = Resources.get("/org/h2/res/help.csv");
057: csv = new InputStreamReader(new ByteArrayInputStream(data));
058: }
059: bnf.parse(csv);
060: return bnf;
061: }
062:
063: Bnf() {
064: random.setSeed(1);
065: }
066:
067: void addFixedRule(String name, int fixedType) {
068: Rule rule = new RuleFixed(fixedType);
069: addRule(name, "Fixed", rule);
070: }
071:
072: RuleHead addRule(String topic, String section, Rule rule) {
073: RuleHead head = new RuleHead(section, topic, rule);
074: if (ruleMap.get(StringUtils.toLowerEnglish(topic)) != null) {
075: throw new Error("already exists: " + topic);
076: }
077: ruleMap.put(StringUtils.toLowerEnglish(topic), head);
078: return head;
079: }
080:
081: public Random getRandom() {
082: return random;
083: }
084:
085: private void parse(Reader csv) throws Exception {
086: csv = new BufferedReader(csv);
087: Rule functions = null;
088: statements = new ArrayList();
089: ResultSet rs = Csv.getInstance().read(csv, null);
090: for (int id = 0; rs.next(); id++) {
091: String section = rs.getString("SECTION").trim();
092: if (section.startsWith("System")) {
093: continue;
094: }
095: String topic = StringUtils.toLowerEnglish(rs.getString(
096: "TOPIC").trim());
097: topic = StringUtils.replaceAll(topic, " ", "");
098: topic = StringUtils.replaceAll(topic, "_", "");
099: syntax = rs.getString("SYNTAX").trim();
100: currentTopic = section;
101: if (section.startsWith("Function")) {
102: int end = syntax.indexOf(':');
103: syntax = syntax.substring(0, end);
104: }
105: tokens = tokenize();
106: index = 0;
107: Rule rule = parseRule();
108: if (section.startsWith("Command")) {
109: rule = new RuleList(rule, new RuleElement(";\n\n",
110: currentTopic), false);
111: }
112: RuleHead head = addRule(topic, section, rule);
113: if (section.startsWith("Function")) {
114: if (functions == null) {
115: functions = rule;
116: } else {
117: functions = new RuleList(rule, functions, true);
118: }
119: } else if (section.startsWith("Commands")) {
120: statements.add(head);
121: }
122: }
123: addRule("@func@", "Function", functions);
124: addFixedRule("@ymd@", RuleFixed.YMD);
125: addFixedRule("@hms@", RuleFixed.HMS);
126: addFixedRule("@nanos@", RuleFixed.NANOS);
127: addFixedRule("anythingExceptSingleQuote",
128: RuleFixed.ANY_EXCEPT_SINGLE_QUOTE);
129: addFixedRule("anythingExceptDoubleQuote",
130: RuleFixed.ANY_EXCEPT_DOUBLE_QUOTE);
131: addFixedRule("anythingUntilEndOfLine", RuleFixed.ANY_UNTIL_EOL);
132: addFixedRule("anythingUntilEndComment", RuleFixed.ANY_UNTIL_END);
133: addFixedRule("anything", RuleFixed.ANY_WORD);
134: addFixedRule("@hexStart@", RuleFixed.HEX_START);
135: addFixedRule("@concat@", RuleFixed.CONCAT);
136: addFixedRule("@az_@", RuleFixed.AZ_UNDERLINE);
137: addFixedRule("@af@", RuleFixed.AF);
138: addFixedRule("@digit@", RuleFixed.DIGIT);
139: }
140:
141: /**
142: * Get the HTML documentation for a given syntax.
143: *
144: * @param rule the rule (topic)
145: * @param syntax the BNF syntax
146: * @return the HTML formatted text
147: */
148: public String getSyntaxHtml(String rule, String syntax) {
149: StringTokenizer tokenizer = new StringTokenizer(syntax,
150: SEPARATORS, true);
151: StringBuffer buff = new StringBuffer();
152: while (tokenizer.hasMoreTokens()) {
153: String s = tokenizer.nextToken();
154: if (s.length() == 1
155: || StringUtils.toUpperEnglish(s).equals(s)) {
156: buff.append(s);
157: continue;
158: }
159: RuleHead found = null;
160: for (int i = 0; i < s.length(); i++) {
161: String test = StringUtils
162: .toLowerEnglish(s.substring(i));
163: RuleHead r = (RuleHead) ruleMap.get(test);
164: if (r != null) {
165: found = r;
166: break;
167: }
168: }
169: if (found == null || found.rule instanceof RuleFixed) {
170: buff.append(s);
171: continue;
172: }
173: String page = "grammar.html";
174: if (found.section.startsWith("Data Types")) {
175: page = "datatypes.html";
176: } else if (found.section.startsWith("Functions")) {
177: page = "functions.html";
178: }
179: String link = StringUtils.urlEncode(found.getTopic()
180: .toLowerCase());
181: buff.append("<a href=\"" + page + "#" + link + "\">");
182: buff.append(s);
183: buff.append("</a>");
184: }
185: return buff.toString();
186: }
187:
188: private Rule parseRule() {
189: read();
190: return parseOr();
191: }
192:
193: private Rule parseOr() {
194: Rule r = parseList();
195: if (firstChar == '|') {
196: read();
197: r = new RuleList(r, parseOr(), true);
198: }
199: lastRepeat = r;
200: return r;
201: }
202:
203: private Rule parseList() {
204: Rule r = parseToken();
205: if (firstChar != '|' && firstChar != ']' && firstChar != '}'
206: && firstChar != 0) {
207: r = new RuleList(r, parseList(), false);
208: }
209: lastRepeat = r;
210: return r;
211: }
212:
213: private Rule parseToken() {
214: Rule r;
215: if ((firstChar >= 'A' && firstChar <= 'Z')
216: || (firstChar >= 'a' && firstChar <= 'z')) {
217: // r = new RuleElement(currentToken+ " syntax:" + syntax);
218: r = new RuleElement(currentToken, currentTopic);
219: } else if (firstChar == '[') {
220: read();
221: Rule r2 = parseOr();
222: boolean repeat = false;
223: if (r2.last() instanceof RuleRepeat) {
224: repeat = true;
225: }
226: r = new RuleOptional(r2, repeat);
227: if (firstChar != ']') {
228: throw new Error("expected ], got " + currentToken
229: + " syntax:" + syntax);
230: }
231: } else if (firstChar == '{') {
232: read();
233: r = parseOr();
234: if (firstChar != '}') {
235: throw new Error("expected }, got " + currentToken
236: + " syntax:" + syntax);
237: }
238: } else if ("@commaDots@".equals(currentToken)) {
239: r = new RuleList(new RuleElement(",", currentTopic),
240: lastRepeat, false);
241: r = new RuleRepeat(r);
242: } else if ("@dots@".equals(currentToken)) {
243: r = new RuleRepeat(lastRepeat);
244: } else {
245: r = new RuleElement(currentToken, currentTopic);
246: }
247: lastRepeat = r;
248: read();
249: return r;
250: }
251:
252: private void read() {
253: if (index < tokens.length) {
254: currentToken = tokens[index++];
255: firstChar = currentToken.charAt(0);
256: } else {
257: currentToken = "";
258: firstChar = 0;
259: }
260: }
261:
262: private String[] tokenize() {
263: ArrayList list = new ArrayList();
264: syntax = StringUtils.replaceAll(syntax, "yyyy-MM-dd", "@ymd@");
265: syntax = StringUtils.replaceAll(syntax, "hh:mm:ss", "@hms@");
266: syntax = StringUtils.replaceAll(syntax, "nnnnnnnnn", "@nanos@");
267: syntax = StringUtils.replaceAll(syntax, "function", "@func@");
268: syntax = StringUtils.replaceAll(syntax, "0x", "@hexStart@");
269: syntax = StringUtils.replaceAll(syntax, ",...", "@commaDots@");
270: syntax = StringUtils.replaceAll(syntax, "...", "@dots@");
271: syntax = StringUtils.replaceAll(syntax, "||", "@concat@");
272: syntax = StringUtils.replaceAll(syntax, "a-z|_", "@az_@");
273: syntax = StringUtils.replaceAll(syntax, "A-Z|_", "@az_@");
274: syntax = StringUtils.replaceAll(syntax, "a-f", "@af@");
275: syntax = StringUtils.replaceAll(syntax, "A-F", "@af@");
276: syntax = StringUtils.replaceAll(syntax, "0-9", "@digit@");
277: StringTokenizer tokenizer = new StringTokenizer(syntax,
278: SEPARATORS, true);
279: while (tokenizer.hasMoreTokens()) {
280: String s = tokenizer.nextToken();
281: // avoid duplicate strings
282: s = StringCache.get(s);
283: if (s.length() == 1) {
284: if (" \r\n".indexOf(s.charAt(0)) >= 0) {
285: continue;
286: }
287: }
288: list.add(s);
289: }
290: return (String[]) list.toArray(new String[0]);
291: }
292:
293: /**
294: * Get the list of tokens that can follow.
295: * This is the main autocomplete method.
296: * The returned map for the query 'S' may look like this:
297: * <pre>
298: * key: 1#SELECT, value: ELECT
299: * key: 1#SET, value: ET
300: * </pre>
301: *
302: * @param query the start of the statement
303: * @return the map of possible token types / tokens
304: */
305: public HashMap getNextTokenList(String query) {
306: HashMap next = new HashMap();
307: Sentence sentence = new Sentence();
308: sentence.next = next;
309: sentence.text = query;
310: for (int i = 0; i < statements.size(); i++) {
311: RuleHead head = (RuleHead) statements.get(i);
312: if (!head.section.startsWith("Commands")) {
313: continue;
314: }
315: sentence.max = System.currentTimeMillis() + MAX_PARSE_TIME;
316: head.getRule().addNextTokenList(query, sentence);
317: }
318: return next;
319: }
320:
321: /**
322: * Cross-link all statements with each other.
323: * This method is called after updating the topics.
324: */
325: public void linkStatements() {
326: for (Iterator it = ruleMap.values().iterator(); it.hasNext();) {
327: RuleHead r = (RuleHead) it.next();
328: r.getRule().setLinks(ruleMap);
329: }
330: }
331:
332: /**
333: * Update a topic with a context specific rule.
334: * This is used for autocomplete support.
335: *
336: * @param topic the topic
337: * @param rule the database context rule
338: */
339: public void updateTopic(String topic, DbContextRule rule) {
340: topic = StringUtils.toLowerEnglish(topic);
341: RuleHead head = (RuleHead) ruleMap.get(topic);
342: if (head == null) {
343: head = new RuleHead("db", topic, rule);
344: ruleMap.put(topic, head);
345: statements.add(head);
346: } else {
347: head.rule = rule;
348: }
349: }
350:
351: /**
352: * Get the list of possible statements.
353: *
354: * @return the list of statements
355: */
356: public ArrayList getStatements() {
357: return statements;
358: }
359:
360: }
|