001: package edu.indiana.lib.twinpeaks.search.singlesearch;
002:
003: import javax.xml.parsers.ParserConfigurationException;
004: import javax.xml.parsers.SAXParser;
005: import javax.xml.parsers.SAXParserFactory;
006:
007: import org.xml.sax.Attributes;
008: import org.xml.sax.SAXException;
009:
010: import edu.indiana.lib.twinpeaks.util.*;
011:
012: public class CqlParser extends org.xml.sax.helpers.DefaultHandler {
013:
014: private static org.apache.commons.logging.Log _log = LogUtils
015: .getLog(CqlParser.class);
016:
017: // index mappings (CQL -> Sirsi)
018: private static final java.util.Map INDEX_MAP = new java.util.HashMap();
019: static {
020: INDEX_MAP.put("keyword", " ");
021: INDEX_MAP.put("title", ":TITLE");
022: INDEX_MAP.put("author", ":CREATOR");
023: INDEX_MAP.put("subject", ":SUBJECT");
024: INDEX_MAP.put("year", ":DATE");
025: }
026:
027: // boolean mappings (CQL -> Sirsi)
028: private static final java.util.Map BOOL_RELATION_MAP = new java.util.HashMap();
029: static {
030: BOOL_RELATION_MAP.put("and", " AND ");
031: BOOL_RELATION_MAP.put("or", " OR ");
032: }
033:
034: // for SAX Parsing
035: SAXParser saxParser;
036: StringBuffer textBuffer;
037: StringBuffer searchClause;
038: boolean inSearchClause;
039: java.util.Stack cqlStack;
040:
041: public CqlParser() {
042: // initialize stack
043: cqlStack = new java.util.Stack();
044:
045: // initialize SAX Parser
046: SAXParserFactory factory;
047:
048: factory = SAXParserFactory.newInstance();
049: factory.setNamespaceAware(true);
050: try {
051: saxParser = factory.newSAXParser();
052: } catch (org.xml.sax.SAXException e) {
053: e.printStackTrace();
054: } catch (ParserConfigurationException e) {
055: e.printStackTrace();
056: }
057: }
058:
059: /**
060: * Converts a CQL-formatted search query into a format that the X-Server
061: * can understand. Uses org.z3950.zing.cql.CQLNode.toXCQL() and SAX Parsing
062: * to convert the cqlSearchQuery into an X-Server find_command.
063: *
064: * @param cqlSearchQuery CQL-formatted search query.
065: * @return X-Server find_command or null if cqlSearchQuery is null or empty.
066: * @see org.z3950.zing.cql.CQLNode.toXCQL()
067: */
068: public String doCQL2MetasearchCommand(String cqlSearchQuery) {
069:
070: if (cqlSearchQuery == null || cqlSearchQuery.equals("")) {
071: return null;
072: }
073:
074: org.z3950.zing.cql.CQLParser parser = new org.z3950.zing.cql.CQLParser();
075: org.z3950.zing.cql.CQLNode root = null;
076:
077: try {
078: // parse the criteria
079: root = parser.parse(cqlSearchQuery);
080: } catch (java.io.IOException ioe) {
081: ioe.printStackTrace();
082: } catch (org.z3950.zing.cql.CQLParseException e) {
083: e.printStackTrace();
084: }
085: String cqlXml = root.toXCQL(0);
086:
087: _log.debug("CQL XML:");
088: _log.debug(cqlXml);
089:
090: // get cqlXml as a stream
091: java.io.ByteArrayInputStream byteInputStream = null;
092: try {
093: byteInputStream = new java.io.ByteArrayInputStream(cqlXml
094: .getBytes("UTF8"));
095: } catch (java.io.UnsupportedEncodingException uee) {
096: uee.printStackTrace();
097: }
098:
099: // clear the stack
100: cqlStack.removeAllElements();
101:
102: // run the parser
103: try {
104: saxParser.parse(byteInputStream, this );
105: byteInputStream.close();
106: } catch (java.io.IOException ioe) {
107: ioe.printStackTrace();
108: } catch (org.xml.sax.SAXException spe) {
109: spe.printStackTrace();
110: }
111:
112: String cqlResult = (String) cqlStack.pop();
113: return cqlResult.trim();
114: }
115:
116: //----------------------------------
117: // DEFAULT HANDLER IMPLEMENTATIONS -
118: //----------------------------------
119:
120: /**
121: * Receive notification of the beginning of an element.
122: *
123: * @see org.xml.sax.helpers.DefaultHandler
124: */
125: public void startElement(String namespaceURI, String sName,
126: String qName, Attributes attrs) throws SAXException {
127: // set flags to avoid overwriting duplicate tag data
128: if (qName.equals("searchClause")) {
129: inSearchClause = true;
130: }
131: }
132:
133: /**
134: * Receive notification of the end of an element.
135: *
136: * @see org.xml.sax.helpers.DefaultHandler
137: */
138: public void endElement(String namespaceURI, String sName,
139: String qName) throws SAXException {
140: // extract data
141: extractDataFromText(qName);
142:
143: // clear flags
144: if (qName.equals("searchClause")) {
145: inSearchClause = false;
146: }
147: }
148:
149: /**
150: * Receive notification of character data inside an element.
151: *
152: * @see org.xml.sax.helpers.DefaultHandler
153: */
154: public void characters(char[] buf, int offset, int len)
155: throws SAXException {
156: // store character data
157: String text = new String(buf, offset, len);
158:
159: if (textBuffer == null) {
160: textBuffer = new StringBuffer(text);
161: } else {
162: textBuffer.append(text);
163: }
164: }
165:
166: //-------------------------
167: // PRIVATE HELPER METHODS -
168: //-------------------------
169:
170: private void extractDataFromText(String element) {
171: if (textBuffer == null) {
172: return;
173: }
174:
175: String text = textBuffer.toString().trim();
176: if (text.equals("") && !element.equals("triple")) {
177: return;
178: }
179: //
180: // check for a boolean relation value
181: //
182: if (!inSearchClause && element.equals("value")) {
183: cqlStack.push(text);
184: }
185: //
186: // Construct a search clause
187: //
188: if (inSearchClause) {
189: if (searchClause == null) {
190: searchClause = new StringBuffer();
191: }
192: //
193: // General syntax: title=macbeth
194: //
195: // (title is the index, = is the value, macbeth is the term)
196: //
197: if (element.equals("index")) {
198: searchClause.append(translateIndex(text));
199:
200: } else if (element.equals("value")) {
201: //
202: // The relation value is always supplied as '='.
203: // We don't need it. Just use a space for Web2
204: //
205: searchClause.append(' ');
206:
207: } else if (element.equals("term")) {
208: // Discard '+' encoding for embedded spaces (should we url decode?).
209: //
210: // Unless our caller provides enclosing quotes (\"), this will
211: // produce a series of keywords, not a phrase.
212: //
213: searchClause.append(text.replaceAll("\\+", " ").trim());
214: cqlStack.push(searchClause.toString().trim());
215: searchClause = null;
216: }
217: }
218: //
219: // evaluate expression so far if we hit a </triple>
220: //
221: if (element.equals("triple")) {
222: String rightOperand = (String) cqlStack.pop();
223: String leftOperand = (String) cqlStack.pop();
224: String booleanRelation = (String) cqlStack.pop();
225:
226: cqlStack.push(leftOperand
227: + translateBooleanRelation(booleanRelation)
228: + rightOperand);
229: }
230:
231: textBuffer = null;
232: }
233:
234: private String translateIndex(String cqlIndex) {
235: String xserverIndex = (String) INDEX_MAP.get(cqlIndex);
236:
237: if (xserverIndex == null || xserverIndex.equals("")) {
238: _log.error("\nERROR (CQL2XServerFindCommand."
239: + "translateIndex()): bad index");
240: // default to keyword
241: xserverIndex = " ";
242: }
243:
244: return xserverIndex;
245: }
246:
247: private String translateBooleanRelation(String booleanRelation) {
248: String xserverBoolean = (String) BOOL_RELATION_MAP
249: .get(booleanRelation);
250:
251: if (xserverBoolean == null || xserverBoolean.equals("")) {
252: _log
253: .error("\nERROR (CQL2XServerFindCommand."
254: + "translateBooleanRelation()): bad boolean relation");
255: // default to and
256: xserverBoolean = " AND ";
257: }
258:
259: return xserverBoolean;
260: }
261:
262: public static void main(String[] args) {
263: CqlParser parser = new CqlParser();
264: String query;
265:
266: query = "title=\"My Title\"";
267: System.out.println();
268: System.out.println("CQL: " + query);
269: System.out.println("Sirsi: "
270: + parser.doCQL2MetasearchCommand(query));
271:
272: query = "title=\"\\\"My Title\\\"\"";
273: System.out.println();
274: System.out.println("CQL: " + query);
275: System.out.println("Sirsi: "
276: + parser.doCQL2MetasearchCommand(query));
277:
278: query = "title=\"My Title\" and keyword=\"some keywords\"";
279: System.out.println();
280: System.out.println("CQL: " + query);
281: System.out.println("Sirsi: "
282: + parser.doCQL2MetasearchCommand(query));
283:
284: query = "title=\"My Title\" and keyword=\"some keywords\" and year=\"1999\"";
285: System.out.println();
286: System.out.println("CQL: " + query);
287: System.out.println("Sirsi: "
288: + parser.doCQL2MetasearchCommand(query));
289: }
290: }
|